From d2cc90817d91550ed57177dc560ca7decd24c168 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Fri, 10 Jan 2025 18:02:38 +0800 Subject: [PATCH] [platform] support pytorch custom op pluggable (#11328) Signed-off-by: wangxiyuan Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/custom_op.py | 7 +++++++ vllm/platforms/interface.py | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/vllm/model_executor/custom_op.py b/vllm/model_executor/custom_op.py index 401606e8c76f0..96995c56bf504 100644 --- a/vllm/model_executor/custom_op.py +++ b/vllm/model_executor/custom_op.py @@ -57,6 +57,11 @@ def forward_hpu(self, *args, **kwargs): # PyTorch-native implementation. return self.forward_native(*args, **kwargs) + def forward_oot(self, *args, **kwargs): + # By default, we assume that OOT ops are compatible with the + # PyTorch-native implementation. + return self.forward_native(*args, **kwargs) + def dispatch_forward(self): # NOTE(woosuk): Here we assume that vLLM was built for only one # specific backend. Currently, we do not support dynamic dispatching. @@ -81,6 +86,8 @@ def dispatch_forward(self): return self.forward_tpu elif current_platform.is_xpu(): return self.forward_xpu + elif current_platform.is_out_of_tree(): + return self.forward_oot else: return self.forward_cuda diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index f440358f65fbb..01d753408e6d0 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -45,6 +45,7 @@ class PlatformEnum(enum.Enum): CPU = enum.auto() NEURON = enum.auto() OPENVINO = enum.auto() + OOT = enum.auto() UNSPECIFIED = enum.auto() @@ -107,6 +108,9 @@ def is_neuron(self) -> bool: def is_openvino(self) -> bool: return self._enum == PlatformEnum.OPENVINO + def is_out_of_tree(self) -> bool: + return self._enum == PlatformEnum.OOT + def is_cuda_alike(self) -> bool: """Stateless version of :func:`torch.cuda.is_available`.""" return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)