vllm-project · MengqingCao · Dec 17, 2024
diff --git a/vllm/executor/ray_gpu_executor.py b/vllm/executor/ray_gpu_executor.py
@@ -194,7 +194,7 @@ def sort_by_driver_then_worker_ip(worker):
                 # driver_dummy_worker can be None when using ray spmd worker.
                 continue
             worker_node_and_gpu_ids.append(
-                ray.get(worker.get_node_and_gpu_ids.remote()) \
+                ray.get(worker.get_node_and_accelerator_ids.remote()) \
             ) # type: ignore
 
         node_workers = defaultdict(list)  # node id -> list of worker ranks

diff --git a/vllm/executor/ray_hpu_executor.py b/vllm/executor/ray_hpu_executor.py
@@ -169,7 +169,7 @@ def sort_by_driver_then_worker_ip(worker):
                 # driver_dummy_worker can be None when using ray spmd worker.
                 continue
             worker_node_and_gpu_ids.append(
-                ray.get(worker.get_node_and_gpu_ids.remote()) \
+                ray.get(worker.get_node_and_accelerator_ids.remote()) \
             ) # type: ignore
 
         node_workers = defaultdict(list)  # node id -> list of worker ranks

diff --git a/vllm/executor/ray_tpu_executor.py b/vllm/executor/ray_tpu_executor.py
@@ -143,7 +143,7 @@ def sort_by_driver_then_worker_ip(worker):
                 # driver_dummy_worker can be None when using ray spmd worker.
                 continue
             worker_node_and_gpu_ids.append(
-                ray.get(worker.get_node_and_gpu_ids.remote()) \
+                ray.get(worker.get_node_and_accelerator_ids.remote()) \
             ) # type: ignore
 
         node_workers = defaultdict(list)

diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py
@@ -46,10 +46,14 @@ def __init__(self, *args, **kwargs) -> None:
         def get_node_ip(self) -> str:
             return get_ip()
 
-        def get_node_and_gpu_ids(self) -> Tuple[str, List[int]]:
+        def get_node_and_accelerator_ids(self) -> Tuple[str, List[int]]:
+
             node_id = ray.get_runtime_context().get_node_id()
-            gpu_ids = ray.get_gpu_ids()
-            return node_id, gpu_ids
+            device_key = current_platform.ray_device_key \
+                if current_platform.ray_device_key else "CUDA"
+            accelerator_ids = ray.get_runtime_context().get_accelerator_ids(
+            )[device_key]
+            return node_id, accelerator_ids
 
         def execute_model_spmd(
             self, req_or_tuple: Union[bytes,
@@ -249,11 +253,12 @@ def initialize_ray_cluster(
         # Placement group is already set.
         return
 
-    device_str = "GPU"
-    if current_platform.is_tpu():
-        device_str = "TPU"
-    elif current_platform.is_hpu():
-        device_str = 'HPU'
+    device_str = current_platform.ray_device_key
+    if not device_str:
+        device_str = "GPU"
+        logger.warning(
+            "There are no device key in ray of required %s device, "
+            "setting it to \"GPU\" for default.", device_str)
     # Create placement group for worker processes
     current_placement_group = ray.util.get_current_placement_group()
     if current_placement_group:

diff --git a/vllm/executor/ray_xpu_executor.py b/vllm/executor/ray_xpu_executor.py
@@ -22,7 +22,9 @@ def _get_env_vars_to_be_updated(self):
                 # driver_dummy_worker can be None when using ray spmd worker.
                 continue
             worker_node_and_gpu_ids.append(
-                ray.get(worker.get_node_and_gpu_ids.remote()))  # type: ignore
+                ray.get(
+                    worker.get_node_and_accelerator_ids.remote(  # type: ignore
+                    )))
 
         # Set environment variables for the driver and workers.
         all_args_to_update_environment_variables = [({

diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
@@ -77,6 +77,7 @@ class CudaPlatformBase(Platform):
     device_name: str = "cuda"
     device_type: str = "cuda"
     dispatch_key: str = "CUDA"
+    ray_device_key: str = "GPU"
 
     @classmethod
     def get_device_capability(cls,

diff --git a/vllm/platforms/hpu.py b/vllm/platforms/hpu.py
@@ -19,6 +19,7 @@ class HpuPlatform(Platform):
     device_name: str = "hpu"
     device_type: str = "hpu"
     dispatch_key: str = "HPU"
+    ray_device_key: str = "HPU"
 
     @classmethod
     def get_default_attn_backend(cls, selected_backend: _Backend) -> _Backend:

diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
@@ -81,6 +81,9 @@ class Platform:
     # check https://github.com/pytorch/pytorch/blob/313dac6c1ca0fa0cde32477509cce32089f8532a/torchgen/model.py#L134 # noqa
     # use "CPU" as a fallback for platforms not registered in PyTorch
     dispatch_key: str = "CPU"
+    # available ray device keys:
+    # https://github.com/ray-project/ray/blob/master/python/ray/_private/ray_constants.py#L441 # noqa
+    ray_device_key: str
     supported_quantization: list[str] = []
 
     def is_cuda(self) -> bool:

diff --git a/vllm/platforms/neuron.py b/vllm/platforms/neuron.py
@@ -16,6 +16,7 @@ class NeuronPlatform(Platform):
     _enum = PlatformEnum.NEURON
     device_name: str = "neuron"
     device_type: str = "neuron"
+    ray_device_key: str = "neuron_cores"
     supported_quantization: list[str] = ["neuron_quant"]
 
     @classmethod

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -39,6 +39,7 @@ class RocmPlatform(Platform):
     device_name: str = "rocm"
     device_type: str = "cuda"
     dispatch_key: str = "CUDA"
+    ray_device_key: str = "CUDA"
     supported_quantization: list[str] = [
         "awq", "gptq", "fp8", "compressed_tensors", "compressed-tensors",
         "fbgemm_fp8", "gguf"

diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
@@ -19,6 +19,7 @@ class TpuPlatform(Platform):
     device_name: str = "tpu"
     device_type: str = "tpu"
     dispatch_key: str = "XLA"
+    ray_device_key: str = "TPU"
     supported_quantization: list[str] = ["tpu_int8"]
 
     @classmethod