From 8f01e650a38c253024a87a42cb7b88d3d8bc6ec3 Mon Sep 17 00:00:00 2001 From: yisheng <yi.sheng@intel.com> Date: Tue, 31 Dec 2024 14:19:16 +0800 Subject: [PATCH] make pp group initilized to avoid pooint-to-point communication as the first call --- vllm/worker/xpu_worker.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/worker/xpu_worker.py b/vllm/worker/xpu_worker.py index 1295666055b04..e9cb623c8eb45 100644 --- a/vllm/worker/xpu_worker.py +++ b/vllm/worker/xpu_worker.py @@ -11,6 +11,7 @@ from vllm.config import VllmConfig from vllm.distributed import (ensure_model_parallel_initialized, init_distributed_environment) +from vllm.distributed.parallel_state import get_pp_group from vllm.logger import init_logger from vllm.model_executor import set_random_seed from vllm.platforms import current_platform @@ -176,3 +177,8 @@ def init_worker_distributed_environment(self) -> None: parallel_config.pipeline_parallel_size) # global all_reduce needed for overall oneccl warm up torch.distributed.all_reduce(torch.zeros(1).xpu()) + + if parallel_config.pipeline_parallel_size > 1: + # Add pp group init to avoid + # p2p communication as the first call + get_pp_group().all_reduce(torch.zeros(1).xpu())