Skip to content

Commit

Permalink
load punica wrapper obj dynamically
Browse files Browse the repository at this point in the history
Signed-off-by: Shanshan Shen <[email protected]>
  • Loading branch information
shen-shanshan committed Dec 30, 2024
1 parent abf1f7e commit e930130
Show file tree
Hide file tree
Showing 5 changed files with 14 additions and 21 deletions.
8 changes: 7 additions & 1 deletion vllm/lora/punica_wrapper/punica_selector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from vllm.platforms import current_platform
from vllm.utils import print_info_once, resolve_obj_by_qualname

from .punica_base import PunicaWrapperBase


def get_punica_wrapper(*args, **kwargs) -> PunicaWrapperBase:
return current_platform.get_punica_wrapper(*args, **kwargs)
punica_wrapper_qualname = current_platform.get_punica_wrapper()
punica_wrapper_cls = resolve_obj_by_qualname(punica_wrapper_qualname)
punica_wrapper = punica_wrapper_cls(*args, **kwargs)
assert punica_wrapper is not None
print_info_once("Using " + punica_wrapper_qualname.rsplit(".", 1)[1] + ".")
return punica_wrapper
8 changes: 2 additions & 6 deletions vllm/platforms/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
import vllm._C # noqa
import vllm.envs as envs
from vllm.logger import init_logger
from vllm.lora.punica_wrapper.punica_base import PunicaWrapperBase
from vllm.lora.punica_wrapper.punica_gpu import PunicaWrapperGPU
from vllm.utils import print_info_once

from .interface import DeviceCapability, Platform, PlatformEnum

Expand Down Expand Up @@ -145,9 +142,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
cache_config.block_size = 16

@classmethod
def get_punica_wrapper(cls, *args, **kwargs) -> PunicaWrapperBase:
print_info_once("Using PunicaWrapperGPU.")
return PunicaWrapperGPU(*args, **kwargs)
def get_punica_wrapper(cls) -> str:
return "vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU"


# NVML utils
Expand Down
8 changes: 2 additions & 6 deletions vllm/platforms/hpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@
import torch

from vllm.logger import init_logger
from vllm.lora.punica_wrapper.punica_base import PunicaWrapperBase
from vllm.lora.punica_wrapper.punica_hpu import PunicaWrapperHPU
from vllm.utils import print_info_once

from .interface import Platform, PlatformEnum, _Backend

Expand Down Expand Up @@ -63,6 +60,5 @@ def is_pin_memory_available(cls):
return False

@classmethod
def get_punica_wrapper(cls, *args, **kwargs) -> PunicaWrapperBase:
print_info_once("Using PunicaWrapperHPU.")
return PunicaWrapperHPU(*args, **kwargs)
def get_punica_wrapper(cls) -> str:
return "vllm.lora.punica_wrapper.punica_hpu.PunicaWrapperHPU"
3 changes: 1 addition & 2 deletions vllm/platforms/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import torch

from vllm.logger import init_logger
from vllm.lora.punica_wrapper.punica_base import PunicaWrapperBase

if TYPE_CHECKING:
from vllm.config import VllmConfig
Expand Down Expand Up @@ -240,7 +239,7 @@ def is_pin_memory_available(cls) -> bool:
return True

@classmethod
def get_punica_wrapper(cls, *args, **kwargs) -> PunicaWrapperBase:
def get_punica_wrapper(cls) -> str:
"""
Return the punica wrapper for current platform.
"""
Expand Down
8 changes: 2 additions & 6 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

import vllm.envs as envs
from vllm.logger import init_logger
from vllm.lora.punica_wrapper.punica_base import PunicaWrapperBase
from vllm.lora.punica_wrapper.punica_gpu import PunicaWrapperGPU
from vllm.utils import print_info_once

from .interface import DeviceCapability, Platform, PlatformEnum, _Backend

Expand Down Expand Up @@ -115,6 +112,5 @@ def verify_quantization(cls, quant: str) -> None:
envs.VLLM_USE_TRITON_AWQ = True

@classmethod
def get_punica_wrapper(cls, *args, **kwargs) -> PunicaWrapperBase:
print_info_once("Using PunicaWrapperGPU.")
return PunicaWrapperGPU(*args, **kwargs)
def get_punica_wrapper(cls) -> str:
return "vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU"

0 comments on commit e930130

Please sign in to comment.