Skip to content

Commit

Permalink
[Bugfix] Add checks for LoRA and CPU offload (#11810)
Browse files Browse the repository at this point in the history
Signed-off-by: Jee Jee Li <[email protected]>
  • Loading branch information
jeejeelee authored Jan 8, 2025
1 parent f4923cb commit f645eb6
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2051,6 +2051,11 @@ def __post_init__(self):
f"max_cpu_loras ({self.max_cpu_loras}) must be >= "
f"max_loras ({self.max_loras})")

def verify_with_cache_config(self, cache_config: CacheConfig):
# TODO LoRA supports CPU offload.
if cache_config.cpu_offload_gb > 0:
raise ValueError("CPU offload is not supported with LoRA yet.")

def verify_with_model_config(self, model_config: ModelConfig):
if self.lora_dtype in (None, "auto"):
self.lora_dtype = model_config.dtype
Expand Down Expand Up @@ -3138,6 +3143,7 @@ def __post_init__(self):
self.cache_config.verify_with_parallel_config(self.parallel_config)

if self.lora_config:
self.lora_config.verify_with_cache_config(self.cache_config)
self.lora_config.verify_with_model_config(self.model_config)
self.lora_config.verify_with_scheduler_config(
self.scheduler_config)
Expand Down

0 comments on commit f645eb6

Please sign in to comment.