Skip to content

Commit

Permalink
Move FP8 to SGLang (#2370)
Browse files Browse the repository at this point in the history
Co-authored-by: HaiShaw <[email protected]>
  • Loading branch information
zhyncs and HaiShaw authored Dec 6, 2024
1 parent 3d32e4a commit 84d96b3
Show file tree
Hide file tree
Showing 2 changed files with 561 additions and 2 deletions.
4 changes: 2 additions & 2 deletions python/sglang/srt/layers/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from vllm.model_executor.layers.quantization.deepspeedfp import DeepSpeedFPConfig
from vllm.model_executor.layers.quantization.experts_int8 import ExpertsInt8Config
from vllm.model_executor.layers.quantization.fbgemm_fp8 import FBGEMMFp8Config
from vllm.model_executor.layers.quantization.fp8 import Fp8Config, Fp8MoEMethod
from vllm.model_executor.layers.quantization.gguf import GGUFConfig
from vllm.model_executor.layers.quantization.gptq import GPTQConfig
from vllm.model_executor.layers.quantization.gptq_marlin import GPTQMarlinConfig
Expand All @@ -23,6 +22,7 @@
from vllm.model_executor.layers.quantization.tpu_int8 import Int8TpuConfig

from sglang.srt.layers.quantization.base_config import QuantizationConfig
from sglang.srt.layers.quantization.fp8 import Fp8Config, Fp8MoEMethod

QUANTIZATION_METHODS: Dict[str, Type[QuantizationConfig]] = {
"aqlm": AQLMConfig,
Expand Down Expand Up @@ -100,13 +100,13 @@ def fp8_moe_apply(
def fp8_get_quant_method(self, layer, prefix):
"""Enhanced get_quant_method for FP8 config."""
from vllm.model_executor.layers.linear import LinearBase
from vllm.model_executor.layers.quantization.fp8 import Fp8LinearMethod
from vllm.model_executor.layers.quantization.utils.quant_utils import (
is_layer_skipped,
)

from sglang.srt.layers.fused_moe_triton.layer import FusedMoE
from sglang.srt.layers.linear import UnquantizedLinearMethod
from sglang.srt.layers.quantization.fp8 import Fp8LinearMethod

if isinstance(layer, LinearBase):
if is_layer_skipped(prefix, self.ignored_layers):
Expand Down
Loading

0 comments on commit 84d96b3

Please sign in to comment.