From b532a5fd16d0c2b0d7945bffdc3beab1d7018975 Mon Sep 17 00:00:00 2001 From: xiaobochen <35516720+xiaobochen123@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:54:02 +0800 Subject: [PATCH] fix moe-ep accuracy issue for fp8 (#2489) --- python/sglang/srt/layers/ep_moe/layer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/sglang/srt/layers/ep_moe/layer.py b/python/sglang/srt/layers/ep_moe/layer.py index eca119845a7..3c477fdc2ef 100644 --- a/python/sglang/srt/layers/ep_moe/layer.py +++ b/python/sglang/srt/layers/ep_moe/layer.py @@ -644,6 +644,10 @@ def process_weights_after_loading(self, layer: Module) -> None: "QuantConfig has static quantization, but found " "activation scales are None." ) + layer.w13_weight_scale = torch.nn.Parameter( + torch.max(layer.w13_weight_scale, dim=1).values, + requires_grad=False, + ) return def apply(