fix moe-ep accuracy issue for fp8 (#2489)

sgl-project · Dec 16, 2024 · b532a5f · b532a5f
1 parent a0592c0
commit b532a5f
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/python/sglang/srt/layers/ep_moe/layer.py b/python/sglang/srt/layers/ep_moe/layer.py
@@ -644,6 +644,10 @@ def process_weights_after_loading(self, layer: Module) -> None:
                         "QuantConfig has static quantization, but found "
                         "activation scales are None."
                     )
+                layer.w13_weight_scale = torch.nn.Parameter(
+                    torch.max(layer.w13_weight_scale, dim=1).values,
+                    requires_grad=False,
+                )
             return
 
     def apply(