add tests

Signed-off-by: Avshalom <[email protected]>
vllm-project · Jan 8, 2025 · 83b6002 · 83b6002
1 parent 3a2f669
commit 83b6002
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 0 deletions.
diff --git a/tests/kernels/test_moe.py b/tests/kernels/test_moe.py
@@ -14,6 +14,8 @@
 from vllm.model_executor.layers.fused_moe import fused_moe
 from vllm.model_executor.layers.fused_moe.fused_moe import (
     fused_topk, moe_align_block_size)
+from vllm.model_executor.layers.fused_moe.moe_torch_iterative import (
+    fused_moe as iterative_moe)
 from vllm.model_executor.layers.quantization.utils.marlin_utils_test import (
     marlin_quantize)
 from vllm.model_executor.models.mixtral import MixtralMoE
@@ -46,6 +48,11 @@ def test_fused_moe(
     triton_output = fused_moe(a, w1, w2, score, topk, renormalize=False)
     torch_output = torch_moe(a, w1, w2, score, topk)
     torch.testing.assert_close(triton_output, torch_output, atol=2e-2, rtol=0)
+    iterative_output = iterative_moe(a, w1, w2, score, topk)
+    torch.testing.assert_close(iterative_output,
+                               torch_output,
+                               atol=2e-2,
+                               rtol=0)
 
 
 @pytest.mark.parametrize("dtype",

diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -19,6 +19,7 @@
 else:
     fused_experts = None  # type: ignore
 if current_platform.is_tpu():
+    # the iterative moe implementation is used until the moe_pallas is fixed
     from .moe_torch_iterative import fused_moe as fused_moe_pallas
 else:
     fused_moe_pallas = None  # type: ignore