From 6128f7cff5e61517f69fafa6aec148d8d40657cf Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Sun, 8 Dec 2024 20:07:30 +0800 Subject: [PATCH] fix: specify dtype with begin_forward aka plan (#2404) --- python/sglang/srt/layers/attention/flashinfer_backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/sglang/srt/layers/attention/flashinfer_backend.py b/python/sglang/srt/layers/attention/flashinfer_backend.py index f89bc2ccaa2..536358fbc94 100644 --- a/python/sglang/srt/layers/attention/flashinfer_backend.py +++ b/python/sglang/srt/layers/attention/flashinfer_backend.py @@ -678,6 +678,7 @@ def call_begin_forward( self.num_qo_heads, self.num_kv_heads, self.head_dim, + q_data_type=self.q_data_type, ) # cached part @@ -691,6 +692,7 @@ def call_begin_forward( self.num_kv_heads, self.head_dim, 1, + q_data_type=self.q_data_type, )