From 61f92667640030c95e04b4b5720d70b7cc118278 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Mon, 16 Dec 2024 18:38:52 -0800 Subject: [PATCH] Fix --- python/sglang/srt/managers/schedule_policy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/schedule_policy.py b/python/sglang/srt/managers/schedule_policy.py index 03634bd3913..0f645d1497d 100644 --- a/python/sglang/srt/managers/schedule_policy.py +++ b/python/sglang/srt/managers/schedule_policy.py @@ -75,7 +75,7 @@ def calc_priority(self, waiting_queue: List[Req]): prefix_computed = False if policy == "lpm" or policy == "dfs-weight": # rid to deprioritize in the current run for in-batch prefix caching. - temporary_deprioritized = {} + temporary_deprioritized = set() self.waiting_queue_radix_tree.reset() for r in waiting_queue: @@ -100,8 +100,9 @@ def calc_priority(self, waiting_queue: List[Req]): len(in_batch_matching_prefixes) >= IN_BATCH_PREFIX_CACHING_DEPRIORITIZE_THRESHOLD ): - temporary_deprioritized[r.rid] = r + temporary_deprioritized.add(r.rid) else: + # Insert with a dummy key self.waiting_queue_radix_tree.insert( prefix_ids, torch.empty(len(prefix_ids), dtype=torch.bool) )