diff --git a/python/sglang/srt/managers/schedule_policy.py b/python/sglang/srt/managers/schedule_policy.py index 03634bd3913..0f645d1497d 100644 --- a/python/sglang/srt/managers/schedule_policy.py +++ b/python/sglang/srt/managers/schedule_policy.py @@ -75,7 +75,7 @@ def calc_priority(self, waiting_queue: List[Req]): prefix_computed = False if policy == "lpm" or policy == "dfs-weight": # rid to deprioritize in the current run for in-batch prefix caching. - temporary_deprioritized = {} + temporary_deprioritized = set() self.waiting_queue_radix_tree.reset() for r in waiting_queue: @@ -100,8 +100,9 @@ def calc_priority(self, waiting_queue: List[Req]): len(in_batch_matching_prefixes) >= IN_BATCH_PREFIX_CACHING_DEPRIORITIZE_THRESHOLD ): - temporary_deprioritized[r.rid] = r + temporary_deprioritized.add(r.rid) else: + # Insert with a dummy key self.waiting_queue_radix_tree.insert( prefix_ids, torch.empty(len(prefix_ids), dtype=torch.bool) )