diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index e56d5cddce424..1893f9f753550 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -1504,6 +1504,7 @@ def _preempt_by_recompute( seq.status = SequenceStatus.WAITING self.free_seq(seq) seq.reset_state_for_recompute() + self._free_seq_group_cross_attn_blocks(seq_group) def _preempt_by_swap( self,