diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/scheduler.py index 614558155e45e..08e7c0fd4dc9b 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/scheduler.py @@ -208,6 +208,7 @@ def schedule(self) -> "SchedulerOutput": num_new_tokens = self.block_size computed_blocks.pop() num_new_tokens = min(num_new_tokens, token_budget) + assert num_new_tokens > 0 # Schedule encoder inputs. (encoder_inputs_to_schedule, num_new_tokens,