switch to assertion

Signed-off-by: rajveerb <[email protected]>
vllm-project · Dec 23, 2024 · 08e8e26 · 08e8e26
1 parent d7e39e8
commit 08e8e26
Showing 1 changed file with 0 additions and 16 deletions.
diff --git a/vllm/sequence.py b/vllm/sequence.py
@@ -765,18 +765,10 @@ def init_multi_step_from_lookahead_slots(self, num_lookahead_slots: int,
 
     def set_last_token_time(self, now: float) -> None:
         """Sets the last token time for Request level timings."""
-<<<<<<< HEAD
         # If still in prefill phase, assertion fails.
         assert not self.is_prefill(), (
             "seq_group.set_last_token_time() should not be called "
             "if the seq_group is in prefill phase.")
-=======
-        # If still in prefill phase, raise Error.
-        if self.is_prefill():
-            raise ValueError(
-                "seq_group.set_last_token_time() should not be called "
-                "if the seq_group is in prefill phase.")
->>>>>>> set function for setting last token latency and time
         self.last_token_latency = now - self.metrics.last_token_time
         self.metrics.last_token_time = now
 
@@ -787,14 +779,6 @@ def get_last_token_latency(self) -> float:
             "if the seq_group is in prefill phase.")
         return self.last_token_latency
 
-    def get_last_token_latency(self) -> float:
-        """Returns the latency of the last token."""
-        if self.is_prefill():
-            raise ValueError(
-                "seq_group.get_last_token_latency() should not be called "
-                "if the seq_group is in prefill phase.")
-        return self.last_token_latency
-
     def maybe_set_first_token_time(self, time: float) -> None:
         """Sets the first token time for Request level timings."""
         # Note: in a case where a sequence_group is swapped and