From 33fc1e2e86ce5d60940463f8f71daaa61728d3b7 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com> Date: Sun, 5 Jan 2025 16:35:01 -0500 Subject: [PATCH] [Frontend] Improve `StreamingResponse` Exception Handling (#11752) --- vllm/entrypoints/openai/serving_chat.py | 4 ++-- vllm/entrypoints/openai/serving_completion.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 9ba5eeb7709c9..89a119ac65695 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -301,7 +301,7 @@ async def chat_completion_stream_generator( ] * num_choices else: tool_parsers = [None] * num_choices - except RuntimeError as e: + except Exception as e: logger.exception("Error in tool parser creation.") data = self.create_streaming_error_response(str(e)) yield f"data: {data}\n\n" @@ -591,7 +591,7 @@ async def chat_completion_stream_generator( completion_tokens=num_completion_tokens, total_tokens=num_prompt_tokens + num_completion_tokens) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error logger.exception("Error in chat completion stream generator.") data = self.create_streaming_error_response(str(e)) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 17197dce8da23..2c9c20caf8119 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -371,7 +371,7 @@ async def completion_stream_generator( # report to FastAPI middleware aggregate usage across all choices request_metadata.final_usage_info = final_usage_info - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error data = self.create_streaming_error_response(str(e)) yield f"data: {data}\n\n"