From d9547da98d070c58932ca5aa51d1e9f646eb6abd Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Sun, 5 Jan 2025 17:56:30 +0000 Subject: [PATCH] updated --- vllm/entrypoints/openai/serving_chat.py | 14 +++++++------- vllm/entrypoints/openai/serving_completion.py | 8 ++++---- vllm/entrypoints/openai/serving_embedding.py | 6 +++--- vllm/entrypoints/openai/serving_pooling.py | 6 +++--- vllm/entrypoints/openai/serving_score.py | 6 +++--- vllm/entrypoints/openai/serving_tokenization.py | 2 +- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 9ba5eeb7709c9..687c8c66dbcbe 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -171,7 +171,7 @@ async def create_chat_completion( truncate_prompt_tokens=request.truncate_prompt_tokens, add_special_tokens=request.add_special_tokens, ) - except ValueError as e: + except Exception as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) @@ -228,7 +228,7 @@ async def create_chat_completion( ) generators.append(generator) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -245,7 +245,7 @@ async def create_chat_completion( return await self.chat_completion_full_generator( request, result_generator, request_id, model_name, conversation, tokenizer, request_metadata) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -301,7 +301,7 @@ async def chat_completion_stream_generator( ] * num_choices else: tool_parsers = [None] * num_choices - except RuntimeError as e: + except Exception as e: logger.exception("Error in tool parser creation.") data = self.create_streaming_error_response(str(e)) yield f"data: {data}\n\n" @@ -591,7 +591,7 @@ async def chat_completion_stream_generator( completion_tokens=num_completion_tokens, total_tokens=num_prompt_tokens + num_completion_tokens) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error logger.exception("Error in chat completion stream generator.") data = self.create_streaming_error_response(str(e)) @@ -618,7 +618,7 @@ async def chat_completion_full_generator( final_res = res except asyncio.CancelledError: return self.create_error_response("Client disconnected") - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -682,7 +682,7 @@ async def chat_completion_full_generator( try: tool_parser = self.tool_parser(tokenizer) - except RuntimeError as e: + except Exception as e: logger.exception("Error in tool parser creation.") return self.create_error_response(str(e)) diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 17197dce8da23..53ae1b134590a 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -106,7 +106,7 @@ async def create_completion( truncate_prompt_tokens=request.truncate_prompt_tokens, add_special_tokens=request.add_special_tokens, ) - except ValueError as e: + except Exception as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) @@ -158,7 +158,7 @@ async def create_completion( ) generators.append(generator) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -215,7 +215,7 @@ async def create_completion( ) except asyncio.CancelledError: return self.create_error_response("Client disconnected") - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -371,7 +371,7 @@ async def completion_stream_generator( # report to FastAPI middleware aggregate usage across all choices request_metadata.final_usage_info = final_usage_info - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error data = self.create_streaming_error_response(str(e)) yield f"data: {data}\n\n" diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index e7116a3d95d10..fe8ba5eb95b9d 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -136,7 +136,7 @@ async def create_embedding( truncate_prompt_tokens=truncate_prompt_tokens, add_special_tokens=request.add_special_tokens, ) - except ValueError as e: + except Exception as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) @@ -167,7 +167,7 @@ async def create_embedding( ) generators.append(generator) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -196,7 +196,7 @@ async def create_embedding( ) except asyncio.CancelledError: return self.create_error_response("Client disconnected") - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index 5830322071e58..3441071344f4d 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -132,7 +132,7 @@ async def create_pooling( truncate_prompt_tokens=truncate_prompt_tokens, add_special_tokens=request.add_special_tokens, ) - except ValueError as e: + except Exception as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) @@ -163,7 +163,7 @@ async def create_pooling( ) generators.append(generator) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -192,7 +192,7 @@ async def create_pooling( ) except asyncio.CancelledError: return self.create_error_response("Client disconnected") - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py index 5d3e7139d7a17..9b5aa13bda841 100644 --- a/vllm/entrypoints/openai/serving_score.py +++ b/vllm/entrypoints/openai/serving_score.py @@ -101,7 +101,7 @@ async def create_score( if not self.model_config.is_cross_encoder: raise ValueError("Model is not cross encoder.") - except ValueError as e: + except Exception as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e)) @@ -155,7 +155,7 @@ async def create_score( ) generators.append(generator) - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) @@ -184,7 +184,7 @@ async def create_score( ) except asyncio.CancelledError: return self.create_error_response("Client disconnected") - except ValueError as e: + except Exception as e: # TODO: Use a vllm-specific Validation Error return self.create_error_response(str(e)) diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index b67ecfb01316f..a3dc42ff8f023 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -86,7 +86,7 @@ async def create_tokenize( request.prompt, add_special_tokens=request.add_special_tokens, ) - except ValueError as e: + except Exception as e: logger.exception("Error in preprocessing prompt inputs") return self.create_error_response(str(e))