From 361ea8d9120879b6eed517416fc70db829c8ed2e Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 17 Dec 2024 04:14:14 -0800 Subject: [PATCH] Fix openai protocols and pass top_k, min_p (#2499) --- python/sglang/srt/openai_api/adapter.py | 4 ++++ python/sglang/srt/openai_api/protocol.py | 14 +++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/openai_api/adapter.py b/python/sglang/srt/openai_api/adapter.py index dfb7d4f18bf..6aff0b45822 100644 --- a/python/sglang/srt/openai_api/adapter.py +++ b/python/sglang/srt/openai_api/adapter.py @@ -510,6 +510,8 @@ def v1_generate_request( "stop": request.stop, "stop_token_ids": request.stop_token_ids, "top_p": request.top_p, + "top_k": request.top_k, + "min_p": request.min_p, "presence_penalty": request.presence_penalty, "frequency_penalty": request.frequency_penalty, "repetition_penalty": request.repetition_penalty, @@ -926,6 +928,8 @@ def v1_chat_generate_request( "stop": stop, "stop_token_ids": request.stop_token_ids, "top_p": request.top_p, + "top_k": request.top_k, + "min_p": request.min_p, "presence_penalty": request.presence_penalty, "frequency_penalty": request.frequency_penalty, "repetition_penalty": request.repetition_penalty, diff --git a/python/sglang/srt/openai_api/protocol.py b/python/sglang/srt/openai_api/protocol.py index 5d5d430cd17..9fe3f25d5c4 100644 --- a/python/sglang/srt/openai_api/protocol.py +++ b/python/sglang/srt/openai_api/protocol.py @@ -166,17 +166,19 @@ class CompletionRequest(BaseModel): temperature: float = 1.0 top_p: float = 1.0 user: Optional[str] = None - lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None # Extra parameters for SRT backend only and will be ignored by OpenAI models. - json_schema: Optional[str] = None - regex: Optional[str] = None + top_k: int = -1 + min_p: float = 0.0 min_tokens: int = 0 + regex: Optional[str] = None + json_schema: Optional[str] = None repetition_penalty: float = 1.0 stop_token_ids: Optional[List[int]] = None no_stop_trim: bool = False ignore_eos: bool = False skip_special_tokens: bool = True + lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None class CompletionResponseChoice(BaseModel): @@ -276,14 +278,16 @@ class ChatCompletionRequest(BaseModel): user: Optional[str] = None # Extra parameters for SRT backend only and will be ignored by OpenAI models. - lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None - regex: Optional[str] = None + top_k: int = -1 + min_p: float = 0.0 min_tokens: int = 0 + regex: Optional[str] = None repetition_penalty: float = 1.0 stop_token_ids: Optional[List[int]] = None no_stop_trim: bool = False ignore_eos: bool = False skip_special_tokens: bool = True + lora_path: Optional[Union[List[Optional[str]], Optional[str]]] = None class ChatMessage(BaseModel):