From 8f241e77bc0b0aba2a20f7341ffddd3851472afe Mon Sep 17 00:00:00 2001 From: Maximilien de Bayser Date: Thu, 9 Jan 2025 00:05:43 -0300 Subject: [PATCH] treat do_lower_case in the same way as the sentence-transformers library (#11815) Signed-off-by: Max de Bayser Signed-off-by: Fred Reiss --- tests/entrypoints/openai/test_serving_chat.py | 1 + tests/models/embedding/language/test_embedding.py | 1 + vllm/entrypoints/openai/serving_engine.py | 5 +++++ vllm/inputs/preprocess.py | 6 ++++++ vllm/transformers_utils/tokenizer_group/__init__.py | 5 ----- 5 files changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 97248f1150979..f431d1065e0eb 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -35,6 +35,7 @@ class MockModelConfig: logits_processor_pattern = None diff_sampling_param: Optional[dict] = None allowed_local_media_path: str = "" + encoder_config = None def get_diff_sampling_param(self): return self.diff_sampling_param or {} diff --git a/tests/models/embedding/language/test_embedding.py b/tests/models/embedding/language/test_embedding.py index f458ef5ef556d..7749806548cd9 100644 --- a/tests/models/embedding/language/test_embedding.py +++ b/tests/models/embedding/language/test_embedding.py @@ -15,6 +15,7 @@ # [Encoder-only] pytest.param("BAAI/bge-base-en-v1.5", marks=[pytest.mark.core_model, pytest.mark.cpu_model]), + pytest.param("sentence-transformers/all-MiniLM-L12-v2"), pytest.param("intfloat/multilingual-e5-large"), # [Encoder-decoder] pytest.param("intfloat/e5-mistral-7b-instruct", diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index 319f869240036..88859255f202a 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -160,6 +160,11 @@ def _normalize_prompt_text_to_input( truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]], add_special_tokens: bool, ) -> TextTokensPrompt: + if (self.model_config.encoder_config is not None + and self.model_config.encoder_config.get( + "do_lower_case", False)): + prompt = prompt.lower() + if truncate_prompt_tokens is None: encoded = tokenizer(prompt, add_special_tokens=add_special_tokens) else: diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index 6ddc1eb76f10d..3e92d5821e645 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -190,6 +190,12 @@ def _tokenize_prompt( # on the task and language of their request. Also needed to avoid # appending an EOS token to the prompt which disrupts generation. add_special_tokens = False + + if (self.model_config.encoder_config is not None + and self.model_config.encoder_config.get( + "do_lower_case", False)): + prompt = prompt.lower() + return tokenizer.encode(request_id=request_id, prompt=prompt, lora_request=lora_request, diff --git a/vllm/transformers_utils/tokenizer_group/__init__.py b/vllm/transformers_utils/tokenizer_group/__init__.py index c0b3d2585a962..d400276796996 100644 --- a/vllm/transformers_utils/tokenizer_group/__init__.py +++ b/vllm/transformers_utils/tokenizer_group/__init__.py @@ -26,11 +26,6 @@ def init_tokenizer_from_configs(model_config: ModelConfig, trust_remote_code=model_config.trust_remote_code, revision=model_config.tokenizer_revision) - if (model_config.encoder_config is not None - and "do_lower_case" in model_config.encoder_config): - init_kwargs["do_lower_case"] = model_config.encoder_config[ - "do_lower_case"] - return get_tokenizer_group(parallel_config.tokenizer_pool_config, **init_kwargs)