From 53f69b14a3459aa61ce48a30c3c0a036a9175777 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 11 Jan 2025 21:27:24 +0800 Subject: [PATCH] [Doc] Basic guide for writing unit tests for new models (#11951) Signed-off-by: DarkLight1337 Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/source/contributing/model/basic.md | 2 +- docs/source/contributing/model/index.md | 1 + .../source/contributing/model/registration.md | 3 +- docs/source/contributing/model/tests.md | 63 +++++++++++++++++++ tests/models/registry.py | 5 ++ tests/models/test_initialization.py | 10 +++ 6 files changed, 81 insertions(+), 3 deletions(-) create mode 100644 docs/source/contributing/model/tests.md diff --git a/docs/source/contributing/model/basic.md b/docs/source/contributing/model/basic.md index 002808ac5fbbd..5c2dc486c8bea 100644 --- a/docs/source/contributing/model/basic.md +++ b/docs/source/contributing/model/basic.md @@ -1,6 +1,6 @@ (new-model-basic)= -# Basic Implementation +# Implementing a Basic Model This guide walks you through the steps to implement a basic vLLM model. diff --git a/docs/source/contributing/model/index.md b/docs/source/contributing/model/index.md index 245e13b795ec4..fe018b61b08cf 100644 --- a/docs/source/contributing/model/index.md +++ b/docs/source/contributing/model/index.md @@ -10,6 +10,7 @@ This section provides more information on how to integrate a [PyTorch](https://p basic registration +tests multimodal ``` diff --git a/docs/source/contributing/model/registration.md b/docs/source/contributing/model/registration.md index 6a9262669cd29..d6c9e4181dfee 100644 --- a/docs/source/contributing/model/registration.md +++ b/docs/source/contributing/model/registration.md @@ -1,6 +1,6 @@ (new-model-registration)= -# Model Registration +# Registering a Model to vLLM vLLM relies on a model registry to determine how to run each model. A list of pre-registered architectures can be found [here](#supported-models). @@ -15,7 +15,6 @@ This gives you the ability to modify the codebase and test your model. After you have implemented your model (see [tutorial](#new-model-basic)), put it into the directory. Then, add your model class to `_VLLM_MODELS` in so that it is automatically registered upon importing vLLM. -You should also include an example HuggingFace repository for this model in to run the unit tests. Finally, update our [list of supported models](#supported-models) to promote your model! ```{important} diff --git a/docs/source/contributing/model/tests.md b/docs/source/contributing/model/tests.md new file mode 100644 index 0000000000000..74c933b2f45da --- /dev/null +++ b/docs/source/contributing/model/tests.md @@ -0,0 +1,63 @@ +(new-model-tests)= + +# Writing Unit Tests + +This page explains how to write unit tests to verify the implementation of your model. + +## Required Tests + +These tests are necessary to get your PR merged into vLLM library. +Without them, the CI for your PR will fail. + +### Model loading + +Include an example HuggingFace repository for your model in . +This enables a unit test that loads dummy weights to ensure that the model can be initialized in vLLM. + +```{important} +The list of models in each section should be maintained in alphabetical order. +``` + +```{tip} +If your model requires a development version of HF Transformers, you can set +`min_transformers_version` to skip the test in CI until the model is released. +``` + +## Optional Tests + +These tests are optional to get your PR merged into vLLM library. +Passing these tests provides more confidence that your implementation is correct, and helps avoid future regressions. + +### Model correctness + +These tests compare the model outputs of vLLM against [HF Transformers](https://github.com/huggingface/transformers). You can add new tests under the subdirectories of . + +#### Generative models + +For [generative models](#generative-models), there are two levels of correctness tests, as defined in : + +- Exact correctness (`check_outputs_equal`): The text outputted by vLLM should exactly match the text outputted by HF. +- Logprobs similarity (`check_logprobs_close`): The logprobs outputted by vLLM should be in the top-k logprobs outputted by HF, and vice versa. + +#### Pooling models + +For [pooling models](#pooling-models), we simply check the cosine similarity, as defined in . + +(mm-processing-tests)= + +### Multi-modal processing + +#### Common tests + +Adding your model to verifies that the following input combinations result in the same outputs: + +- Text + multi-modal data +- Tokens + multi-modal data +- Text + cached multi-modal data +- Tokens + cached multi-modal data + +#### Model-specific tests + +You can add a new file under to run tests that only apply to your model. + +For example, if the HF processor for your model accepts user-specified keyword arguments, you can verify that the keyword arguments are being applied correctly, such as in . diff --git a/tests/models/registry.py b/tests/models/registry.py index dcb8bfa0f9510..f5aaa8eb071f9 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -22,6 +22,11 @@ class _HfExamplesInfo: for speculative decoding. """ + min_transformers_version: Optional[str] = None + """ + The minimum version of HF Transformers that is required to run this model. + """ + is_available_online: bool = True """ Set this to ``False`` if the name of this architecture no longer exists on diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index 3b728f2744fca..7a564c1f4a1d0 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -1,7 +1,9 @@ from unittest.mock import patch import pytest +from packaging.version import Version from transformers import PretrainedConfig +from transformers import __version__ as TRANSFORMERS_VERSION from vllm import LLM @@ -13,6 +15,14 @@ def test_can_initialize(model_arch): model_info = HF_EXAMPLE_MODELS.get_hf_info(model_arch) if not model_info.is_available_online: pytest.skip("Model is not available online") + if model_info.min_transformers_version is not None: + current_version = TRANSFORMERS_VERSION + required_version = model_info.min_transformers_version + if Version(current_version) < Version(required_version): + pytest.skip( + f"You have `transformers=={current_version}` installed, but " + f"`transformers>={required_version}` is required to run this " + "model") # Avoid OOM def hf_overrides(hf_config: PretrainedConfig) -> PretrainedConfig: