vllm-project · DarkLight1337 · Jan 9, 2025 · Jan 9, 2025
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -38,7 +38,7 @@ steps:
   - pip install -r requirements-docs.txt
   - SPHINXOPTS=\"-W\" make html
   # Check API reference (if it fails, you may have missing mock imports)
-  - grep \"sig sig-object py\" build/html/api/params.html
+  - grep \"sig sig-object py\" build/html/api/inference_params.html
 
 - label: Async Engine, Inputs, Utils, Worker Test # 24min
   fast_check: true

diff --git a/docs/source/api/params.md → docs/source/api/inference_params.md b/docs/source/api/params.md → docs/source/api/inference_params.md
@@ -1,6 +1,6 @@
-# Optional Parameters
+# Inference Parameters
 
-Optional parameters for vLLM APIs.
+Inference parameters for vLLM APIs.
 
 (sampling-params)=
 
@@ -19,4 +19,3 @@ Optional parameters for vLLM APIs.
 .. autoclass:: vllm.PoolingParams
     :members:
 ```
-
diff --git a/docs/source/api/model/adapters.md b/docs/source/api/model/adapters.md
@@ -0,0 +1,9 @@
+# Model Adapters
+
+## Module Contents
+
+```{eval-rst}
+.. automodule:: vllm.model_executor.models.adapters
+    :members:
+    :member-order: bysource
+```
diff --git a/docs/source/api/model/index.md b/docs/source/api/model/index.md
@@ -0,0 +1,12 @@
+# Model Development
+
+## Submodules
+
+```{toctree}
+:maxdepth: 1
+
+interfaces_base
+interfaces
+adapters
+```
+
diff --git a/docs/source/api/model/interfaces.md b/docs/source/api/model/interfaces.md
@@ -0,0 +1,9 @@
+# Optional Interfaces
+
+## Module Contents
+
+```{eval-rst}
+.. automodule:: vllm.model_executor.models.interfaces
+    :members:
+    :member-order: bysource
+```
diff --git a/docs/source/api/model/interfaces_base.md b/docs/source/api/model/interfaces_base.md
@@ -0,0 +1,9 @@
+# Base Model Interfaces
+
+## Module Contents
+
+```{eval-rst}
+.. automodule:: vllm.model_executor.models.interfaces_base
+    :members:
+    :member-order: bysource
+```
diff --git a/docs/source/index.md b/docs/source/index.md
@@ -139,8 +139,9 @@ community/sponsors
 
 api/offline_inference/index
 api/engine/index
+api/inference_params
 api/multimodal/index
-api/params
+api/model/index
 ```
 
 % Design Documents: Details about vLLM internals

diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py
@@ -38,13 +38,15 @@ def get_multimodal_embeddings(self, **kwargs) -> Optional[T]:
         to be merged with text embeddings.
 
         The output embeddings must be one of the following formats:
+
         - A list or tuple of 2D tensors, where each tensor corresponds to 
-          each input multimodal data item (e.g, image).
+            each input multimodal data item (e.g, image).
         - A single 3D tensor, with the batch dimension grouping the 2D tensors.
 
-        NOTE: The returned multimodal embeddings must be in the same order as 
-        the appearances of their corresponding multimodal data item in the 
-        input prompt.
+        Note:
+            The returned multimodal embeddings must be in the same order as 
+            the appearances of their corresponding multimodal data item in the 
+            input prompt.
         """
         ...
 
@@ -59,6 +61,7 @@ def get_input_embeddings(
     ) -> torch.Tensor:
         ...
 
+    @overload
     def get_input_embeddings(
         self,
         input_ids: torch.Tensor,

diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py
@@ -35,6 +35,7 @@
 
 @runtime_checkable
 class VllmModel(Protocol[C_co, T_co]):
+    """The interface required for all models in vLLM."""
 
     def __init__(
         self,
@@ -97,6 +98,7 @@ def is_vllm_model(
 
 @runtime_checkable
 class VllmModelForTextGeneration(VllmModel[C_co, T], Protocol[C_co, T]):
+    """The interface required for all generative models in vLLM."""
 
     def compute_logits(
         self,
@@ -142,6 +144,7 @@ def is_text_generation_model(
 
 @runtime_checkable
 class VllmModelForPooling(VllmModel[C_co, T], Protocol[C_co, T]):
+    """The interface required for all pooling models in vLLM."""
 
     def pooler(
         self,