Skip to content

Commit

Permalink
[Model] Add support for deepseek-vl2-tiny model (#12068)
Browse files Browse the repository at this point in the history
Signed-off-by: Isotr0py <[email protected]>
  • Loading branch information
Isotr0py authored Jan 16, 2025
1 parent 5fd24ec commit 62b06ba
Show file tree
Hide file tree
Showing 6 changed files with 22 additions and 21 deletions.
5 changes: 2 additions & 3 deletions docs/source/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ See [this page](#generative-models) for more information on how to use generativ
* - `DeepseekVLV2ForCausalLM`
- DeepSeek-VL2
- T + I<sup>+</sup>
- `deepseek-ai/deepseek-vl2-tiny`(WIP), `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note)
- `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc. (see note)
-
- ✅︎
- ✅︎
Expand Down Expand Up @@ -768,9 +768,8 @@ See [this page](#generative-models) for more information on how to use generativ
<sup>+</sup> Multiple items can be inputted per text prompt for this modality.

````{note}
The `deepseek-ai/deepseek-vl2-tiny` is not supported yet.
To use `DeepSeek-VL2` series models, you need to install a fork version `deepseek_vl2` package:
```shell
pip install git+https://github.com/Isotr0py/DeepSeek-VL2.git
```
Expand Down
2 changes: 1 addition & 1 deletion examples/offline_inference/vision_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def run_chameleon(question: str, modality: str):
def run_deepseek_vl2(question: str, modality: str):
assert modality == "image"

model_name = "deepseek-ai/deepseek-vl2-small"
model_name = "deepseek-ai/deepseek-vl2-tiny"

llm = LLM(model=model_name,
max_model_len=4096,
Expand Down
2 changes: 1 addition & 1 deletion examples/offline_inference/vision_language_multi_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def load_aria(question, image_urls: List[str]) -> ModelRequestData:


def load_deepseek_vl2(question: str, image_urls: List[str]):
model_name = "deepseek-ai/deepseek-vl2-small"
model_name = "deepseek-ai/deepseek-vl2-tiny"

llm = LLM(model=model_name,
max_model_len=4096,
Expand Down
20 changes: 9 additions & 11 deletions tests/models/decoder_only/vision_language/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import pytest
from transformers import AutoModelForVision2Seq
from transformers import __version__ as TRANSFORMERS_VERSION
from transformers.utils import is_flash_attn_2_available

from vllm.platforms import current_platform
Expand Down Expand Up @@ -189,30 +190,27 @@
dtype="bfloat16",
),
"deepseek_vl_v2": VLMTestInfo(
models=["deepseek-ai/deepseek-vl2-small"],
models=["deepseek-ai/deepseek-vl2-tiny"],
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
dtype="bfloat16",
prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ", # noqa: E501
max_model_len=4096,
max_num_seqs=2,
single_image_prompts=IMAGE_ASSETS.prompts({
"stop_sign": "<image>\nWhat's the color of the stop sign and car?",
"cherry_blossom": "<image>\nWhat's the color of the tower?",
"stop_sign": "<image>\nWhat's the content in the center of the image?", # noqa: E501
"cherry_blossom": "<image>\nPlease infer the season with reason in details.", # noqa: E501
}),
multi_image_prompt="image_1:<image>\nimage_2:<image>\nDescribe the two images shortly.", # noqa: E501
multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?", # noqa: E501
vllm_runner_kwargs={"hf_overrides": {"architectures": ["DeepseekVLV2ForCausalLM"]}}, # noqa: E501
image_size_factors=[(0.10, 0.15)],
patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
postprocess_inputs=model_utils.cast_dtype_post_processor("images"),
hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
stop_str=["<|end▁of▁sentence|>", "<|begin▁of▁sentence|>"], # noqa: E501
num_logprobs=5,
image_size_factors=[(), (1.0, ), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
marks=[
pytest.mark.skipif(
not is_flash_attn_2_available(),
reason="Model needs flash-attn for numeric convergence.",
),
large_gpu_mark(min_gb=48),
TRANSFORMERS_VERSION >= "4.48.0",
reason="HF model is not compatible with transformers>=4.48.0",
)
],
),
"fuyu": VLMTestInfo(
Expand Down
3 changes: 1 addition & 2 deletions tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ class _HfExamplesInfo:
trust_remote_code=True),
"ChatGLMForConditionalGeneration": _HfExamplesInfo("chatglm2-6b",
is_available_online=False),
# TODO(Isotr0py): Use deepseek-vl2-tiny for test after it's supported
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-small"), # noqa: E501
"DeepseekVLV2ForCausalLM": _HfExamplesInfo("deepseek-ai/deepseek-vl2-tiny"), # noqa: E501
"FuyuForCausalLM": _HfExamplesInfo("adept/fuyu-8b"),
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m"),
"InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B",
Expand Down
11 changes: 8 additions & 3 deletions vllm/model_executor/models/deepseek_vl2.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
f"Only 2D tile_tag is supported currently, got: {self.tile_tag}"
)

if self.text_config.topk_method == "noaux_tc":
architectures = ["DeepseekV3ForCausalLM"]
elif not self.text_config.use_mla:
architectures = ["DeepseekForCausalLM"]
else:
architectures = ["DeepseekV2ForCausalLM"]

self.language_model = init_vllm_registered_model(
vllm_config=vllm_config,
hf_config=self.text_config,
prefix=maybe_prefix(prefix, "language"),
architectures=["DeepseekV3ForCausalLM"]
if self.text_config.topk_method == "noaux_tc" else
["DeepseekV2ForCausalLM"],
architectures=architectures,
)

self.make_empty_intermediate_tensors = (
Expand Down

0 comments on commit 62b06ba

Please sign in to comment.