Skip to content

Commit

Permalink
Fix Max Token ID for Qwen-VL-Chat (#11980)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex-Brooks <[email protected]>
  • Loading branch information
alex-jw-brooks authored Jan 13, 2025
1 parent 89ce62a commit 5340a30
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions vllm/transformers_utils/tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import os
import warnings
from pathlib import Path
Expand Down Expand Up @@ -67,7 +68,15 @@ def get_cached_tokenizer(tokenizer: AnyTokenizer) -> AnyTokenizer:
tokenizer.all_special_tokens_extended)
tokenizer_all_special_tokens = set(tokenizer.all_special_tokens)
tokenizer_len = len(tokenizer)

max_token_id = max(tokenizer.get_vocab().values())
# Some tokenizers (e.g., QwenTokenizer) have special tokens that
# are added and included in the implementation of the vocab_size
# property, but not in get_vocab(); if there is an implementation
# of vocab size, we should take the greater value.
if hasattr(tokenizer, "vocab_size"):
with contextlib.suppress(NotImplementedError):
max_token_id = max(max_token_id, tokenizer.vocab_size)

class CachedTokenizer(tokenizer.__class__): # type: ignore

Expand Down

0 comments on commit 5340a30

Please sign in to comment.