Skip to content

Commit

Permalink
[Bugfix] Check that number of images matches number of <|image|> toke…
Browse files Browse the repository at this point in the history
…ns with mllama (#11939)

Signed-off-by: Travis Johnson <[email protected]>
  • Loading branch information
tjohnson31415 authored Jan 10, 2025
1 parent 8a57940 commit d45cbe7
Showing 1 changed file with 9 additions and 0 deletions.
9 changes: 9 additions & 0 deletions vllm/model_executor/models/mllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,13 @@ def input_processor_for_mllama(

assert is_list_of(image_data, Image.Image)

num_image_tokens = dec_inputs['prompt_token_ids'].count(
MLLAMA_IMAGE_TOKEN_ID)
if num_image_tokens != len(image_data):
raise ValueError(
f"The number of image tokens ({num_image_tokens}) must be"
f" the same as the number of images ({len(image_data)})")

# Since only the last group of consecutive images
# are attended by the decoded tokens, we only need to
# get the number of tiles for those images.
Expand Down Expand Up @@ -1493,6 +1500,8 @@ def convert_sparse_cross_attention_mask_to_dense(
dense_mask[seq_start + start:seq_start + end,
tile_start:tile_start + tile] = 1
tile_start += tile
assert ts != -1
assert td != 0
tile_range_for_decode.append((ts, ts + td))
seq_start += length

Expand Down

0 comments on commit d45cbe7

Please sign in to comment.