Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 committed Jan 7, 2025
1 parent 7d394b5 commit aac372e
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
9 changes: 6 additions & 3 deletions vllm/model_executor/models/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
ProcessingMixin, PromptReplacement)
from vllm.multimodal.profiling import BaseProfilingInfo, ProcessorInputs
from vllm.sequence import IntermediateTensors
from vllm.utils import is_list_of

from .clip import CLIPVisionModel
from .interfaces import SupportsMultiModal, SupportsPP
Expand Down Expand Up @@ -521,7 +522,7 @@ def sampler(self):
return get_sampler()

def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor:
# The image size may be different for Pixtral-HF
# Only the longest edge is equal to image_size for Pixtral-HF
if self.config.vision_config.model_type == "pixtral":
return data

Expand Down Expand Up @@ -550,10 +551,12 @@ def _parse_and_validate_image_input(
raise ValueError("Incorrect type of pixel values. "
f"Got type: {type(pixel_values)}")

pixel_values = flatten_bn(pixel_values,
concat=is_list_of(pixel_values, list))

return LlavaImagePixelInputs(
type="pixel_values",
data=self._validate_pixel_values(
flatten_bn(pixel_values, concat=True)),
data=self._validate_pixel_values(pixel_values),
)

if image_embeds is not None:
Expand Down
9 changes: 9 additions & 0 deletions vllm/model_executor/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,15 @@ def flatten_bn(
...


@overload
def flatten_bn(
x: Union[List[torch.Tensor], torch.Tensor],
*,
concat: bool = False,
) -> Union[List[torch.Tensor], torch.Tensor]:
...


def flatten_bn(
x: Union[List[torch.Tensor], torch.Tensor],
*,
Expand Down

0 comments on commit aac372e

Please sign in to comment.