Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Frontend] Enable decord to load video from base64 #11492

Merged
merged 3 commits into from
Dec 25, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 8 additions & 16 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,16 +125,6 @@ async def async_fetch_image(image_url: str,
return image.convert(image_mode)


def _load_video_frames_from_bytes(b: bytes):
frame = Image.open(BytesIO(b))
return np.array(frame)


def load_video_frames_from_base64(frame: Union[bytes, str]):
"""Load frame from base64 format."""
return _load_video_frames_from_bytes(base64.b64decode(frame))


def _load_video_from_bytes(b: bytes, num_frames: int = 32):
_, decord = try_import_video_packages()

Expand All @@ -156,12 +146,9 @@ def _load_video_from_bytes(b: bytes, num_frames: int = 32):


def _load_video_from_data_url(video_url: str):
# Only split once and assume the second part is the base64 encoded image
frames_base64 = video_url.split(",")[1:]
return np.stack([
load_video_frames_from_base64(frame_base64)
for frame_base64 in frames_base64
])
Isotr0py marked this conversation as resolved.
Show resolved Hide resolved
# Only split once and assume the second part is the base64 encoded video
_, video_base64 = video_url.split(",", 1)
return load_video_from_base64(video_base64)


def fetch_video(video_url: str, *, num_frames: int = 32) -> npt.NDArray:
Expand Down Expand Up @@ -393,6 +380,11 @@ def encode_video_base64(frames: npt.NDArray):
return ",".join(base64_frames)


def load_video_from_base64(video: Union[bytes, str]) -> Image.Image:
"""Load video from base64 format."""
return _load_video_from_bytes(base64.b64decode(video))


def resolve_visual_encoder_outputs(
encoder_outputs: Union[torch.Tensor, list[torch.Tensor]],
feature_sample_layers: Optional[list[int]],
Expand Down
Loading