Skip to content

Commit

Permalink
[Frontend] Enable decord to load video from base64 (#11492)
Browse files Browse the repository at this point in the history
Signed-off-by: DarkLight1337 <[email protected]>
  • Loading branch information
DarkLight1337 authored Dec 25, 2024
1 parent fc60166 commit b689ada
Showing 1 changed file with 19 additions and 20 deletions.
39 changes: 19 additions & 20 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,7 @@ async def async_fetch_image(image_url: str,
return image.convert(image_mode)


def _load_video_frames_from_bytes(b: bytes):
frame = Image.open(BytesIO(b))
return np.array(frame)


def load_video_frames_from_base64(frame: Union[bytes, str]):
"""Load frame from base64 format."""
return _load_video_frames_from_bytes(base64.b64decode(frame))


def _load_video_from_bytes(b: bytes, num_frames: int = 32):
def _load_video_from_bytes(b: bytes, num_frames: int = 32) -> npt.NDArray:
_, decord = try_import_video_packages()

video_path = BytesIO(b)
Expand All @@ -155,13 +145,17 @@ def _load_video_from_bytes(b: bytes, num_frames: int = 32):
return frames


def _load_video_from_data_url(video_url: str):
# Only split once and assume the second part is the base64 encoded image
frames_base64 = video_url.split(",")[1:]
return np.stack([
load_video_frames_from_base64(frame_base64)
for frame_base64 in frames_base64
])
def _load_video_from_data_url(video_url: str) -> npt.NDArray:
# Only split once and assume the second part is the base64 encoded video
_, video_base64 = video_url.split(",", 1)

if video_url.startswith("data:video/jpeg;"):
return np.stack([
np.array(load_image_from_base64(frame_base64))
for frame_base64 in video_base64.split(",")
])

return load_video_from_base64(video_base64)


def fetch_video(video_url: str, *, num_frames: int = 32) -> npt.NDArray:
Expand Down Expand Up @@ -342,7 +336,7 @@ def rescale_image_size(image: Image.Image,
return image


def try_import_video_packages() -> Any:
def try_import_video_packages():
try:
import cv2
import decord
Expand Down Expand Up @@ -384,7 +378,7 @@ def sample_frames_from_video(frames: npt.NDArray,
return sampled_frames


def encode_video_base64(frames: npt.NDArray):
def encode_video_base64(frames: npt.NDArray) -> str:
base64_frames = []
frames_list = [frames[i] for i in range(frames.shape[0])]
for frame in frames_list:
Expand All @@ -393,6 +387,11 @@ def encode_video_base64(frames: npt.NDArray):
return ",".join(base64_frames)


def load_video_from_base64(video: Union[bytes, str]) -> npt.NDArray:
"""Load video from base64 format."""
return _load_video_from_bytes(base64.b64decode(video))


def resolve_visual_encoder_outputs(
encoder_outputs: Union[torch.Tensor, list[torch.Tensor]],
feature_sample_layers: Optional[list[int]],
Expand Down

0 comments on commit b689ada

Please sign in to comment.