diff --git a/examples/vlm/dataflow.yml b/examples/vlm/dataflow.yml index 51371b67d..bfb61821b 100644 --- a/examples/vlm/dataflow.yml +++ b/examples/vlm/dataflow.yml @@ -18,7 +18,7 @@ nodes: image: source: camera/image queue_size: 1 - tick: dora/timer/millis/300 + tick: dora/timer/millis/100 outputs: - text - tick diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py index c6c735b5a..7a2b61195 100644 --- a/node-hub/dora-qwenvl/dora_qwenvl/main.py +++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py @@ -5,10 +5,19 @@ import numpy as np import pyarrow as pa from PIL import Image +from pathlib import Path import cv2 DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct" -CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH) + +MODEL_NAME_OR_PATH = os.getenv("MODEL_NAME_OR_PATH", DEFAULT_PATH) + +if bool(os.getenv("MODELSCOPE")) is True: + from modelscope import snapshot_download + + if not Path(MODEL_NAME_OR_PATH).exists(): + MODEL_NAME_OR_PATH = snapshot_download(MODEL_NAME_OR_PATH) + DEFAULT_QUESTION = os.getenv( "DEFAULT_QUESTION", "Describe this image", @@ -20,14 +29,14 @@ import flash_attn as _ model = Qwen2VLForConditionalGeneration.from_pretrained( - CUSTOM_MODEL_PATH, + MODEL_NAME_OR_PATH, torch_dtype="auto", device_map="auto", attn_implementation="flash_attention_2", ) except (ImportError, ModuleNotFoundError): model = Qwen2VLForConditionalGeneration.from_pretrained( - CUSTOM_MODEL_PATH, + MODEL_NAME_OR_PATH, torch_dtype="auto", device_map="auto", ) @@ -38,7 +47,7 @@ # default processor -processor = AutoProcessor.from_pretrained(CUSTOM_MODEL_PATH) +processor = AutoProcessor.from_pretrained(MODEL_NAME_OR_PATH) def generate(frames: dict, question): @@ -101,6 +110,16 @@ def main(): event_type = event["type"] if event_type == "INPUT": + + # pylint: disable=fixme + # TODO: Remove this after https://github.com/dora-rs/dora/pull/652 + while True: + next_event = node.next(timeout=0.001) + if next_event is not None and next_event["type"] == "INPUT": + event = next_event + else: + break + event_id = event["id"] if "image" in event_id: diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml index eac559469..f4f9f1bd6 100644 --- a/node-hub/dora-qwenvl/pyproject.toml +++ b/node-hub/dora-qwenvl/pyproject.toml @@ -20,6 +20,7 @@ transformers = "^4.45" qwen-vl-utils = "^0.0.2" accelerate = "^0.33" opencv-python = ">= 4.1.1" +modelscope = "^1.18.1" # flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation