From ab77fb8874b84b349ecb0dc473b293c3217522a5 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Sun, 1 Sep 2024 12:42:27 +0200
Subject: [PATCH 01/10] Adding qwenvl2 and its data recorder

---
 examples/camera/.gitignore                    |   1 +
 examples/camera/README.md                     |   9 +
 examples/camera/dataflow.yml                  |  20 ++
 examples/camera/run.rs                        |  98 +++++++++
 examples/vlm/.gitignore                       |   1 +
 examples/vlm/README.md                        |   1 +
 examples/vlm/dataflow.yml                     |  31 +++
 examples/vlm/run.rs                           |  98 +++++++++
 node-hub/dora-qwenvl/README.md                |   3 +
 node-hub/dora-qwenvl/dora_qwenvl/__init__.py  |  11 +
 node-hub/dora-qwenvl/dora_qwenvl/main.py      | 145 +++++++++++++
 node-hub/dora-qwenvl/pyproject.toml           |  30 +++
 .../dora-qwenvl/tests/test_dora_qwenvl.py     |   9 +
 node-hub/dora-rerun/src/main.rs               |   2 +-
 node-hub/llama-factory-recorder/README.md     |   3 +
 .../llama_factory_recorder/__init__.py        |  11 +
 .../llama_factory_recorder/main.py            | 193 ++++++++++++++++++
 .../llama-factory-recorder/pyproject.toml     |  23 +++
 .../tests/test_llama_factory_recorder.py      |   9 +
 19 files changed, 697 insertions(+), 1 deletion(-)
 create mode 100644 examples/camera/.gitignore
 create mode 100644 examples/camera/README.md
 create mode 100644 examples/camera/dataflow.yml
 create mode 100644 examples/camera/run.rs
 create mode 100644 examples/vlm/.gitignore
 create mode 100644 examples/vlm/README.md
 create mode 100644 examples/vlm/dataflow.yml
 create mode 100644 examples/vlm/run.rs
 create mode 100644 node-hub/dora-qwenvl/README.md
 create mode 100644 node-hub/dora-qwenvl/dora_qwenvl/__init__.py
 create mode 100644 node-hub/dora-qwenvl/dora_qwenvl/main.py
 create mode 100644 node-hub/dora-qwenvl/pyproject.toml
 create mode 100644 node-hub/dora-qwenvl/tests/test_dora_qwenvl.py
 create mode 100644 node-hub/llama-factory-recorder/README.md
 create mode 100644 node-hub/llama-factory-recorder/llama_factory_recorder/__init__.py
 create mode 100644 node-hub/llama-factory-recorder/llama_factory_recorder/main.py
 create mode 100644 node-hub/llama-factory-recorder/pyproject.toml
 create mode 100644 node-hub/llama-factory-recorder/tests/test_llama_factory_recorder.py

diff --git a/examples/camera/.gitignore b/examples/camera/.gitignore
new file mode 100644
index 00000000..eede66d8
--- /dev/null
+++ b/examples/camera/.gitignore
@@ -0,0 +1 @@
+*.pt
\ No newline at end of file
diff --git a/examples/camera/README.md b/examples/camera/README.md
new file mode 100644
index 00000000..1899f8bb
--- /dev/null
+++ b/examples/camera/README.md
@@ -0,0 +1,9 @@
+# Quick example on how to use a camera
+
+Make sure to have, dora and pip installed.
+
+```bash
+dora up
+dora build dataflow.yml
+dora start dataflow.yml
+```
diff --git a/examples/camera/dataflow.yml b/examples/camera/dataflow.yml
new file mode 100644
index 00000000..35a589da
--- /dev/null
+++ b/examples/camera/dataflow.yml
@@ -0,0 +1,20 @@
+nodes:
+  - id: camera
+    build: pip install ../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/20
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 0
+      IMAGE_WIDTH: 640
+      IMAGE_HEIGHT: 480
+
+  - id: plot
+    build: pip install ../../node-hub/opencv-plot
+    path: opencv-plot
+    inputs:
+      image:
+        source: camera/image
+        queue_size: 1
diff --git a/examples/camera/run.rs b/examples/camera/run.rs
new file mode 100644
index 00000000..b575234d
--- /dev/null
+++ b/examples/camera/run.rs
@@ -0,0 +1,98 @@
+use dora_core::{get_pip_path, get_python_path, run};
+use dora_tracing::set_up_tracing;
+use eyre::{bail, ContextCompat, WrapErr};
+use std::path::Path;
+
+#[tokio::main]
+async fn main() -> eyre::Result<()> {
+    set_up_tracing("python-dataflow-runner")?;
+
+    let root = Path::new(env!("CARGO_MANIFEST_DIR"));
+    std::env::set_current_dir(root.join(file!()).parent().unwrap())
+        .wrap_err("failed to set working dir")?;
+
+    run(
+        get_python_path().context("Could not get python binary")?,
+        &["-m", "venv", "../.env"],
+        None,
+    )
+    .await
+    .context("failed to create venv")?;
+    let venv = &root.join("examples").join(".env");
+    std::env::set_var(
+        "VIRTUAL_ENV",
+        venv.to_str().context("venv path not valid unicode")?,
+    );
+    let orig_path = std::env::var("PATH")?;
+    // bin folder is named Scripts on windows.
+    // 🤦‍♂️ See: https://github.com/pypa/virtualenv/commit/993ba1316a83b760370f5a3872b3f5ef4dd904c1
+    let venv_bin = if cfg!(windows) {
+        venv.join("Scripts")
+    } else {
+        venv.join("bin")
+    };
+
+    if cfg!(windows) {
+        std::env::set_var(
+            "PATH",
+            format!(
+                "{};{orig_path}",
+                venv_bin.to_str().context("venv path not valid unicode")?
+            ),
+        );
+    } else {
+        std::env::set_var(
+            "PATH",
+            format!(
+                "{}:{orig_path}",
+                venv_bin.to_str().context("venv path not valid unicode")?
+            ),
+        );
+    }
+
+    run(
+        get_pip_path().context("Could not get pip binary")?,
+        &["install", "maturin"],
+        Some(venv),
+    )
+    .await
+    .context("pip install maturin failed")?;
+
+    run(
+        "maturin",
+        &["develop"],
+        Some(&root.join("apis").join("python").join("node")),
+    )
+    .await
+    .context("maturin develop failed")?;
+
+    let dataflow = Path::new("dataflow.yml");
+    run_dataflow(dataflow).await?;
+
+    Ok(())
+}
+
+async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> {
+    let cargo = std::env::var("CARGO").unwrap();
+
+    // First build the dataflow (install requirements)
+    let mut cmd = tokio::process::Command::new(&cargo);
+    cmd.arg("run");
+    cmd.arg("--package").arg("dora-cli");
+    cmd.arg("--").arg("build").arg(dataflow);
+    if !cmd.status().await?.success() {
+        bail!("failed to run dataflow");
+    };
+
+    let mut cmd = tokio::process::Command::new(&cargo);
+    cmd.arg("run");
+    cmd.arg("--package").arg("dora-cli");
+    cmd.arg("--")
+        .arg("daemon")
+        .arg("--run-dataflow")
+        .arg(dataflow);
+    if !cmd.status().await?.success() {
+        bail!("failed to run dataflow");
+    };
+    Ok(())
+}
diff --git a/examples/vlm/.gitignore b/examples/vlm/.gitignore
new file mode 100644
index 00000000..eede66d8
--- /dev/null
+++ b/examples/vlm/.gitignore
@@ -0,0 +1 @@
+*.pt
\ No newline at end of file
diff --git a/examples/vlm/README.md b/examples/vlm/README.md
new file mode 100644
index 00000000..ab6eec8c
--- /dev/null
+++ b/examples/vlm/README.md
@@ -0,0 +1 @@
+# Quick example on using a VLM with dora-rs
diff --git a/examples/vlm/dataflow.yml b/examples/vlm/dataflow.yml
new file mode 100644
index 00000000..ac12cce7
--- /dev/null
+++ b/examples/vlm/dataflow.yml
@@ -0,0 +1,31 @@
+nodes:
+  - id: camera
+    build: pip install ../../node-hub/opencv-video-capture
+    path: opencv-video-capture
+    inputs:
+      tick: dora/timer/millis/20
+    outputs:
+      - image
+    env:
+      CAPTURE_PATH: 0
+      IMAGE_WIDTH: 640
+      IMAGE_HEIGHT: 480
+
+  - id: dora-qwenvl
+    build: pip install -e ../../node-hub/dora-qwenvl
+    path: dora-qwenvl
+    inputs:
+      image:
+        source: camera/image
+        queue_size: 1
+    outputs:
+      - text
+
+  - id: plot
+    build: pip install ../../node-hub/opencv-plot
+    path: opencv-plot
+    inputs:
+      image:
+        source: camera/image
+        queue_size: 1
+      text: dora-qwenvl/text
diff --git a/examples/vlm/run.rs b/examples/vlm/run.rs
new file mode 100644
index 00000000..b575234d
--- /dev/null
+++ b/examples/vlm/run.rs
@@ -0,0 +1,98 @@
+use dora_core::{get_pip_path, get_python_path, run};
+use dora_tracing::set_up_tracing;
+use eyre::{bail, ContextCompat, WrapErr};
+use std::path::Path;
+
+#[tokio::main]
+async fn main() -> eyre::Result<()> {
+    set_up_tracing("python-dataflow-runner")?;
+
+    let root = Path::new(env!("CARGO_MANIFEST_DIR"));
+    std::env::set_current_dir(root.join(file!()).parent().unwrap())
+        .wrap_err("failed to set working dir")?;
+
+    run(
+        get_python_path().context("Could not get python binary")?,
+        &["-m", "venv", "../.env"],
+        None,
+    )
+    .await
+    .context("failed to create venv")?;
+    let venv = &root.join("examples").join(".env");
+    std::env::set_var(
+        "VIRTUAL_ENV",
+        venv.to_str().context("venv path not valid unicode")?,
+    );
+    let orig_path = std::env::var("PATH")?;
+    // bin folder is named Scripts on windows.
+    // 🤦‍♂️ See: https://github.com/pypa/virtualenv/commit/993ba1316a83b760370f5a3872b3f5ef4dd904c1
+    let venv_bin = if cfg!(windows) {
+        venv.join("Scripts")
+    } else {
+        venv.join("bin")
+    };
+
+    if cfg!(windows) {
+        std::env::set_var(
+            "PATH",
+            format!(
+                "{};{orig_path}",
+                venv_bin.to_str().context("venv path not valid unicode")?
+            ),
+        );
+    } else {
+        std::env::set_var(
+            "PATH",
+            format!(
+                "{}:{orig_path}",
+                venv_bin.to_str().context("venv path not valid unicode")?
+            ),
+        );
+    }
+
+    run(
+        get_pip_path().context("Could not get pip binary")?,
+        &["install", "maturin"],
+        Some(venv),
+    )
+    .await
+    .context("pip install maturin failed")?;
+
+    run(
+        "maturin",
+        &["develop"],
+        Some(&root.join("apis").join("python").join("node")),
+    )
+    .await
+    .context("maturin develop failed")?;
+
+    let dataflow = Path::new("dataflow.yml");
+    run_dataflow(dataflow).await?;
+
+    Ok(())
+}
+
+async fn run_dataflow(dataflow: &Path) -> eyre::Result<()> {
+    let cargo = std::env::var("CARGO").unwrap();
+
+    // First build the dataflow (install requirements)
+    let mut cmd = tokio::process::Command::new(&cargo);
+    cmd.arg("run");
+    cmd.arg("--package").arg("dora-cli");
+    cmd.arg("--").arg("build").arg(dataflow);
+    if !cmd.status().await?.success() {
+        bail!("failed to run dataflow");
+    };
+
+    let mut cmd = tokio::process::Command::new(&cargo);
+    cmd.arg("run");
+    cmd.arg("--package").arg("dora-cli");
+    cmd.arg("--")
+        .arg("daemon")
+        .arg("--run-dataflow")
+        .arg(dataflow);
+    if !cmd.status().await?.success() {
+        bail!("failed to run dataflow");
+    };
+    Ok(())
+}
diff --git a/node-hub/dora-qwenvl/README.md b/node-hub/dora-qwenvl/README.md
new file mode 100644
index 00000000..88f4e564
--- /dev/null
+++ b/node-hub/dora-qwenvl/README.md
@@ -0,0 +1,3 @@
+# Dora QwenVL2 node
+
+Experimental node for using a VLM within dora.
diff --git a/node-hub/dora-qwenvl/dora_qwenvl/__init__.py b/node-hub/dora-qwenvl/dora_qwenvl/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/dora-qwenvl/dora_qwenvl/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
new file mode 100644
index 00000000..904d2926
--- /dev/null
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -0,0 +1,145 @@
+import os
+from dora import Node
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+import numpy as np
+import pyarrow as pa
+from PIL import Image
+
+DEFAULT_PATH = "Qwen/Qwen2-VL-2B-Instruct"
+CUSTOM_MODEL_PATH = os.getenv("CUSTOM_MODEL_PATH", DEFAULT_PATH)
+DEFAULT_QUESTION = os.getenv(
+    "DEFAULT_QUESTION",
+    "Describe this image",
+)
+
+# default: Load the model on the available device(s)
+model = Qwen2VLForConditionalGeneration.from_pretrained(
+    PATH,
+    torch_dtype="auto",
+    device_map="auto",
+    attn_implementation="flash_attention_2",
+)
+
+# default processer
+processor = AutoProcessor.from_pretrained(DEFAULT_PATH)
+
+
+def generate(image: np.array, question):
+    """
+    Generate the response to the question given the image using Qwen2 model.
+    """
+    image = Image.fromarray(image)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "image": image,
+                },
+                {"type": "text", "text": question},
+            ],
+        }
+    ]
+
+    # Preparation for inference
+    text = processor.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt",
+    )
+    inputs = inputs.to("cuda")
+
+    # Inference: Generation of the output
+    generated_ids = model.generate(**inputs, max_new_tokens=128)
+    generated_ids_trimmed = [
+        out_ids[len(in_ids) :]
+        for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+    ]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False,
+    )
+    return output_text[0]
+
+
+def main():
+    node = Node()
+
+    question = DEFAULT_QUESTION
+    frame = None
+    pa.array([])  # initialize pyarrow array
+
+    for event in node:
+        event_type = event["type"]
+
+        if event_type == "INPUT":
+            event_id = event["id"]
+
+            if event_id == "image":
+                storage = event["value"]
+                metadata = event["metadata"]
+                encoding = metadata["encoding"]
+                width = metadata["width"]
+                height = metadata["height"]
+
+                if encoding == "bgr8":
+                    channels = 3
+                    storage_type = np.uint8
+                elif encoding == "rgb8":
+                    channels = 3
+                    storage_type = np.uint8
+                else:
+                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
+
+                frame = (
+                    storage.to_numpy()
+                    .astype(storage_type)
+                    .reshape((height, width, channels))
+                )
+                if encoding == "bgr8":
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                elif encoding == "rgb8":
+                    pass
+                else:
+                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
+
+            elif event_id == "tick":
+                if frame is None:
+                    continue
+                response = generate(frame, question)
+                node.send_output(
+                    "text",
+                    pa.array([response]),
+                    metadata,
+                )
+
+            elif event_id == "text":
+                text = event["value"][0].as_py()
+                if text != "":
+                    question = text
+                if frame is None:
+                    continue
+                # set the max number of tiles in `max_num`
+                response = generate(frame, question)
+                node.send_output(
+                    "text",
+                    pa.array([response]),
+                    metadata,
+                )
+
+        elif event_type == "ERROR":
+            raise RuntimeError(event["error"])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml
new file mode 100644
index 00000000..cbd5e131
--- /dev/null
+++ b/node-hub/dora-qwenvl/pyproject.toml
@@ -0,0 +1,30 @@
+[tool.poetry]
+name = "dora-qwenvl"
+version = "0.3.6-rc0"
+authors = [
+    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
+    "Enzo Le Van <dev@enzo-le-van.fr>",
+]
+description = "Dora Node for VLM"
+readme = "README.md"
+
+packages = [{ include = "dora_qwenvl" }]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+dora-rs = "^0.3.6"
+numpy = "< 2.0.0"
+torch = "^2.4.0"
+torchvision = "^0.19"
+transformers = { git = "https://github.com/huggingface/transformers" }
+qwen-vl-utils = "^0.0.2"
+accelerate = "^0.33"
+flash-attention = "^2.6.1"
+
+
+[tool.poetry.scripts]
+dora-qwenvl = "dora_qwenvl.main:main"
+
+[build-system]
+requires = ["poetry-core>=1.8.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/node-hub/dora-qwenvl/tests/test_dora_qwenvl.py b/node-hub/dora-qwenvl/tests/test_dora_qwenvl.py
new file mode 100644
index 00000000..4105676f
--- /dev/null
+++ b/node-hub/dora-qwenvl/tests/test_dora_qwenvl.py
@@ -0,0 +1,9 @@
+import pytest
+
+
+def test_import_main():
+    from dora_qwenvl.main import main
+
+    # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
+    with pytest.raises(RuntimeError):
+        main()
diff --git a/node-hub/dora-rerun/src/main.rs b/node-hub/dora-rerun/src/main.rs
index a3b9b895..b16272d9 100644
--- a/node-hub/dora-rerun/src/main.rs
+++ b/node-hub/dora-rerun/src/main.rs
@@ -104,7 +104,7 @@ fn main() -> Result<()> {
 
                 rec.log(id.as_str(), &image)
                     .context("could not log image")?;
-            } else if id.as_str().contains("textlog") {
+            } else if id.as_str().contains("text") {
                 let buffer: StringArray = data.to_data().into();
                 buffer.iter().try_for_each(|string| -> Result<()> {
                     if let Some(str) = string {
diff --git a/node-hub/llama-factory-recorder/README.md b/node-hub/llama-factory-recorder/README.md
new file mode 100644
index 00000000..9279a6d6
--- /dev/null
+++ b/node-hub/llama-factory-recorder/README.md
@@ -0,0 +1,3 @@
+# Dora Llama factory recorder
+
+Experimental node for recording for training llama based model.
diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/__init__.py b/node-hub/llama-factory-recorder/llama_factory_recorder/__init__.py
new file mode 100644
index 00000000..ac3cbef9
--- /dev/null
+++ b/node-hub/llama-factory-recorder/llama_factory_recorder/__init__.py
@@ -0,0 +1,11 @@
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, "r", encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
new file mode 100644
index 00000000..aa8deb10
--- /dev/null
+++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -0,0 +1,193 @@
+import os
+import json
+from dora import Node
+import numpy as np
+import pyarrow as pa
+from PIL import Image
+from pathlib import Path
+
+DEFAULT_QUESTION = os.getenv(
+    "DEFAULT_QUESTION",
+    "Describe this image",
+)
+ENTRY_NAME = "dora_demo"
+LLAMA_FACTORY_ROOT_PATH = Path(os.getenv("LLAMA_FACTORY_ROOT_PATH")) / "data"
+
+
+# If JSON already exists, append incremental suffix to avoid overwriting
+if (LLAMA_FACTORY_ROOT_PATH / ENTRY_NAME).exists():
+    i = 1
+    while (LLAMA_FACTORY_ROOT_PATH / f"{ENTRY_NAME}_{i}.json").exists():
+        i += 1
+    ENTRY_NAME = f"{ENTRY_NAME}_{i}"
+
+
+DEFAULT_RECORD_IMAGE_ROOT_PATH = LLAMA_FACTORY_ROOT_PATH / ENTRY_NAME
+DEFAULT_RECORD_JSON_PATH = LLAMA_FACTORY_ROOT_PATH / (ENTRY_NAME + ".json")
+
+
+def write_dict_to_json(file_path, key: str, new_data):
+    """
+    Writes a dictionary to a JSON file. If the file already contains a list of entries,
+    the new data will be appended to that list. Otherwise, it will create a new list.
+
+    Parameters:
+    - file_path: str, the path to the JSON file.
+    - new_data: dict, the dictionary to add to the JSON file.
+    """
+    try:
+        # Open the JSON file and load its content
+        with open(file_path, "r+", encoding="utf-8") as file:
+            try:
+                data = json.load(file)
+            except json.JSONDecodeError:
+                data = {}
+
+            data[key] = new_data
+            # Write the updated data back to the file
+            file.seek(0)
+            json.dump(data, file, indent=4, ensure_ascii=False)
+            file.truncate()
+
+    except FileNotFoundError:
+        # If the file doesn't exist, create it and write the new data as a list
+        with open(file_path, "w", encoding="utf-8") as file:
+            json.dump({key: new_data}, file, indent=4, ensure_ascii=False)
+
+
+write_dict_to_json(
+    LLAMA_FACTORY_ROOT_PATH / "dataset_info.json",
+    ENTRY_NAME,
+    {
+        "file_name": ENTRY_NAME + ".json",
+        "formatting": "sharegpt",
+        "columns": {"messages": "messages", "images": "images"},
+        "tags": {
+            "role_tag": "role",
+            "content_tag": "content",
+            "user_tag": "user",
+            "assistant_tag": "assistant",
+        },
+    },
+)
+
+
+def save_image_and_add_to_json(
+    image_array, root_path, llama_root_path, jsonl_file, messages
+):
+    """
+    Saves an image from a NumPy array and adds a new JSON object as a line to a JSONL file.
+    The function generates a sequential numeric image filename starting from 0 and
+    follows the provided template structure.
+
+    Parameters:
+    - image_array: numpy.ndarray, the image data as a NumPy array.
+    - root_path: str, the root directory where the image will be saved.
+    - jsonl_file: str, the path to the JSONL file.
+    - messages: list of dicts, each containing 'content' and 'role'.
+
+    The image is saved as a PNG file, and the JSONL entry includes the 'messages' and 'images' keys.
+    """
+
+    # Create the root directory if it doesn't exist
+    os.makedirs(llama_root_path / root_path, exist_ok=True)
+
+    # Get the current image ID by counting existing files
+    image_id = len(
+        [
+            name
+            for name in os.listdir(llama_root_path / root_path)
+            if os.path.isfile(os.path.join(llama_root_path / root_path, name))
+        ]
+    )
+
+    # Define the image filename
+    image_filename = f"{image_id}.png"
+    image_path = os.path.join(root_path, image_filename)
+
+    # Save the image
+    image = Image.fromarray(image_array)
+    image.save(llama_root_path / image_path)
+
+    # Create the JSON entry with 'messages' and 'images'
+    new_entry = {"messages": messages, "images": [image_path]}
+
+    # Add the entry to the JSONL file with UTF-8 encoding
+    with open(jsonl_file, "a", encoding="utf-8") as f:
+        json_line = json.dumps(new_entry)
+        f.write(json_line + "\n")
+
+
+def main():
+    pa.array([])  # initialize pyarrow array
+    node = Node()
+
+    question = DEFAULT_QUESTION
+    frame = None
+
+    for event in node:
+        event_type = event["type"]
+
+        if event_type == "INPUT":
+            event_id = event["id"]
+
+            if event_id == "image":
+                storage = event["value"]
+                metadata = event["metadata"]
+                encoding = metadata["encoding"]
+                width = metadata["width"]
+                height = metadata["height"]
+
+                if encoding == "bgr8":
+                    channels = 3
+                    storage_type = np.uint8
+                elif encoding == "rgb8":
+                    channels = 3
+                    storage_type = np.uint8
+                else:
+                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
+
+                frame = (
+                    storage.to_numpy()
+                    .astype(storage_type)
+                    .reshape((height, width, channels))
+                )
+                if encoding == "bgr8":
+                    frame = frame[:, :, ::-1]  # OpenCV image (BGR to RGB)
+                elif encoding == "rgb8":
+                    pass
+                else:
+                    raise RuntimeError(f"Unsupported image encoding: {encoding}")
+
+            elif event_id == "text":
+                text = event["value"][0].as_py()
+                if text != "":
+                    question = text
+            elif event_id == "ground_truth":
+                if frame is None:
+                    continue
+                ground_truth = event["value"][0].as_py()
+
+                messages = [
+                    {"content": "<image>" + question, "role": "user"},
+                    {
+                        "content": ground_truth,
+                        "role": "assistant",
+                    },
+                ]
+
+                save_image_and_add_to_json(
+                    image_array=frame,
+                    root_path=ENTRY_NAME,
+                    llama_root_path=LLAMA_FACTORY_ROOT_PATH,
+                    jsonl_file=DEFAULT_RECORD_JSON_PATH,
+                    messages=messages,
+                )
+                node.send_output(
+                    "text",
+                    pa.array([ground_truth]),
+                    metadata,
+                )
+
+        elif event_type == "ERROR":
+            raise RuntimeError(event["error"])
diff --git a/node-hub/llama-factory-recorder/pyproject.toml b/node-hub/llama-factory-recorder/pyproject.toml
new file mode 100644
index 00000000..34a55c0a
--- /dev/null
+++ b/node-hub/llama-factory-recorder/pyproject.toml
@@ -0,0 +1,23 @@
+[tool.poetry]
+name = "llama-factory-recorder"
+version = "0.3.6-rc0"
+authors = [
+    "Haixuan Xavier Tao <tao.xavier@outlook.com>",
+    "Enzo Le Van <dev@enzo-le-van.fr>",
+]
+description = "Dora Node for VLM"
+readme = "README.md"
+
+packages = [{ include = "llama_factory_recorder" }]
+
+[tool.poetry.dependencies]
+python = "^3.7"
+dora-rs = "^0.3.6"
+pillow = "^10.4.0"
+
+[tool.poetry.scripts]
+llama-factory-recorder = "llama_factory_recorder.main:main"
+
+[build-system]
+requires = ["poetry-core>=1.8.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/node-hub/llama-factory-recorder/tests/test_llama_factory_recorder.py b/node-hub/llama-factory-recorder/tests/test_llama_factory_recorder.py
new file mode 100644
index 00000000..34dee34c
--- /dev/null
+++ b/node-hub/llama-factory-recorder/tests/test_llama_factory_recorder.py
@@ -0,0 +1,9 @@
+import pytest
+
+
+def test_import_main():
+    from llama_factory_recorder.main import main
+
+    # Check that everything is working, and catch dora Runtime Exception as we're not running in a dora dataflow.
+    with pytest.raises(RuntimeError):
+        main()

From 526c168aa5d99daa31203894c037fa4892f7c98f Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Mon, 2 Sep 2024 18:09:47 +0200
Subject: [PATCH 02/10] Minor fix

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index 904d2926..65112584 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -15,7 +15,7 @@
 
 # default: Load the model on the available device(s)
 model = Qwen2VLForConditionalGeneration.from_pretrained(
-    PATH,
+    CUSTOM_MODEL_PATH,
     torch_dtype="auto",
     device_map="auto",
     attn_implementation="flash_attention_2",
@@ -118,7 +118,7 @@ def main():
                     continue
                 response = generate(frame, question)
                 node.send_output(
-                    "text",
+                    "tick",
                     pa.array([response]),
                     metadata,
                 )

From 006ee0122de9957b4946236280a24496df5890f6 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Tue, 3 Sep 2024 10:09:38 +0200
Subject: [PATCH 03/10] Update keyboard version

---
 node-hub/dora-keyboard/pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/node-hub/dora-keyboard/pyproject.toml b/node-hub/dora-keyboard/pyproject.toml
index 29b9308a..83b7c615 100644
--- a/node-hub/dora-keyboard/pyproject.toml
+++ b/node-hub/dora-keyboard/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dora-keyboard"
-version = "0.3.5"
+version = "0.3.6"
 authors = [
     "Haixuan Xavier Tao <tao.xavier@outlook.com>",
     "Enzo Le Van <dev@enzo-le-van.fr>",
@@ -13,7 +13,7 @@ readme = "README.md"
 packages = [{ include = "dora_keyboard" }]
 
 [tool.poetry.dependencies]
-dora-rs = "0.3.5"
+dora-rs = "^0.3.6"
 numpy = "< 2.0.0"
 pyarrow = ">= 5.0.0"
 pynput = "^1.7.6"

From 27e6833967e64d27835232b95c9a8b940f28dc13 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Wed, 4 Sep 2024 08:29:44 +0200
Subject: [PATCH 04/10] Fix flash_attn naming

---
 node-hub/dora-qwenvl/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml
index cbd5e131..d5ca3b7b 100644
--- a/node-hub/dora-qwenvl/pyproject.toml
+++ b/node-hub/dora-qwenvl/pyproject.toml
@@ -19,7 +19,7 @@ torchvision = "^0.19"
 transformers = { git = "https://github.com/huggingface/transformers" }
 qwen-vl-utils = "^0.0.2"
 accelerate = "^0.33"
-flash-attention = "^2.6.1"
+flash_attn = "^2.6.1"
 
 
 [tool.poetry.scripts]

From 39f3fd445261f684e1d954f0af06bfcd91387305 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Wed, 11 Sep 2024 11:58:10 +0200
Subject: [PATCH 05/10] Remove flash_attn from pyproject

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py | 39 ++++++++++++++++--------
 node-hub/dora-qwenvl/pyproject.toml      |  2 +-
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index 65112584..d659b4df 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -13,23 +13,32 @@
     "Describe this image",
 )
 
-# default: Load the model on the available device(s)
-model = Qwen2VLForConditionalGeneration.from_pretrained(
-    CUSTOM_MODEL_PATH,
-    torch_dtype="auto",
-    device_map="auto",
-    attn_implementation="flash_attention_2",
-)
+# Check if flash_attn is installed
+try:
+    import flash_attn
+
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        CUSTOM_MODEL_PATH,
+        torch_dtype="auto",
+        device_map="auto",
+        attn_implementation="flash_attention_2",
+    )
+except ImportError:
+    model = Qwen2VLForConditionalGeneration.from_pretrained(
+        CUSTOM_MODEL_PATH,
+        torch_dtype="auto",
+        device_map="auto",
+    )
+
 
 # default processer
 processor = AutoProcessor.from_pretrained(DEFAULT_PATH)
 
 
-def generate(image: np.array, question):
+def generate(frames: dict, question):
     """
     Generate the response to the question given the image using Qwen2 model.
     """
-    image = Image.fromarray(image)
 
     messages = [
         {
@@ -38,7 +47,10 @@ def generate(image: np.array, question):
                 {
                     "type": "image",
                     "image": image,
-                },
+                }
+                for image in frames.values()
+            ]
+            + [
                 {"type": "text", "text": question},
             ],
         }
@@ -73,11 +85,11 @@ def generate(image: np.array, question):
 
 
 def main():
+    pa.array([])  # initialize pyarrow array
     node = Node()
 
     question = DEFAULT_QUESTION
-    frame = None
-    pa.array([])  # initialize pyarrow array
+    frames = {}
 
     for event in node:
         event_type = event["type"]
@@ -85,7 +97,7 @@ def main():
         if event_type == "INPUT":
             event_id = event["id"]
 
-            if event_id == "image":
+            if "image" in event_id:
                 storage = event["value"]
                 metadata = event["metadata"]
                 encoding = metadata["encoding"]
@@ -112,6 +124,7 @@ def main():
                     pass
                 else:
                     raise RuntimeError(f"Unsupported image encoding: {encoding}")
+                frames[event_id] = Image.fromarray(frame)
 
             elif event_id == "tick":
                 if frame is None:
diff --git a/node-hub/dora-qwenvl/pyproject.toml b/node-hub/dora-qwenvl/pyproject.toml
index d5ca3b7b..8df2d80b 100644
--- a/node-hub/dora-qwenvl/pyproject.toml
+++ b/node-hub/dora-qwenvl/pyproject.toml
@@ -19,7 +19,7 @@ torchvision = "^0.19"
 transformers = { git = "https://github.com/huggingface/transformers" }
 qwen-vl-utils = "^0.0.2"
 accelerate = "^0.33"
-flash_attn = "^2.6.1"
+# flash_attn = "^2.6.1" # Install using: pip install -U flash-attn --no-build-isolation
 
 
 [tool.poetry.scripts]

From 28a7df85196cbcae26e44b3f59ceaa25b52bb32a Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Wed, 11 Sep 2024 12:28:38 +0200
Subject: [PATCH 06/10] Fix pylint

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index d659b4df..92294b48 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -15,7 +15,7 @@
 
 # Check if flash_attn is installed
 try:
-    import flash_attn
+    import flash_attn as _
 
     model = Qwen2VLForConditionalGeneration.from_pretrained(
         CUSTOM_MODEL_PATH,

From 3936dfaac11291f3a9db44d8252441fd04d9335e Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Wed, 11 Sep 2024 12:45:55 +0200
Subject: [PATCH 07/10] Fix llama factory errir

---
 .../llama_factory_recorder/main.py            | 68 +++++++++----------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
index aa8deb10..ceeeebbf 100644
--- a/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
+++ b/node-hub/llama-factory-recorder/llama_factory_recorder/main.py
@@ -10,20 +10,6 @@
     "DEFAULT_QUESTION",
     "Describe this image",
 )
-ENTRY_NAME = "dora_demo"
-LLAMA_FACTORY_ROOT_PATH = Path(os.getenv("LLAMA_FACTORY_ROOT_PATH")) / "data"
-
-
-# If JSON already exists, append incremental suffix to avoid overwriting
-if (LLAMA_FACTORY_ROOT_PATH / ENTRY_NAME).exists():
-    i = 1
-    while (LLAMA_FACTORY_ROOT_PATH / f"{ENTRY_NAME}_{i}.json").exists():
-        i += 1
-    ENTRY_NAME = f"{ENTRY_NAME}_{i}"
-
-
-DEFAULT_RECORD_IMAGE_ROOT_PATH = LLAMA_FACTORY_ROOT_PATH / ENTRY_NAME
-DEFAULT_RECORD_JSON_PATH = LLAMA_FACTORY_ROOT_PATH / (ENTRY_NAME + ".json")
 
 
 def write_dict_to_json(file_path, key: str, new_data):
@@ -55,23 +41,6 @@ def write_dict_to_json(file_path, key: str, new_data):
             json.dump({key: new_data}, file, indent=4, ensure_ascii=False)
 
 
-write_dict_to_json(
-    LLAMA_FACTORY_ROOT_PATH / "dataset_info.json",
-    ENTRY_NAME,
-    {
-        "file_name": ENTRY_NAME + ".json",
-        "formatting": "sharegpt",
-        "columns": {"messages": "messages", "images": "images"},
-        "tags": {
-            "role_tag": "role",
-            "content_tag": "content",
-            "user_tag": "user",
-            "assistant_tag": "assistant",
-        },
-    },
-)
-
-
 def save_image_and_add_to_json(
     image_array, root_path, llama_root_path, jsonl_file, messages
 ):
@@ -122,6 +91,37 @@ def main():
     pa.array([])  # initialize pyarrow array
     node = Node()
 
+    assert os.getenv(
+        "LLAMA_FACTORY_ROOT_PATH"
+    ), "LLAMA_FACTORY_ROOT_PATH is not set, Either git clone the repo or set the environment variable"
+    llama_factory_root_path = Path(os.getenv("LLAMA_FACTORY_ROOT_PATH")) / "data"
+
+    entry_name = os.getenv("ENTRY_NAME", "dora_demo")
+    # If JSON already exists, append incremental suffix to avoid overwriting
+    if (llama_factory_root_path / entry_name).exists():
+        i = 1
+        while (llama_factory_root_path / f"{entry_name}_{i}.json").exists():
+            i += 1
+        entry_name = f"{entry_name}_{i}"
+
+    default_record_json_path = llama_factory_root_path / (entry_name + ".json")
+
+    write_dict_to_json(
+        llama_factory_root_path / "dataset_info.json",
+        entry_name,
+        {
+            "file_name": entry_name + ".json",
+            "formatting": "sharegpt",
+            "columns": {"messages": "messages", "images": "images"},
+            "tags": {
+                "role_tag": "role",
+                "content_tag": "content",
+                "user_tag": "user",
+                "assistant_tag": "assistant",
+            },
+        },
+    )
+
     question = DEFAULT_QUESTION
     frame = None
 
@@ -178,9 +178,9 @@ def main():
 
                 save_image_and_add_to_json(
                     image_array=frame,
-                    root_path=ENTRY_NAME,
-                    llama_root_path=LLAMA_FACTORY_ROOT_PATH,
-                    jsonl_file=DEFAULT_RECORD_JSON_PATH,
+                    root_path=entry_name,
+                    llama_root_path=llama_factory_root_path,
+                    jsonl_file=default_record_json_path,
                     messages=messages,
                 )
                 node.send_output(

From ae622094ec504277f699f752adefccebba01073e Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Wed, 11 Sep 2024 13:48:00 +0200
Subject: [PATCH 08/10] Fix typo

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index 92294b48..59235559 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -31,7 +31,7 @@
     )
 
 
-# default processer
+# default processor
 processor = AutoProcessor.from_pretrained(DEFAULT_PATH)
 
 

From c99dcf5c303cf227e7726b01377b321c612b741c Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Thu, 12 Sep 2024 10:42:46 +0200
Subject: [PATCH 09/10] Add support for multi-image inference

---
 node-hub/dora-qwenvl/dora_qwenvl/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/node-hub/dora-qwenvl/dora_qwenvl/main.py b/node-hub/dora-qwenvl/dora_qwenvl/main.py
index 59235559..bb182e0b 100644
--- a/node-hub/dora-qwenvl/dora_qwenvl/main.py
+++ b/node-hub/dora-qwenvl/dora_qwenvl/main.py
@@ -127,27 +127,27 @@ def main():
                 frames[event_id] = Image.fromarray(frame)
 
             elif event_id == "tick":
-                if frame is None:
+                if len(frames.keys()) == 0:
                     continue
-                response = generate(frame, question)
+                response = generate(frames, question)
                 node.send_output(
                     "tick",
                     pa.array([response]),
-                    metadata,
+                    {},
                 )
 
             elif event_id == "text":
                 text = event["value"][0].as_py()
                 if text != "":
                     question = text
-                if frame is None:
+                if len(frames.keys()) == 0:
                     continue
                 # set the max number of tiles in `max_num`
-                response = generate(frame, question)
+                response = generate(frames, question)
                 node.send_output(
                     "text",
                     pa.array([response]),
-                    metadata,
+                    {},
                 )
 
         elif event_type == "ERROR":

From e1742f9ee24d75423982bf2a934bc98b646b2758 Mon Sep 17 00:00:00 2001
From: haixuanTao <tao.xavier@outlook.com>
Date: Fri, 13 Sep 2024 10:12:03 +0200
Subject: [PATCH 10/10] Adding tick within the VLM example

---
 examples/vlm/dataflow.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/examples/vlm/dataflow.yml b/examples/vlm/dataflow.yml
index ac12cce7..51371b67 100644
--- a/examples/vlm/dataflow.yml
+++ b/examples/vlm/dataflow.yml
@@ -18,8 +18,12 @@ nodes:
       image:
         source: camera/image
         queue_size: 1
+      tick: dora/timer/millis/300
     outputs:
       - text
+      - tick
+    env:
+      DEFAULT_QUESTION: Describe the image.
 
   - id: plot
     build: pip install ../../node-hub/opencv-plot
@@ -28,4 +32,4 @@ nodes:
       image:
         source: camera/image
         queue_size: 1
-      text: dora-qwenvl/text
+      text: dora-qwenvl/tick