From 539e84c58f8ac08d8764573f402057e58ca6d7d2 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Mon, 30 Dec 2024 19:43:18 -0800
Subject: [PATCH 1/2] simplify vision hash

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 tests/v1/core/test_prefix_caching.py | 8 ++++----
 vllm/v1/core/kv_cache_utils.py       | 7 ++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py
index ed04f0a373c51..b7e39735b662f 100644
--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@@ -469,9 +469,9 @@ def test_mm_prefix_caching():
     # Completed block should have hashes with extra keys.
     assert not computed_blocks
     assert len(req0.kv_block_hashes) == 3
-    assert req0.kv_block_hashes[0].extra_keys == (("aaa", 0), )
-    assert req0.kv_block_hashes[1].extra_keys == (("aaa", 5), ("bbb", 0))
-    assert req0.kv_block_hashes[2].extra_keys == (("bbb", 2), )
+    assert req0.kv_block_hashes[0].extra_keys == ("aaa", )
+    assert req0.kv_block_hashes[1].extra_keys == ("bbb", )
+    assert req0.kv_block_hashes[2].extra_keys == tuple()
 
     blocks = manager.allocate_slots(req0, 59, computed_blocks)
     assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4]
@@ -485,7 +485,7 @@ def test_mm_prefix_caching():
 
     # The just completed block should have hashes with extra keys.
     assert len(req0.kv_block_hashes) == 4
-    assert req0.kv_block_hashes[3].extra_keys == (("ccc", 0), )
+    assert req0.kv_block_hashes[3].extra_keys == ("ccc", )
 
     # Cache hit.
     unique_token_ids = [-1] * 7 + [200] * 5
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 9ddbff7c9a604..7a4b675261f42 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -217,9 +217,10 @@ def generate_block_hash_extra_keys(
                 curr_mm_idx += 1
                 continue
 
-            # The block contains the current mm input.
-            mm_start = max(0, start_token_idx - offset)
-            extra_keys.append((mm_hashes[curr_mm_idx], mm_start))
+            if start_token_idx <= offset:
+                # This block contains the start of the current mm input.
+                extra_keys.append(mm_hashes[curr_mm_idx])
+
             if end_token_idx >= offset + length:
                 # If this block contains the end of the current mm input,
                 # move to the next mm input as this block may also contain

From 8537a9bae7189a9204facf476e00fb435680c8b9 Mon Sep 17 00:00:00 2001
From: Chen Zhang <zhangch99@outlook.com>
Date: Mon, 30 Dec 2024 20:21:15 -0800
Subject: [PATCH 2/2] only remove offset

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
---
 tests/v1/core/test_prefix_caching.py | 4 ++--
 vllm/v1/core/kv_cache_utils.py       | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py
index b7e39735b662f..c21606057fa02 100644
--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@@ -470,8 +470,8 @@ def test_mm_prefix_caching():
     assert not computed_blocks
     assert len(req0.kv_block_hashes) == 3
     assert req0.kv_block_hashes[0].extra_keys == ("aaa", )
-    assert req0.kv_block_hashes[1].extra_keys == ("bbb", )
-    assert req0.kv_block_hashes[2].extra_keys == tuple()
+    assert req0.kv_block_hashes[1].extra_keys == ("aaa", "bbb")
+    assert req0.kv_block_hashes[2].extra_keys == ("bbb", )
 
     blocks = manager.allocate_slots(req0, 59, computed_blocks)
     assert [b.block_id for b in blocks] == [0, 1, 2, 3, 4]
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 7a4b675261f42..84ff48bf428a0 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -217,9 +217,8 @@ def generate_block_hash_extra_keys(
                 curr_mm_idx += 1
                 continue
 
-            if start_token_idx <= offset:
-                # This block contains the start of the current mm input.
-                extra_keys.append(mm_hashes[curr_mm_idx])
+            # The block contains the current mm input.
+            extra_keys.append(mm_hashes[curr_mm_idx])
 
             if end_token_idx >= offset + length:
                 # If this block contains the end of the current mm input,