From 0f3c74f02bc834e2112fb42515f22cdcc0c54394 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 14:54:58 +0800
Subject: [PATCH 01/12] trigger edge case on internvl intentionally
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/intern_vit.py | 12 ++++++++++--
vllm/model_executor/models/internvl.py | 14 +++++++++++++-
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py
index 7ff68bd60e8ad..508d3557db2fb 100644
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -155,13 +155,21 @@ def __init__(
self.tp_size)
self.scale = self.head_dim**-0.5
- self.qkv = QKVParallelLinear(
+ # self.qkv = QKVParallelLinear(
+ # self.embed_dim,
+ # self.head_dim,
+ # num_dummy_heads + self.num_heads,
+ # bias=config.qkv_bias,
+ # quant_config=quant_config,
+ # prefix=f"{prefix}.qkv",
+ # )
+ self.qkv_proj = QKVParallelLinear(
self.embed_dim,
self.head_dim,
num_dummy_heads + self.num_heads,
bias=config.qkv_bias,
quant_config=quant_config,
- prefix=f"{prefix}.qkv",
+ prefix=f"{prefix}.qkv_proj",
)
self.qk_normalization = config.qk_normalization
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index f4b7e4478c164..b4a91c05650e5 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -34,7 +34,7 @@
from .clip import (dummy_image_for_clip, dummy_seq_data_for_clip,
get_clip_num_patches)
from .interfaces import SupportsMultiModal, SupportsPP
-from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model,
+from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, init_vllm_registered_model,
maybe_prefix, merge_multimodal_embeddings)
IMG_START = ''
@@ -473,6 +473,18 @@ def dummy_data(
@INPUT_REGISTRY.register_input_processor(input_pipeline.input_processor)
class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP):
+ # BitandBytes specific attributes
+ bitsandbytes_stacked_params_mapping = {
+ # shard_name, weight_name, index
+ "q_proj": ("qkv_proj", 0),
+ "k_proj": ("qkv_proj", 1),
+ "v_proj": ("qkv_proj", 2),
+ "gate_proj": ("gate_up_proj", 0),
+ "up_proj": ("gate_up_proj", 1),
+ }
+
+ hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={".qkv.": ".qkv_proj."})
+
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
super().__init__()
From c5eac0816fb9c9ae88c1deec6d8fc5afdac78957 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 15:15:00 +0800
Subject: [PATCH 02/12] trigger edge case on internvl intentionally
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/internvl.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index b4a91c05650e5..89bdff56e396b 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -483,7 +483,7 @@ class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP):
"up_proj": ("gate_up_proj", 1),
}
- hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={".qkv.": ".qkv_proj."})
+ hf_to_vllm_mapper = WeightsMapper(orig_to_new_substr={".qkv.": ".qkv_proj."})
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
super().__init__()
From 4e8ed740570373c32cdc57815e59671dba2e4f06 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 16:01:53 +0800
Subject: [PATCH 03/12] trigger edge case on internvl intentionally
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/internvl.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index 89bdff56e396b..42b50543fb85a 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -786,4 +786,4 @@ def sample(
def load_weights(self, weights: Iterable[Tuple[str,
torch.Tensor]]) -> Set[str]:
loader = AutoWeightsLoader(self)
- return loader.load_weights(weights)
+ return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
From 1b0edd3e06f3e0fb3fa82af49e367e4bfb0e65ea Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 16:12:42 +0800
Subject: [PATCH 04/12] trigger edge case on internvl intentionally
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/intern_vit.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py
index 508d3557db2fb..e9cc3d7394e87 100644
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -207,7 +207,8 @@ def _apply_qk_norm(self, q: torch.Tensor, k: torch.Tensor):
def forward(self, x: torch.Tensor) -> torch.Tensor:
B, N, _ = x.shape
- qkv, _ = self.qkv(x)
+ # qkv, _ = self.qkv(x)
+ qkv, _ = self.qkv_proj(x)
q, k, v = qkv.chunk(3, dim=-1)
if self.qk_normalization:
From 02f8a5d86112cb1aad751b57a6031b992799e3b1 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 16:50:24 +0800
Subject: [PATCH 05/12] handle target modules
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/model_loader/loader.py | 3 +--
vllm/model_executor/models/phi3.py | 4 ----
2 files changed, 1 insertion(+), 6 deletions(-)
diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
index f2d9293b31a83..272ca628a2a58 100644
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -995,8 +995,7 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
for sub_name in sub_modules:
self.target_modules.append(
name.replace(last_name, sub_name))
- else:
- self.target_modules.append(name)
+ self.target_modules.append(name)
assert (self.target_modules
), "vllm currently does not support BNB quantization for"
f" {type(model).__name__}"
diff --git a/vllm/model_executor/models/phi3.py b/vllm/model_executor/models/phi3.py
index 937858ee3b8c2..34141511ea791 100644
--- a/vllm/model_executor/models/phi3.py
+++ b/vllm/model_executor/models/phi3.py
@@ -14,7 +14,3 @@ class Phi3ForCausalLM(LlamaForCausalLM):
"gate_up_proj",
],
}
-
- # BitandBytes specific attributes
- # Initialize an empty dict when there is no stacked parameter mapping.
- bitsandbytes_stacked_params_mapping = {}
From d2dee347a8e14e002ea66a9c67477a0950f80d02 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 16:58:12 +0800
Subject: [PATCH 06/12] revert phi3
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/phi3.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/vllm/model_executor/models/phi3.py b/vllm/model_executor/models/phi3.py
index 34141511ea791..937858ee3b8c2 100644
--- a/vllm/model_executor/models/phi3.py
+++ b/vllm/model_executor/models/phi3.py
@@ -14,3 +14,7 @@ class Phi3ForCausalLM(LlamaForCausalLM):
"gate_up_proj",
],
}
+
+ # BitandBytes specific attributes
+ # Initialize an empty dict when there is no stacked parameter mapping.
+ bitsandbytes_stacked_params_mapping = {}
From d732bb0fed299eb57800941ebc5e25ff68582b17 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 17:07:05 +0800
Subject: [PATCH 07/12] add comments
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/model_loader/loader.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
index 272ca628a2a58..ee405a30e10fd 100644
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -995,7 +995,11 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
for sub_name in sub_modules:
self.target_modules.append(
name.replace(last_name, sub_name))
+ # we also add original module name in case that model has
+ # a mixture of disk-merged and disk-splitted weights with
+ # same last name.
self.target_modules.append(name)
+
assert (self.target_modules
), "vllm currently does not support BNB quantization for"
f" {type(model).__name__}"
From 5f4934f3ab3112de7802bb51349cf719455aec18 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 17:09:48 +0800
Subject: [PATCH 08/12] revert internvl
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/intern_vit.py | 17 ++++-------------
vllm/model_executor/models/internvl.py | 16 ++--------------
2 files changed, 6 insertions(+), 27 deletions(-)
diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py
index e9cc3d7394e87..20cd180ea8a01 100644
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -155,21 +155,13 @@ def __init__(
self.tp_size)
self.scale = self.head_dim**-0.5
- # self.qkv = QKVParallelLinear(
- # self.embed_dim,
- # self.head_dim,
- # num_dummy_heads + self.num_heads,
- # bias=config.qkv_bias,
- # quant_config=quant_config,
- # prefix=f"{prefix}.qkv",
- # )
- self.qkv_proj = QKVParallelLinear(
+ self.qkv = QKVParallelLinear(
self.embed_dim,
self.head_dim,
num_dummy_heads + self.num_heads,
bias=config.qkv_bias,
quant_config=quant_config,
- prefix=f"{prefix}.qkv_proj",
+ prefix=f"{prefix}.qkv",
)
self.qk_normalization = config.qk_normalization
@@ -207,8 +199,7 @@ def _apply_qk_norm(self, q: torch.Tensor, k: torch.Tensor):
def forward(self, x: torch.Tensor) -> torch.Tensor:
B, N, _ = x.shape
- # qkv, _ = self.qkv(x)
- qkv, _ = self.qkv_proj(x)
+ qkv, _ = self.qkv(x)
q, k, v = qkv.chunk(3, dim=-1)
if self.qk_normalization:
@@ -480,4 +471,4 @@ def load_weights(self, weights: Iterable[Tuple[str,
default_weight_loader)
weight_loader(param, loaded_weight)
loaded_params.add(name)
- return loaded_params
+ return loaded_params
\ No newline at end of file
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index 42b50543fb85a..d4d67a0bc4c33 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -34,7 +34,7 @@
from .clip import (dummy_image_for_clip, dummy_seq_data_for_clip,
get_clip_num_patches)
from .interfaces import SupportsMultiModal, SupportsPP
-from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, init_vllm_registered_model,
+from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model,
maybe_prefix, merge_multimodal_embeddings)
IMG_START = ''
@@ -473,18 +473,6 @@ def dummy_data(
@INPUT_REGISTRY.register_input_processor(input_pipeline.input_processor)
class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP):
- # BitandBytes specific attributes
- bitsandbytes_stacked_params_mapping = {
- # shard_name, weight_name, index
- "q_proj": ("qkv_proj", 0),
- "k_proj": ("qkv_proj", 1),
- "v_proj": ("qkv_proj", 2),
- "gate_proj": ("gate_up_proj", 0),
- "up_proj": ("gate_up_proj", 1),
- }
-
- hf_to_vllm_mapper = WeightsMapper(orig_to_new_substr={".qkv.": ".qkv_proj."})
-
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
super().__init__()
@@ -786,4 +774,4 @@ def sample(
def load_weights(self, weights: Iterable[Tuple[str,
torch.Tensor]]) -> Set[str]:
loader = AutoWeightsLoader(self)
- return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
+ return loader.load_weights(weights)
\ No newline at end of file
From e2381087f2d0943609e7b9306e83a9d722f275d3 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 17:10:21 +0800
Subject: [PATCH 09/12] code format
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/model_loader/loader.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
index ee405a30e10fd..c209926cfa7c8 100644
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -995,7 +995,7 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
for sub_name in sub_modules:
self.target_modules.append(
name.replace(last_name, sub_name))
- # we also add original module name in case that model has
+ # we also add original module name in case that model has
# a mixture of disk-merged and disk-splitted weights with
# same last name.
self.target_modules.append(name)
From 0df51657c11818653d25adeadcb1e62fa097e9e8 Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 17:11:15 +0800
Subject: [PATCH 10/12] revert internvl
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/models/intern_vit.py | 2 +-
vllm/model_executor/models/internvl.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py
index 20cd180ea8a01..7ff68bd60e8ad 100644
--- a/vllm/model_executor/models/intern_vit.py
+++ b/vllm/model_executor/models/intern_vit.py
@@ -471,4 +471,4 @@ def load_weights(self, weights: Iterable[Tuple[str,
default_weight_loader)
weight_loader(param, loaded_weight)
loaded_params.add(name)
- return loaded_params
\ No newline at end of file
+ return loaded_params
diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py
index d4d67a0bc4c33..f4b7e4478c164 100644
--- a/vllm/model_executor/models/internvl.py
+++ b/vllm/model_executor/models/internvl.py
@@ -774,4 +774,4 @@ def sample(
def load_weights(self, weights: Iterable[Tuple[str,
torch.Tensor]]) -> Set[str]:
loader = AutoWeightsLoader(self)
- return loader.load_weights(weights)
\ No newline at end of file
+ return loader.load_weights(weights)
From c372d3faba888bc611a37d08950f6fc29ee4ebdb Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 21:55:55 +0800
Subject: [PATCH 11/12] update comments
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/model_loader/loader.py | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
index c209926cfa7c8..d0c77c841ede7 100644
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -995,9 +995,9 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
for sub_name in sub_modules:
self.target_modules.append(
name.replace(last_name, sub_name))
- # we also add original module name in case that model has
- # a mixture of disk-merged and disk-splitted weights with
- # same last name.
+ # Add original module name even if the module has stacked map,
+ # in case model has a mixture of disk-merged and disk-splitted
+ # weights with same last name.
self.target_modules.append(name)
assert (self.target_modules
From b89cee37a794cb1aa8019c803d667d75ea0d40bb Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Fri, 27 Dec 2024 22:01:15 +0800
Subject: [PATCH 12/12] code format
Signed-off-by: Isotr0py <2037008807@qq.com>
---
vllm/model_executor/model_loader/loader.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/vllm/model_executor/model_loader/loader.py b/vllm/model_executor/model_loader/loader.py
index d0c77c841ede7..1fe887f3c40f8 100644
--- a/vllm/model_executor/model_loader/loader.py
+++ b/vllm/model_executor/model_loader/loader.py
@@ -996,7 +996,7 @@ def _get_bnb_target_modules(self, model: nn.Module) -> None:
self.target_modules.append(
name.replace(last_name, sub_name))
# Add original module name even if the module has stacked map,
- # in case model has a mixture of disk-merged and disk-splitted
+ # in case model has a mixture of disk-merged and disk-splitted
# weights with same last name.
self.target_modules.append(name)