From 9dd02d85ca801c99241317a8061bd025c726af93 Mon Sep 17 00:00:00 2001 From: Siyuan Li <94890248+liaoyanqing666@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:24:10 +0800 Subject: [PATCH] [Bug] Fix usage of `.transpose()` and `.view()` consecutively. (#11979) --- vllm/attention/layer.py | 2 +- vllm/model_executor/models/intern_vit.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 55e4e14027f79..b8afd428f2cc0 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -230,7 +230,7 @@ def forward( value, scale=self.scale) out = out.transpose(1, 2) - return out.view(bsz, q_len, -1) + return out.reshape(bsz, q_len, -1) def unified_attention( diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py index 7ff68bd60e8ad..8ad009d5101e4 100644 --- a/vllm/model_executor/models/intern_vit.py +++ b/vllm/model_executor/models/intern_vit.py @@ -271,7 +271,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: v = v.transpose(1, 2) x = F.scaled_dot_product_attention(q, k, v, scale=self.scale) - x = x.transpose(1, 2).view(B, N, -1) + x = x.transpose(1, 2).reshape(B, N, -1) x = self.proj(x) return x