-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Fix arm neon vstx lane memVT size
StN lane memory size set too big lead to alias analysis goes wrong. Fixes llvm/llvm-project#64696 Differential Revision: https://reviews.llvm.org/D158611 (cherry picked from commit db8f6c009e5a17d304be7404e50eb20b2dd0c75b)
- Loading branch information
Showing
4 changed files
with
185 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon -O2 | FileCheck %s | ||
|
||
; st2 must before two ldrb. | ||
; The situation that put one ldrb before st2 because of the conservative memVT set for st2lane, | ||
; which lead to basic-aa goes wrong. | ||
|
||
define dso_local i32 @test_vst2_lane_u8([2 x <8 x i8>] %vectors.coerce) local_unnamed_addr { | ||
; CHECK-LABEL: test_vst2_lane_u8: | ||
; CHECK: st2 { v[[V1:[0-9]+]].b, v[[V2:[0-9]+]].b }[6], [x8] | ||
; CHECK-NEXT: umov w[[W1:[0-9]+]], v[[V12:[0-9]+]].b[6] | ||
; CHECK-NEXT: ldrb w[[W2:[0-9]+]], [sp, #12] | ||
; CHECK-NEXT: ldrb w[[W2:[0-9]+]], [sp, #13] | ||
entry: | ||
%temp = alloca [2 x i8], align 4 | ||
%vectors.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %vectors.coerce, 0 | ||
%vectors.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %vectors.coerce, 1 | ||
call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %temp) #4 | ||
call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %vectors.coerce.fca.0.extract, <8 x i8> %vectors.coerce.fca.1.extract, i64 6, ptr nonnull %temp) | ||
%0 = load i8, ptr %temp, align 4 | ||
%vget_lane = extractelement <8 x i8> %vectors.coerce.fca.0.extract, i64 6 | ||
%cmp8.not = icmp ne i8 %0, %vget_lane | ||
%arrayidx3.1 = getelementptr inbounds [2 x i8], ptr %temp, i64 0, i64 1 | ||
%1 = load i8, ptr %arrayidx3.1, align 1 | ||
%vget_lane.1 = extractelement <8 x i8> %vectors.coerce.fca.1.extract, i64 6 | ||
%cmp8.not.1 = icmp ne i8 %1, %vget_lane.1 | ||
%or.cond = select i1 %cmp8.not, i1 true, i1 %cmp8.not.1 | ||
%cmp.lcssa = zext i1 %or.cond to i32 | ||
call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %temp) #4 | ||
ret i32 %cmp.lcssa | ||
} | ||
|
||
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 | ||
declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr nocapture) #2 | ||
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
; RUN: llc -mtriple=aarch64-linux-gnu -stop-after=instruction-select < %s | FileCheck %s | ||
|
||
%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } | ||
%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } | ||
%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } | ||
|
||
declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2.v2f32.p0f32(float*) | ||
declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3.v2f32.p0f32(float*) | ||
declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4.v2f32.p0f32(float*) | ||
|
||
declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) | ||
declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) | ||
declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) | ||
|
||
declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) | ||
declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) | ||
declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) | ||
|
||
declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) | ||
declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) | ||
declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) | ||
|
||
|
||
define %struct.__neon_float32x2x2_t @test_ld2(float* %addr) { | ||
; CHECK-LABEL: name: test_ld2 | ||
; CHECK: LD2Twov2s {{.*}} :: (load (s128) {{.*}}) | ||
%val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x2_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x3_t @test_ld3(float* %addr) { | ||
; CHECK-LABEL: name: test_ld3 | ||
; CHECK: LD3Threev2s {{.*}} :: (load (s192) {{.*}}) | ||
%val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x3_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x4_t @test_ld4(float* %addr) { | ||
; CHECK-LABEL: name: test_ld4 | ||
; CHECK: LD4Fourv2s {{.*}} :: (load (s256) {{.*}}) | ||
%val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x4_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x2_t @test_ld1x2(float* %addr) { | ||
; CHECK-LABEL: name: test_ld1x2 | ||
; CHECK: LD1Twov2s {{.*}} :: (load (s128) {{.*}}) | ||
%val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x2_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x3_t @test_ld1x3(float* %addr) { | ||
; CHECK-LABEL: name: test_ld1x3 | ||
; CHECK: LD1Threev2s {{.*}} :: (load (s192) {{.*}}) | ||
%val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x3_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x4_t @test_ld1x4(float* %addr) { | ||
; CHECK-LABEL: name: test_ld1x4 | ||
; CHECK: LD1Fourv2s {{.*}} :: (load (s256) {{.*}}) | ||
%val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x4_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x2_t @test_ld2r(float* %addr) { | ||
; CHECK-LABEL: name: test_ld2r | ||
; CHECK: LD2Rv2s {{.*}} :: (load (s64) {{.*}}) | ||
%val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x2_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x3_t @test_ld3r(float* %addr) { | ||
; CHECK-LABEL: name: test_ld3r | ||
; CHECK: LD3Rv2s {{.*}} :: (load (s96) {{.*}}) | ||
%val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x3_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x4_t @test_ld4r(float* %addr) { | ||
; CHECK-LABEL: name: test_ld4r | ||
; CHECK: LD4Rv2s {{.*}} :: (load (s128) {{.*}}) | ||
%val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %addr) | ||
ret %struct.__neon_float32x2x4_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x2_t @test_ld2lane(<2 x float> %a, <2 x float> %b, float* %addr) { | ||
; CHECK-LABEL: name: test_ld2lane | ||
; CHECK: {{.*}} LD2i32 {{.*}} | ||
%val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %a, <2 x float> %b, i64 1, float* %addr) | ||
ret %struct.__neon_float32x2x2_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x3_t @test_ld3lane(<2 x float> %a, <2 x float> %b, <2 x float> %c, float* %addr) { | ||
; CHECK-LABEL: name: test_ld3lane | ||
; CHECK: {{.*}} LD3i32 {{.*}} | ||
%val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, i64 1, float* %addr) | ||
ret %struct.__neon_float32x2x3_t %val | ||
} | ||
|
||
define %struct.__neon_float32x2x4_t @test_ld4lane(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d, float* %addr) { | ||
; CHECK-LABEL: name: test_ld4lane | ||
; CHECK: {{.*}} LD4i32 {{.*}} | ||
%val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d, i64 1, float* %addr) | ||
ret %struct.__neon_float32x2x4_t %val | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters