Skip to content

Commit

Permalink
[SLP]Fix PR70004: Do not change insert point for reduction gather nodes.
Browse files Browse the repository at this point in the history
No need to change the insert point for reduction gather node, we can use
the ReductionRoot as insert point instead to avoid possible crashes.

(cherry picked from commit d79051f894129428ec36dedc6bbfdfdcc1bd0c17)
  • Loading branch information
alexey-bataev authored and tru committed Nov 13, 2023
1 parent 69b3baf commit 529aa6e
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 21 deletions.
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10118,7 +10118,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}

if (E->State == TreeEntry::NeedToGather) {
if (E->getMainOp() && E->Idx == 0)
// Set insert point for non-reduction initial nodes.
if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList)
setInsertPointAfterBundle(E);
Value *Vec = createBuildVector(E);
E->VectorizedValue = Vec;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-sie-ps5 < %s | FileCheck %s

define void @tes() {
; CHECK-LABEL: define void @tes() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = fcmp ole <2 x double> zeroinitializer, zeroinitializer
; CHECK-NEXT: br label [[TMP1:%.*]]
; CHECK: 1:
; CHECK-NEXT: [[TMP2:%.*]] = select i1 false, i1 false, i1 false
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i1> zeroinitializer, <2 x i1> [[TMP0]], <4 x i32> <i32 0, i32 0, i32 0, i32 2>
; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP4]])
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP5]], i1 false, i1 false
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP2]], i1 [[OP_RDX]], i1 false
; CHECK-NEXT: br i1 [[OP_RDX1]], label [[TMP6:%.*]], label [[TMP7:%.*]]
; CHECK: 6:
; CHECK-NEXT: ret void
; CHECK: 7:
; CHECK-NEXT: ret void
;
entry:
%0 = extractelement <2 x i1> zeroinitializer, i64 0
%1 = extractelement <2 x i1> zeroinitializer, i64 0
%2 = fcmp ole <2 x double> zeroinitializer, zeroinitializer
%3 = extractelement <2 x i1> %2, i64 0
%4 = extractelement <2 x i1> zeroinitializer, i64 0
br label %5

5:
%6 = select i1 false, i1 false, i1 false
%7 = select i1 %6, i1 %0, i1 false
%8 = select i1 %7, i1 %1, i1 false
%9 = select i1 %8, i1 false, i1 false
%10 = select i1 %9, i1 %3, i1 false
%11 = select i1 %10, i1 %4, i1 false
br i1 %11, label %12, label %13

12:
ret void

13:
ret void
}
40 changes: 20 additions & 20 deletions llvm/test/Transforms/SLPVectorizer/X86/reduction-transpose.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@
define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
; SSE2-LABEL: @reduce_and4(
; SSE2-NEXT: entry:
; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]]
; SSE2-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; SSE2-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]]
; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
; SSE2-NEXT: ret i32 [[OP_RDX1]]
;
Expand All @@ -40,11 +40,11 @@ define i32 @reduce_and4(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <
;
; AVX-LABEL: @reduce_and4(
; AVX-NEXT: entry:
; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP3]]
; AVX-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; AVX-NEXT: [[TMP1:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP0]])
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP1]], [[TMP3]]
; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
; AVX-NEXT: ret i32 [[OP_RDX1]]
;
Expand Down Expand Up @@ -94,11 +94,11 @@ entry:

define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
; SSE2-LABEL: @reduce_and4_transpose(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; SSE2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]]
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; SSE2-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
; SSE2-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; SSE2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
; SSE2-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]]
; SSE2-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
; SSE2-NEXT: ret i32 [[OP_RDX1]]
;
Expand All @@ -114,11 +114,11 @@ define i32 @reduce_and4_transpose(i32 %acc, <4 x i32> %v1, <4 x i32> %v2, <4 x i
; SSE42-NEXT: ret i32 [[OP_RDX3]]
;
; AVX-LABEL: @reduce_and4_transpose(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; AVX-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP2]])
; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP3]], [[TMP4]]
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V4:%.*]], <4 x i32> [[V3:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; AVX-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP1]])
; AVX-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[V2:%.*]], <4 x i32> [[V1:%.*]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
; AVX-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v8i32(<8 x i32> [[TMP3]])
; AVX-NEXT: [[OP_RDX:%.*]] = and i32 [[TMP2]], [[TMP4]]
; AVX-NEXT: [[OP_RDX1:%.*]] = and i32 [[OP_RDX]], [[ACC:%.*]]
; AVX-NEXT: ret i32 [[OP_RDX1]]
;
Expand Down

0 comments on commit 529aa6e

Please sign in to comment.