Skip to content

Commit

Permalink
[X86] combineVSelectToBLENDV - handle vselect(vXi1,A,B) -> blendv(sex…
Browse files Browse the repository at this point in the history
…t(vXi1),A,B)

For pre-AVX512 targets, attempt to sign-extend a vXi1 condition mask to pass to a X86ISD::BLENDV node

Fixes Issue #53760
  • Loading branch information
RKSimon committed Feb 11, 2022
1 parent c7eb846 commit 20af71f
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 189 deletions.
28 changes: 20 additions & 8 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43438,19 +43438,17 @@ static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
/// This function will also call SimplifyDemandedBits on already created
/// BLENDV to perform additional simplifications.
static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
if ((N->getOpcode() != ISD::VSELECT &&
N->getOpcode() != X86ISD::BLENDV) ||
ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
return SDValue();

// Don't optimize before the condition has been transformed to a legal type
// and don't ever optimize vector selects that map to AVX512 mask-registers.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned BitWidth = Cond.getScalarValueSizeInBits();
if (BitWidth < 8 || BitWidth > 64)
return SDValue();
EVT VT = N->getValueType(0);

// We can only handle the cases where VSELECT is directly legal on the
// subtarget. We custom lower VSELECT nodes with constant conditions and
Expand All @@ -43462,8 +43460,6 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
// Potentially, we should combine constant-condition vselect nodes
// pre-legalization into shuffles and not mark as many types as custom
// lowered.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = N->getValueType(0);
if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
return SDValue();
// FIXME: We don't support i16-element blends currently. We could and
Expand All @@ -43481,6 +43477,22 @@ static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG,
if (VT.is512BitVector())
return SDValue();

// PreAVX512, without mask-registers, attempt to sign-extend bool vectors to
// allow us to use BLENDV.
if (!Subtarget.hasAVX512() && BitWidth == 1) {
EVT CondVT = VT.changeVectorElementTypeToInteger();
if (SDValue ExtCond = combineToExtendBoolVectorInReg(
ISD::SIGN_EXTEND, SDLoc(N), CondVT, Cond, DAG, DCI, Subtarget)) {
return DAG.getNode(X86ISD::BLENDV, SDLoc(N), VT, ExtCond,
N->getOperand(1), N->getOperand(2));
}
}

// Don't optimize before the condition has been transformed to a legal type
// and don't ever optimize vector selects that map to AVX512 mask-registers.
if (BitWidth < 8 || BitWidth > 64)
return SDValue();

auto OnlyUsedAsSelectCond = [](SDValue Cond) {
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
UI != UE; ++UI)
Expand Down
203 changes: 22 additions & 181 deletions llvm/test/CodeGen/X86/vector-bo-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -507,51 +507,11 @@ define <16 x float> @fdiv_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef
define <8 x float> @fadd_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fadd_v8f32_cast_cond:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $5, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: movl %edi, %ecx
; AVX2-NEXT: shrb $4, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: negl %ecx
; AVX2-NEXT: vmovd %ecx, %xmm2
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $7, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $2, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
; AVX2-NEXT: shrb $3, %dil
; AVX2-NEXT: movzbl %dil, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vaddps %ymm1, %ymm0, %ymm0
Expand Down Expand Up @@ -644,52 +604,13 @@ define <8 x double> @fadd_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
define <8 x float> @fsub_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fsub_v8f32_cast_cond:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $5, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: movl %edi, %ecx
; AVX2-NEXT: shrb $4, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: negl %ecx
; AVX2-NEXT: vmovd %ecx, %xmm2
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $7, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $2, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
; AVX2-NEXT: shrb $3, %dil
; AVX2-NEXT: movzbl %dil, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; AVX2-NEXT: vpand %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
Expand Down Expand Up @@ -780,51 +701,11 @@ define <8 x double> @fsub_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
define <8 x float> @fmul_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fmul_v8f32_cast_cond:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $5, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: movl %edi, %ecx
; AVX2-NEXT: shrb $4, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: negl %ecx
; AVX2-NEXT: vmovd %ecx, %xmm2
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $7, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $2, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
; AVX2-NEXT: shrb $3, %dil
; AVX2-NEXT: movzbl %dil, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vmulps %ymm1, %ymm0, %ymm0
Expand Down Expand Up @@ -917,51 +798,11 @@ define <8 x double> @fmul_v8f64_cast_cond(i8 noundef zeroext %pb, <8 x double> n
define <8 x float> @fdiv_v8f32_cast_cond(i8 noundef zeroext %pb, <8 x float> noundef %x, <8 x float> noundef %y) {
; AVX2-LABEL: fdiv_v8f32_cast_cond:
; AVX2: # %bb.0:
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $5, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: movl %edi, %ecx
; AVX2-NEXT: shrb $4, %cl
; AVX2-NEXT: movzbl %cl, %ecx
; AVX2-NEXT: andl $1, %ecx
; AVX2-NEXT: negl %ecx
; AVX2-NEXT: vmovd %ecx, %xmm2
; AVX2-NEXT: vpinsrd $1, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $7, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vmovd %eax, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3
; AVX2-NEXT: movl %edi, %eax
; AVX2-NEXT: shrb $2, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $2, %eax, %xmm3, %xmm3
; AVX2-NEXT: shrb $3, %dil
; AVX2-NEXT: movzbl %dil, %eax
; AVX2-NEXT: andl $1, %eax
; AVX2-NEXT: negl %eax
; AVX2-NEXT: vpinsrd $3, %eax, %xmm3, %xmm3
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128]
; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm3, %ymm1
; AVX2-NEXT: vdivps %ymm1, %ymm0, %ymm0
Expand Down

0 comments on commit 20af71f

Please sign in to comment.