Skip to content

Commit

Permalink
Revert "Add SIMD to LowerCallMemcmp (#84530)" (#84595)
Browse files Browse the repository at this point in the history
This reverts commit eda1c3a.
  • Loading branch information
sbomer authored Apr 11, 2023
1 parent f211984 commit f8435f4
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 59 deletions.
6 changes: 3 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7384,9 +7384,9 @@ GenTree* Compiler::gtNewZeroConNode(var_types type)
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(type))
{
GenTreeVecCon* vecCon = gtNewVconNode(type);
vecCon->gtSimdVal = simd_t::Zero();
return vecCon;
GenTreeVecCon* allBitsSet = gtNewVconNode(type);
allBitsSet->gtSimdVal = simd_t::Zero();
return allBitsSet;
}
#endif // FEATURE_SIMD

Expand Down
68 changes: 12 additions & 56 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1902,20 +1902,8 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
{
GenTree* lArg = call->gtArgs.GetUserArgByIndex(0)->GetNode();
GenTree* rArg = call->gtArgs.GetUserArgByIndex(1)->GetNode();

ssize_t MaxUnrollSize = 16;
#ifdef FEATURE_SIMD
MaxUnrollSize = 32;
#ifdef TARGET_XARCH
if (comp->compOpportunisticallyDependsOn(InstructionSet_Vector256))
{
MaxUnrollSize = 64;
}
// TODO-XARCH-AVX512: Consider enabling this for AVX512
#endif
#endif

if (cnsSize <= MaxUnrollSize)
// TODO: Add SIMD path for [16..128] via GT_HWINTRINSIC nodes
if (cnsSize <= 16)
{
unsigned loadWidth = 1 << BitOperations::Log2((unsigned)cnsSize);
var_types loadType;
Expand All @@ -1931,25 +1919,11 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
{
loadType = TYP_INT;
}
else if ((loadWidth == 8) || (MaxUnrollSize == 16))
else if ((loadWidth == 8) || (loadWidth == 16))
{
loadWidth = 8;
loadType = TYP_LONG;
}
#ifdef FEATURE_SIMD
else if ((loadWidth == 16) || (MaxUnrollSize == 32))
{
loadWidth = 16;
loadType = TYP_SIMD16;
}
#ifdef TARGET_XARCH
else if ((loadWidth == 32) || (MaxUnrollSize == 64))
{
loadWidth = 32;
loadType = TYP_SIMD32;
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
else
{
unreached();
Expand All @@ -1958,26 +1932,8 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)

GenTree* result = nullptr;

auto newBinaryOp = [](Compiler* comp, genTreeOps oper, var_types type, GenTree* op1,
GenTree* op2) -> GenTree* {
#ifdef FEATURE_SIMD
if (varTypeIsSIMD(op1))
{
if (GenTree::OperIsCmpCompare(oper))
{
assert(type == TYP_INT);
return comp->gtNewSimdCmpOpAllNode(oper, TYP_BOOL, op1, op2, CORINFO_TYPE_NATIVEUINT,
genTypeSize(op1), false);
}
return comp->gtNewSimdBinOpNode(oper, op1->TypeGet(), op1, op2, CORINFO_TYPE_NATIVEUINT,
genTypeSize(op1), false);
}
#endif
return comp->gtNewOperNode(oper, type, op1, op2);
};

// loadWidth == cnsSize means a single load is enough for both args
if (loadWidth == (unsigned)cnsSize)
if ((loadWidth == (unsigned)cnsSize) && (loadWidth <= 8))
{
// We're going to emit something like the following:
//
Expand All @@ -1987,7 +1943,7 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
//
GenTree* lIndir = comp->gtNewIndir(loadType, lArg);
GenTree* rIndir = comp->gtNewIndir(loadType, rArg);
result = newBinaryOp(comp, GT_EQ, TYP_INT, lIndir, rIndir);
result = comp->gtNewOperNode(GT_EQ, TYP_INT, lIndir, rIndir);

BlockRange().InsertAfter(lArg, lIndir);
BlockRange().InsertAfter(rArg, rIndir);
Expand Down Expand Up @@ -2034,17 +1990,17 @@ GenTree* Lowering::LowerCallMemcmp(GenTreeCall* call)
//
GenTree* l1Indir = comp->gtNewIndir(loadType, lArgUse.Def());
GenTree* r1Indir = comp->gtNewIndir(loadType, rArgUse.Def());
GenTree* lXor = newBinaryOp(comp, GT_XOR, actualLoadType, l1Indir, r1Indir);
GenTree* lXor = comp->gtNewOperNode(GT_XOR, actualLoadType, l1Indir, r1Indir);
GenTree* l2Offs = comp->gtNewIconNode(cnsSize - loadWidth, TYP_I_IMPL);
GenTree* l2AddOffs = newBinaryOp(comp, GT_ADD, lArg->TypeGet(), lArgClone, l2Offs);
GenTree* l2AddOffs = comp->gtNewOperNode(GT_ADD, lArg->TypeGet(), lArgClone, l2Offs);
GenTree* l2Indir = comp->gtNewIndir(loadType, l2AddOffs);
GenTree* r2Offs = comp->gtCloneExpr(l2Offs); // offset is the same
GenTree* r2AddOffs = newBinaryOp(comp, GT_ADD, rArg->TypeGet(), rArgClone, r2Offs);
GenTree* r2AddOffs = comp->gtNewOperNode(GT_ADD, rArg->TypeGet(), rArgClone, r2Offs);
GenTree* r2Indir = comp->gtNewIndir(loadType, r2AddOffs);
GenTree* rXor = newBinaryOp(comp, GT_XOR, actualLoadType, l2Indir, r2Indir);
GenTree* resultOr = newBinaryOp(comp, GT_OR, actualLoadType, lXor, rXor);
GenTree* zeroCns = comp->gtNewZeroConNode(actualLoadType);
result = newBinaryOp(comp, GT_EQ, TYP_INT, resultOr, zeroCns);
GenTree* rXor = comp->gtNewOperNode(GT_XOR, actualLoadType, l2Indir, r2Indir);
GenTree* resultOr = comp->gtNewOperNode(GT_OR, actualLoadType, lXor, rXor);
GenTree* zeroCns = comp->gtNewIconNode(0, actualLoadType);
result = comp->gtNewOperNode(GT_EQ, TYP_INT, resultOr, zeroCns);

BlockRange().InsertAfter(rArgClone, l1Indir, r1Indir, l2Offs, l2AddOffs);
BlockRange().InsertAfter(l2AddOffs, l2Indir, r2Offs, r2AddOffs, r2Indir);
Expand Down

0 comments on commit f8435f4

Please sign in to comment.