Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ShuffleUnsafe methods and optimise Shuffle for non-constant indices #99596

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3366,11 +3366,19 @@ class Compiler
GenTree* gtNewSimdRoundNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);

GenTree* gtNewSimdShuffleNodeVariable(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize,
bool isUnsafe);

GenTree* gtNewSimdShuffleNode(var_types type,
GenTree* op1,
GenTree* op2,
CorInfoType simdBaseJitType,
unsigned simdSize);
unsigned simdSize,
bool isUnsafe);

GenTree* gtNewSimdSqrtNode(
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);
Expand Down Expand Up @@ -4680,7 +4688,7 @@ class Compiler
bool mustExpand);

#ifdef FEATURE_HW_INTRINSICS
bool IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const;
bool IsValidForShuffle(GenTree* indices, unsigned simdSize, var_types simdBaseType) const;

GenTree* impHWIntrinsic(NamedIntrinsic intrinsic,
CORINFO_CLASS_HANDLE clsHnd,
Expand Down
822 changes: 759 additions & 63 deletions src/coreclr/jit/gentree.cpp

Large diffs are not rendered by default.

35 changes: 24 additions & 11 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2151,38 +2151,51 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

case NI_Vector64_Shuffle:
case NI_Vector128_Shuffle:
case NI_Vector64_ShuffleUnsafe:
case NI_Vector128_ShuffleUnsafe:
{
assert((sig->numArgs == 2) || (sig->numArgs == 3));
assert((simdSize == 8) || (simdSize == 16));

GenTree* indices = impStackTop(0).val;

if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType))
if (!IsValidForShuffle(indices, simdSize, simdBaseType))
{
break;
}

if (!indices->IsCnsVec())
{
assert(sig->numArgs == 2);

if (!opts.OptimizationEnabled())
if (opts.OptimizationEnabled())
{
// Only enable late stage rewriting if optimizations are enabled
// as we won't otherwise encounter a constant at the later point
return nullptr;
}
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

op2 = impSIMDPopStack();
op1 = impSIMDPopStack();
retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);

retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize);

retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
break;
retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint));
break;
}
}

if (sig->numArgs == 2)
{
op2 = impSIMDPopStack();
op1 = impSIMDPopStack();

retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize);
bool isUnsafe = intrinsic == NI_Vector64_ShuffleUnsafe || intrinsic == NI_Vector128_ShuffleUnsafe;
if (indices->IsCnsVec())
{
retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe);
}
else
{
retNode = gtNewSimdShuffleNodeVariable(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe);
}
}
break;
}
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate,
HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, ShuffleUnsafe, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
Expand Down Expand Up @@ -195,6 +196,7 @@ HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate,
HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, ShuffleUnsafe, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate,
HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, ShuffleUnsafe, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
Expand Down Expand Up @@ -210,6 +211,7 @@ HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate,
HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector256, ShuffleUnsafe, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible)
Expand Down Expand Up @@ -321,6 +323,7 @@ HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate,
HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector512, ShuffleUnsafe, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
Expand Down
Loading
Loading