Skip to content

Commit

Permalink
Updating Vector<T> to support opt-in 512-bit widths (#97460)
Browse files Browse the repository at this point in the history
* Updating Vector<T> to support opt-in 512-bit widths

* Apply formatting patch
  • Loading branch information
tannergooding authored Jan 24, 2024
1 parent e568f75 commit be6c9f6
Show file tree
Hide file tree
Showing 12 changed files with 438 additions and 788 deletions.
21 changes: 13 additions & 8 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3232,12 +3232,14 @@ class Compiler
GenTree* op4,
CorInfoType simdBaseJitType,
unsigned simdSize);
GenTree* gtNewSimdToScalarNode(var_types type,
GenTree* op1,
CorInfoType simdBaseJitType,
unsigned simdSize);
#endif // TARGET_XARCH


GenTree* gtNewSimdToScalarNode(var_types type,
GenTree* op1,
CorInfoType simdBaseJitType,
unsigned simdSize);

GenTree* gtNewSimdUnOpNode(genTreeOps op,
var_types type,
GenTree* op1,
Expand Down Expand Up @@ -8832,10 +8834,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
CLANG_FORMAT_COMMENT_ANCHOR;

#if defined(TARGET_XARCH)
// TODO-XArch: Add support for 512-bit Vector<T>
assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT512));

if (compExactlyDependsOn(InstructionSet_VectorT256))
if (compExactlyDependsOn(InstructionSet_VectorT512))
{
assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT256));
assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT128));
return ZMM_REGSIZE_BYTES;
}
else if (compExactlyDependsOn(InstructionSet_VectorT256))
{
assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT128));
return YMM_REGSIZE_BYTES;
Expand Down
54 changes: 18 additions & 36 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1373,7 +1373,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector128_Create:
case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_VectorT128_CreateBroadcast:
case NI_VectorT_CreateBroadcast:
#if defined(TARGET_XARCH)
case NI_BMI1_TrailingZeroCount:
case NI_BMI1_X64_TrailingZeroCount:
Expand All @@ -1387,7 +1387,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector512_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
case NI_Vector512_CreateScalarUnsafe:
case NI_VectorT256_CreateBroadcast:
case NI_X86Base_BitScanForward:
case NI_X86Base_X64_BitScanForward:
case NI_X86Base_BitScanReverse:
Expand Down Expand Up @@ -1629,20 +1628,20 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector128_AsUInt64:
case NI_Vector128_AsVector4:
case NI_Vector128_op_UnaryPlus:
case NI_VectorT128_As:
case NI_VectorT128_AsVectorByte:
case NI_VectorT128_AsVectorDouble:
case NI_VectorT128_AsVectorInt16:
case NI_VectorT128_AsVectorInt32:
case NI_VectorT128_AsVectorInt64:
case NI_VectorT128_AsVectorNInt:
case NI_VectorT128_AsVectorNUInt:
case NI_VectorT128_AsVectorSByte:
case NI_VectorT128_AsVectorSingle:
case NI_VectorT128_AsVectorUInt16:
case NI_VectorT128_AsVectorUInt32:
case NI_VectorT128_AsVectorUInt64:
case NI_VectorT128_op_UnaryPlus:
case NI_VectorT_As:
case NI_VectorT_AsVectorByte:
case NI_VectorT_AsVectorDouble:
case NI_VectorT_AsVectorInt16:
case NI_VectorT_AsVectorInt32:
case NI_VectorT_AsVectorInt64:
case NI_VectorT_AsVectorNInt:
case NI_VectorT_AsVectorNUInt:
case NI_VectorT_AsVectorSByte:
case NI_VectorT_AsVectorSingle:
case NI_VectorT_AsVectorUInt16:
case NI_VectorT_AsVectorUInt32:
case NI_VectorT_AsVectorUInt64:
case NI_VectorT_op_UnaryPlus:
#if defined(TARGET_XARCH)
case NI_Vector256_As:
case NI_Vector256_AsByte:
Expand All @@ -1658,20 +1657,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector256_AsUInt32:
case NI_Vector256_AsUInt64:
case NI_Vector256_op_UnaryPlus:
case NI_VectorT256_As:
case NI_VectorT256_AsVectorByte:
case NI_VectorT256_AsVectorDouble:
case NI_VectorT256_AsVectorInt16:
case NI_VectorT256_AsVectorInt32:
case NI_VectorT256_AsVectorInt64:
case NI_VectorT256_AsVectorNInt:
case NI_VectorT256_AsVectorNUInt:
case NI_VectorT256_AsVectorSByte:
case NI_VectorT256_AsVectorSingle:
case NI_VectorT256_AsVectorUInt16:
case NI_VectorT256_AsVectorUInt32:
case NI_VectorT256_AsVectorUInt64:
case NI_VectorT256_op_UnaryPlus:
case NI_Vector512_As:
case NI_Vector512_AsByte:
case NI_Vector512_AsDouble:
Expand Down Expand Up @@ -1716,19 +1701,16 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_Vector128_get_AllBitsSet:
case NI_Vector128_get_One:
case NI_Vector128_get_Zero:
case NI_VectorT128_get_AllBitsSet:
case NI_VectorT128_get_One:
case NI_VectorT128_get_Zero:
case NI_VectorT_get_AllBitsSet:
case NI_VectorT_get_One:
case NI_VectorT_get_Zero:
#if defined(TARGET_XARCH)
case NI_Vector256_get_AllBitsSet:
case NI_Vector256_get_One:
case NI_Vector256_get_Zero:
case NI_Vector512_get_AllBitsSet:
case NI_Vector512_get_One:
case NI_Vector512_get_Zero:
case NI_VectorT256_get_AllBitsSet:
case NI_VectorT256_get_One:
case NI_VectorT256_get_Zero:
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS
{
Expand Down
73 changes: 39 additions & 34 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22617,18 +22617,7 @@ GenTree* Compiler::gtNewSimdGetElementNode(

if (useToScalar)
{
intrinsicId = NI_Vector128_ToScalar;

if (simdSize == 64)
{
intrinsicId = NI_Vector512_ToScalar;
}
else if (simdSize == 32)
{
intrinsicId = NI_Vector256_ToScalar;
}

return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseJitType, simdSize);
return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize);
}

switch (simdBaseType)
Expand Down Expand Up @@ -22671,14 +22660,7 @@ GenTree* Compiler::gtNewSimdGetElementNode(
#elif defined(TARGET_ARM64)
if (op2->IsIntegralConst(0))
{
intrinsicId = NI_Vector128_ToScalar;

if (simdSize == 8)
{
intrinsicId = NI_Vector64_ToScalar;
}

return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseJitType, simdSize);
return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize);
}

if (simdSize == 8)
Expand Down Expand Up @@ -24575,7 +24557,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
case TYP_USHORT:
{
tmp = gtNewSimdHWIntrinsicNode(simdType, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, simdSize);
return gtNewSimdHWIntrinsicNode(type, tmp, NI_Vector64_ToScalar, simdBaseJitType, 8);
return gtNewSimdToScalarNode(type, tmp, simdBaseJitType, 8);
}

case TYP_INT:
Expand All @@ -24590,7 +24572,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
{
tmp = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, 16);
}
return gtNewSimdHWIntrinsicNode(type, tmp, NI_Vector64_ToScalar, simdBaseJitType, 8);
return gtNewSimdToScalarNode(type, tmp, simdBaseJitType, 8);
}

case TYP_FLOAT:
Expand All @@ -24612,7 +24594,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
simdSize);
}
}
return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize);
return gtNewSimdToScalarNode(type, op1, simdBaseJitType, simdSize);
}

case TYP_DOUBLE:
Expand All @@ -24624,7 +24606,7 @@ GenTree* Compiler::gtNewSimdSumNode(var_types type, GenTree* op1, CorInfoType si
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddPairwiseScalar, simdBaseJitType,
simdSize);
}
return gtNewSimdHWIntrinsicNode(type, op1, NI_Vector64_ToScalar, simdBaseJitType, 8);
return gtNewSimdToScalarNode(type, op1, simdBaseJitType, 8);
}
default:
{
Expand Down Expand Up @@ -24696,7 +24678,6 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type,
}
#endif // TARGET_XARCH

#if defined(TARGET_XARCH)
//----------------------------------------------------------------------------------------------
// Compiler::gtNewSimdToScalarNode: Creates a new simd ToScalar node.
//
Expand All @@ -24711,9 +24692,19 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type,
//
GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize)
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(varTypeIsArithmetic(type));

assert(op1 != nullptr);
assert(varTypeIsSIMD(op1));

#if defined(TARGET_X86)
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(varTypeIsArithmetic(simdBaseType));

NamedIntrinsic intrinsic = NI_Illegal;

#ifdef TARGET_XARCH
#if defined(TARGET_X86)
if (varTypeIsLong(simdBaseType))
{
// We need SSE41 to handle long, use software fallback
Expand All @@ -24724,23 +24715,37 @@ GenTree* Compiler::gtNewSimdToScalarNode(var_types type, GenTree* op1, CorInfoTy
return gtNewSimdGetElementNode(type, op1, op2, simdBaseJitType, simdSize);
}
#endif // TARGET_X86
// Ensure MOVD/MOVQ support exists
assert(compIsaSupportedDebugOnly(InstructionSet_SSE2));
NamedIntrinsic intrinsic = NI_Vector128_ToScalar;

if (simdSize == 32)
if (simdSize == 64)
{
assert(IsBaselineVector512IsaSupportedDebugOnly());
intrinsic = NI_Vector512_ToScalar;
}
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_Vector256_ToScalar;
}
else if (simdSize == 64)
else
{
assert(IsBaselineVector512IsaSupportedDebugOnly());
intrinsic = NI_Vector512_ToScalar;
intrinsic = NI_Vector128_ToScalar;
}
#elif defined(TARGET_ARM64)
if (simdSize == 8)
{
intrinsic = NI_Vector64_ToScalar;
}
else
{
intrinsic = NI_Vector128_ToScalar;
}
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

assert(intrinsic != NI_Illegal);
return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
}
#endif // TARGET_XARCH

GenTree* Compiler::gtNewSimdUnOpNode(
genTreeOps op, var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize)
Expand Down
9 changes: 3 additions & 6 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1073,17 +1073,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,

op1 = gtNewSimdGetLowerNode(TYP_SIMD8, op1, simdBaseJitType, simdSize);
op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, 8);
op1 = gtNewSimdHWIntrinsicNode(genActualType(simdBaseType), op1, NI_Vector64_ToScalar, simdBaseJitType,
8);
op1 = gtNewSimdToScalarNode(genActualType(simdBaseType), op1, simdBaseJitType, 8);
op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, TYP_INT);

GenTree* zero = gtNewZeroConNode(TYP_SIMD16);
ssize_t index = 8 / genTypeSize(simdBaseType);

op2 = gtNewSimdGetUpperNode(TYP_SIMD8, op2, simdBaseJitType, simdSize);
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, 8);
op2 = gtNewSimdHWIntrinsicNode(genActualType(simdBaseType), op2, NI_Vector64_ToScalar, simdBaseJitType,
8);
op2 = gtNewSimdToScalarNode(genActualType(simdBaseType), op2, simdBaseJitType, 8);
op2 = gtNewCastNode(TYP_INT, op2, /* isUnsigned */ true, TYP_INT);

op2 = gtNewOperNode(GT_LSH, TYP_INT, op2, gtNewIconNode(8));
Expand Down Expand Up @@ -1112,8 +1110,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
simdSize);
}

retNode = gtNewSimdHWIntrinsicNode(genActualType(simdBaseType), op1, NI_Vector64_ToScalar,
simdBaseJitType, 8);
retNode = gtNewSimdToScalarNode(genActualType(simdBaseType), op1, simdBaseJitType, 8);

if ((simdBaseType != TYP_INT) && (simdBaseType != TYP_UINT))
{
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3542,7 +3542,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
GenTree* res =
gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar, callJitType, 16);

retNode = gtNewSimdHWIntrinsicNode(callType, res, NI_Vector128_ToScalar, callJitType, 16);
retNode = gtNewSimdToScalarNode(callType, res, callJitType, 16);
break;
}
#elif defined(TARGET_ARM64)
Expand All @@ -3566,7 +3566,7 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op3, op2, op1, NI_AdvSimd_FusedMultiplyAddScalar,
callJitType, 8);

retNode = gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector64_ToScalar, callJitType, 8);
retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 8);
break;
}
#endif
Expand Down Expand Up @@ -8478,7 +8478,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
retNode->AsHWIntrinsic()->Op(2) = op1;
}

return gtNewSimdHWIntrinsicNode(callType, retNode, NI_Vector128_ToScalar, callJitType, 16);
return gtNewSimdToScalarNode(callType, retNode, callJitType, 16);
}
}
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH
Expand Down Expand Up @@ -8643,7 +8643,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
callJitType, 16);
}

return gtNewSimdHWIntrinsicNode(callType, tmp, NI_Vector128_ToScalar, callJitType, 16);
return gtNewSimdToScalarNode(callType, tmp, callJitType, 16);
}
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH

Expand Down
Loading

0 comments on commit be6c9f6

Please sign in to comment.