From 95b8eb8443919de6b25df56d1b95685b9d7458bb Mon Sep 17 00:00:00 2001 From: Hamish Arblaster Date: Tue, 6 Aug 2024 22:38:16 +1000 Subject: [PATCH 1/6] Squash into 1 commit --- src/coreclr/jit/compiler.h | 12 +- src/coreclr/jit/gentree.cpp | 728 +++++++++++-- src/coreclr/jit/hwintrinsicarm64.cpp | 35 +- src/coreclr/jit/hwintrinsiclistarm64.h | 2 + src/coreclr/jit/hwintrinsiclistxarch.h | 3 + src/coreclr/jit/hwintrinsicxarch.cpp | 37 +- src/coreclr/jit/rationalize.cpp | 25 +- .../System/Runtime/Intrinsics/Vector128.cs | 171 ++- .../System/Runtime/Intrinsics/Vector256.cs | 140 +++ .../System/Runtime/Intrinsics/Vector512.cs | 126 +++ .../src/System/Runtime/Intrinsics/Vector64.cs | 89 ++ .../ref/System.Runtime.Intrinsics.cs | 52 + .../tests/Vectors/Vector128Tests.cs | 480 +++++++++ .../tests/Vectors/Vector256Tests.cs | 640 +++++++++++ .../tests/Vectors/Vector512Tests.cs | 990 ++++++++++++++++++ .../tests/Vectors/Vector64Tests.cs | 336 ++++++ src/mono/mono/mini/simd-intrinsics.c | 4 +- src/mono/mono/mini/simd-methods.h | 1 + 18 files changed, 3762 insertions(+), 109 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4b295f7a6aa0e..ae0da2d4a815e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3366,11 +3366,19 @@ class Compiler GenTree* gtNewSimdRoundNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdShuffleNodeVariable(var_types type, + GenTree* op1, + GenTree* op2, + CorInfoType simdBaseJitType, + unsigned simdSize, + bool isUnsafe); + GenTree* gtNewSimdShuffleNode(var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, - unsigned simdSize); + unsigned simdSize, + bool isUnsafe); GenTree* gtNewSimdSqrtNode( var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); @@ -4680,7 +4688,7 @@ class Compiler bool mustExpand); #ifdef FEATURE_HW_INTRINSICS - bool IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const; + bool IsValidForShuffle(GenTree* indices, unsigned simdSize, var_types simdBaseType) const; GenTree* impHWIntrinsic(NamedIntrinsic intrinsic, CORINFO_CLASS_HANDLE clsHnd, diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5c496b0e080de..adcfa9cd615fb 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -18373,7 +18373,7 @@ unsigned GenTreeVecCon::ElementCount(unsigned simdSize, var_types simdBaseType) return simdSize / genTypeSize(simdBaseType); } -bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_types simdBaseType) const +bool Compiler::IsValidForShuffle(GenTree* indices, unsigned simdSize, var_types simdBaseType) const { #if defined(TARGET_XARCH) size_t elementSize = genTypeSize(simdBaseType); @@ -18387,41 +18387,6 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t // it's likely not worth it overall given that IsHardwareAccelerated reports false return false; } - else if ((varTypeIsByte(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) || - (varTypeIsShort(simdBaseType) && !compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL))) - { - bool crossLane = false; - - for (size_t index = 0; index < elementCount; index++) - { - uint64_t value = vecCon->GetIntegralVectorConstElement(index, simdBaseType); - - if (value >= elementCount) - { - continue; - } - - if (index < (elementCount / 2)) - { - if (value >= (elementCount / 2)) - { - crossLane = true; - break; - } - } - else if (value < (elementCount / 2)) - { - crossLane = true; - break; - } - } - - if (crossLane) - { - // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort - return false; - } - } } else if (simdSize == 64) { @@ -18440,6 +18405,12 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t // TYP_BYTE, TYP_UBYTE, TYP_SHORT, and TYP_USHORT need SSSE3 to be able to shuffle any operation return false; } + + if (!indices->IsCnsVec() && !compOpportunisticallyDependsOn(InstructionSet_SSSE3)) + { + // the variable implementation for Vector128 Shuffle always needs SSSE3 + return false; + } } #endif // TARGET_XARCH @@ -24863,8 +24834,546 @@ GenTree* Compiler::gtNewSimdRoundNode(var_types type, GenTree* op1, CorInfoType return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize); } +GenTree* Compiler::gtNewSimdShuffleNodeVariable( + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isUnsafe) +{ + assert(IsBaselineSimdIsaSupportedDebugOnly()); + + assert(varTypeIsSIMD(type)); + assert(getSIMDTypeForSize(simdSize) == type); + + assert(op1 != nullptr); + assert(op1->TypeIs(type)); + + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + assert(op2 != nullptr); + assert(op2->TypeIs(type)); + assert(!op2->IsCnsVec() || isUnsafe); + + GenTree* retNode = nullptr; + GenTree* cnsNode = nullptr; + + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + +#if defined(TARGET_XARCH) + // duplicate operand 2 for non-isUnsafe implementation later + GenTree* op2DupSafe = isUnsafe ? nullptr : fgMakeMultiUse(&op2); + + // TODO-XARCH-CQ: If we have known min/max or set/unset bits for the indices, we could further optimise many cases + // below + + if (simdSize == 64) + { + if (elementSize == 1) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512VBMI)); + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_PermuteVar64x8, simdBaseJitType, simdSize); + } + else if (elementSize == 2) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512BW_PermuteVar32x16, simdBaseJitType, simdSize); + } + else if (elementSize == 4) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar16x32, simdBaseJitType, simdSize); + } + else + { + assert(elementSize == 8); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_PermuteVar8x64, simdBaseJitType, simdSize); + } + } + else if (elementSize == 1 && simdSize == 16) + { + assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3)); + + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); + } + else if (elementSize == 1 && simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) + { + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512VBMI_VL_PermuteVar32x8, simdBaseJitType, simdSize); + } + else if (elementSize == 1 && simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + { + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX10v1_PermuteVar32x8, simdBaseJitType, simdSize); + } + else if (elementSize == 2 && compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL)) + { + assert(simdSize == 16 || simdSize == 32); + + // swap the operands to match the encoding requirements + NamedIntrinsic intrinsic = simdSize == 16 ? NI_AVX512BW_VL_PermuteVar8x16 : NI_AVX512BW_VL_PermuteVar16x16; + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); + } + else if (elementSize == 2 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + { + assert(simdSize == 16 || simdSize == 32); + + // swap the operands to match the encoding requirements + NamedIntrinsic intrinsic = simdSize == 16 ? NI_AVX10v1_PermuteVar8x16 : NI_AVX10v1_PermuteVar16x16; + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, intrinsic, simdBaseJitType, simdSize); + } + else if (elementSize == 4 && (simdSize == 32 || compOpportunisticallyDependsOn(InstructionSet_AVX))) + { + assert(simdSize == 16 || simdSize == 32); + + if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); + + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize); + } + } + else if (elementSize == 8 && simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + { + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX512F_VL_PermuteVar4x64, simdBaseJitType, simdSize); + } + else if (elementSize == 8 && simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + { + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX10v1_PermuteVar4x64, simdBaseJitType, simdSize); + } + else + { + assert((elementSize == 1 && simdSize == 32) || elementSize == 2 || (elementSize == 4 && simdSize == 16) || + elementSize == 8); + + if (elementSize == 8 && (simdSize == 32 || compOpportunisticallyDependsOn(InstructionSet_AVX))) + { + assert(simdSize == 16 || simdSize == 32); + if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX)); + } + + // the below is implemented for integral types + if (varTypeIsFloating(simdBaseType)) + { + assert(elementSize == 8); + simdBaseJitType = CORINFO_TYPE_ULONG; + } + + // shift all indices to the left by 1 (long to int index) + cnsNode = gtNewIconNode(1, TYP_INT); + if (simdSize == 32) + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_ShiftLeftLogical, simdBaseJitType, simdSize); + } + else + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_ShiftLeftLogical, simdBaseJitType, simdSize); + } + + // the below are implemented with float/int/uint + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT; + if (varTypeIsFloating(simdBaseType)) + { + simdBaseJitType = CORINFO_TYPE_FLOAT; + } + + // shuffle & manipulate the long indices to int indices + simd_t shufCns = {}; + for (size_t index = 0; index < simdSize / 4; index++) + { + shufCns.u32[index] = index & 6; + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + if (simdSize == 32) + { + // swap the operands to match the encoding requirements + op2 = gtNewSimdHWIntrinsicNode(type, cnsNode, op2, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); + } + else + { + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize); + } + + simd_t orCns = {}; + for (size_t index = 0; index < simdSize / 4; index++) + { + orCns.u32[index] = index & 1; + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + + // perform the shuffle with our int indices + if (simdSize == 32) + { + // swap the operands to match the encoding requirements + retNode = gtNewSimdHWIntrinsicNode(type, op2, op1, NI_AVX2_PermuteVar8x32, simdBaseJitType, simdSize); + } + else + { + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX_PermuteVar, CORINFO_TYPE_FLOAT, simdSize); + } + } + else if (simdSize == 32) + { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX2)); + assert(elementSize <= 2); + + // the idea is the following (for bytes, short indices are first converted to byte indices): + // 1. we have vector, and we create a vectorSwapped from it (which the 128-bit lanes are swapped) + // 2. we then shuffle each using Avx2.Shuffle (which masks with 0x0F for index, or 0x80 for zeroing) + // 3. we now have vector and vectorSwapped shuffled with Avx2.Shuffle - which only shuffles within the lane + // 4. for Shuffle, invalid indices are explicitly zeroed later, so no need to worry about anything outside + // [0, 31], and for ShuffleUnsafe, we don't guarantee any particular value. + // 5. since we only care about [0, 31], notably we have that for each element either vector or vectorSwapped + // (not both) will have the value we actually want, since one is effectively index A = i & 0x0F, and + // the other is effectively B = (i & 0x0F) | 0x10. (vector is A for left lane and B for right lane, + // and vectorSwapped is B for left lane and A for right lane) + // 6. we can use a conditional select to get the appropriate value if we know what mask to use. + // 7. we can use the following mask: + // (indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10))) > V256.Create((byte)0x0F) + // since this detects whether the index value is in the same lane as V256.Create((byte)0, 1, ..., 31) + // would be (which we know we can always use vector for). this is because it normalises the 0x10 bit + // to mean '0 = in vector, 1 = in vectorSwapped', and then we can use > 0x0F to detect when this is + // the case (we use > on sbyte, since it is the primitive operation on x86/x64 avx2 hardware). + // 8. since we use Avx2.Shuffle with the indices, we get that any with the 0x80 bit set automatically get 0. + // 9. for other out of range cases, we will have (indices ^ ...) > 0x0F will always be true, meaning we + // always select from vectorSwapped in the case of out-of-bounds (but no 0x80). we normalise + // explicitly for Shuffle always anyway, and for ShuffleUnsafe, this behaviour is fine (since it's + // out of bounds). + + // we want ShuffleUnsafe to be at least as good as Shuffle (at least in sensible cases), so for constant + // indices vector we special case some cases to use normal Shuffle to ensure it gets the additional + // optimisations available there (to avoid double Avx2.Shuffle-ing when possible). all these here cases are + // equivalent to normal shuffle (we only take ones that are in range, or would trivially give 0 for both). + if (isUnsafe && op2->IsCnsVec()) + { + // get the byte indices we will use for shuffling + bool isValidForNormalShuffle = true; + simd_t indicesCns = op2->AsVecCon()->gtSimdVal; + if (elementSize > 1) + { + for (size_t i = 0; i < simdSize / 2; i++) + { + uint16_t index = indicesCns.u16[i]; + if (index >= 128) + { + // if the index is 128 or more, then we would have to synthesise a new op2, which we will + // not do, since there is no reason to put in extra effort to support such out-of-bounds + // cases for ShuffleUnsafe. + isValidForNormalShuffle = false; + break; + } + indicesCns.u8[i * 2] = (uint8_t)(index << 1); + indicesCns.u8[i * 2 + 1] = (uint8_t)((index << 1) | 1); + } + } + + if (isValidForNormalShuffle) + { + // check they are all within valid range (or have high bit set since it also trivially behaves same) + for (size_t i = 0; i < simdSize; i++) + { + if ((indicesCns.u8[i] & ~0x9F) != 0) + { + isValidForNormalShuffle = false; + break; + } + } + + // if valid, call to gtNewSimdShuffleNode + if (isValidForNormalShuffle) + { + // note: this does not cause an infinite loop, since we call with isUnsafe: false, which never + // enters gtNewSimdShuffleNodeVariable. + return gtNewSimdShuffleNode(type, op1, op2, simdBaseJitType, simdSize, false); + } + } + } + + // if we have elementSize > 1, we need to convert op2 (short indices) to byte indices + if (elementSize > 1) + { + // shift all indices to the left by tzcnt(size) = 1 + cnsNode = gtNewIconNode(1, TYP_INT); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_ShiftLeftLogical, simdBaseJitType, simdSize); + + // the below are implemented with byte/sbyte + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + // shuffle with a pattern like 0 0 2 2 4 4 6 6 ... 0 0 2 2 ... (for shorts) + // (note: the 0x10 bit is ignored for Avx2.Shuffle) + simd_t shufCns = {}; + shufCns.u64[0] = 0x0606040402020000; + shufCns.u64[1] = 0x0E0E0C0C0A0A0808; + shufCns.u64[2] = 0x0606040402020000; + shufCns.u64[3] = 0x0E0E0C0C0A0A0808; + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + + // or every second index with 1 (short) + simd_t orCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + orCns.u8[index] = static_cast(index & (elementSize - 1)); + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + } + + // get the indices, and xor the cross-lane bit on the high 128-bit lane part of indices. we begin computing + // this (and the comparison) early as it seems to be one of the slower calculations (it can be done in + // parallel to other operations ideally). + // V256 indicesXord = indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10))); + simd_t xorCns = {}; + xorCns.u64[0] = 0; + xorCns.u64[1] = 0; + xorCns.u64[2] = 0x1010101010101010; + xorCns.u64[3] = 0x1010101010101010; + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = xorCns; + + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup1); + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup1); + GenTree* indicesXord = gtNewSimdBinOpNode(GT_XOR, type, op2Dup2, cnsNode, simdBaseJitType, simdSize); + + // compare our modified indices to 0x0F (highest value not swapping lane), we get 0xFF when we are swapping + // lane and 0x00 otherwise + // V256 selection = Avx2.CompareGreaterThan(indicesXord.AsSByte(), V256.Create((sbyte)0x0F)).AsByte(); + cnsNode = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(0x0F, TYP_INT), CORINFO_TYPE_UBYTE, simdSize); + GenTree* selection = gtNewSimdCmpOpNode(GT_GT, type, indicesXord, cnsNode, CORINFO_TYPE_BYTE, simdSize); + + // swap the low and high 128-bit lanes + // calculate swap before shuf1 so they can be computed in parallel + // Vector256 swap = Avx2.Permute2x128(vector, vector, 0b00000001); + uint8_t control = 1; + cnsNode = gtNewIconNode(control, TYP_INT); + GenTree* swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, + simdBaseJitType, simdSize); + + // shuffle with both the normal and swapped values + // Vector256 shuf1 = Avx2.Shuffle(vector, indices); + // Vector256 shuf2 = Avx2.Shuffle(swap, indices); + GenTree* shuf1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + GenTree* shuf2 = gtNewSimdHWIntrinsicNode(type, swap, op2Dup1, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + + // blend our two shuffles based on whether each element swaps lanes or not + // return Avx2.BlendVariable(shuf1, shuf2, selection); + retNode = gtNewSimdHWIntrinsicNode(type, shuf1, shuf2, selection, NI_AVX2_BlendVariable, simdBaseJitType, + simdSize); + } + else + { + assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3)); + assert(simdSize == 16); + assert(elementSize > 1); + + // the below is implemented for integral types + if (varTypeIsFloating(simdBaseType)) + { + if (elementSize == 4) + { + simdBaseJitType = CORINFO_TYPE_UINT; + } + else + { + assert(elementSize == 8); + simdBaseJitType = CORINFO_TYPE_ULONG; + } + } + + // shift all indices to the left by tzcnt(size) + cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSE2_ShiftLeftLogical, simdBaseJitType, simdSize); + + // the below are implemented with byte/sbyte + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + // we need to convert the indices to byte indices + // shuffle with a pattern like 0 0 2 2 4 4 6 6 ... (for short, and similar for larger) + + simd_t shufCns = {}; + for (size_t index = 0; index < elementCount; index++) + { + for (size_t i = 0; i < elementSize; i++) + { + shufCns.u8[(index * elementSize) + i] = static_cast(index * elementSize); + } + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); + + // or the relevant bits + + simd_t orCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + orCns.u8[index] = static_cast(index & (elementSize - 1)); + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + + // apply normal byte shuffle now that we've converted it + + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); + } + } +#elif defined(TARGET_ARM64) + NamedIntrinsic lookupIntrinsic = NI_AdvSimd_VectorTableLookup; + + if (simdSize == 16) + { + lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup; + } + + // fix-up indices for non-byte sized element types: + // if we have short / int / long, then we want to VectorTableLookup the least-significant byte to all bytes of that + // index element, and then shift left by the applicable amount, then or on the bits for the elements + // if it's not isUnsafe, we also need to then fix-up the out-of-range indices + GenTree* op2DupSafe = (isUnsafe || elementSize == 1) ? nullptr : fgMakeMultiUse(&op2); + if (elementSize > 1) + { + // AdvSimd.ShiftLeftLogical is only valid on integral types, excluding Vector128 + if (varTypeIsFloating(simdBaseType)) + { + if (elementSize == 4) + { + simdBaseJitType = CORINFO_TYPE_INT; + } + else + { + assert(elementSize == 8); + simdBaseJitType = CORINFO_TYPE_LONG; + } + } + if (simdSize == 16 && simdBaseJitType == CORINFO_TYPE_INT) + { + simdBaseJitType = CORINFO_TYPE_UINT; + } + + // shift all indices to the left by tzcnt(size) + cnsNode = gtNewIconNode(BitOperations::TrailingZeroCount(static_cast(elementSize)), TYP_INT); + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, NI_AdvSimd_ShiftLeftLogical, simdBaseJitType, simdSize); + + // VectorTableLookup is only valid on byte/sbyte + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + + simd_t shufCns = {}; + for (size_t index = 0; index < elementCount; index++) + { + for (size_t i = 0; i < elementSize; i++) + { + shufCns.u8[(index * elementSize) + i] = static_cast(index * elementSize); + } + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = shufCns; + + op2 = gtNewSimdHWIntrinsicNode(type, op2, cnsNode, lookupIntrinsic, simdBaseJitType, simdSize); + + simd_t orCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + orCns.u8[index] = static_cast(index & (elementSize - 1)); + } + + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = orCns; + + op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); + } + + retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, lookupIntrinsic, simdBaseJitType, simdSize); +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + assert(retNode != nullptr); + +#if defined(TARGET_XARCH) + if (!isUnsafe) +#elif defined(TARGET_ARM64) + if (!isUnsafe && elementSize > 1) +#else +#error Unsupported platform +#endif // !TARGET_XARCH && !TARGET_ARM64 + { + // we need to ensure indices larger than elementCount become 0 for larger element types + + assert(op2DupSafe != nullptr); + + // get the CorInfoType used for the index comparison + CorInfoType corType = CORINFO_TYPE_UBYTE; + if (elementSize == 2) + { + corType = CORINFO_TYPE_USHORT; + } + else if (elementSize == 4) + { + corType = CORINFO_TYPE_UINT; + } + else if (elementSize == 8) + { + corType = CORINFO_TYPE_ULONG; + } + + assert(genTypeSize(JitType2PreciseVarType(corType)) == elementSize); + + // create the comparand node, and the mask node (op2 < comparand), and the result node (mask & unsafeResult) + GenTree* comparand = + gtNewSimdCreateBroadcastNode(type, gtNewIconNode(elementCount, TYP_INT), corType, simdSize); + GenTree* mask = gtNewSimdCmpOpNode(GT_LT, type, op2DupSafe, comparand, corType, simdSize); + retNode = gtNewSimdBinOpNode(GT_AND, type, mask, retNode, simdBaseJitType, simdSize); + } + else + { + assert(op2DupSafe == nullptr); + } + + return retNode; +} + GenTree* Compiler::gtNewSimdShuffleNode( - var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize) + var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize, bool isUnsafe) { assert(IsBaselineSimdIsaSupportedDebugOnly()); @@ -24881,6 +25390,13 @@ GenTree* Compiler::gtNewSimdShuffleNode( var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); + if (isUnsafe) + { + // For ShuffleUnsafe, delegate to the variable implementation to get the same behaviour for + // ShuffleUnsafe with constant vs variable indices for free. + return gtNewSimdShuffleNodeVariable(type, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } + if (op2->IsVectorAllBitsSet()) { // AllBitsSet represents indices that are always "out of range" which means zero should be @@ -24981,42 +25497,123 @@ GenTree* Compiler::gtNewSimdShuffleNode( (varTypeIsShort(simdBaseType) && !compIsEvexOpportunisticallySupported(isV512Supported, InstructionSet_AVX512BW_VL))) { - if (crossLane) + // if we have short, we want to treat it like byte here + if (varTypeIsShort(simdBaseType)) { - // TODO-XARCH-CQ: We should emulate cross-lane shuffling for byte/sbyte and short/ushort - unreached(); + simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; } - // If we aren't crossing lanes, then we can decompose the byte/sbyte - // and short/ushort operations into 2x 128-bit operations + uint8_t leftWants = 0; // result left lane wants which lanes bitfield (1 - left, 2 - right) + uint8_t rightWants = 0; // result right lane wants which lanes bitfield (1 - left, 2 - right) + bool nonDefaultShuffleMask = + false; // tracks whether any element in vecCns is not the default value: 0->15, 0->15 - // We want to build what is essentially the following managed code: - // var op1Lower = op1.GetLower(); - // op1Lower = Ssse3.Shuffle(op1Lower, Vector128.Create(...)); - // - // var op1Upper = op1.GetUpper(); - // op1Upper = Ssse3.Shuffle(op1Upper, Vector128.Create(...)); - // - // return Vector256.Create(op1Lower, op1Upper); + simd_t selCns = {}; + for (size_t index = 0; index < simdSize; index++) + { + // get pointer to our leftWants/rightWants + uint8_t* wants = (index < 16) ? &leftWants : &rightWants; - simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE; + // update our wants based on which values we use + value = vecCns.u8[index]; + if (value < 16) + { + *wants |= 1; + } + else if (value < 32) + { + *wants |= 2; + } - GenTree* op1Dup = fgMakeMultiUse(&op1); - GenTree* op1Lower = gtNewSimdGetLowerNode(TYP_SIMD16, op1, simdBaseJitType, simdSize); + // update our conditional select mask for if we need 2 shuffles + value ^= static_cast(index & 0x10); + selCns.u8[index] = (value < 32 && value >= 16) ? 0xFF : 0; - op2 = gtNewVconNode(TYP_SIMD16); - op2->AsVecCon()->gtSimd16Val = vecCns.v128[0]; + // normalise our shuffle mask, and check if it's default + if (vecCns.u8[index] < 32) + { + vecCns.u8[index] &= 0x0F; + } + if (vecCns.u8[index] != (index & 0x0F)) + { + nonDefaultShuffleMask = true; + } + } + + // we might be able to get away with only 1 shuffle, this is the case if neither leftWants nor + // rightWants are 3 (indicating only 0/1 side used) + if (leftWants != 3 && rightWants != 3) + { + // set result to its initial value + retNode = op1; + + // get the permutation control + uint8_t control = 0; + if (leftWants == 2) + { + // if left wants right lane, then set that bit + control |= 1; + } + if (rightWants != 1) + { + // if right wants right lane (or neither), then set the bit for right lane + control |= 16; + } - op1Lower = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Lower, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16); + // create the permutation node + // if we have 16, then we don't need to actually permute, since that's what we start with + if (control != 16) + { + GenTree* retNodeDup = fgMakeMultiUse(&retNode); - GenTree* op1Upper = gtNewSimdGetUpperNode(TYP_SIMD16, op1Dup, simdBaseJitType, simdSize); + cnsNode = gtNewIconNode(control); + retNode = gtNewSimdHWIntrinsicNode(type, retNode, retNodeDup, cnsNode, NI_AVX2_Permute2x128, + simdBaseJitType, simdSize); + } - op2 = gtNewVconNode(TYP_SIMD16); - op2->AsVecCon()->gtSimd16Val = vecCns.v128[1]; + // if we have a non-default shuffle mask, we need to do Avx2.Shuffle + if (nonDefaultShuffleMask) + { + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; - op1Upper = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1Upper, op2, NI_SSSE3_Shuffle, simdBaseJitType, 16); + retNode = gtNewSimdHWIntrinsicNode(type, retNode, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + } + } + else + { + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup1); + + // create the control for swapping + uint8_t control = 1; // 0b00000001 + cnsNode = gtNewIconNode(control); + GenTree* swap = gtNewSimdHWIntrinsicNode(type, op1, op1Dup1, cnsNode, NI_AVX2_Permute2x128, + simdBaseJitType, simdSize); + + // if we have non-default shuffle mask + if (nonDefaultShuffleMask) + { + // create the shuffle indices node + op2 = gtNewVconNode(type); + op2->AsVecCon()->gtSimdVal = vecCns; + + GenTree* op2Dup = fgMakeMultiUse(&op2); + + // shuffle both op1 and swap(op1) + op1 = gtNewSimdHWIntrinsicNode(type, op1Dup2, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + swap = gtNewSimdHWIntrinsicNode(type, swap, op2Dup, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + } + + // select the appropriate values + GenTree* selNode = gtNewVconNode(type); + selNode->AsVecCon()->gtSimdVal = selCns; + retNode = gtNewSimdHWIntrinsicNode(type, op1, swap, selNode, NI_AVX2_BlendVariable, simdBaseJitType, + simdSize); + } - return gtNewSimdWithUpperNode(type, op1Lower, op1Upper, simdBaseJitType, simdSize); + assert(retNode != nullptr); + return retNode; } if (elementSize == 4) @@ -29430,15 +30027,20 @@ bool GenTreeHWIntrinsic::ShouldConstantProp(GenTree* operand, GenTreeVecCon* vec #endif // TARGET_XARCH case NI_Vector128_Shuffle: + case NI_Vector128_ShuffleUnsafe: #if defined(TARGET_XARCH) case NI_Vector256_Shuffle: + case NI_Vector256_ShuffleUnsafe: case NI_Vector512_Shuffle: + case NI_Vector512_ShuffleUnsafe: #elif defined(TARGET_ARM64) case NI_Vector64_Shuffle: + case NI_Vector64_ShuffleUnsafe: #endif { - // The shuffle indices need to be constant so we can preserve - // the node as a hwintrinsic instead of rewriting as a user call. + // The shuffle indices ideally are constant so we can get the best + // codegen possible. There are also some case/s where it would have + // to rewrite as a user call instead depending on available intrinsics. assert(GetOperandCount() == 2); return IsUserCall() && (operand == Op(2)); } diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index e7bde27b11d12..3c3b929f19cfa 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2151,30 +2151,35 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector64_Shuffle: case NI_Vector128_Shuffle: + case NI_Vector64_ShuffleUnsafe: + case NI_Vector128_ShuffleUnsafe: { assert((sig->numArgs == 2) || (sig->numArgs == 3)); assert((simdSize == 8) || (simdSize == 16)); GenTree* indices = impStackTop(0).val; - if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType)) + if (!IsValidForShuffle(indices, simdSize, simdBaseType)) + { + break; + } + + if (!indices->IsCnsVec()) { assert(sig->numArgs == 2); - if (!opts.OptimizationEnabled()) + if (opts.OptimizationEnabled()) { // Only enable late stage rewriting if optimizations are enabled // as we won't otherwise encounter a constant at the later point - return nullptr; - } + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + break; + } } if (sig->numArgs == 2) @@ -2182,7 +2187,15 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); + bool isUnsafe = intrinsic == NI_Vector64_ShuffleUnsafe || intrinsic == NI_Vector128_ShuffleUnsafe; + if (indices->IsCnsVec()) + { + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } + else + { + retNode = gtNewSimdShuffleNodeVariable(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } } break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index c3649cb64beb5..1cba9cc914384 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -82,6 +82,7 @@ HARDWARE_INTRINSIC(Vector64, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Round, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector64, ShuffleUnsafe, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -195,6 +196,7 @@ HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, ShuffleUnsafe, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 691b8f6bd4521..99805b8030cfb 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -100,6 +100,7 @@ HARDWARE_INTRINSIC(Vector128, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Round, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector128, ShuffleUnsafe, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) @@ -210,6 +211,7 @@ HARDWARE_INTRINSIC(Vector256, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Round, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector256, ShuffleUnsafe, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) @@ -321,6 +323,7 @@ HARDWARE_INTRINSIC(Vector512, MultiplyAddEstimate, HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Round, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Vector512, ShuffleUnsafe, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 3c4474599cbc7..a90ba0819872d 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -3461,30 +3461,36 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector128_Shuffle: case NI_Vector256_Shuffle: case NI_Vector512_Shuffle: + case NI_Vector128_ShuffleUnsafe: + case NI_Vector256_ShuffleUnsafe: + case NI_Vector512_ShuffleUnsafe: { assert((sig->numArgs == 2) || (sig->numArgs == 3)); assert((simdSize == 16) || (simdSize == 32) || (simdSize == 64)); GenTree* indices = impStackTop(0).val; - if (!indices->IsCnsVec() || !IsValidForShuffle(indices->AsVecCon(), simdSize, simdBaseType)) + if (!IsValidForShuffle(indices, simdSize, simdBaseType)) + { + break; + } + + if (!indices->IsCnsVec()) { assert(sig->numArgs == 2); - if (!opts.OptimizationEnabled()) + if (opts.OptimizationEnabled()) { // Only enable late stage rewriting if optimizations are enabled // as we won't otherwise encounter a constant at the later point - return nullptr; - } - - op2 = impSIMDPopStack(); - op1 = impSIMDPopStack(); + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); - break; + retNode->AsHWIntrinsic()->SetMethodHandle(this, method R2RARG(*entryPoint)); + break; + } } if (sig->numArgs == 2) @@ -3492,7 +3498,16 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); + bool isUnsafe = intrinsic == NI_Vector128_ShuffleUnsafe || intrinsic == NI_Vector256_ShuffleUnsafe || + intrinsic == NI_Vector512_ShuffleUnsafe; + if (indices->IsCnsVec()) + { + retNode = gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } + else + { + retNode = gtNewSimdShuffleNodeVariable(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } } break; } diff --git a/src/coreclr/jit/rationalize.cpp b/src/coreclr/jit/rationalize.cpp index 708de9b8b8c34..d20c5504b033c 100644 --- a/src/coreclr/jit/rationalize.cpp +++ b/src/coreclr/jit/rationalize.cpp @@ -323,11 +323,15 @@ void Rationalizer::RewriteHWIntrinsicAsUserCall(GenTree** use, ArrayStackIsCnsVec() && comp->IsValidForShuffle(op2->AsVecCon(), simdSize, simdBaseType)) + if (!comp->IsValidForShuffle(op2, simdSize, simdBaseType)) { - result = comp->gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize); + break; + } + + bool isUnsafe = intrinsicId == NI_Vector128_ShuffleUnsafe; +#if defined(TARGET_XARCH) + isUnsafe = + isUnsafe || intrinsicId == NI_Vector256_ShuffleUnsafe || intrinsicId == NI_Vector512_ShuffleUnsafe; +#elif defined(TARGET_ARM64) + isUnsafe = isUnsafe || intrinsicId == NI_Vector64_ShuffleUnsafe; +#endif + + if (op2->IsCnsVec()) + { + result = comp->gtNewSimdShuffleNode(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } + else + { + result = comp->gtNewSimdShuffleNodeVariable(retType, op1, op2, simdBaseJitType, simdSize, isUnsafe); } break; } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 3f022bc476467..8d63e143860fd 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -2976,31 +2976,31 @@ public static Vector128 Shuffle(Vector128 vector, Vector128 /// The input vector from which values are selected. /// The per-element indices used to select a value from . /// A new vector containing the values from selected by the given . - /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. - /// On hardware with support, indices are treated as modulo 16, and if the high bit is set, the result will be set to 0 for that element. - /// On hardware with or support, this method behaves the same as Shuffle. - [MethodImpl(MethodImplOptions.AggressiveInlining)] + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(PackedSimd))] - internal static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) { - if (Ssse3.IsSupported) - { - return Ssse3.Shuffle(vector, indices); - } - - if (AdvSimd.Arm64.IsSupported) - { - return AdvSimd.Arm64.VectorTableLookup(vector, indices); - } - - if (PackedSimd.IsSupported) - { - return PackedSimd.Swizzle(vector, indices); - } + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { return Shuffle(vector, indices); } @@ -3053,6 +3053,41 @@ public static Vector128 Shuffle(Vector128 vector, Vector128Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx10v1))] + [CompExactlyDependsOn(typeof(Avx512BW.VL))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx10v1))] + [CompExactlyDependsOn(typeof(Avx512BW.VL))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3126,6 +3161,55 @@ public static Vector128 Shuffle(Vector128 vector, Vector128 i return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3199,6 +3283,55 @@ public static Vector128 Shuffle(Vector128 vector, Vector128Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 1]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 1]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 1]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(Avx))] + [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] + public static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) + { + return Shuffle(vector, indices); + } + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128 Sin(Vector128 vector) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs index 2edd04c3fc99c..ff5ca2da1e84a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs @@ -2887,6 +2887,37 @@ public static Vector256 Shuffle(Vector256 vector, Vector256 return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 31]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512Vbmi.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 31]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512Vbmi.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -2936,6 +2967,35 @@ public static Vector256 Shuffle(Vector256 vector, Vector256Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512BW.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512BW.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3009,6 +3069,43 @@ public static Vector256 Shuffle(Vector256 vector, Vector256 i return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx2))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx2))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx2))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3082,6 +3179,49 @@ public static Vector256 Shuffle(Vector256 vector, Vector256Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx2))] + [CompExactlyDependsOn(typeof(Avx512F.VL))] + [CompExactlyDependsOn(typeof(Avx10v1))] + public static Vector256 ShuffleUnsafe(Vector256 vector, Vector256 indices) + { + return Shuffle(vector, indices); + } + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Sin(Vector256 vector) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs index 10e6d278594be..85845397850a0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector512.cs @@ -2928,6 +2928,33 @@ public static Vector512 Shuffle(Vector512 vector, Vector512 return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 63]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 63]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -2977,6 +3004,31 @@ public static Vector512 Shuffle(Vector512 vector, Vector512Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 31]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx512BW))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 31]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx512BW))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3050,6 +3102,43 @@ public static Vector512 Shuffle(Vector512 vector, Vector512 i return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx512F))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx512F))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 15]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx512F))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3123,6 +3212,43 @@ public static Vector512 Shuffle(Vector512 vector, Vector512Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx512F))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(Avx512F))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CompExactlyDependsOn(typeof(Avx512F))] + public static Vector512 ShuffleUnsafe(Vector512 vector, Vector512 indices) + { + return Shuffle(vector, indices); + } + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector512 Sin(Vector512 vector) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs index dfd95f4021df1..dc53f4c298fe4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs @@ -2947,6 +2947,33 @@ public static Vector64 Shuffle(Vector64 vector, Vector64 in return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// Behavior is platform-dependent for out-of-range indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 7]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -2996,6 +3023,31 @@ public static Vector64 Shuffle(Vector64 vector, Vector64 return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 3]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + /// Creates a new vector by selecting values from an input vector using a set of indices. /// The input vector from which values are selected. /// The per-element indices used to select a value from . @@ -3069,6 +3121,43 @@ public static Vector64 Shuffle(Vector64 vector, Vector64 indi return result; } + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 1]. + [Intrinsic] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 1]. + [Intrinsic] + [CLSCompliant(false)] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + + /// Creates a new vector by selecting values from an input vector using a set of indices. + /// The input vector from which values are selected. + /// The per-element indices used to select a value from . + /// A new vector containing the values from selected by the given . + /// Unlike Shuffle, this method delegates to the underlying hardware intrinsic without ensuring that are normalized to [0, 1]. + [Intrinsic] + [CompExactlyDependsOn(typeof(AdvSimd))] + public static Vector64 ShuffleUnsafe(Vector64 vector, Vector64 indices) + { + return Shuffle(vector, indices); + } + internal static Vector64 Sin(Vector64 vector) where T : ITrigonometricFunctions { diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index d7d401d2f5c83..7890eb1e6724f 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -301,6 +301,20 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector128 vector, [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } public static System.Runtime.Intrinsics.Vector128 Shuffle(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ShuffleUnsafe(System.Runtime.Intrinsics.Vector128 vector, System.Runtime.Intrinsics.Vector128 indices) { throw null; } public static System.Runtime.Intrinsics.Vector128 Sin(System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static System.Runtime.Intrinsics.Vector128 Sin(System.Runtime.Intrinsics.Vector128 vector) { throw null; } public static (System.Runtime.Intrinsics.Vector128 Sin, System.Runtime.Intrinsics.Vector128 Cos) SinCos(System.Runtime.Intrinsics.Vector128 vector) { throw null; } @@ -680,6 +694,20 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector256 vector, [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } public static System.Runtime.Intrinsics.Vector256 Shuffle(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector256 ShuffleUnsafe(System.Runtime.Intrinsics.Vector256 vector, System.Runtime.Intrinsics.Vector256 indices) { throw null; } public static System.Runtime.Intrinsics.Vector256 Sin(System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static System.Runtime.Intrinsics.Vector256 Sin(System.Runtime.Intrinsics.Vector256 vector) { throw null; } public static (System.Runtime.Intrinsics.Vector256 Sin, System.Runtime.Intrinsics.Vector256 Cos) SinCos(System.Runtime.Intrinsics.Vector256 vector) { throw null; } @@ -1060,6 +1088,20 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector512 vector, [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector512 Shuffle(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } public static System.Runtime.Intrinsics.Vector512 Shuffle(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector512 ShuffleUnsafe(System.Runtime.Intrinsics.Vector512 vector, System.Runtime.Intrinsics.Vector512 indices) { throw null; } public static System.Runtime.Intrinsics.Vector512 Sin(System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static System.Runtime.Intrinsics.Vector512 Sin(System.Runtime.Intrinsics.Vector512 vector) { throw null; } public static (System.Runtime.Intrinsics.Vector512 Sin, System.Runtime.Intrinsics.Vector512 Cos) SinCos(System.Runtime.Intrinsics.Vector512 vector) { throw null; } @@ -1405,6 +1447,16 @@ public static void CopyTo(this System.Runtime.Intrinsics.Vector64 vector, [System.CLSCompliantAttribute(false)] public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } public static System.Runtime.Intrinsics.Vector64 Shuffle(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + [System.CLSCompliantAttribute(false)] + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } + public static System.Runtime.Intrinsics.Vector64 ShuffleUnsafe(System.Runtime.Intrinsics.Vector64 vector, System.Runtime.Intrinsics.Vector64 indices) { throw null; } public static System.Runtime.Intrinsics.Vector64 Sin(System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static System.Runtime.Intrinsics.Vector64 Sin(System.Runtime.Intrinsics.Vector64 vector) { throw null; } public static (System.Runtime.Intrinsics.Vector64 Sin, System.Runtime.Intrinsics.Vector64 Cos) SinCos(System.Runtime.Intrinsics.Vector64 vector) { throw null; } diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs index 27955248daac8..53b0a0b55a71f 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs @@ -2776,6 +2776,486 @@ public void Vector128UInt64ShuffleOneInputWithZeroIndicesTest() } } + [Fact] + public void Vector128ByteShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((short)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((uint)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleUnsafeOneInputTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Create((ulong)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((double)1, 2), Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8), Vector128.Create((short)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((int)1, 2, 3, 4), Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((long)1, 2), Vector128.Create((long)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector128.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((float)1, 2, 3, 4), Vector128.Create((int)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8), Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((uint)1, 2, 3, 4), Vector128.Create((uint)3, 2, 1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector128 result = Vector128.ShuffleUnsafe(Vector128.Create((ulong)1, 2), Vector128.Create((ulong)1, 0)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 indices = Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 indices = Vector128.Create((long)1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 indices = Vector128.Create((short)7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 indices = Vector128.Create((int)3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 indices = Vector128.Create((long)1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 indices = Vector128.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 indices = Vector128.Create((int)3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 indices = Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 indices = Vector128.Create((uint)3, 2, 1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 indices = Vector128.Create((ulong)1, 0); + Vector128 result = Vector128.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector128ByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128DoubleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((double)1, 2); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((short)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((int)1, 2, 3, 4); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128Int64ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((long)1, 2); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128SByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128SingleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((float)1, 2, 3, 4); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((uint)1, 2, 3, 4); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector128UInt64ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector128 vector = Vector128.Create((ulong)1, 2); + Vector128 result = Vector128.ShuffleUnsafe(vector, Vector128.Zero); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector128ByteStoreTest() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs index d16e5eed48180..60feaf02f3803 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs @@ -3402,6 +3402,646 @@ public void Vector256UInt64ShuffleOneInputWithZeroIndicesTest() } } + [Fact] + public void Vector256ByteShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((uint)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleUnsafeOneInputTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Create((ulong)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((double)1, 2, 3, 4), Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((long)1, 2, 3, 4), Vector256.Create((long)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((uint)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((ulong)1, 2, 3, 4), Vector256.Create((ulong)3, 2, 1, 0)); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((double)1, 2, 3, 4), Vector256.Create((long)1, 0, 3, 2)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((short)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)3, 2, 1, 0, 7, 6, 5, 4)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((long)1, 2, 3, 4), Vector256.Create((long)1, 0, 3, 2)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), Vector256.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((int)3, 2, 1, 0, 7, 6, 5, 4)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), Vector256.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8), Vector256.Create((uint)3, 2, 1, 0, 7, 6, 5, 4)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleUnsafeOneInputWithDirectVectorAndNoCrossLaneTest() + { + Vector256 result = Vector256.ShuffleUnsafe(Vector256.Create((ulong)1, 2, 3, 4), Vector256.Create((ulong)1, 0, 3, 2)); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 indices = Vector256.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 indices = Vector256.Create((long)3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 indices = Vector256.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 indices = Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 indices = Vector256.Create((long)3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 indices = Vector256.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 indices = Vector256.Create((int)7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 indices = Vector256.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 indices = Vector256.Create((uint)7, 6, 5, 4, 3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 indices = Vector256.Create((ulong)3, 2, 1, 0); + Vector256 result = Vector256.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector256ByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256DoubleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((double)1, 2, 3, 4); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((int)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256Int64ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((long)1, 2, 3, 4); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256SByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256SingleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((float)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((uint)1, 2, 3, 4, 5, 6, 7, 8); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector256UInt64ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector256 vector = Vector256.Create((ulong)1, 2, 3, 4); + Vector256 result = Vector256.ShuffleUnsafe(vector, Vector256.Zero); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector256ByteStoreTest() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs index f996d82369615..edef98388892a 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector512Tests.cs @@ -3394,6 +3394,996 @@ public void Vector512UInt64ShuffleOneInputWithZeroIndicesTest() } } + [Fact] + public void Vector512ByteShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((byte)63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((byte)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512DoubleShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((double)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((long)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((double)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int16ShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((short)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((short)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int32ShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((int)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((int)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((int)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int64ShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((long)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((long)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((long)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SByteShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((sbyte)63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((sbyte)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SingleShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((float)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((int)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((float)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt16ShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((ushort)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ushort)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt32ShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((uint)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((uint)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((uint)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt64ShuffleUnsafeOneInputTest() + { + Vector512 vector = Vector512.Create((ulong)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Create((ulong)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ulong)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512ByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64), + Vector512.Create((byte)63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((byte)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512DoubleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((double)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((long)7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((double)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + Vector512.Create((short)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((short)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((int)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((int)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((int)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int64ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((long)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((long)7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((long)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64), + Vector512.Create((sbyte)63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((sbyte)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SingleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((float)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((int)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((float)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + Vector512.Create((ushort)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ushort)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((uint)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((uint)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((uint)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt64ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((ulong)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((ulong)7, 6, 5, 4, 3, 2, 1, 0) + ); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ulong)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512ByteShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64), + Vector512.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((byte)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((byte)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((byte)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512DoubleShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((double)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((long)1, 0, 3, 2, 5, 4, 7, 6) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((double)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((double)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((double)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int16ShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + Vector512.Create((short)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18, 17, 16, 31, 30, 29, 28, 27, 26, 25, 24) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((short)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((short)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((short)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int32ShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((int)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((int)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((int)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((int)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((int)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int64ShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((long)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((long)1, 0, 3, 2, 5, 4, 7, 6) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((long)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((long)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((long)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SByteShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64), + Vector512.Create((sbyte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((sbyte)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((sbyte)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SingleShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((float)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((int)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((float)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((float)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((float)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt16ShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + Vector512.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18, 17, 16, 31, 30, 29, 28, 27, 26, 25, 24) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((ushort)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((ushort)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt32ShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((uint)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((uint)3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((uint)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((uint)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((uint)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt64ShuffleUnsafeOneInputWithDirectVectorAndNoCross128BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((ulong)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((ulong)1, 0, 3, 2, 5, 4, 7, 6) + ); + + for (int index = 0; index < Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector128.Count - index), result.GetElement(index)); + } + + for (int index = Vector128.Count; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - (index - Vector128.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count - Vector128.Count; index++) + { + Assert.Equal((ulong)(Vector512.Count - Vector128.Count - (index - Vector256.Count)), result.GetElement(index)); + } + + for (int index = Vector256.Count + Vector128.Count; index < Vector512.Count; index++) + { + Assert.Equal((ulong)(Vector512.Count - (index - Vector256.Count - Vector128.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512ByteShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64), + Vector512.Create((byte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((byte)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((byte)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512DoubleShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((double)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((long)3, 2, 1, 0, 7, 6, 5, 4) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((double)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((double)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int16ShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + Vector512.Create((short)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((short)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((short)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int32ShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((int)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((int)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((int)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((int)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int64ShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((long)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((long)3, 2, 1, 0, 7, 6, 5, 4) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((long)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((long)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SByteShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64), + Vector512.Create((sbyte)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((sbyte)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((sbyte)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SingleShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((float)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((int)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((float)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((float)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt16ShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), + Vector512.Create((ushort)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ushort)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((ushort)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt32ShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((uint)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), + Vector512.Create((uint)7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((uint)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((uint)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt64ShuffleUnsafeOneInputWithDirectVectorAndNoCross256BitLaneTest() + { + Vector512 result = Vector512.ShuffleUnsafe( + Vector512.Create((ulong)1, 2, 3, 4, 5, 6, 7, 8), + Vector512.Create((ulong)3, 2, 1, 0, 7, 6, 5, 4) + ); + + for (int index = 0; index < Vector256.Count; index++) + { + Assert.Equal((ulong)(Vector256.Count - index), result.GetElement(index)); + } + + for (int index = Vector256.Count; index < Vector512.Count; index++) + { + Assert.Equal((ulong)(Vector512.Count - (index - Vector256.Count)), result.GetElement(index)); + } + } + + [Fact] + public void Vector512ByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + Vector512 indices = Vector512.Create((byte)63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((byte)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512DoubleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((double)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 indices = Vector512.Create((long)7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((double)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector512 indices = Vector512.Create((short)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((short)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((int)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 indices = Vector512.Create((int)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((int)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int64ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((long)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 indices = Vector512.Create((long)7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((long)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + Vector512 indices = Vector512.Create((sbyte)63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((sbyte)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512SingleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((float)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 indices = Vector512.Create((int)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((float)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector512 indices = Vector512.Create((ushort)31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ushort)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((uint)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 indices = Vector512.Create((uint)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((uint)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt64ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector512 vector = Vector512.Create((ulong)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 indices = Vector512.Create((ulong)7, 6, 5, 4, 3, 2, 1, 0); + Vector512 result = Vector512.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ulong)(Vector512.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector512ByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((byte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512DoubleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((double)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((double)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((short)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((short)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((int)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512Int64ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((long)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((long)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512SByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512SingleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((float)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((ushort)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((uint)1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector512UInt64ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector512 vector = Vector512.Create((ulong)1, 2, 3, 4, 5, 6, 7, 8); + Vector512 result = Vector512.ShuffleUnsafe(vector, Vector512.Zero); + + for (int index = 0; index < Vector512.Count; index++) + { + Assert.Equal((ulong)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector512ByteStoreTest() { diff --git a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs index c049c08515a08..0e62bc9f52016 100644 --- a/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs +++ b/src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs @@ -2398,6 +2398,342 @@ public void Vector64UInt32ShuffleOneInputWithZeroIndicesTest() } } + [Fact] + public void Vector64ByteShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((byte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((short)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((short)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((sbyte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((ushort)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleUnsafeOneInputTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Create((uint)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8), Vector64.Create((byte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((short)1, 2, 3, 4), Vector64.Create((short)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((short)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((int)1, 2), Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8), Vector64.Create((sbyte)7, 6, 5, 4, 3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((float)1, 2), Vector64.Create((int)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((ushort)1, 2, 3, 4), Vector64.Create((ushort)3, 2, 1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleUnsafeOneInputWithDirectVectorTest() + { + Vector64 result = Vector64.ShuffleUnsafe(Vector64.Create((uint)1, 2), Vector64.Create((uint)1, 0)); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 indices = Vector64.Create((byte)7, 6, 5, 4, 3, 2, 1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 indices = Vector64.Create((short)3, 2, 1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((short)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 indices = Vector64.Create((int)1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 indices = Vector64.Create((sbyte)7, 6, 5, 4, 3, 2, 1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 indices = Vector64.Create((int)1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 indices = Vector64.Create((ushort)3, 2, 1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleUnsafeOneInputWithLocalIndicesTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 indices = Vector64.Create((uint)1, 0); + Vector64 result = Vector64.ShuffleUnsafe(vector, indices); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)(Vector64.Count - index), result.GetElement(index)); + } + } + + [Fact] + public void Vector64ByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((byte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((byte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((short)1, 2, 3, 4); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal(1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64Int32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((int)1, 2); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((int)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64SByteShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((sbyte)1, 2, 3, 4, 5, 6, 7, 8); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((sbyte)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64SingleShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((float)1, 2); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((float)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt16ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((ushort)1, 2, 3, 4); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((ushort)1, result.GetElement(index)); + } + } + + [Fact] + public void Vector64UInt32ShuffleUnsafeOneInputWithZeroIndicesTest() + { + Vector64 vector = Vector64.Create((uint)1, 2); + Vector64 result = Vector64.ShuffleUnsafe(vector, Vector64.Zero); + + for (int index = 0; index < Vector64.Count; index++) + { + Assert.Equal((uint)1, result.GetElement(index)); + } + } + [Fact] public unsafe void Vector64ByteStoreTest() { diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 47b1df99798cd..4d597add5b27b 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -1685,6 +1685,7 @@ static guint16 sri_vector_methods [] = { SN_Negate, SN_OnesComplement, SN_Shuffle, + SN_ShuffleUnsafe, SN_Sqrt, SN_Subtract, SN_Sum, @@ -2847,7 +2848,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; return emit_simd_ins_for_unary_op (cfg, klass, fsig, args, arg0_type, id); } - case SN_Shuffle: { + case SN_Shuffle: + case SN_ShuffleUnsafe: { MonoType *etype = get_vector_t_elem_type (fsig->ret); if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype)) return NULL; diff --git a/src/mono/mono/mini/simd-methods.h b/src/mono/mono/mini/simd-methods.h index fd7f56e4c7200..2c6d0ae1978a8 100644 --- a/src/mono/mono/mini/simd-methods.h +++ b/src/mono/mono/mini/simd-methods.h @@ -108,6 +108,7 @@ METHOD(ExtractMostSignificantBits) METHOD(GetElement) METHOD(GetLower) METHOD(GetUpper) +METHOD(ShuffleUnsafe) METHOD(Sum) METHOD(ToScalar) METHOD(ToVector128) From 57e488423f5d9b8ee4d06352bf319fea35dfa67e Mon Sep 17 00:00:00 2001 From: Hamish Arblaster Date: Tue, 6 Aug 2024 22:39:18 +1000 Subject: [PATCH 2/6] Remove internal dependency on ShuffleUnsafe's behaviour wrt high bit --- .../Buffers/Text/Base64Helper/Base64DecoderHelper.cs | 6 +----- .../System/Buffers/Text/Base64Helper/Base64Helper.cs | 10 +++++++++- .../src/System/SearchValues/IndexOfAnyAsciiSearcher.cs | 6 +++++- .../System/SearchValues/Strings/Helpers/TeddyHelper.cs | 8 +++++++- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs index 97765e340acff..4599aa887fde4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Buffers/Text/Base64Helper/Base64DecoderHelper.cs @@ -832,11 +832,7 @@ internal static Vector128 SimdShuffle(Vector128 left, Vector128 utf16Vector) // If a non-ASCII bit is set in any WORD of the vector, we have seen non-ASCII data. return zeroIsAscii != Vector512.Zero; } +#endif +#if NET + // same as ShuffleUnsafe, except that we guarantee that if the high bit is set, it gives 0 [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static Vector128 ShuffleUnsafe(Vector128 vector, Vector128 indices) +#if NET9_0_OR_GREATER + [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] +#endif + internal static Vector128 ShuffleUnsafeModified(Vector128 vector, Vector128 indices) { if (Ssse3.IsSupported) { diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 36965c24da7e5..8e13e0ac22384 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -1,6 +1,7 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; @@ -1018,7 +1019,9 @@ public static int LastIndexOfAny(ref byte searchSpace, int searchSpace [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Sse2))] + [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 IndexOfAnyLookup(Vector128 source0, Vector128 source1, Vector128 bitmapLookup) where TNegator : struct, INegator @@ -1052,7 +1055,7 @@ private static Vector128 IndexOfAnyLookupCore(Vector128 source, Vect // The bitmapLookup represents a 8x16 table of bits, indicating whether a character is present in the needle. // Lookup the rows via the lower nibble and the column via the higher nibble. - Vector128 bitMask = Vector128.ShuffleUnsafe(bitmapLookup, lowNibbles); + Vector128 bitMask = Base64Helper.ShuffleUnsafeModified(bitmapLookup, lowNibbles); // For values above 127, the high nibble will be above 7. We construct the positions vector for the shuffle such that those values map to 0. Vector128 bitPositions = Vector128.ShuffleUnsafe(Vector128.Create(0x8040201008040201, 0).AsByte(), highNibbles); @@ -1089,6 +1092,7 @@ private static Vector256 IndexOfAnyLookupCore(Vector256 source, Vect [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd))] + [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 IndexOfAnyLookup(Vector128 source, Vector128 bitmapLookup0, Vector128 bitmapLookup1) where TNegator : struct, INegator diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs index a162bad30bcf4..4f346f5a369cf 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs @@ -1,9 +1,11 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Buffers.Text; using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.Wasm; using System.Runtime.Intrinsics.X86; namespace System.Buffers @@ -17,6 +19,7 @@ internal static class TeddyHelper [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] public static (Vector128 Result, Vector128 Prev0) ProcessInputN2( Vector128 input, Vector128 prev0, @@ -89,7 +92,9 @@ public static (Vector512 Result, Vector512 Prev0) ProcessInputN2( [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] + [CompExactlyDependsOn(typeof(AdvSimd))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] public static (Vector128 Result, Vector128 Prev0, Vector128 Prev1) ProcessInputN3( Vector128 input, Vector128 prev0, Vector128 prev1, @@ -293,9 +298,10 @@ private static (Vector512 Low, Vector512 High) GetNibbles(Vector512< [MethodImpl(MethodImplOptions.AggressiveInlining)] [CompExactlyDependsOn(typeof(Ssse3))] [CompExactlyDependsOn(typeof(AdvSimd.Arm64))] + [CompExactlyDependsOn(typeof(PackedSimd))] private static Vector128 Shuffle(Vector128 maskLow, Vector128 maskHigh, Vector128 low, Vector128 high) { - return Vector128.ShuffleUnsafe(maskLow, low) & Vector128.ShuffleUnsafe(maskHigh, high); + return Base64Helper.ShuffleUnsafeModified(maskLow, low) & Vector128.ShuffleUnsafe(maskHigh, high); } [MethodImpl(MethodImplOptions.AggressiveInlining)] From b9be44ec64d8514c69d6d914fc5dc7d0051aa563 Mon Sep 17 00:00:00 2001 From: Hamish Arblaster Date: Wed, 7 Aug 2024 19:09:02 +1000 Subject: [PATCH 3/6] Optimise some codegen - Optimise comparison in `gtNewSimdShuffleNodeVariable` for xarch - Optimise for constant vector in Vector256.Shuffle{Unsafe} when have AVX2 only --- src/coreclr/jit/gentree.cpp | 133 +++++++++++++++++++++++++++++------- 1 file changed, 108 insertions(+), 25 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index adcfa9cd615fb..afd1e59b66d46 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24857,6 +24857,10 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( size_t elementCount = simdSize / elementSize; #if defined(TARGET_XARCH) + // on xarch, signed comparison is cheaper, so whenever we are able to use it in the result & (indices < elementCount) + // step for Shuffle, we do. + bool canUseSignedComparisonHint = false; + // duplicate operand 2 for non-isUnsafe implementation later GenTree* op2DupSafe = isUnsafe ? nullptr : fgMakeMultiUse(&op2); @@ -24900,6 +24904,8 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3)); retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); + + canUseSignedComparisonHint = true; // high bit on index gives 0 already } else if (elementSize == 1 && simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) { @@ -25067,6 +25073,8 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( // explicitly for Shuffle always anyway, and for ShuffleUnsafe, this behaviour is fine (since it's // out of bounds). + if (elementSize == 1) canUseSignedComparisonHint = true; // high bit on index gives 0 already + // we want ShuffleUnsafe to be at least as good as Shuffle (at least in sensible cases), so for constant // indices vector we special case some cases to use normal Shuffle to ensure it gets the additional // optimisations available there (to avoid double Avx2.Shuffle-ing when possible). all these here cases are @@ -25152,9 +25160,46 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( op2 = gtNewSimdBinOpNode(GT_OR, type, op2, cnsNode, simdBaseJitType, simdSize); } - // get the indices, and xor the cross-lane bit on the high 128-bit lane part of indices. we begin computing - // this (and the comparison) early as it seems to be one of the slower calculations (it can be done in - // parallel to other operations ideally). + // create required clones of op2 + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup1); + + // swap the low and high 128-bit lanes + // calculate swap before shuf1 so they can be computed in parallel + // Vector256 swap = Avx2.Permute2x128(vector, vector, 0b00000001); + GenTree* swap; + if (!op1->IsCnsVec()) + { + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup1); + + uint8_t control = 1; + cnsNode = gtNewIconNode(control, TYP_INT); + swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, + simdBaseJitType, simdSize); + } + else + { + // if we have a constant, keep it constant + GenTree* op1Dup1 = fgMakeMultiUse(&op1); + swap = op1Dup1; + + simd_t* cnsPtr = &op1Dup1->AsVecCon()->gtSimdVal; + uint64_t tmp = cnsPtr->u64[0]; + cnsPtr->u64[0] = cnsPtr->u64[2]; + cnsPtr->u64[2] = tmp; + tmp = cnsPtr->u64[1]; + cnsPtr->u64[1] = cnsPtr->u64[3]; + cnsPtr->u64[3] = tmp; + } + + // shuffle with both the normal and swapped values + // Vector256 shuf1 = Avx2.Shuffle(vector, indices); + // Vector256 shuf2 = Avx2.Shuffle(swap, indices); + GenTree* shuf1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + GenTree* shuf2 = gtNewSimdHWIntrinsicNode(type, swap, op2Dup1, NI_AVX2_Shuffle, simdBaseJitType, simdSize); + + // get the indices, and xor the cross-lane bit on the high 128-bit lane part of indices. // V256 indicesXord = indices ^ V256.Create(V128.Create((byte)0), V128.Create((byte)0x10))); simd_t xorCns = {}; xorCns.u64[0] = 0; @@ -25164,32 +25209,20 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( cnsNode = gtNewVconNode(type); cnsNode->AsVecCon()->gtSimdVal = xorCns; - GenTree* op2Dup1 = fgMakeMultiUse(&op2); - GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup1); - GenTree* op1Dup1 = fgMakeMultiUse(&op1); - GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup1); GenTree* indicesXord = gtNewSimdBinOpNode(GT_XOR, type, op2Dup2, cnsNode, simdBaseJitType, simdSize); // compare our modified indices to 0x0F (highest value not swapping lane), we get 0xFF when we are swapping // lane and 0x00 otherwise // V256 selection = Avx2.CompareGreaterThan(indicesXord.AsSByte(), V256.Create((sbyte)0x0F)).AsByte(); - cnsNode = gtNewSimdCreateBroadcastNode(type, gtNewIconNode(0x0F, TYP_INT), CORINFO_TYPE_UBYTE, simdSize); + simd_t comparandCnd = {}; + comparandCnd.u64[0] = 0x0F0F0F0F0F0F0F0F; + comparandCnd.u64[1] = 0x0F0F0F0F0F0F0F0F; + comparandCnd.u64[2] = 0x0F0F0F0F0F0F0F0F; + comparandCnd.u64[3] = 0x0F0F0F0F0F0F0F0F; + cnsNode = gtNewVconNode(type); + cnsNode->AsVecCon()->gtSimdVal = comparandCnd; GenTree* selection = gtNewSimdCmpOpNode(GT_GT, type, indicesXord, cnsNode, CORINFO_TYPE_BYTE, simdSize); - // swap the low and high 128-bit lanes - // calculate swap before shuf1 so they can be computed in parallel - // Vector256 swap = Avx2.Permute2x128(vector, vector, 0b00000001); - uint8_t control = 1; - cnsNode = gtNewIconNode(control, TYP_INT); - GenTree* swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, - simdBaseJitType, simdSize); - - // shuffle with both the normal and swapped values - // Vector256 shuf1 = Avx2.Shuffle(vector, indices); - // Vector256 shuf2 = Avx2.Shuffle(swap, indices); - GenTree* shuf1 = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_AVX2_Shuffle, simdBaseJitType, simdSize); - GenTree* shuf2 = gtNewSimdHWIntrinsicNode(type, swap, op2Dup1, NI_AVX2_Shuffle, simdBaseJitType, simdSize); - // blend our two shuffles based on whether each element swaps lanes or not // return Avx2.BlendVariable(shuf1, shuf2, selection); retNode = gtNewSimdHWIntrinsicNode(type, shuf1, shuf2, selection, NI_AVX2_BlendVariable, simdBaseJitType, @@ -25356,11 +25389,61 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( corType = CORINFO_TYPE_ULONG; } + bool subComparandNode = false; + +#if defined(TARGET_XARCH) + // check if we have hardware accelerated unsigned comparison + bool hardwareAcceleratedUnsignedComparison = simdSize == 64; + if (simdSize == 32 || simdSize == 16) + { + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) hardwareAcceleratedUnsignedComparison = true; + if (elementSize < 4) hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL); + else hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL); + } + + // if the hardware doesn't support direct unsigned comparison, we attempt to use signed comparison + if (!hardwareAcceleratedUnsignedComparison) + { + corType = CORINFO_TYPE_BYTE; + if (elementSize == 2) + { + corType = CORINFO_TYPE_SHORT; + } + else if (elementSize == 4) + { + corType = CORINFO_TYPE_INT; + } + else if (elementSize == 8) + { + corType = CORINFO_TYPE_LONG; + } + + // if we can't use signed comparison for free, update the comparand and op2DupSafe appropriately. + // doing this manually allows the comparand to still be a constant. + if (!canUseSignedComparisonHint) + { + subComparandNode = true; + uint64_t subtractionValue = static_cast(1) << (elementSize * 8 - 1); + GenTree* subtraction = + gtNewSimdCreateBroadcastNode(type, gtNewLconNode(subtractionValue), corType, simdSize); + + op2DupSafe = gtNewSimdBinOpNode(GT_SUB, type, op2DupSafe, subtraction, corType, simdSize); + } + } +#endif + + // create the comparand node + uint64_t comparandValue = static_cast(elementCount); + if (subComparandNode) + { + uint64_t subtraction = (uint64_t)1 << (elementSize * 8 - 1); + comparandValue -= subtraction; + } + GenTree* comparand = gtNewSimdCreateBroadcastNode(type, gtNewLconNode(comparandValue), corType, simdSize); + assert(genTypeSize(JitType2PreciseVarType(corType)) == elementSize); - // create the comparand node, and the mask node (op2 < comparand), and the result node (mask & unsafeResult) - GenTree* comparand = - gtNewSimdCreateBroadcastNode(type, gtNewIconNode(elementCount, TYP_INT), corType, simdSize); + // create the mask node (op2 < comparand), and the result node (mask & unsafeResult) GenTree* mask = gtNewSimdCmpOpNode(GT_LT, type, op2DupSafe, comparand, corType, simdSize); retNode = gtNewSimdBinOpNode(GT_AND, type, mask, retNode, simdBaseJitType, simdSize); } From 1423e85202381025adc9f080cec6583d9b617f50 Mon Sep 17 00:00:00 2001 From: Hamish Arblaster Date: Wed, 7 Aug 2024 19:49:20 +1000 Subject: [PATCH 4/6] jit format --- src/coreclr/jit/gentree.cpp | 51 ++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index afd1e59b66d46..ad683d18d4c14 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -24857,8 +24857,8 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( size_t elementCount = simdSize / elementSize; #if defined(TARGET_XARCH) - // on xarch, signed comparison is cheaper, so whenever we are able to use it in the result & (indices < elementCount) - // step for Shuffle, we do. + // on xarch, signed comparison is cheaper, so whenever we are able to use it in the + // result & (indices < elementCount) step for Shuffle, we do. bool canUseSignedComparisonHint = false; // duplicate operand 2 for non-isUnsafe implementation later @@ -24905,7 +24905,8 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( retNode = gtNewSimdHWIntrinsicNode(type, op1, op2, NI_SSSE3_Shuffle, simdBaseJitType, simdSize); - canUseSignedComparisonHint = true; // high bit on index gives 0 already + // high bit on index gives 0 already + canUseSignedComparisonHint = true; } else if (elementSize == 1 && simdSize == 32 && compOpportunisticallyDependsOn(InstructionSet_AVX512VBMI_VL)) { @@ -25073,7 +25074,8 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( // explicitly for Shuffle always anyway, and for ShuffleUnsafe, this behaviour is fine (since it's // out of bounds). - if (elementSize == 1) canUseSignedComparisonHint = true; // high bit on index gives 0 already + // high bit on index gives 0 already + if (elementSize == 1) canUseSignedComparisonHint = true; // we want ShuffleUnsafe to be at least as good as Shuffle (at least in sensible cases), so for constant // indices vector we special case some cases to use normal Shuffle to ensure it gets the additional @@ -25161,8 +25163,8 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( } // create required clones of op2 - GenTree* op2Dup1 = fgMakeMultiUse(&op2); - GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup1); + GenTree* op2Dup1 = fgMakeMultiUse(&op2); + GenTree* op2Dup2 = fgMakeMultiUse(&op2Dup1); // swap the low and high 128-bit lanes // calculate swap before shuf1 so they can be computed in parallel @@ -25174,9 +25176,9 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup1); uint8_t control = 1; - cnsNode = gtNewIconNode(control, TYP_INT); - swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, - simdBaseJitType, simdSize); + cnsNode = gtNewIconNode(control, TYP_INT); + swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, simdBaseJitType, + simdSize); } else { @@ -25184,13 +25186,13 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( GenTree* op1Dup1 = fgMakeMultiUse(&op1); swap = op1Dup1; - simd_t* cnsPtr = &op1Dup1->AsVecCon()->gtSimdVal; - uint64_t tmp = cnsPtr->u64[0]; - cnsPtr->u64[0] = cnsPtr->u64[2]; - cnsPtr->u64[2] = tmp; - tmp = cnsPtr->u64[1]; - cnsPtr->u64[1] = cnsPtr->u64[3]; - cnsPtr->u64[3] = tmp; + simd_t* cnsPtr = &op1Dup1->AsVecCon()->gtSimdVal; + uint64_t tmp = cnsPtr->u64[0]; + cnsPtr->u64[0] = cnsPtr->u64[2]; + cnsPtr->u64[2] = tmp; + tmp = cnsPtr->u64[1]; + cnsPtr->u64[1] = cnsPtr->u64[3]; + cnsPtr->u64[3] = tmp; } // shuffle with both the normal and swapped values @@ -25396,9 +25398,18 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( bool hardwareAcceleratedUnsignedComparison = simdSize == 64; if (simdSize == 32 || simdSize == 16) { - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) hardwareAcceleratedUnsignedComparison = true; - if (elementSize < 4) hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL); - else hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL); + if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + { + hardwareAcceleratedUnsignedComparison = true; + } + if (elementSize < 4) + { + hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512BW_VL); + } + else + { + hardwareAcceleratedUnsignedComparison = compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL); + } } // if the hardware doesn't support direct unsigned comparison, we attempt to use signed comparison @@ -25422,7 +25433,7 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( // doing this manually allows the comparand to still be a constant. if (!canUseSignedComparisonHint) { - subComparandNode = true; + subComparandNode = true; uint64_t subtractionValue = static_cast(1) << (elementSize * 8 - 1); GenTree* subtraction = gtNewSimdCreateBroadcastNode(type, gtNewLconNode(subtractionValue), corType, simdSize); From ff76287c5c8ea70d671ed8652aead8d92c48ebc4 Mon Sep 17 00:00:00 2001 From: Hamish Arblaster Date: Wed, 7 Aug 2024 20:02:26 +1000 Subject: [PATCH 5/6] jit format --- src/coreclr/jit/gentree.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ad683d18d4c14..e55473fd1a14e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25075,7 +25075,10 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( // out of bounds). // high bit on index gives 0 already - if (elementSize == 1) canUseSignedComparisonHint = true; + if (elementSize == 1) + { + canUseSignedComparisonHint = true; + } // we want ShuffleUnsafe to be at least as good as Shuffle (at least in sensible cases), so for constant // indices vector we special case some cases to use normal Shuffle to ensure it gets the additional @@ -25176,9 +25179,9 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( GenTree* op1Dup2 = fgMakeMultiUse(&op1Dup1); uint8_t control = 1; - cnsNode = gtNewIconNode(control, TYP_INT); - swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, simdBaseJitType, - simdSize); + cnsNode = gtNewIconNode(control, TYP_INT); + swap = gtNewSimdHWIntrinsicNode(type, op1Dup1, op1Dup2, cnsNode, NI_AVX2_Permute2x128, simdBaseJitType, + simdSize); } else { From ca1a5fafb228586445e5b330da1995723f1e6bd4 Mon Sep 17 00:00:00 2001 From: Hamish Arblaster Date: Wed, 7 Aug 2024 20:20:21 +1000 Subject: [PATCH 6/6] Simplify logic for using Shuffle for ShuffleUnsafe --- src/coreclr/jit/gentree.cpp | 75 +++++++++++-------------------------- 1 file changed, 22 insertions(+), 53 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e55473fd1a14e..a7785a9e37318 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -25080,55 +25080,6 @@ GenTree* Compiler::gtNewSimdShuffleNodeVariable( canUseSignedComparisonHint = true; } - // we want ShuffleUnsafe to be at least as good as Shuffle (at least in sensible cases), so for constant - // indices vector we special case some cases to use normal Shuffle to ensure it gets the additional - // optimisations available there (to avoid double Avx2.Shuffle-ing when possible). all these here cases are - // equivalent to normal shuffle (we only take ones that are in range, or would trivially give 0 for both). - if (isUnsafe && op2->IsCnsVec()) - { - // get the byte indices we will use for shuffling - bool isValidForNormalShuffle = true; - simd_t indicesCns = op2->AsVecCon()->gtSimdVal; - if (elementSize > 1) - { - for (size_t i = 0; i < simdSize / 2; i++) - { - uint16_t index = indicesCns.u16[i]; - if (index >= 128) - { - // if the index is 128 or more, then we would have to synthesise a new op2, which we will - // not do, since there is no reason to put in extra effort to support such out-of-bounds - // cases for ShuffleUnsafe. - isValidForNormalShuffle = false; - break; - } - indicesCns.u8[i * 2] = (uint8_t)(index << 1); - indicesCns.u8[i * 2 + 1] = (uint8_t)((index << 1) | 1); - } - } - - if (isValidForNormalShuffle) - { - // check they are all within valid range (or have high bit set since it also trivially behaves same) - for (size_t i = 0; i < simdSize; i++) - { - if ((indicesCns.u8[i] & ~0x9F) != 0) - { - isValidForNormalShuffle = false; - break; - } - } - - // if valid, call to gtNewSimdShuffleNode - if (isValidForNormalShuffle) - { - // note: this does not cause an infinite loop, since we call with isUnsafe: false, which never - // enters gtNewSimdShuffleNodeVariable. - return gtNewSimdShuffleNode(type, op1, op2, simdBaseJitType, simdSize, false); - } - } - } - // if we have elementSize > 1, we need to convert op2 (short indices) to byte indices if (elementSize > 1) { @@ -25487,11 +25438,32 @@ GenTree* Compiler::gtNewSimdShuffleNode( var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); assert(varTypeIsArithmetic(simdBaseType)); + size_t elementSize = genTypeSize(simdBaseType); + size_t elementCount = simdSize / elementSize; + if (isUnsafe) { // For ShuffleUnsafe, delegate to the variable implementation to get the same behaviour for // ShuffleUnsafe with constant vs variable indices for free. - return gtNewSimdShuffleNodeVariable(type, op1, op2, simdBaseJitType, simdSize, isUnsafe); + + // We want ShuffleUnsafe to be at least as good as Shuffle (at least in non out-of-range cases), + // so if we have all values in range, then just treat it like Shuffle. + bool gotInvalidIndex = false; + for (size_t index = 0; index < elementCount; index++) + { + uint64_t value = op2->GetIntegralVectorConstElement(index, simdBaseType); + if (value >= elementCount) + { + gotInvalidIndex = true; + break; + } + } + + if (gotInvalidIndex) + { + // Call variable implementation. + return gtNewSimdShuffleNodeVariable(type, op1, op2, simdBaseJitType, simdSize, isUnsafe); + } } if (op2->IsVectorAllBitsSet()) @@ -25510,9 +25482,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( GenTree* retNode = nullptr; GenTreeIntConCommon* cnsNode = nullptr; - size_t elementSize = genTypeSize(simdBaseType); - size_t elementCount = simdSize / elementSize; - #if defined(TARGET_XARCH) uint8_t control = 0; bool crossLane = false;