diff --git a/src/mono/mono/arch/arm64/arm64-codegen.h b/src/mono/mono/arch/arm64/arm64-codegen.h index a6ea6c3faa350..b348d274e6025 100644 --- a/src/mono/mono/arch/arm64/arm64-codegen.h +++ b/src/mono/mono/arch/arm64/arm64-codegen.h @@ -1139,19 +1139,16 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_dup_g_4s(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn)) #define arm_neon_dup_g_2d(p, rd, rn) arm_neon_cpy_opcode ((p), VREG_FULL, 0b0, 0b00100, 0b0001, (rd), (rn)) -// the opcode is smov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg. -#define arm_neon_smovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn)) -#define arm_neon_smovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn)) -#define arm_neon_smovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0101, (rd), (rn)) -#define arm_neon_smovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0101, (rd), (rn)) -#define arm_neon_smovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0101, (rd), (rn)) - -// the opcode is umov, but we define variants smovs and smovd by whether they fill a 32 or 64-bit reg. -#define arm_neon_umovs_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn)) -#define arm_neon_umovs_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn)) -#define arm_neon_umovd_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00001 | ((index) << 1), 0b0111, (rd), (rn)) -#define arm_neon_umovd_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00010 | ((index) << 2), 0b0111, (rd), (rn)) -#define arm_neon_umovd_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b00100 | ((index) << 3), 0b0111, (rd), (rn)) +#define arm_neon_smov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00001 | ((index) << 1), 0b0101, (rd), (rn)) +#define arm_neon_smov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00010 | ((index) << 2), 0b0101, (rd), (rn)) +#define arm_neon_smov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00100 | ((index) << 3), 0b0101, (rd), (rn)) +#define arm_neon_smov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b0, 0b01000 | ((index) << 4), 0b0101, (rd), (rn)) + +#define arm_neon_umov_b(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00001 | ((index) << 1), 0b0111, (rd), (rn)) +#define arm_neon_umov_h(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00010 | ((index) << 2), 0b0111, (rd), (rn)) +#define arm_neon_umov_s(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b0, 0b0, 0b00100 | ((index) << 3), 0b0111, (rd), (rn)) +#define arm_neon_umov_d(p, rd, rn, index) arm_neon_cpy_opcode ((p), 0b1, 0b0, 0b01000 | ((index) << 4), 0b0111, (rd), (rn)) + /* NEON :: 3-register same FP16 */ // TODO @@ -1576,6 +1573,9 @@ arm_encode_arith_imm (int imm, guint32 *shift) /* NEON :: across lanes */ #define arm_neon_xln_opcode(p, q, u, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), (q), 0b00001110001100000000100000000000 | (u) << 29 | (size) << 22 | (opcode) << 12, (rd), (rn)) +#define arm_neon_umaxv(p, width, type, rd, rn) arm_neon_xln_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn)) +#define arm_neon_uminv(p, width, type, rd, rn) arm_neon_xln_opcode ((p), (width), 0b1, (type), 0b11010, (rd), (rn)) + // contrary to most other opcodes, the suffix is the type of source #define arm_neon_saddlv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00011, (rd), (rn)) #define arm_neon_saddlv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00011, (rd), (rn)) @@ -1609,18 +1609,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_uaddlv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b00011, (rd), (rn)) #define arm_neon_uaddlv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b00011, (rd), (rn)) -#define arm_neon_umaxv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn)) -#define arm_neon_umaxv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn)) -#define arm_neon_umaxv_4h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn)) -#define arm_neon_umaxv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn)) -#define arm_neon_umaxv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn)) - -#define arm_neon_uminv_8b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b11010, (rd), (rn)) -#define arm_neon_uminv_16b(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b11010, (rd), (rn)) -#define arm_neon_uminv_4h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b11010, (rd), (rn)) -#define arm_neon_uminv_8h(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b11010, (rd), (rn)) -#define arm_neon_uminv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b11010, (rd), (rn)) - #define arm_neon_fmaxnmv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn)) #define arm_neon_fmaxv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01111, (rd), (rn)) #define arm_neon_fminnmv_4s(p, rd, rn) arm_neon_xln_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b01100, (rd), (rn)) @@ -2313,6 +2301,15 @@ arm_encode_arith_imm (int imm, guint32 *shift) arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \ } while (0) +#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111) +#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \ + int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \ + arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \ +} while (0) + +#define arm_neon_sli(p, width, type, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), (width), 0b1, (type), 0b01010, (rd), (rn), (shift)) +#define arm_neon_shrn(p, type, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, (type), 0b10000, (rd), (rn), (shift)) + #define arm_neon_sshr_8b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift)) #define arm_neon_sshr_16b(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b00000, (rd), (rn), (shift)) #define arm_neon_sshr_4h(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b00000, (rd), (rn), (shift)) @@ -2345,12 +2342,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_srsra_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_4, 0b00110, (rd), (rn), (shift)) #define arm_neon_srsra_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b0, SIZE_8, 0b00110, (rd), (rn), (shift)) -#define arm_neon_shimm_shl_immh_immb(size, shift) (((shift) + (8 << (size))) & 0b01111111) -#define arm_neon_shimm_shl_opcode(p, q, u, size, opcode, rd, rn, shift) do { \ - int32_t ___temp_emit0 = arm_neon_shimm_shl_immh_immb ((size), (shift)); \ - arm_neon_shimm_opcode ((p), (q), (u), (__temp_emit0 >> 3) & 0b1111, __temp_emit0 & 0b111, (opcode), (rd), (rn)) \ -} while (0) - #define arm_neon_shl_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift)) #define arm_neon_shl_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b0, SIZE_1, 0b01010, (rd), (rn), (shift)) #define arm_neon_shl_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b0, SIZE_2, 0b01010, (rd), (rn), (shift)) @@ -2454,14 +2445,6 @@ arm_encode_arith_imm (int imm, guint32 *shift) #define arm_neon_sri_4s(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01000, (rd), (rn), (shift)) #define arm_neon_sri_2d(p, rd, rn, shift) arm_neon_shimm_shr_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01000, (rd), (rn), (shift)) -#define arm_neon_sli_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift)) -#define arm_neon_sli_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01010, (rd), (rn), (shift)) -#define arm_neon_sli_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift)) -#define arm_neon_sli_8h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_2, 0b01010, (rd), (rn), (shift)) -#define arm_neon_sli_2s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift)) -#define arm_neon_sli_4s(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_4, 0b01010, (rd), (rn), (shift)) -#define arm_neon_sli_2d(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_8, 0b01010, (rd), (rn), (shift)) - #define arm_neon_sqshlu_8b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift)) #define arm_neon_sqshlu_16b(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_FULL, 0b1, SIZE_1, 0b01100, (rd), (rn), (shift)) #define arm_neon_sqshlu_4h(p, rd, rn, shift) arm_neon_shimm_shl_opcode ((p), VREG_LOW, 0b1, SIZE_2, 0b01100, (rd), (rn), (shift)) diff --git a/src/mono/mono/mini/cpu-arm64.mdesc b/src/mono/mono/mini/cpu-arm64.mdesc index a084543ebb5b2..b6f8f23a64c25 100644 --- a/src/mono/mono/mini/cpu-arm64.mdesc +++ b/src/mono/mono/mini/cpu-arm64.mdesc @@ -503,6 +503,7 @@ xcompare: dest:x src1:x src2:x len:4 xcompare_fp: dest:x src1:x src2:x len:4 negate: dest:x src1:x len:4 ones_complement: dest:x src1:x len:4 +xextract: dest:i src1:x len:12 xbinop_forceint: dest:x src1:x src2:x len:4 xcast: dest:x src1:x len:4 clob:1 diff --git a/src/mono/mono/mini/mini-arm64.c b/src/mono/mono/mini/mini-arm64.c index d54b3137a0f77..57b140565ff6b 100644 --- a/src/mono/mono/mini/mini-arm64.c +++ b/src/mono/mono/mini/mini-arm64.c @@ -3396,6 +3396,27 @@ emit_move_return_value (MonoCompile *cfg, guint8 * code, MonoInst *ins) return code; } +static guint8* +emit_xextract (guint8* code, int width, int mode, int dreg, int sreg1) +{ + switch (mode) { + case SIMD_EXTR_IS_ANY_SET: + arm_neon_umaxv (code, width, TYPE_I8, FP_TEMP_REG, sreg1); + arm_neon_umov_b (code, dreg, FP_TEMP_REG, 0); + arm_lsrw(code, dreg, dreg, 7); // dreg contains 0xff for TRUE or 0x0 for FALSE, normalize to 0x1/0x0 + break; + case SIMD_EXTR_ARE_ALL_SET: + arm_neon_uminv (code, width, TYPE_I8, FP_TEMP_REG, sreg1); + arm_neon_umov_b (code, dreg, FP_TEMP_REG, 0); + arm_lsrw(code, dreg, dreg, 7); + break; + default: + g_assert_not_reached (); + } + + return code; +} + /* * emit_branch_island: * @@ -3822,6 +3843,9 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb) case OP_XZERO: arm_neon_eor_16b (code, dreg, dreg, dreg); break; + case OP_XEXTRACT: + code = emit_xextract (code, VREG_FULL, ins->inst_c0, dreg, sreg1); + break; /* ALU */ case OP_IADD: diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h index 020e446a4bf78..1303bfebf9ff8 100644 --- a/src/mono/mono/mini/mini-ops.h +++ b/src/mono/mono/mini/mini-ops.h @@ -1479,6 +1479,10 @@ MINI_OP(OP_XCOMPARE_SCALAR, "xcompare_scalar", XREG, XREG, XREG) MINI_OP(OP_XCOMPARE_FP, "xcompare_fp", XREG, XREG, XREG) MINI_OP(OP_XCOMPARE_FP_SCALAR, "xcompare_fp_scalar", XREG, XREG, XREG) +/* Extract from XREG into IREG. + * inst_c0 - specific instruction, one of SIMD_EXTR_... */ +MINI_OP(OP_XEXTRACT, "xextract", IREG, XREG, NONE) + /* * Generic SIMD operations, the rest of the JIT doesn't care about the exact operation. */ @@ -1486,6 +1490,7 @@ MINI_OP(OP_XBINOP, "xbinop", XREG, XREG, XREG) MINI_OP(OP_XBINOP_FORCEINT, "xbinop_forceint", XREG, XREG, XREG) MINI_OP(OP_XBINOP_SCALAR, "xbinop_scalar", XREG, XREG, XREG) MINI_OP(OP_XBINOP_BYSCALAR, "xbinop_byscalar", XREG, XREG, XREG) + /* inst_c0 contains an INTRINS_ enum, inst_c1 might contain additional data */ MINI_OP(OP_XOP, "xop", NONE, NONE, NONE) MINI_OP(OP_XOP_X_I, "xop_x_i", XREG, IREG, NONE) diff --git a/src/mono/mono/mini/mini.h b/src/mono/mono/mini/mini.h index 41a6a3b6d8e54..943242eadb494 100644 --- a/src/mono/mono/mini/mini.h +++ b/src/mono/mono/mini/mini.h @@ -2933,6 +2933,11 @@ enum { SIMD_PREFETCH_MODE_2, }; +enum { + SIMD_EXTR_IS_ANY_SET, + SIMD_EXTR_ARE_ALL_SET +}; + int mini_primitive_type_size (MonoTypeEnum type); MonoTypeEnum mini_get_simd_type_info (MonoClass *klass, guint32 *nelems); diff --git a/src/mono/mono/mini/simd-intrinsics.c b/src/mono/mono/mini/simd-intrinsics.c index 73121fc4a3668..747e6b237b4b8 100644 --- a/src/mono/mono/mini/simd-intrinsics.c +++ b/src/mono/mono/mini/simd-intrinsics.c @@ -509,11 +509,18 @@ static MonoInst* emit_xequal (MonoCompile *cfg, MonoClass *klass, MonoInst *arg1, MonoInst *arg2) { #ifdef TARGET_ARM64 - int size = mono_class_value_size (klass, NULL); - if (size == 16) + if (!COMPILE_LLVM (cfg)) { + MonoTypeEnum elemt = get_underlying_type (m_class_get_this_arg (arg1->klass)); + MonoInst* cmp = emit_xcompare (cfg, arg1->klass, elemt, arg1, arg2); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = SIMD_EXTR_ARE_ALL_SET; + ret->inst_c1 = mono_class_value_size (klass, NULL); + return ret; + } else if (mono_class_value_size (klass, NULL) == 16) { return emit_simd_ins (cfg, klass, OP_XEQUAL_ARM64_V128_FAST, arg1->dreg, arg2->dreg); - else + } else { return emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); + } #else MonoInst *ins = emit_simd_ins (cfg, klass, OP_XEQUAL, arg1->dreg, arg2->dreg); if (!COMPILE_LLVM (cfg)) @@ -1201,9 +1208,9 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi return NULL; } - if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256")) - return NULL; // TODO: Fix Vector256.WithUpper/WithLower - + if (!strcmp (m_class_get_name (cfg->method->klass), "Vector256") || !strcmp (m_class_get_name (cfg->method->klass), "Vector512")) + return NULL; + // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64 #ifdef TARGET_ARM64 if (!COMPILE_LLVM (cfg)) { @@ -1216,6 +1223,16 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi case SN_LessThanOrEqual: case SN_Negate: case SN_OnesComplement: + case SN_EqualsAny: + case SN_GreaterThanAny: + case SN_GreaterThanOrEqualAny: + case SN_LessThanAny: + case SN_LessThanOrEqualAny: + case SN_EqualsAll: + case SN_GreaterThanAll: + case SN_GreaterThanOrEqualAll: + case SN_LessThanAll: + case SN_LessThanOrEqualAll: case SN_Subtract: case SN_BitwiseAnd: case SN_BitwiseOr: @@ -1488,18 +1505,27 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi if (!is_element_type_primitive (fsig->params [0])) return NULL; MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); - switch (id) { - case SN_Equals: - return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]); - case SN_EqualsAll: - return emit_xequal (cfg, arg_class, args [0], args [1]); - case SN_EqualsAny: { - MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); - MonoInst *zero = emit_xzero (cfg, arg_class); - return emit_not_xequal (cfg, arg_class, cmp_eq, zero); + if (id == SN_Equals) + return emit_xcompare (cfg, klass, arg0_type, args [0], args [1]); + + if (COMPILE_LLVM (cfg)) { + switch (id) { + case SN_EqualsAll: + return emit_xequal (cfg, arg_class, args [0], args [1]); + case SN_EqualsAny: { + MonoInst *cmp_eq = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); + MonoInst *zero = emit_xzero (cfg, arg_class); + return emit_not_xequal (cfg, arg_class, cmp_eq, zero); + } } - default: g_assert_not_reached (); + } else { + MonoInst* cmp = emit_xcompare (cfg, arg_class, arg0_type, args [0], args [1]); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = (id == SN_EqualsAll) ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; + ret->inst_c1 = mono_class_value_size (klass, NULL); + return ret; } + g_assert_not_reached (); } case SN_ExtractMostSignificantBits: { if (!is_element_type_primitive (fsig->params [0]) || type_enum_is_float (arg0_type)) @@ -1567,34 +1593,40 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi fsig->ret->type == MONO_TYPE_BOOLEAN && mono_metadata_type_equal (fsig->params [0], fsig->params [1])); - MonoInst *cmp = emit_xcompare_for_intrinsic (cfg, klass, id, arg0_type, args [0], args [1]); - MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); - + gboolean is_all = FALSE; switch (id) { case SN_GreaterThanAll: case SN_GreaterThanOrEqualAll: case SN_LessThanAll: - case SN_LessThanOrEqualAll: { - // for floating point numbers all ones is NaN and so - // they must be treated differently than integer types - if (type_enum_is_float (arg0_type)) { + case SN_LessThanOrEqualAll: + is_all = TRUE; + break; + } + + MonoClass *arg_class = mono_class_from_mono_type_internal (fsig->params [0]); + if (COMPILE_LLVM (cfg)) { + MonoInst *cmp = emit_xcompare_for_intrinsic (cfg, klass, id, arg0_type, args [0], args [1]); + if (is_all) { + // for floating point numbers all ones is NaN and so + // they must be treated differently than integer types + if (type_enum_is_float (arg0_type)) { + MonoInst *zero = emit_xzero (cfg, arg_class); + MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero); + return emit_xequal (cfg, arg_class, inverted_cmp, zero); + } + + MonoInst *ones = emit_xones (cfg, arg_class); + return emit_xequal (cfg, arg_class, cmp, ones); + } else { MonoInst *zero = emit_xzero (cfg, arg_class); - MonoInst *inverted_cmp = emit_xcompare (cfg, klass, arg0_type, cmp, zero); - return emit_xequal (cfg, arg_class, inverted_cmp, zero); + return emit_not_xequal (cfg, arg_class, cmp, zero); } - - MonoInst *ones = emit_xones (cfg, arg_class); - return emit_xequal (cfg, arg_class, cmp, ones); - } - case SN_GreaterThanAny: - case SN_GreaterThanOrEqualAny: - case SN_LessThanAny: - case SN_LessThanOrEqualAny: { - MonoInst *zero = emit_xzero (cfg, arg_class); - return emit_not_xequal (cfg, arg_class, cmp, zero); - } - default: - g_assert_not_reached (); + } else { + MonoInst* cmp = emit_xcompare_for_intrinsic (cfg, arg_class, id, arg0_type, args [0], args [1]); + MonoInst* ret = emit_simd_ins (cfg, mono_defaults.boolean_class, OP_XEXTRACT, cmp->dreg, -1); + ret->inst_c0 = is_all ? SIMD_EXTR_ARE_ALL_SET : SIMD_EXTR_IS_ANY_SET; + ret->inst_c1 = mono_class_value_size (klass, NULL); + return ret; } } case SN_Narrow: { @@ -1908,6 +1940,8 @@ emit_vector64_vector128_t (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSign case SN_op_BitwiseAnd: case SN_op_BitwiseOr: case SN_op_ExclusiveOr: + case SN_op_Equality: + case SN_op_Inequality: break; default: return NULL;