dotnet · LeVladIonescu · Mar 23, 2023 · Mar 9, 2023 · Mar 9, 2023 · Mar 17, 2023
@@ -1573,6 +1573,9 @@ arm_encode_arith_imm (int imm, guint32 *shift)
 #define arm_neon_fsqrt_4s(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_1, 0b11101, (rd), (rn))
 #define arm_neon_fsqrt_2d(p, rd, rn) arm_neon_2mvec_opcode ((p), VREG_FULL, 0b1, 0b10 | SIZE_2, 0b11101, (rd), (rn))
 
+#define arm_neon_frintm(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, (type), 0b11001, (rd), (rn))
+#define arm_neon_frintp(p, width, type, rd, rn) arm_neon_2mvec_opcode ((p), (width), 0b0, 0b10 | (type), 0b11000, (rd), (rn))
+
 /* NEON :: across lanes */
 #define arm_neon_xln_opcode(p, q, u, size, opcode, rd, rn) arm_neon_opcode_2reg ((p), (q), 0b00001110001100000000100000000000 | (u) << 29 | (size) << 22 | (opcode) << 12, (rd), (rn))
 

@@ -504,6 +504,7 @@ xcompare_fp: dest:x src1:x src2:x len:4
 negate: dest:x src1:x len:4
 ones_complement: dest:x src1:x len:4
 xbinop_forceint: dest:x src1:x src2:x len:4
+xunop: dest:x src1:x len:4
 
 generic_class_init: src1:a len:44 clob:c
 gc_safe_point: src1:i len:12 clob:c

@@ -36,6 +36,7 @@
 #define EXPAND_FUN(m, ...) EXPAND(m PARENTHESIZE(__VA_ARGS__))
 #define OPFMT_WDSS _w, dreg, sreg1, sreg2
 #define OPFMT_WTDSS _w, _t, dreg, sreg1, sreg2
+#define OPFMT_WTDS _w, _t, dreg, sreg1
 #define OPFMT_WTDSS_REV _w, _t, dreg, sreg2, sreg1
 #define _UNDEF(...) g_assert_not_reached ()
 #define SIMD_OP_CODE(reg_w, op, c) ((reg_w << 31) | (op) << 16 | (c))

diff --git a/src/mono/mono/mini/mini-ops.h b/src/mono/mono/mini/mini-ops.h
@@ -1483,6 +1483,7 @@ MINI_OP(OP_XCOMPARE_FP_SCALAR, "xcompare_fp_scalar", XREG, XREG, XREG)
  * Generic SIMD operations, the rest of the JIT doesn't care about the exact operation.
  */
 MINI_OP(OP_XBINOP, "xbinop", XREG, XREG, XREG)
+MINI_OP(OP_XUNOP, "xunop", XREG, XREG, NONE)
 MINI_OP(OP_XBINOP_FORCEINT, "xbinop_forceint", XREG, XREG, XREG)
 MINI_OP(OP_XBINOP_SCALAR, "xbinop_scalar", XREG, XREG, XREG)
 MINI_OP(OP_XBINOP_BYSCALAR, "xbinop_byscalar", XREG, XREG, XREG)

@@ -56,3 +56,5 @@ SIMD_OP  (128, OP_XBINOP,      OP_FSUB,              WTDSS,              _UNDEF,
 SIMD_OP  (128, OP_XBINOP_FORCEINT,    XBINOP_FORCEINT_AND,    WDSS,      arm_neon_and,     arm_neon_and,     arm_neon_and,    arm_neon_and,      arm_neon_and,     arm_neon_and)
 SIMD_OP  (128, OP_XBINOP_FORCEINT,    XBINOP_FORCEINT_OR,     WDSS,      arm_neon_orr,     arm_neon_orr,     arm_neon_orr,    arm_neon_orr,      arm_neon_orr,     arm_neon_orr)
 SIMD_OP  (128, OP_XBINOP_FORCEINT,    XBINOP_FORCEINT_XOR,    WDSS,      arm_neon_eor,     arm_neon_eor,     arm_neon_eor,    arm_neon_eor,      arm_neon_eor,     arm_neon_eor)
+SIMD_OP  (128, OP_XUNOP,    OP_FLOOR,    WTDS,    _UNDEF,    _UNDEF,            _UNDEF,          _UNDEF,            arm_neon_frintp,  arm_neon_frintp)
+SIMD_OP  (128, OP_XUNOP,    OP_CEIL,    WTDS,    _UNDEF,    _UNDEF,            _UNDEF,          _UNDEF,            arm_neon_frintm,  arm_neon_frintm)
@@ -1190,10 +1190,10 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		return NULL;
 #endif
 // FIXME: This limitation could be removed once everything here are supported by mini JIT on arm64
-#ifdef TARGET_ARM64
-	if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp))
-		return NULL;
-#endif
+// #ifdef TARGET_ARM64
+// 	if (!(cfg->compile_aot && cfg->full_aot && !cfg->interp))
+// 		return NULL;
+// #endif
 
 	int id = lookup_intrins (sri_vector_methods, sizeof (sri_vector_methods), cmethod);
 	if (id == -1) {
@@ -1220,6 +1220,8 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		case SN_BitwiseAnd:
 		case SN_BitwiseOr:
 		case SN_Xor:
+		case SN_Floor:
+		case SN_Ceiling:
 			break;
 		default: 
 			return NULL;
@@ -1325,8 +1327,14 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
 		if (!type_enum_is_float (arg0_type))
 			return NULL;
 #ifdef TARGET_ARM64
-		int ceil_or_floor = id == SN_Ceiling ? INTRINS_AARCH64_ADV_SIMD_FRINTP : INTRINS_AARCH64_ADV_SIMD_FRINTM;
-		return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X, ceil_or_floor, arg0_type, fsig, args);
+		if (!COMPILE_LLVM(cfg)) {
+			int ceil_or_floor = id == SN_Ceiling ? OP_CEIL : OP_FLOOR;
+			return emit_simd_ins_for_sig (cfg, klass, OP_XUNOP, ceil_or_floor, arg0_type, fsig, args);
+		}
+		else {
+			int ceil_or_floor = id == SN_Ceiling ? INTRINS_AARCH64_ADV_SIMD_FRINTP : INTRINS_AARCH64_ADV_SIMD_FRINTM;
+			return emit_simd_ins_for_sig (cfg, klass, OP_XOP_OVR_X_X, ceil_or_floor, arg0_type, fsig, args);
+		}
 #elif defined(TARGET_AMD64)
 		if (!is_SIMD_feature_supported (cfg, MONO_CPU_X86_SSE41))
 			return NULL;