use simd_cast in vcvt_s*_f*

rust-lang · Mar 16, 2021 · 1997d18 · 1997d18
1 parent 85718e2
commit 1997d18
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 152 deletions.
diff --git a/crates/core_arch/src/aarch64/neon/generated.rs b/crates/core_arch/src/aarch64/neon/generated.rs
@@ -1157,52 +1157,32 @@ pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzs))]
-pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v2i32.v2f32")]
-        fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
-    }
-    vcvt_s32_f32_(a)
+pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
+    simd_cast(a)
 }
 
 /// Floating-point convert to signed fixed-point, rounding toward zero
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzs))]
-pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32")]
-        fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
-    }
-    vcvtq_s32_f32_(a)
+pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
+    simd_cast(a)
 }
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
+/// Floating-point convert to unsigned fixed-point, rounding toward zero
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzs))]
-pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v1i64.v1f64")]
-        fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
-    }
-    vcvt_s64_f64_(a)
+pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
+    simd_cast(a)
 }
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
+/// Floating-point convert to unsigned fixed-point, rounding toward zero
 #[inline]
 #[target_feature(enable = "neon")]
 #[cfg_attr(test, assert_instr(fcvtzs))]
-pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v2i64.v2f64")]
-        fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
-    }
-    vcvtq_s64_f64_(a)
+pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
+    simd_cast(a)
 }
 
 /// Floating-point convert to signed integer, rounding to nearest with ties to away
@@ -1413,58 +1393,6 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
     vcvtpq_s64_f64_(a)
 }
 
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
-pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v2i32.v2f32")]
-        fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
-    }
-    vcvt_u32_f32_(a)
-}
-
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
-pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32")]
-        fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
-    }
-    vcvtq_u32_f32_(a)
-}
-
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
-pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v1i64.v1f64")]
-        fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
-    }
-    vcvt_u64_f64_(a)
-}
-
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-#[inline]
-#[target_feature(enable = "neon")]
-#[cfg_attr(test, assert_instr(fcvtzu))]
-pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
-    #[allow(improper_ctypes)]
-    extern "C" {
-        #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v2i64.v2f64")]
-        fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
-    }
-    vcvtq_u64_f64_(a)
-}
-
 /// Floating-point convert to unsigned integer, rounding to nearest with ties to away
 #[inline]
 #[target_feature(enable = "neon")]
@@ -2990,22 +2918,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_s32_f32() {
-        let a: f32x2 = f32x2::new(-1.0, 2.0);
-        let e: i32x2 = i32x2::new(-1, 2);
-        let r: i32x2 = transmute(vcvt_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_s32_f32() {
-        let a: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
-        let e: i32x4 = i32x4::new(-1, 2, -3, 4);
-        let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vcvt_s64_f64() {
         let a: f64 = -1.0;
@@ -3022,6 +2934,22 @@ mod test {
         assert_eq!(r, e);
     }
 
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvt_u64_f64() {
+        let a: f64 = 1.0;
+        let e: u64x1 = u64x1::new(1);
+        let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
+    #[simd_test(enable = "neon")]
+    unsafe fn test_vcvtq_u64_f64() {
+        let a: f64x2 = f64x2::new(1.0, 2.0);
+        let e: u64x2 = u64x2::new(1, 2);
+        let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
+        assert_eq!(r, e);
+    }
+
     #[simd_test(enable = "neon")]
     unsafe fn test_vcvta_s32_f32() {
         let a: f32x2 = f32x2::new(-1.0, 2.0);
@@ -3150,38 +3078,6 @@ mod test {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_u32_f32() {
-        let a: f32x2 = f32x2::new(1.0, 2.0);
-        let e: u32x2 = u32x2::new(1, 2);
-        let r: u32x2 = transmute(vcvt_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u32_f32() {
-        let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
-        let e: u32x4 = u32x4::new(1, 2, 3, 4);
-        let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvt_u64_f64() {
-        let a: f64 = 1.0;
-        let e: u64x1 = u64x1::new(1);
-        let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
-    #[simd_test(enable = "neon")]
-    unsafe fn test_vcvtq_u64_f64() {
-        let a: f64x2 = f64x2::new(1.0, 2.0);
-        let e: u64x2 = u64x2::new(1, 2);
-        let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
-        assert_eq!(r, e);
-    }
-
     #[simd_test(enable = "neon")]
     unsafe fn test_vcvta_u32_f32() {
         let a: f32x2 = f32x2::new(1.0, 2.0);

diff --git a/crates/core_arch/src/arm/neon/generated.rs b/crates/core_arch/src/arm/neon/generated.rs
@@ -1946,6 +1946,7 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
 pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
     simd_cast(a)
 }
@@ -1955,6 +1956,7 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
 pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
     simd_cast(a)
 }
@@ -1964,6 +1966,7 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
 pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
     simd_cast(a)
 }
@@ -1973,6 +1976,7 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
 #[target_feature(enable = "neon")]
 #[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
 #[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
+#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
 pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
     simd_cast(a)
 }

diff --git a/crates/stdarch-gen/neon.spec b/crates/stdarch-gen/neon.spec
@@ -659,6 +659,9 @@ fn = simd_cast
 a = -1.0, 2.0, -3.0, 4.0
 validate -1, 2, -3, 4
 
+aarch64 = fcvtzs
+generate float64x1_t:int64x1_t, float64x2_t:int64x2_t
+
 arm = vcvt
 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t
 
@@ -669,19 +672,12 @@ fn = simd_cast
 a = 1.0, 2.0, 3.0, 4.0
 validate 1, 2, 3, 4
 
+aarch64 = fcvtzs
+generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
+
 arm = vcvt
 generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t
 
-/// Floating-point convert to signed fixed-point, rounding toward zero
-name = vcvt
-double-suffixes
-a = -1.0, 2.0, -3.0, 4.0
-validate -1, 2, -3, 4
-
-aarch64 = fcvtzs
-link-aarch64 = fcvtzs._EXT2_._EXT_
-generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
-
 /// Floating-point convert to signed integer, rounding to nearest with ties to away
 name = vcvta
 double-suffixes
@@ -722,16 +718,6 @@ aarch64 = fcvtps
 link-aarch64 = fcvtps._EXT2_._EXT_
 generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t
 
-/// Floating-point convert to unsigned fixed-point, rounding toward zero
-name = vcvt
-double-suffixes
-a = 1.0, 2.0, 3.0, 4.0
-validate 1, 2, 3, 4
-
-aarch64 = fcvtzu
-link-aarch64 = fcvtzu._EXT2_._EXT_
-generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t
-
 /// Floating-point convert to unsigned integer, rounding to nearest with ties to away
 name = vcvta
 double-suffixes

diff --git a/crates/stdarch-gen/src/main.rs b/crates/stdarch-gen/src/main.rs
@@ -1190,7 +1190,7 @@ mod test {
     tests_aarch64.push('}');
     tests_aarch64.push('\n');
 
-    let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+    let arm_out_path: PathBuf = PathBuf::from("./crates/core_arch")
         .join("src")
         .join("arm")
         .join("neon");
@@ -1200,7 +1200,8 @@ mod test {
     file_arm.write_all(out_arm.as_bytes())?;
     file_arm.write_all(tests_arm.as_bytes())?;
 
-    let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+    //let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
+    let aarch64_out_path: PathBuf = PathBuf::from("./crates/core_arch")
         .join("src")
         .join("aarch64")
         .join("neon");