Skip to content

Commit

Permalink
use simd_cast in vcvt_s*_f*
Browse files Browse the repository at this point in the history
  • Loading branch information
SparrowLii committed Mar 16, 2021
1 parent 85718e2 commit 1997d18
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 152 deletions.
156 changes: 26 additions & 130 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1157,52 +1157,32 @@ pub unsafe fn vcvtx_high_f32_f64(a: float32x2_t, b: float64x2_t) -> float32x4_t
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v2i32.v2f32")]
fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t;
}
vcvt_s32_f32_(a)
pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
simd_cast(a)
}

/// Floating-point convert to signed fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v4i32.v4f32")]
fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t;
}
vcvtq_s32_f32_(a)
pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
simd_cast(a)
}

/// Floating-point convert to signed fixed-point, rounding toward zero
/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvt_s64_f64(a: float64x1_t) -> int64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v1i64.v1f64")]
fn vcvt_s64_f64_(a: float64x1_t) -> int64x1_t;
}
vcvt_s64_f64_(a)
pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
simd_cast(a)
}

/// Floating-point convert to signed fixed-point, rounding toward zero
/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s64_f64(a: float64x2_t) -> int64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzs.v2i64.v2f64")]
fn vcvtq_s64_f64_(a: float64x2_t) -> int64x2_t;
}
vcvtq_s64_f64_(a)
pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
simd_cast(a)
}

/// Floating-point convert to signed integer, rounding to nearest with ties to away
Expand Down Expand Up @@ -1413,58 +1393,6 @@ pub unsafe fn vcvtpq_s64_f64(a: float64x2_t) -> int64x2_t {
vcvtpq_s64_f64_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v2i32.v2f32")]
fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t;
}
vcvt_u32_f32_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v4i32.v4f32")]
fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t;
}
vcvtq_u32_f32_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvt_u64_f64(a: float64x1_t) -> uint64x1_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v1i64.v1f64")]
fn vcvt_u64_f64_(a: float64x1_t) -> uint64x1_t;
}
vcvt_u64_f64_(a)
}

/// Floating-point convert to unsigned fixed-point, rounding toward zero
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(fcvtzu))]
pub unsafe fn vcvtq_u64_f64(a: float64x2_t) -> uint64x2_t {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fcvtzu.v2i64.v2f64")]
fn vcvtq_u64_f64_(a: float64x2_t) -> uint64x2_t;
}
vcvtq_u64_f64_(a)
}

/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -2990,22 +2918,6 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_s32_f32() {
let a: f32x2 = f32x2::new(-1.0, 2.0);
let e: i32x2 = i32x2::new(-1, 2);
let r: i32x2 = transmute(vcvt_s32_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_s32_f32() {
let a: f32x4 = f32x4::new(-1.0, 2.0, -3.0, 4.0);
let e: i32x4 = i32x4::new(-1, 2, -3, 4);
let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_s64_f64() {
let a: f64 = -1.0;
Expand All @@ -3022,6 +2934,22 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_u64_f64() {
let a: f64 = 1.0;
let e: u64x1 = u64x1::new(1);
let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_u64_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let e: u64x2 = u64x2::new(1, 2);
let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvta_s32_f32() {
let a: f32x2 = f32x2::new(-1.0, 2.0);
Expand Down Expand Up @@ -3150,38 +3078,6 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_u32_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
let e: u32x2 = u32x2::new(1, 2);
let r: u32x2 = transmute(vcvt_u32_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_u32_f32() {
let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0);
let e: u32x4 = u32x4::new(1, 2, 3, 4);
let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvt_u64_f64() {
let a: f64 = 1.0;
let e: u64x1 = u64x1::new(1);
let r: u64x1 = transmute(vcvt_u64_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvtq_u64_f64() {
let a: f64x2 = f64x2::new(1.0, 2.0);
let e: u64x2 = u64x2::new(1, 2);
let r: u64x2 = transmute(vcvtq_u64_f64(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vcvta_u32_f32() {
let a: f32x2 = f32x2::new(1.0, 2.0);
Expand Down
4 changes: 4 additions & 0 deletions crates/core_arch/src/arm/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1946,6 +1946,7 @@ pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
simd_cast(a)
}
Expand All @@ -1955,6 +1956,7 @@ pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
simd_cast(a)
}
Expand All @@ -1964,6 +1966,7 @@ pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
simd_cast(a)
}
Expand All @@ -1973,6 +1976,7 @@ pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t {
#[target_feature(enable = "neon")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))]
#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))]
#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))]
pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
simd_cast(a)
}
Expand Down
26 changes: 6 additions & 20 deletions crates/stdarch-gen/neon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,9 @@ fn = simd_cast
a = -1.0, 2.0, -3.0, 4.0
validate -1, 2, -3, 4

aarch64 = fcvtzs
generate float64x1_t:int64x1_t, float64x2_t:int64x2_t

arm = vcvt
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t

Expand All @@ -669,19 +672,12 @@ fn = simd_cast
a = 1.0, 2.0, 3.0, 4.0
validate 1, 2, 3, 4

aarch64 = fcvtzs
generate float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

arm = vcvt
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t

/// Floating-point convert to signed fixed-point, rounding toward zero
name = vcvt
double-suffixes
a = -1.0, 2.0, -3.0, 4.0
validate -1, 2, -3, 4

aarch64 = fcvtzs
link-aarch64 = fcvtzs._EXT2_._EXT_
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t

/// Floating-point convert to signed integer, rounding to nearest with ties to away
name = vcvta
double-suffixes
Expand Down Expand Up @@ -722,16 +718,6 @@ aarch64 = fcvtps
link-aarch64 = fcvtps._EXT2_._EXT_
generate float32x2_t:int32x2_t, float32x4_t:int32x4_t, float64x1_t:int64x1_t, float64x2_t:int64x2_t

/// Floating-point convert to unsigned fixed-point, rounding toward zero
name = vcvt
double-suffixes
a = 1.0, 2.0, 3.0, 4.0
validate 1, 2, 3, 4

aarch64 = fcvtzu
link-aarch64 = fcvtzu._EXT2_._EXT_
generate float32x2_t:uint32x2_t, float32x4_t:uint32x4_t, float64x1_t:uint64x1_t, float64x2_t:uint64x2_t

/// Floating-point convert to unsigned integer, rounding to nearest with ties to away
name = vcvta
double-suffixes
Expand Down
5 changes: 3 additions & 2 deletions crates/stdarch-gen/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1190,7 +1190,7 @@ mod test {
tests_aarch64.push('}');
tests_aarch64.push('\n');

let arm_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
let arm_out_path: PathBuf = PathBuf::from("./crates/core_arch")
.join("src")
.join("arm")
.join("neon");
Expand All @@ -1200,7 +1200,8 @@ mod test {
file_arm.write_all(out_arm.as_bytes())?;
file_arm.write_all(tests_arm.as_bytes())?;

let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
//let aarch64_out_path: PathBuf = PathBuf::from(env::var("OUT_DIR").unwrap())
let aarch64_out_path: PathBuf = PathBuf::from("./crates/core_arch")
.join("src")
.join("aarch64")
.join("neon");
Expand Down

0 comments on commit 1997d18

Please sign in to comment.