From 16d710f5f1572a6e67c8e87f0da3f89fed07ae32 Mon Sep 17 00:00:00 2001 From: NamorNiradnug Date: Sat, 4 May 2024 15:47:53 +0300 Subject: [PATCH] optimize round() --- src/math/f32/trig.rs | 5 ++++- src/math/f64/trig.rs | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/math/f32/trig.rs b/src/math/f32/trig.rs index f737622..821c273 100644 --- a/src/math/f32/trig.rs +++ b/src/math/f32/trig.rs @@ -30,7 +30,10 @@ where abs_x = abs_x .simd_lt(Simd::splat(INPUT_LIMIT)) .select(abs_x, Simd::default()); - let quadrants_float = (abs_x * Simd::splat(FRAC_2_PI)).round(); + // (abs_x * Simd::splat(FRAC_2_PI)).round() generates more instruntions and hence is slower + let quadrants_float = abs_x + .mul_add(Simd::splat(FRAC_2_PI), Simd::splat(0.5)) + .trunc(); // SAFETY: INPUT_LIMIT guaratees that `quadrants_float` are representable in u32 let quadrants = unsafe { quadrants_float.to_int_unchecked::().cast() }; diff --git a/src/math/f64/trig.rs b/src/math/f64/trig.rs index 6ed9591..5c2d5db 100644 --- a/src/math/f64/trig.rs +++ b/src/math/f64/trig.rs @@ -26,7 +26,10 @@ where abs_x = abs_x .simd_lt(Simd::splat(INPUT_LIMIT)) .select(abs_x, Simd::default()); - let quadrants_float = (abs_x * Simd::splat(FRAC_2_PI)).round(); + // (abs_x * Simd::splat(FRAC_2_PI)).round() generates more instruntions and hence is slower + let quadrants_float = abs_x + .mul_add(Simd::splat(FRAC_2_PI), Simd::splat(0.5)) + .trunc(); // SAFETY: INPUT_LIMIT guarantees that values in `quadrants_float` are representable in u64 let quadrants = unsafe { quadrants_float.to_int_unchecked::().cast() };