Skip to content

Commit

Permalink
optimize round()
Browse files Browse the repository at this point in the history
  • Loading branch information
NamorNiradnug committed May 4, 2024
1 parent fd0448b commit 16d710f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/math/f32/trig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ where
abs_x = abs_x
.simd_lt(Simd::splat(INPUT_LIMIT))
.select(abs_x, Simd::default());
let quadrants_float = (abs_x * Simd::splat(FRAC_2_PI)).round();
// (abs_x * Simd::splat(FRAC_2_PI)).round() generates more instruntions and hence is slower
let quadrants_float = abs_x
.mul_add(Simd::splat(FRAC_2_PI), Simd::splat(0.5))
.trunc();

// SAFETY: INPUT_LIMIT guaratees that `quadrants_float` are representable in u32
let quadrants = unsafe { quadrants_float.to_int_unchecked::<i32>().cast() };
Expand Down
5 changes: 4 additions & 1 deletion src/math/f64/trig.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ where
abs_x = abs_x
.simd_lt(Simd::splat(INPUT_LIMIT))
.select(abs_x, Simd::default());
let quadrants_float = (abs_x * Simd::splat(FRAC_2_PI)).round();
// (abs_x * Simd::splat(FRAC_2_PI)).round() generates more instruntions and hence is slower
let quadrants_float = abs_x
.mul_add(Simd::splat(FRAC_2_PI), Simd::splat(0.5))
.trunc();

// SAFETY: INPUT_LIMIT guarantees that values in `quadrants_float` are representable in u64
let quadrants = unsafe { quadrants_float.to_int_unchecked::<i64>().cast() };
Expand Down

0 comments on commit 16d710f

Please sign in to comment.