Skip to content

Commit

Permalink
Merge pull request #4158 from pleroy/TresVieux
Browse files Browse the repository at this point in the history
Improve argument reduction
  • Loading branch information
pleroy authored Jan 7, 2025
2 parents 8c5eb1e + bbad4e6 commit 535e54c
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 9 deletions.
39 changes: 37 additions & 2 deletions functions/sin_cos_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,47 @@ TEST_F(SinCosTest, AccurateTableIndex) {

for (std::int64_t i = 0; i < iterations; ++i) {
double const x = uniformly_at(random);
auto const n = _mm_cvtsd_si64(_mm_set_sd(x * table_spacing_reciprocal));
auto const m = _mm_cvtsi128_si64(_mm_castpd_si128(

std::int64_t const n =
_mm_cvtsd_si64(_mm_set_sd(x * table_spacing_reciprocal));

std::int64_t const m = _mm_cvtsi128_si64(_mm_castpd_si128(
_mm_and_pd(mantissa_index_bits,
_mm_set_sd(x + (1LL << (std::numeric_limits<double>::digits -
table_spacing_bits - 1))))));

EXPECT_EQ(n, m);
}
}

TEST_F(SinCosTest, ReduceIndex) {
static constexpr std::int64_t iterations = 100;

static constexpr std::int64_t π_over_2_zeroes = 18;
static const __m128d sign_bit =
_mm_castsi128_pd(_mm_cvtsi64_si128(0x8000'0000'0000'0000));
static constexpr double mantissa_reduce_shifter =
1LL << (std::numeric_limits<double>::digits - 1);
std::mt19937_64 random(42);
std::uniform_real_distribution<> uniformly_at(-1000.0, 1000.0);

for (std::int64_t i = 0; i < iterations; ++i) {
double const θ = uniformly_at(random);

__m128d const n_128d = _mm_round_sd(
_mm_setzero_pd(), _mm_set_sd(θ * (2 / π)), _MM_FROUND_RINT);
double const n_double = _mm_cvtsd_f64(n_128d);
std::int64_t const n = _mm_cvtsd_si64(n_128d);

double const abs_θ = std::abs(θ);
__m128d const sign = _mm_and_pd(sign_bit, _mm_set_sd(θ));
double const m_shifted = abs_θ * (2 / π) + mantissa_reduce_shifter;
double const m_double = _mm_cvtsd_f64(
_mm_xor_pd(_mm_set_sd(m_shifted - mantissa_reduce_shifter), sign));
std::int64_t const m = _mm_cvtsd_si64(_mm_set_sd(m_double));

EXPECT_EQ(n, m);
EXPECT_EQ(m, m_double);
}
}

Expand Down
23 changes: 16 additions & 7 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ constexpr std::int64_t π_over_2_zeroes = 18;
constexpr Argument π_over_2_threshold = π / 2 * (1 << π_over_2_zeroes);
constexpr Argument π_over_2_high = 0x1.921fb'5444p0;
constexpr Argument π_over_2_low = 0x2.d1846'9898c'c5170'1b839p-40;
constexpr Argument mantissa_reduce_shifter =
1LL << (std::numeric_limits<double>::digits - 1);

template<FMAPolicy fma_policy>
using Polynomial1 = HornerEvaluator<Value, Argument, 1, fma_policy>;
Expand Down Expand Up @@ -149,6 +151,7 @@ inline double DetectDangerousRounding(double const x, double const Δx) {
}
}

template<FMAPolicy fma_policy>
inline void Reduce(Argument const θ,
DoublePrecision<Argument>& θ_reduced,
std::int64_t& quadrant) {
Expand All @@ -163,11 +166,15 @@ inline void Reduce(Argument const θ,
// in the worst case it could cause the reduced angle to jump from the
// vicinity of π / 4 to the vicinity of -π / 4 with appropriate adjustment
// of the quadrant.
__m128d const n_128d = _mm_round_sd(
_mm_setzero_pd(), _mm_set_sd(θ * (2 / π)), _MM_FROUND_RINT);
double const n_double = _mm_cvtsd_f64(n_128d);
std::int64_t const n = _mm_cvtsd_si64(n_128d);
Argument const value = θ - n_double * π_over_2_high;
__m128d const sign = _mm_and_pd(masks::sign_bit, _mm_set_sd(θ));
double const n_shifted =
FusedMultiplyAdd<fma_policy>(abs_θ, (2 / π), mantissa_reduce_shifter);
double const n_double = _mm_cvtsd_f64(
_mm_xor_pd(_mm_set_sd(n_shifted - mantissa_reduce_shifter), sign));
std::int64_t const n = _mm_cvtsd_si64(_mm_set_sd(n_double));

Argument const value =
FusedNegatedMultiplyAdd<fma_policy>(n_double, π_over_2_high, θ);
Argument const error = n_double * π_over_2_low;
θ_reduced = QuickTwoDifference(value, error);
// TODO(phl): Error analysis needed to find the right bounds.
Expand Down Expand Up @@ -285,15 +292,16 @@ Value __cdecl Sin(Argument θ) {
OSACA_FUNCTION_BEGIN(θ);
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
Reduce(θ, θ_reduced, quadrant);
double value;
OSACA_IF(UseHardwareFMA) {
Reduce<FMAPolicy::Force>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
} else {
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
}
} else {
Reduce<FMAPolicy::Disallow>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
Expand All @@ -313,15 +321,16 @@ Value __cdecl Cos(Argument θ) {
OSACA_FUNCTION_BEGIN(θ);
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
Reduce(θ, θ_reduced, quadrant);
double value;
OSACA_IF(UseHardwareFMA) {
Reduce<FMAPolicy::Force>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
} else {
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
}
} else {
Reduce<FMAPolicy::Disallow>(θ, θ_reduced, quadrant);
OSACA_IF(quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
Expand Down

0 comments on commit 535e54c

Please sign in to comment.