Skip to content

Commit

Permalink
Update overflow bound in log1p(f)
Browse files Browse the repository at this point in the history
And add tests in this region.

Fix functions in region between previous
overflow bound and maximum float.

Accuracy is maintained by relying on high
accuracy log (in branch), however performance
drops slightly. Ideally the core approximation
should also work above threshold.
  • Loading branch information
blapie committed Dec 6, 2024
1 parent b997b2f commit ccd4fd4
Show file tree
Hide file tree
Showing 6 changed files with 33 additions and 18 deletions.
9 changes: 8 additions & 1 deletion src/common/misc.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,17 @@

#define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f

// Overflow bound for exp and pow
// Overflow bounds

// - exp(x) overflows for x over (also used in pow)
#define LOG_DBL_MAX 0x1.62e42fefa39efp+9 /* 709.782712893384 */

// Other bounds

// - log1p(f)(x) approximation holds up to x equals
#define LOG1PF_BOUND 0x1.2ced32p+126 /* 1.0e+38 */
#define LOG1P_BOUND 0x1.c7b1f3cac7433p+1019 /* 1.0e+307 */

//

#ifndef MIN
Expand Down
2 changes: 2 additions & 0 deletions src/libm-tester/tester.c
Original file line number Diff line number Diff line change
Expand Up @@ -4145,6 +4145,7 @@ void do_test() {

fprintf(stderr, "log1p : ");
for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0);
for(d = 1.0e+307;d < DBL_MAX && success;d += 1.0e+306) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0);
showResult(success);

//
Expand Down Expand Up @@ -5018,6 +5019,7 @@ void do_test() {

fprintf(stderr, "log1pf : ");
for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0);
for(d = 1.0e+38;d < FLT_MAX && success;d += 1.0e+37) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0);
showResult(success);

//
Expand Down
11 changes: 6 additions & 5 deletions src/libm/sleefdp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2145,16 +2145,18 @@ EXPORT CONST double xlog1p(double d) {
double m, t, x2;
int e;

if (d > LOG1P_BOUND) return xlog_u1(d); // ~log(d)

double dp1 = d + 1;

int o = dp1 < DBL_MIN;
if (o) dp1 *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32);

e = ilogb2k(dp1 * (1.0/0.75));

t = ldexp3k(1, -e);
m = mla(d, t, t - 1);

if (o) e -= 64;

x = dddiv_d2_d2_d2(dd(m, 0), ddadd_d2_d_d(2, m));
Expand All @@ -2175,8 +2177,7 @@ EXPORT CONST double xlog1p(double d) {
s = ddadd_d2_d2_d(s, x2 * x.x * t);

double r = s.x + s.y;

if (d > 1e+307) r = SLEEF_INFINITY;

if (d < -1 || xisnan(d)) r = SLEEF_NAN;
if (d == -1) r = -SLEEF_INFINITY;
if (xisnegzero(d)) r = -0.0;
Expand Down
10 changes: 6 additions & 4 deletions src/libm/sleefsimddp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2987,17 +2987,19 @@ EXPORT CONST VECTOR_CC vdouble xlog1p(vdouble d) {
0.2857142932794299317e+0,
0.3999999999635251990e+0,
0.6666666666667333541e+0);

s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2)));
s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t));

vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s));

r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(1e+307)), vcast_vd_d(SLEEF_INFINITY), r);

// Use log(d) if d too large to use core approximation.
vopmask ocore = vle_vo_vd_vd(d, vcast_vd_d(LOG1P_BOUND));
if(!LIKELY(vtestallones_i_vo64 (ocore))) r = vsel_vd_vo_vd_vd(ocore, r, xlog_u1(d));
r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(-1)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r);
r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(-1)), vcast_vd_d(-SLEEF_INFINITY), r);
r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r);

return r;
}

Expand Down
8 changes: 5 additions & 3 deletions src/libm/sleefsimdsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2873,13 +2873,15 @@ EXPORT CONST VECTOR_CC vfloat xlog1pf(vfloat d) {
t = vcast_vf_f(+0.3027294874e+0f);
t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f));
t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f));

s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2)));
s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t));

vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s));

r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(1e+38)), vcast_vf_f(SLEEF_INFINITYf), r);

// Use log(d) if d too large to use core approximation.
vopmask ocore = vle_vo_vf_vf(d, vcast_vf_f(LOG1PF_BOUND));
if(!LIKELY(vtestallones_i_vo32 (ocore))) r = vsel_vf_vo_vf_vf(ocore, r, xlogf_u1(d));
r = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(vcast_vf_f(-1), d), vreinterpret_vm_vf(r)));
r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(-1)), vcast_vf_f(-SLEEF_INFINITYf), r);
r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), r);
Expand Down
11 changes: 6 additions & 5 deletions src/libm/sleefsp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1733,18 +1733,20 @@ EXPORT CONST float xlog1pf(float d) {
float m, t, x2;
int e;

if (d > LOG1PF_BOUND) return xlogf(d); // ~log(d)

float dp1 = d + 1;

int o = dp1 < FLT_MIN;
if (o) dp1 *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32);

e = ilogb2kf(dp1 * (1.0f/0.75f));

t = ldexp3kf(1, -e);
m = mlaf(d, t, t-1);

if (o) e -= 64;

x = dfdiv_f2_f2_f2(df(m, 0), dfadd_f2_f_f(2, m));
x2 = x.x * x.x;

Expand All @@ -1757,8 +1759,7 @@ EXPORT CONST float xlog1pf(float d) {
s = dfadd_f2_f2_f(s, x2 * x.x * t);

float r = s.x + s.y;

if (d > 1e+38) r = SLEEF_INFINITYf;

if (d < -1) r = SLEEF_NANf;
if (d == -1) r = -SLEEF_INFINITYf;
if (xisnegzerof(d)) r = -0.0f;
Expand Down

0 comments on commit ccd4fd4

Please sign in to comment.