diff --git a/src/common/misc.h b/src/common/misc.h index 258d39c6..6b571cb7 100644 --- a/src/common/misc.h +++ b/src/common/misc.h @@ -143,10 +143,17 @@ #define R_LN2f 1.442695040888963407359924681001892137426645954152985934135449406931f -// Overflow bound for exp and pow +// Overflow bounds +// - exp(x) overflows for x over (also used in pow) #define LOG_DBL_MAX 0x1.62e42fefa39efp+9 /* 709.782712893384 */ +// Other bounds + +// - log1p(f)(x) approximation holds up to x equals +#define LOG1PF_BOUND 0x1.2ced32p+126 /* 1.0e+38 */ +#define LOG1P_BOUND 0x1.c7b1f3cac7433p+1019 /* 1.0e+307 */ + // #ifndef MIN diff --git a/src/libm-tester/tester.c b/src/libm-tester/tester.c index 2831056f..2a5ed003 100644 --- a/src/libm-tester/tester.c +++ b/src/libm-tester/tester.c @@ -4145,6 +4145,7 @@ void do_test() { fprintf(stderr, "log1p : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0); + for(d = 1.0e+307;d < DBL_MAX && success;d += 1.0e+306) checkAccuracy_d(mpfr_log1p, child_log1p, d, 1.0); showResult(success); // @@ -5018,6 +5019,7 @@ void do_test() { fprintf(stderr, "log1pf : "); for(d = 0.0001;d < 10 && success;d += 0.001) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0); + for(d = 1.0e+38;d < FLT_MAX && success;d += 1.0e+37) checkAccuracy_f(mpfr_log1p, child_log1pf, d, 1.0); showResult(success); // diff --git a/src/libm/sleefdp.c b/src/libm/sleefdp.c index 09a40de1..f03fb9ab 100644 --- a/src/libm/sleefdp.c +++ b/src/libm/sleefdp.c @@ -2145,16 +2145,18 @@ EXPORT CONST double xlog1p(double d) { double m, t, x2; int e; + if (d > LOG1P_BOUND) return xlog_u1(d); // ~log(d) + double dp1 = d + 1; - + int o = dp1 < DBL_MIN; if (o) dp1 *= (double)(INT64_C(1) << 32) * (double)(INT64_C(1) << 32); - + e = ilogb2k(dp1 * (1.0/0.75)); t = ldexp3k(1, -e); m = mla(d, t, t - 1); - + if (o) e -= 64; x = dddiv_d2_d2_d2(dd(m, 0), ddadd_d2_d_d(2, m)); @@ -2175,8 +2177,7 @@ EXPORT CONST double xlog1p(double d) { s = ddadd_d2_d2_d(s, x2 * x.x * t); double r = s.x + s.y; - - if (d > 1e+307) r = SLEEF_INFINITY; + if (d < -1 || xisnan(d)) r = SLEEF_NAN; if (d == -1) r = -SLEEF_INFINITY; if (xisnegzero(d)) r = -0.0; diff --git a/src/libm/sleefsimddp.c b/src/libm/sleefsimddp.c index 4dfc434e..8186ff32 100644 --- a/src/libm/sleefsimddp.c +++ b/src/libm/sleefsimddp.c @@ -2987,17 +2987,19 @@ EXPORT CONST VECTOR_CC vdouble xlog1p(vdouble d) { 0.2857142932794299317e+0, 0.3999999999635251990e+0, 0.6666666666667333541e+0); - + s = ddadd_vd2_vd2_vd2(s, ddscale_vd2_vd2_vd(x, vcast_vd_d(2))); s = ddadd_vd2_vd2_vd(s, vmul_vd_vd_vd(vmul_vd_vd_vd(x2, vd2getx_vd_vd2(x)), t)); vdouble r = vadd_vd_vd_vd(vd2getx_vd_vd2(s), vd2gety_vd_vd2(s)); - - r = vsel_vd_vo_vd_vd(vgt_vo_vd_vd(d, vcast_vd_d(1e+307)), vcast_vd_d(SLEEF_INFINITY), r); + + // Use log(d) if d too large to use core approximation. + vopmask ocore = vle_vo_vd_vd(d, vcast_vd_d(LOG1P_BOUND)); + if(!LIKELY(vtestallones_i_vo64 (ocore))) r = vsel_vd_vo_vd_vd(ocore, r, xlog_u1(d)); r = vsel_vd_vo_vd_vd(vor_vo_vo_vo(vlt_vo_vd_vd(d, vcast_vd_d(-1)), visnan_vo_vd(d)), vcast_vd_d(SLEEF_NAN), r); r = vsel_vd_vo_vd_vd(veq_vo_vd_vd(d, vcast_vd_d(-1)), vcast_vd_d(-SLEEF_INFINITY), r); r = vsel_vd_vo_vd_vd(visnegzero_vo_vd(d), vcast_vd_d(-0.0), r); - + return r; } diff --git a/src/libm/sleefsimdsp.c b/src/libm/sleefsimdsp.c index 21187988..27a33f98 100644 --- a/src/libm/sleefsimdsp.c +++ b/src/libm/sleefsimdsp.c @@ -2873,13 +2873,15 @@ EXPORT CONST VECTOR_CC vfloat xlog1pf(vfloat d) { t = vcast_vf_f(+0.3027294874e+0f); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.3996108174e+0f)); t = vmla_vf_vf_vf_vf(t, x2, vcast_vf_f(+0.6666694880e+0f)); - + s = dfadd_vf2_vf2_vf2(s, dfscale_vf2_vf2_vf(x, vcast_vf_f(2))); s = dfadd_vf2_vf2_vf(s, vmul_vf_vf_vf(vmul_vf_vf_vf(x2, vf2getx_vf_vf2(x)), t)); vfloat r = vadd_vf_vf_vf(vf2getx_vf_vf2(s), vf2gety_vf_vf2(s)); - - r = vsel_vf_vo_vf_vf(vgt_vo_vf_vf(d, vcast_vf_f(1e+38)), vcast_vf_f(SLEEF_INFINITYf), r); + + // Use log(d) if d too large to use core approximation. + vopmask ocore = vle_vo_vf_vf(d, vcast_vf_f(LOG1PF_BOUND)); + if(!LIKELY(vtestallones_i_vo32 (ocore))) r = vsel_vf_vo_vf_vf(ocore, r, xlogf_u1(d)); r = vreinterpret_vf_vm(vor_vm_vo32_vm(vgt_vo_vf_vf(vcast_vf_f(-1), d), vreinterpret_vm_vf(r))); r = vsel_vf_vo_vf_vf(veq_vo_vf_vf(d, vcast_vf_f(-1)), vcast_vf_f(-SLEEF_INFINITYf), r); r = vsel_vf_vo_vf_vf(visnegzero_vo_vf(d), vcast_vf_f(-0.0f), r); diff --git a/src/libm/sleefsp.c b/src/libm/sleefsp.c index 1f80522e..1df87f4d 100644 --- a/src/libm/sleefsp.c +++ b/src/libm/sleefsp.c @@ -1733,18 +1733,20 @@ EXPORT CONST float xlog1pf(float d) { float m, t, x2; int e; + if (d > LOG1PF_BOUND) return xlogf(d); // ~log(d) + float dp1 = d + 1; - + int o = dp1 < FLT_MIN; if (o) dp1 *= (float)(INT64_C(1) << 32) * (float)(INT64_C(1) << 32); - + e = ilogb2kf(dp1 * (1.0f/0.75f)); t = ldexp3kf(1, -e); m = mlaf(d, t, t-1); if (o) e -= 64; - + x = dfdiv_f2_f2_f2(df(m, 0), dfadd_f2_f_f(2, m)); x2 = x.x * x.x; @@ -1757,8 +1759,7 @@ EXPORT CONST float xlog1pf(float d) { s = dfadd_f2_f2_f(s, x2 * x.x * t); float r = s.x + s.y; - - if (d > 1e+38) r = SLEEF_INFINITYf; + if (d < -1) r = SLEEF_NANf; if (d == -1) r = -SLEEF_INFINITYf; if (xisnegzerof(d)) r = -0.0f;