Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

This patch introduces erf, erfc, lgamma, tgamma and sinpi. #23

Merged
merged 4 commits into from
May 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 46 additions & 7 deletions src/arch/helperadvsimd.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ static INLINE vfloat vsqrt_vf_vf(vfloat d) { return vsqrtq_f32(d); }

// Multiply accumulate: z = z + x * y
static INLINE vfloat vmla_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
return vmlaq_f32(z, x, y);
return vfmaq_f32(z, x, y);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason behind this change?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can safely assume that FMA is the faster than any other combination of multiplication and addition. I saw the assembly output from the compiler and vmlaq is converted into multiplication and addition.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! Strange. Could you please provide a minimal example that shows what seems to be an inconsistent behavior? You might have found a bug in gcc.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Multiply-accumulate instructions are all fused in aarch64, so this is not a bug.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This LGTM, I was just asking you to provide an example of code that generated multiplication and addition (separated) from the vmlaq_f32 intrinsics.

No worries if you don't have time.

}
// Multiply subtract: z = z = x * y
static INLINE vfloat vmlanp_vf_vf_vf_vf(vfloat x, vfloat y, vfloat z) {
return vmlsq_f32(z, x, y);
return vfmsq_f32(z, x, y);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

}

// |x|, -x
Expand Down Expand Up @@ -290,11 +290,7 @@ static INLINE vdouble vmin_vd_vd_vd(vdouble x, vdouble y) {

// Multiply accumulate: z = z + x * y
static INLINE vdouble vmla_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
return vmlaq_f64(z, x, y);
}
//[z = x * y - z]
static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
return vsub_vd_vd_vd(vmul_vd_vd_vd(x, y), z);
return vfmaq_f64(z, x, y);
}

static INLINE vdouble vmlanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
Expand All @@ -309,6 +305,11 @@ static INLINE vdouble vfmanp_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // z
return vfmsq_f64(z, x, y);
}

//[z = x * y - z]
static INLINE vdouble vmlapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) {
return vneg_vd_vd(vfmanp_vd_vd_vd_vd(x, y, z));
}

static INLINE vdouble vfmapn_vd_vd_vd_vd(vdouble x, vdouble y, vdouble z) { // x * y - z
return vneg_vd_vd(vfmanp_vd_vd_vd_vd(x, y, z));
}
Expand Down Expand Up @@ -350,6 +351,44 @@ static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) {
return vbslq_f64(vreinterpretq_u64_u32(mask), x, y);
}

#if 1
static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) {
return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2));
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3)));
}
#else
// This implementation is slower on the current CPU models (as of May 2017.)
// I(Naoki Shibata) expect that on future CPU models with hardware similar to Super Shuffle Engine, this implementation will be faster.
static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double d0, double d1) {
uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 },
(uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 });

uint8x16_t tab = (uint8x16_t) (float64x2_t) { d0, d1 };
return (vdouble) vqtbl1q_u8(tab, idx);
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sam here - I think this is not optimal. Isn't the AVX code better?

uint8x16_t idx = vbslq_u8(vreinterpretq_u8_u32(o0), (uint8x16_t) { 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 },
vbslq_u8(vreinterpretq_u8_u32(o1), (uint8x16_t) { 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15 },
vbslq_u8(vreinterpretq_u8_u32(o2), (uint8x16_t) { 16, 17, 18, 19, 20, 21, 22, 23, 16, 17, 18, 19, 20, 21, 22, 23 },
(uint8x16_t) { 24, 25, 26, 27, 28, 29, 30, 31, 24, 25, 26, 27, 28, 29, 30, 31 })));

uint8x16x2_t tab = { { (uint8x16_t) (float64x2_t) { d0, d1 }, (uint8x16_t) (float64x2_t) { d2, d3 } } };
return (vdouble) vqtbl2q_u8(tab, idx);
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2);
}
#endif

static INLINE vdouble vrint_vd_vd(vdouble d) {
return vrndnq_f64(d);
}
Expand Down
12 changes: 12 additions & 0 deletions src/arch/helperavx.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,18 @@ static INLINE vint vsel_vi_vo_vi_vi(vopmask o, vint x, vint y) { return _mm_blen

static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); }

static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) {
return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2));
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3)));
}

static INLINE vopmask visinf_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(INFINITY), _CMP_EQ_OQ));
}
Expand Down
13 changes: 13 additions & 0 deletions src/arch/helperavx2.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ static INLINE vopmask vgt_vo_vi_vi(vint x, vint y) { return _mm256_castsi128_si2
static INLINE vint vsel_vi_vo_vi_vi(vopmask m, vint x, vint y) { return _mm_blendv_epi8(y, x, _mm256_castsi256_si128(m)); }

static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return _mm256_blendv_pd(y, x, _mm256_castsi256_pd(o)); }
static INLINE vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) { return _mm256_permutevar_pd(_mm256_set_pd(v1, v0, v1, v0), o); }

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
__m256i v = _mm256_castpd_si256(vsel_vd_vo_vd_vd(o0, _mm256_castsi256_pd(_mm256_set_epi32(1, 0, 1, 0, 1, 0, 1, 0)),
vsel_vd_vo_vd_vd(o1, _mm256_castsi256_pd(_mm256_set_epi32(3, 2, 3, 2, 3, 2, 3, 2)),
vsel_vd_vo_vd_vd(o2, _mm256_castsi256_pd(_mm256_set_epi32(5, 4, 5, 4, 5, 4, 5, 4)),
_mm256_castsi256_pd(_mm256_set_epi32(7, 6, 7, 6, 7, 6, 7, 6))))));
return _mm256_castsi256_pd(_mm256_permutevar8x32_epi32(_mm256_castpd_si256(_mm256_set_pd(d3, d2, d1, d0)), v));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2);
}

static INLINE vopmask visinf_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm256_cmp_pd(vabs_vd_vd(d), _mm256_set1_pd(INFINITY), _CMP_EQ_OQ));
Expand Down
27 changes: 27 additions & 0 deletions src/arch/helperavx512f.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,33 @@ static INLINE vdouble vsel_vd_vo_vd_vd(vopmask mask, vdouble x, vdouble y) {
return _mm512_mask_blend_pd(mask, y, x);
}

static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) {
return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0));
}

#if 1
// Probably this is faster
static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
__m512i v = _mm512_castpd_si512(vsel_vd_vo_vd_vd(o0, _mm512_castsi512_pd(_mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0)),
vsel_vd_vo_vd_vd(o1, _mm512_castsi512_pd(_mm512_set_epi64(1, 1, 1, 1, 1, 1, 1, 1)),
vsel_vd_vo_vd_vd(o2, _mm512_castsi512_pd(_mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2)),
_mm512_castsi512_pd(_mm512_set_epi64(3, 3, 3, 3, 3, 3, 3, 3))))));
return _mm512_permutexvar_pd(v, _mm512_castpd256_pd512(_mm256_set_pd(d3, d2, d1, d0)));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vo_vo_d_d_d_d(o0, o1, o1, d0, d1, d2, d2);
}
#else
static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2));
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3)));
}
#endif

static INLINE vopmask visinf_vo_vd(vdouble d) {
return _mm512_cmp_pd_mask(vabs_vd_vd(d), _mm512_set1_pd(INFINITY), _CMP_EQ_OQ);
}
Expand Down
12 changes: 12 additions & 0 deletions src/arch/helperpurec.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,18 @@ static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { vmask ret; for(
static INLINE vdouble vsel_vd_vo_vd_vd (vopmask o, vdouble x, vdouble y) { vdouble ret; for(int i=0;i<VECTLENDP*2;i++) ret.u[i] = (o.u[i] & x.u[i]) | (y.u[i] & ~o.u[i]); return ret; }
static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) { vint2 ret; for(int i=0;i<VECTLENDP*2;i++) ret.u[i] = (o.u[i] & x.u[i]) | (y.u[i] & ~o.u[i]); return ret; }

static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) {
return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2));
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3)));
}

static INLINE vdouble vcast_vd_vi(vint vi) { vdouble ret; for(int i=0;i<VECTLENDP;i++) ret.d[i] = vi.i[i]; return ret; }
static INLINE vint vtruncate_vi_vd(vdouble vd) { vint ret; for(int i=0;i<VECTLENDP;i++) ret.i[i] = (int)vd.d[i]; return ret; }
static INLINE vint vrint_vi_vd(vdouble vd) { vint ret; for(int i=0;i<VECTLENDP;i++) ret.i[i] = vd.d[i] > 0 ? (int)(vd.d[i] + 0.5) : (int)(vd.d[i] - 0.5); return ret; }
Expand Down
12 changes: 12 additions & 0 deletions src/arch/helpersse2.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,18 @@ static INLINE vdouble vsel_vd_vo_vd_vd(vopmask opmask, vdouble x, vdouble y) {
}
#endif

static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) {
return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2));
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3)));
}

static INLINE vopmask visinf_vo_vd(vdouble d) {
return vreinterpret_vm_vd(_mm_cmpeq_pd(vabs_vd_vd(d), _mm_set1_pd(INFINITY)));
}
Expand Down
12 changes: 12 additions & 0 deletions src/arch/helpervecext.h
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,18 @@ static INLINE vmask vxor_vm_vo32_vm(vopmask x, vmask y) { return x ^ y; }
static INLINE vdouble vsel_vd_vo_vd_vd(vopmask o, vdouble x, vdouble y) { return (vdouble)(((vmask)o & (vmask)x) | ((vmask)y & ~(vmask)o)); }
static INLINE vint2 vsel_vi2_vo_vi2_vi2(vopmask o, vint2 x, vint2 y) { return (vint2)(((vmask)o & (vmask)x) | ((vmask)y & ~(vmask)o)); }

static INLINE CONST vdouble vsel_vd_vo_d_d(vopmask o, double v1, double v0) {
return vsel_vd_vo_vd_vd(o, vcast_vd_d(v1), vcast_vd_d(v0));
}

static INLINE vdouble vsel_vd_vo_vo_d_d_d(vopmask o0, vopmask o1, double d0, double d1, double d2) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_d_d(o1, d1, d2));
}

static INLINE vdouble vsel_vd_vo_vo_vo_d_d_d_d(vopmask o0, vopmask o1, vopmask o2, double d0, double d1, double d2, double d3) {
return vsel_vd_vo_vd_vd(o0, vcast_vd_d(d0), vsel_vd_vo_vd_vd(o1, vcast_vd_d(d1), vsel_vd_vo_d_d(o2, d2, d3)));
}

static INLINE vdouble vcast_vd_vi(vint vi) {
#if defined(__clang__)
return __builtin_convertvector(vi, vdouble);
Expand Down
27 changes: 27 additions & 0 deletions src/libm-tester/iut.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ int main(int argc, char **argv) {
sscanf(buf, "sincospi_u35 %" PRIx64, &u);
Sleef_double2 x = xsincospi_u35(u2d(u));
printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y));
} else if (startsWith(buf, "sinpi_u05 ")) {
uint64_t u;
sscanf(buf, "sinpi_u05 %" PRIx64, &u);
u = d2u(xsinpi_u05(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "tan ")) {
uint64_t u;
sscanf(buf, "tan %" PRIx64, &u);
Expand Down Expand Up @@ -314,6 +319,28 @@ int main(int argc, char **argv) {
printf("%" PRIx64 " %" PRIx64 "\n", d2u(x.x), d2u(x.y));
}

else if (startsWith(buf, "tgamma_u1 ")) {
uint64_t u;
sscanf(buf, "tgamma_u1 %" PRIx64, &u);
u = d2u(xtgamma_u1(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "lgamma_u1 ")) {
uint64_t u;
sscanf(buf, "lgamma_u1 %" PRIx64, &u);
u = d2u(xlgamma_u1(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "erf_u1 ")) {
uint64_t u;
sscanf(buf, "erf_u1 %" PRIx64, &u);
u = d2u(xerf_u1(u2d(u)));
printf("%" PRIx64 "\n", u);
} else if (startsWith(buf, "erfc_u15 ")) {
uint64_t u;
sscanf(buf, "erfc_u15 %" PRIx64, &u);
u = d2u(xerfc_u15(u2d(u)));
printf("%" PRIx64 "\n", u);
}

else if (startsWith(buf, "sinf ")) {
uint32_t u;
sscanf(buf, "sinf %x", &u);
Expand Down
6 changes: 6 additions & 0 deletions src/libm-tester/iutsimd.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,7 @@ int main(int argc, char **argv) {
func_d_d("sin_u1", xsin_u1);
func_d_d("cos_u1", xcos_u1);
func_d_d("tan_u1", xtan_u1);
func_d_d("sinpi_u05", xsinpi_u05);
func_d_d("asin_u1", xasin_u1);
func_d_d("acos_u1", xacos_u1);
func_d_d("atan_u1", xatan_u1);
Expand Down Expand Up @@ -426,6 +427,11 @@ int main(int argc, char **argv) {
func_d_d_d("fmod", xfmod);

func_d2_d("modf", xmodf);

func_d_d("tgamma_u1", xtgamma_u1);
func_d_d("lgamma_u1", xlgamma_u1);
func_d_d("erf_u1", xerf_u1);
func_d_d("erfc_u15", xerfc_u15);
#endif

#ifdef ENABLE_SP
Expand Down
77 changes: 77 additions & 0 deletions src/libm-tester/tester.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ double child_expm1(double x) { child_d_d("expm1", x); }

Sleef_double2 child_sincospi_u05(double x) { child_d2_d("sincospi_u05", x); }
Sleef_double2 child_sincospi_u35(double x) { child_d2_d("sincospi_u35", x); }
double child_sinpi_u05(double x) { child_d_d("sinpi_u05", x); }

double child_hypot_u05(double x, double y) { child_d_d_d("hypot_u05", x, y); }
double child_hypot_u35(double x, double y) { child_d_d_d("hypot_u35", x, y); }
Expand All @@ -174,6 +175,11 @@ double child_rint(double x) { child_d_d("rint", x); }
double child_frfrexp(double x) { child_d_d("frfrexp", x); }
Sleef_double2 child_modf(double x) { child_d2_d("modf", x); }

double child_tgamma_u1(double x) { child_d_d("tgamma_u1", x); }
double child_lgamma_u1(double x) { child_d_d("lgamma_u1", x); }
double child_erf_u1(double x) { child_d_d("erf_u1", x); }
double child_erfc_u15(double x) { child_d_d("erfc_u15", x); }

//

double child_ldexp(double x, int q) {
Expand Down Expand Up @@ -2212,6 +2218,13 @@ void do_test() {
showResult(success);
}

{
fprintf(stderr, "sinpi_u05 denormal/nonnumber test : ");
double xa[] = { +0.0, -0.0, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_sinpi, child_sinpi_u05, xa[i]);
showResult(success);
}

//

{
Expand Down Expand Up @@ -2619,6 +2632,35 @@ void do_test() {
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_rint, child_rint, xa[i]);
showResult(success);
}


{
fprintf(stderr, "lgamma_u1 denormal/nonnumber test : ");
double xa[] = { -4, -3, -2, -1, +0.0, -0.0, +1, +2, +1e+10, -1e+10, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_lgamma_nosign, child_lgamma_u1, xa[i]);
showResult(success);
}

{
fprintf(stderr, "tgamma_u1 denormal/nonnumber test : ");
double xa[] = { -4, -3, -2, -1, +0.0, -0.0, +1, +2, +1e+10, -1e+10, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_gamma, child_tgamma_u1, xa[i]);
showResult(success);
}

{
fprintf(stderr, "erf_u1 denormal/nonnumber test : ");
double xa[] = { -1, +0.0, -0.0, +1, +1e+10, -1e+10, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_erf, child_erf_u1, xa[i]);
showResult(success);
}

{
fprintf(stderr, "erfc_u15 denormal/nonnumber test : ");
double xa[] = { -1, +0.0, -0.0, +1, +1e+10, -1e+10, POSITIVE_INFINITY, NEGATIVE_INFINITY, NAN };
for(i=0;i<sizeof(xa)/sizeof(double) && success;i++) cmpDenorm_d(mpfr_erfc, child_erfc_u15, xa[i]);
showResult(success);
}
}

if (enableSP) {
Expand Down Expand Up @@ -3339,6 +3381,17 @@ void do_test() {
}
showResult(success);

//

fprintf(stderr, "sinpi_u05 : ");
for(d = -10.1;d < 10 && success;d += 0.0021) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506);
for(d = -1e+8-0.1;d < 1e+8 && success;d += (1e+10 + 0.1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506);
for(i=1;i<10000 && success;i+=31) {
double start = u2d(d2u(i)-20), end = u2d(d2u(i)+20);
for(d = start;d <= end;d = u2d(d2u(d)+1)) checkAccuracy_d(mpfr_sinpi, child_sinpi_u05, d, 0.506);
}
showResult(success);

mpfr_set_default_prec(128);

//
Expand Down Expand Up @@ -3637,6 +3690,30 @@ void do_test() {

//

fprintf(stderr, "lgamma_u1 : ");
for(d = -5000;d < 5000 && success;d += 1.1) checkAccuracy_d(mpfr_lgamma_nosign, child_lgamma_u1, d, 1.0);
showResult(success);

//

fprintf(stderr, "tgamma_u1 : ");
for(d = -10;d < 10 && success;d += 0.002) checkAccuracy_d(mpfr_gamma, child_tgamma_u1, d, 1.0);
showResult(success);

//

fprintf(stderr, "erf_u1 : ");
for(d = -100;d < 100 && success;d += 0.02) checkAccuracy_d(mpfr_erf, child_erf_u1, d, 1.0);
showResult(success);

//

fprintf(stderr, "erfc_u15 : ");
for(d = -1;d < 100 && success;d += 0.01) checkAccuracy_d(mpfr_erfc, child_erfc_u15, d, 1.5);
showResult(success);

//

{
fprintf(stderr, "ilogb : ");

Expand Down
Loading