diff --git a/README.md b/README.md index 0b28b8a..5793986 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,8 @@ alt="SLEEF Logo" width="420"> A pure Julia port of the [SLEEF math library](https://github.com/shibatch/sleef). **History** -- Release [v0.1.0](https://github.com/musm/Sleef.jl/releases/tag/v0.1.0) based on SLEEF version v2.80 +- Release [v0.2.0](https://github.com/musm/Sleef.jl/releases/tag/v0.2.0) based on SLEEF v2.90 +- Release [v0.1.0](https://github.com/musm/Sleef.jl/releases/tag/v0.1.0) based on SLEEF v2.80

[![Travis Build Status](https://travis-ci.org/musm/Sleef.jl.svg?branch=master)](https://travis-ci.org/musm/Sleef.jl) @@ -40,14 +41,22 @@ julia> Sleef.exp(3f0) The available functions include (within 1 ulp) ```julia sin, cos, tan, asin, acos, atan, atan2, sincos, sinh, cosh, tanh, - asinh, acosh, atanh, log, log2, log10, log1p, ilog2, exp, exp2, exp10, expm1, ldexp, cbrt, pow + asinh, acosh, atanh, log, log2, log10, log1p, ilogb, exp, exp2, exp10, expm1, ldexp, cbrt, pow ``` - Faster variants include (within 3 ulp) +Faster variants (within 3 ulp) ```julia sin_fast, cos_fast, tan_fast, sincos_fast, asin_fast, acos_fast, atan_fast, atan2_fast, log_fast, cbrt_fast ``` +## Notes + +The trigonometric functions are tested to return values with specified +accuracy when the argument is within the following range: + +- Double (Float64) precision trigonometric functions : `|arg| <= 10000000` +- Single (Float32) precision trigonometric functions : `|arg| <= 10000` + # Benchmarks You can benchmark the performance of the Sleef.jl math library on your machine by running @@ -55,3 +64,4 @@ You can benchmark the performance of the Sleef.jl math library on your machine b ```julia include(joinpath(Pkg.dir("Sleef"), "benchmark", "benchmark.jl")) ``` + diff --git a/src/double.jl b/src/double.jl index 454ac9f..f19df72 100644 --- a/src/double.jl +++ b/src/double.jl @@ -4,10 +4,10 @@ struct Double{T<:IEEEFloat} <: Number hi::T lo::T end -Double(x::T) where {T <: IEEEFloat} = Double(x, zero(T)) +Double(x::T) where {T<:IEEEFloat} = Double(x, zero(T)) -convert(::Type{Tuple{T,T}}, x::Double{T}) where {T <: IEEEFloat} = (x.hi, x.lo) -convert(::Type{T}, x::Double) where {T <: IEEEFloat} = x.hi + x.lo +convert(::Type{Tuple{T,T}}, x::Double{T}) where {T<:IEEEFloat} = (x.hi, x.lo) +convert(::Type{T}, x::Double) where {T<:IEEEFloat} = x.hi + x.lo @inline trunclo(x::Float64) = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000) # clear lower 27 bits (leave upper 26 bits) @@ -23,107 +23,107 @@ end Double(r, (x.hi - r) + x.lo) end -@inline flipsign(x::Double{T}, y::T) where {T <: IEEEFloat} = Double(flipsign(x.hi, y), flipsign(x.lo, y)) +@inline flipsign(x::Double{T}, y::T) where {T<:IEEEFloat} = Double(flipsign(x.hi, y), flipsign(x.lo, y)) -@inline scale(x::Double{T}, s::T) where {T <: IEEEFloat} = Double(s * x.hi, s * x.lo) +@inline scale(x::Double{T}, s::T) where {T<:IEEEFloat} = Double(s * x.hi, s * x.lo) -@inline (-)(x::Double{T}) where {T <: IEEEFloat} = Double(-x.hi, -x.lo) +@inline (-)(x::Double{T}) where {T<:IEEEFloat} = Double(-x.hi, -x.lo) -@inline function (<)(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} +@inline function (<)(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} x.hi < y.hi end -@inline function (<)(x::Double{T}, y::Number) where {T <: IEEEFloat} +@inline function (<)(x::Double{T}, y::Number) where {T<:IEEEFloat} x.hi < y end -@inline function (<)(x::Number, y::Double{T}) where {T <: IEEEFloat} +@inline function (<)(x::Number, y::Double{T}) where {T<:IEEEFloat} x < y.hi end # quick-two-sum x+y -@inline function dadd(x::T, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dadd(x::T, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x + y Double(s, (x - s) + y) end -@inline function dadd(x::T, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dadd(x::T, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x + y.hi Double(s, (x - s) + y.hi + y.lo) end -@inline function dadd(x::Double{T}, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dadd(x::Double{T}, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x.hi + y Double(s, (x.hi - s) + y + x.lo) end -@inline function dadd(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dadd(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x.hi + y.hi Double(s, (x.hi - s) + y.hi + y.lo + x.lo) end -@inline function dsub(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dsub(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x.hi - y.hi Double(s, (x.hi - s) - y.hi - y.lo + x.lo) end -@inline function dsub(x::Double{T}, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dsub(x::Double{T}, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x.hi - y Double(s, (x.hi - s) - y + x.lo) end -@inline function dsub(x::T, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dsub(x::T, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x - y.hi Double(s, (x - s) - y.hi - y.lo) end -@inline function dsub(x::T, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y| +@inline function dsub(x::T, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y| s = x - y Double(s, (x - s) - y) end # two-sum x+y NO BRANCH -@inline function dadd2(x::T, y::T) where {T <: IEEEFloat} +@inline function dadd2(x::T, y::T) where {T<:IEEEFloat} s = x + y v = s - x Double(s, (x - (s - v)) + (y - v)) end -@inline function dadd2(x::T, y::Double{T}) where {T <: IEEEFloat} +@inline function dadd2(x::T, y::Double{T}) where {T<:IEEEFloat} s = x + y.hi v = s - x Double(s, (x - (s - v)) + (y.hi - v) + y.lo) end -@inline dadd2(x::Double{T}, y::T) where {T <: IEEEFloat} = dadd2(y, x) +@inline dadd2(x::Double{T}, y::T) where {T<:IEEEFloat} = dadd2(y, x) -@inline function dadd2(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} +@inline function dadd2(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} s = x.hi + y.hi v = s - x.hi Double(s, (x.hi - (s - v)) + (y.hi - v) + x.lo + y.lo) end -@inline function dsub2(x::T, y::T) where {T <: IEEEFloat} +@inline function dsub2(x::T, y::T) where {T<:IEEEFloat} s = x - y v = s - x Double(s, (x - (s - v)) + (-y - v)) end -@inline function dsub2(x::T, y::Double{T}) where {T <: IEEEFloat} +@inline function dsub2(x::T, y::Double{T}) where {T<:IEEEFloat} s = x - y.hi v = s - x Double(s, (x - (s - v)) + (-y.hi - v) - y.lo) end -@inline function dsub2(x::Double{T}, y::T) where {T <: IEEEFloat} +@inline function dsub2(x::Double{T}, y::T) where {T<:IEEEFloat} s = x.hi - y v = s - x.hi Double(s, (x.hi - (s - v)) + (-y - v) + x.lo) end -@inline function dsub2(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} +@inline function dsub2(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} s = x.hi - y.hi v = s - x.hi Double(s, (x.hi - (s - v)) + (-y.hi - v) + x.lo - y.lo) @@ -134,60 +134,60 @@ end if FMA_FAST # two-prod-fma - @inline function dmul(x::T, y::T) where {T <: IEEEFloat} + @inline function dmul(x::T, y::T) where {T<:IEEEFloat} z = x * y Double(z, fma(x, y, -z)) end - @inline function dmul(x::Double{T}, y::T) where {T <: IEEEFloat} + @inline function dmul(x::Double{T}, y::T) where {T<:IEEEFloat} z = x.hi * y Double(z, fma(x.hi, y, -z) + x.lo * y) end - @inline dmul(x::T, y::Double{T}) where {T <: IEEEFloat} = dmul(y, x) + @inline dmul(x::T, y::Double{T}) where {T<:IEEEFloat} = dmul(y, x) - @inline function dmul(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} + @inline function dmul(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} z = x.hi * y.hi Double(z, fma(x.hi, y.hi, -z) + x.hi * y.lo + x.lo * y.hi) end # x^2 - @inline function dsqu(x::T) where {T <: IEEEFloat} + @inline function dsqu(x::T) where {T<:IEEEFloat} z = x * x Double(z, fma(x, x, -z)) end - @inline function dsqu(x::Double{T}) where {T <: IEEEFloat} + @inline function dsqu(x::Double{T}) where {T<:IEEEFloat} z = x.hi * x.hi Double(z, fma(x.hi, x.hi, -z) + x.hi * (x.lo + x.lo)) end # sqrt(x) - @inline function dsqrt(x::Double{T}) where {T <: IEEEFloat} + @inline function dsqrt(x::Double{T}) where {T<:IEEEFloat} zhi = _sqrt(x.hi) Double(zhi, (x.lo + fma(-zhi, zhi, x.hi)) / (zhi + zhi)) end # x/y - @inline function ddiv(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} + @inline function ddiv(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} invy = 1 / y.hi zhi = x.hi * invy Double(zhi, (fma(-zhi, y.hi, x.hi) + fma(-zhi, y.lo, x.lo)) * invy) end - @inline function ddiv(x::T, y::T) where {T <: IEEEFloat} + @inline function ddiv(x::T, y::T) where {T<:IEEEFloat} ry = 1 / y r = x * ry Double(r, fma(-r, y, x) * ry) end # 1/x - @inline function ddrec(x::T) where {T <: IEEEFloat} + @inline function ddrec(x::T) where {T<:IEEEFloat} zhi = 1 / x Double(zhi, fma(-zhi, x, one(T)) * zhi) end - @inline function ddrec(x::Double{T}) where {T <: IEEEFloat} + @inline function ddrec(x::Double{T}) where {T<:IEEEFloat} zhi = 1 / x.hi Double(zhi, (fma(-zhi, x.hi, one(T)) + -zhi * x.lo) * zhi) end @@ -195,23 +195,23 @@ if FMA_FAST else #two-prod x*y - @inline function dmul(x::T, y::T) where {T <: IEEEFloat} + @inline function dmul(x::T, y::T) where {T<:IEEEFloat} hx, lx = splitprec(x) hy, ly = splitprec(y) z = x * y Double(z, ((hx * hy - z) + lx * hy + hx * ly) + lx * ly) end - @inline function dmul(x::Double{T}, y::T) where {T <: IEEEFloat} + @inline function dmul(x::Double{T}, y::T) where {T<:IEEEFloat} hx, lx = splitprec(x.hi) hy, ly = splitprec(y) z = x.hi * y Double(z, (hx * hy - z) + lx * hy + hx * ly + lx * ly + x.lo * y) end - @inline dmul(x::T, y::Double{T}) where {T <: IEEEFloat} = dmul(y, x) + @inline dmul(x::T, y::Double{T}) where {T<:IEEEFloat} = dmul(y, x) - @inline function dmul(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} + @inline function dmul(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} hx, lx = splitprec(x.hi) hy, ly = splitprec(y.hi) z = x.hi * y.hi @@ -219,34 +219,34 @@ else end # x^2 - @inline function dsqu(x::T) where {T <: IEEEFloat} + @inline function dsqu(x::T) where {T<:IEEEFloat} hx, lx = splitprec(x) z = x * x Double(z, (hx * hx - z) + lx * (hx + hx) + lx * lx) end - @inline function dsqu(x::Double{T}) where {T <: IEEEFloat} + @inline function dsqu(x::Double{T}) where {T<:IEEEFloat} hx, lx = splitprec(x.hi) z = x.hi * x.hi Double(z, (hx * hx - z) + lx * (hx + hx) + lx * lx + x.hi * (x.lo + x.lo)) end # sqrt(x) - @inline function dsqrt(x::Double{T}) where {T <: IEEEFloat} + @inline function dsqrt(x::Double{T}) where {T<:IEEEFloat} c = _sqrt(x.hi) u = dsqu(c) Double(c, (x.hi - u.hi - u.lo + x.lo) / (c + c)) end # x/y - @inline function ddiv(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} + @inline function ddiv(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} invy = 1 / y.hi c = x.hi * invy u = dmul(c, y.hi) Double(c, ((((x.hi - u.hi) - u.lo) + x.lo) - c * y.lo) * invy) end - @inline function ddiv(x::T, y::T) where {T <: IEEEFloat} + @inline function ddiv(x::T, y::T) where {T<:IEEEFloat} ry = 1 / y r = x * ry hx, lx = splitprec(r) @@ -256,13 +256,13 @@ else # 1/x - @inline function ddrec(x::T) where {T <: IEEEFloat} + @inline function ddrec(x::T) where {T<:IEEEFloat} c = 1 / x u = dmul(c, x) Double(c, (one(T) - u.hi - u.lo) * c) end - @inline function ddrec(x::Double{T}) where {T <: IEEEFloat} + @inline function ddrec(x::Double{T}) where {T<:IEEEFloat} c = 1 / x.hi u = dmul(c, x.hi) Double(c, (one(T) - u.hi - u.lo - c * x.lo) * c) diff --git a/src/exp.jl b/src/exp.jl index 742279d..96e747e 100644 --- a/src/exp.jl +++ b/src/exp.jl @@ -17,7 +17,7 @@ const over_e2(::Type{Float32}) = 128f0 Compute the base-`2` exponential of `x`, that is `2ˣ`. """ -function exp2(x::T) where {T <: IEEEFloat} +function exp2(x::T) where {T<:IEEEFloat} u = expk(dmul(MDLN2(T), x)) x > over_e2(T) && (u = T(Inf)) isninf(x) && (u = T(0)) @@ -34,7 +34,7 @@ const over_e10(::Type{Float32}) = 38f0 Compute the base-`10` exponential of `x`, that is `10ˣ`. """ -function exp10(x::T) where {T <: IEEEFloat} +function exp10(x::T) where {T<:IEEEFloat} u = expk(dmul(MDLN10(T), x)) x > over_e10(T) && (u = T(Inf)) isninf(x) && (u = T(0)) @@ -95,7 +95,7 @@ const c1f = 0.499999850988388061523438f0 global @inline exp_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d global @inline exp_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f -function exp(x::T) where {T <: IEEEFloat} +function exp(x::T) where {T<:IEEEFloat} q = roundi(T(MLN2E) * x) s = muladd(q, -LN2U(T), x) s = muladd(q, -LN2L(T), s) diff --git a/src/hyp.jl b/src/hyp.jl index 4b7e961..a334de0 100644 --- a/src/hyp.jl +++ b/src/hyp.jl @@ -8,7 +8,7 @@ over_sch(::Type{Float32}) = 89f0 Compute hyperbolic sine of `x`. """ -function sinh(x::T) where {T <: IEEEFloat} +function sinh(x::T) where {T<:IEEEFloat} u = abs(x) d = expk2(Double(u)) d = dsub(d, ddrec(d)) @@ -27,7 +27,7 @@ end Compute hyperbolic cosine of `x`. """ -function cosh(x::T) where {T <: IEEEFloat} +function cosh(x::T) where {T<:IEEEFloat} u = abs(x) d = expk2(Double(u)) d = dadd(d, ddrec(d)) @@ -48,7 +48,7 @@ over_th(::Type{Float32}) = 8.664339742f0 Compute hyperbolic tangent of `x`. """ -function tanh(x::T) where {T <: IEEEFloat} +function tanh(x::T) where {T<:IEEEFloat} u = abs(x) d = expk2(Double(u)) e = ddrec(d) @@ -68,7 +68,7 @@ end Compute the inverse hyperbolic sine of `x`. """ -function asinh(x::T) where {T <: IEEEFloat} +function asinh(x::T) where {T<:IEEEFloat} u = abs(x) d = logk2(dadd(dsqrt(dadd2(dsqu(u), T(1))), u)) u = T(d) @@ -85,7 +85,7 @@ end Compute the inverse hyperbolic cosine of `x`. """ -function acosh(x::T) where {T <: IEEEFloat} +function acosh(x::T) where {T<:IEEEFloat} d = logk2(dadd2(dsqrt(dsub2(dsqu(x), T(1))), x)) u = T(d) u = isinf(x) || isnan(u) ? T(Inf) : u @@ -102,7 +102,7 @@ end Compute the inverse hyperbolic tangent of `x`. """ -function atanh(x::T) where {T <: IEEEFloat} +function atanh(x::T) where {T<:IEEEFloat} u = abs(x) d = logk2(ddiv(dadd2(T(1), u), dsub2(T(1), u))) u = u > T(1) ? T(NaN) : (u == T(1) ? T(Inf) : T(d) * T(0.5)) diff --git a/src/misc.jl b/src/misc.jl index d6ef725..a533928 100644 --- a/src/misc.jl +++ b/src/misc.jl @@ -4,7 +4,7 @@ Exponentiation operator, returns `x` raised to the power `y`. """ -function pow(x::T, y::T) where {T <: IEEEFloat} +function pow(x::T, y::T) where {T<:IEEEFloat} yi = unsafe_trunc(Int, y) yint = yi == y yodd = isodd(yi) && yint @@ -108,7 +108,7 @@ end Compute the hypotenuse `\sqrt{x^2+y^2}` avoiding overflow and underflow. """ -function hypot(x::T, y::T) where {T <: IEEEFloat} +function hypot(x::T, y::T) where {T<:IEEEFloat} x = abs(x) y = abs(y) if x < y diff --git a/src/priv.jl b/src/priv.jl index efd87e8..45bcca4 100644 --- a/src/priv.jl +++ b/src/priv.jl @@ -31,7 +31,7 @@ end Computes `a × 2^n`. """ -@inline function ldexpk(x::T, q::Int) where {T <: IEEEFloat} +@inline function ldexpk(x::T, q::Int) where {T<:IEEEFloat} bias = exponent_bias(T) emax = exponent_max(T) m, q = split_exponent(T, q) @@ -45,7 +45,7 @@ Computes `a × 2^n`. x * u end -@inline ldexpk2(x::T, e::Int) where {T <: IEEEFloat} = x * pow2i(T, e >> UInt(1)) * pow2i(T, e - (e >> UInt(1))) +@inline ldexpk2(x::T, e::Int) where {T<:IEEEFloat} = x * pow2i(T, e >> UInt(1)) * pow2i(T, e - (e >> UInt(1))) @@ -108,7 +108,7 @@ const c1f = -0.333332866430282592773438f0 global @inline atan2k_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d c20d global @inline atan2k_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f c9f -@inline function atan2k_fast(y::T, x::T) where {T <: IEEEFloat} +@inline function atan2k_fast(y::T, x::T) where {T<:IEEEFloat} q = 0 if x < 0 x = -x @@ -130,7 +130,7 @@ end global @inline atan2k_kernel(x::Double{Float64}) = @horner x.hi c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d c20d global @inline atan2k_kernel(x::Double{Float32}) = dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f c5f c6f c7f c8f c9f)) -@inline function atan2k(y::Double{T}, x::Double{T}) where {T <: IEEEFloat} +@inline function atan2k(y::Double{T}, x::Double{T}) where {T<:IEEEFloat} q = 0 if x < 0 x = -x @@ -174,7 +174,7 @@ const c1f = 0.499999850988388061523438f0 global @inline expk_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d global @inline expk_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f -@inline function expk(d::Double{T}) where {T <: IEEEFloat} +@inline function expk(d::Double{T}) where {T<:IEEEFloat} q = round(T(d) * T(MLN2E)) s = dadd(d, -q * LN2U(T)) s = dadd(s, -q * LN2L(T)) @@ -185,7 +185,7 @@ global @inline expk_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f end -@inline function expk2(d::Double{T}) where {T <: IEEEFloat} +@inline function expk2(d::Double{T}) where {T<:IEEEFloat} q = round(T(d) * T(MLN2E)) s = dadd(d, -q * LN2U(T)) s = dadd(s, -q * LN2L(T)) diff --git a/src/trig.jl b/src/trig.jl index a9e6851..bc5eab1 100644 --- a/src/trig.jl +++ b/src/trig.jl @@ -35,7 +35,7 @@ const c1f = -0.166666597127914428710938f0 global @inline sincos_kernel(x::Double{Float64}) = dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d)) global @inline sincos_kernel(x::Double{Float32}) = dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f)) -function sin(x::T) where {T <: IEEEFloat} +function sin(x::T) where {T<:IEEEFloat} q = round(x * T(M1PI)) s = dsub2(x, q * PI4A(T) * 4) @@ -53,7 +53,7 @@ function sin(x::T) where {T <: IEEEFloat} return u end -function cos(x::T) where {T <: IEEEFloat} +function cos(x::T) where {T<:IEEEFloat} x = abs(x) q = muladd(T(2), round(x * T(M1PI) - T(0.5)), T(1)) s = dsub2(x, q * PI4A(T) * 2) @@ -133,7 +133,7 @@ function sin_fast(x::T) where {T<:IEEEFloat} return u end -function cos_fast(x::T) where {T <: IEEEFloat} +function cos_fast(x::T) where {T<:IEEEFloat} q = muladd(2, roundi(x * T(M1PI) - T(0.5)), 1) x = muladd(q, -PI4A(T) * 2, x) x = muladd(q, -PI4B(T) * 2, x) @@ -198,7 +198,7 @@ global @inline sincos_a_kernel(x::Float32) = @horner x a1f a2f a3f global @inline sincos_b_kernel(x::Float64) = @horner x b1d b2d b3d b4d b5d b6d b7d global @inline sincos_b_kernel(x::Float32) = @horner x b1f b2f b3f b4f b5f -function sincos_fast(d::T) where {T <: IEEEFloat} +function sincos_fast(d::T) where {T<:IEEEFloat} q = roundi(d * T(M2PI)) s = d s = muladd(q, -PI4A(T) * 2, s) @@ -373,7 +373,7 @@ end Compute the inverse tangent of `x`, where the output is in radians. """ -function atan(x::T) where {T <: IEEEFloat} +function atan(x::T) where {T<:IEEEFloat} u = T(atan2k(Double(abs(x)), Double(T(1)))) isinf(x) && (u = T(MPI2)) flipsign(u, x) @@ -421,7 +421,7 @@ const c1f = -0.333331018686294555664062f0 global @inline _atan_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d global @inline _atan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f -function atan_fast(x::T) where {T <: IEEEFloat} +function atan_fast(x::T) where {T<:IEEEFloat} q = 0 if signbit(x) x = -x @@ -447,7 +447,7 @@ end Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to determine the quadrant of the return value. """ -function atan2(x::T, y::T) where {T <: IEEEFloat} +function atan2(x::T, y::T) where {T<:IEEEFloat} r = T(atan2k(Double(abs(x)), Double(y))) r = flipsign(r, y) if isinf(y) || y == 0 @@ -468,7 +468,7 @@ end Compute the inverse tangent of `x/y`, using the signs of both `x` and `y` to determine the quadrant of the return value. """ -function atan2_fast(x::T, y::T) where {T <: IEEEFloat} +function atan2_fast(x::T, y::T) where {T<:IEEEFloat} r = atan2k_fast(abs(x), y) r = flipsign(r, y) if isinf(y) || y == 0 @@ -490,7 +490,7 @@ end Compute the inverse sine of `x`, where the output is in radians. """ -function asin(x::T) where {T <: IEEEFloat} +function asin(x::T) where {T<:IEEEFloat} d = atan2k(Double(abs(x)), dsqrt(dmul(dadd(T(1), x), dsub(T(1), x)))) u = T(d) abs(x) == 1 && (u = T(MPI2)) @@ -503,7 +503,7 @@ end Compute the inverse sine of `x`, where the output is in radians. """ -function asin_fast(x::T) where {T <: IEEEFloat} +function asin_fast(x::T) where {T<:IEEEFloat} flipsign(atan2k_fast(abs(x), _sqrt((1 + x) * (1 - x))), x) end @@ -514,7 +514,7 @@ end Compute the inverse cosine of `x`, where the output is in radians. """ -function acos(x::T) where {T <: IEEEFloat} +function acos(x::T) where {T<:IEEEFloat} d = atan2k(dsqrt(dmul(dadd(T(1), x), dsub(T(1), x))), Double(abs(x))) d = flipsign(d, x) abs(x) == 1 && (d = Double(T(0))) @@ -528,6 +528,6 @@ end Compute the inverse cosine of `x`, where the output is in radians. """ -function acos_fast(x::T) where {T <: IEEEFloat} +function acos_fast(x::T) where {T<:IEEEFloat} flipsign(atan2k_fast(_sqrt((1 + x) * (1 - x)), abs(x)), x) + (signbit(x) ? T(MPI) : T(0)) end diff --git a/src/utils.jl b/src/utils.jl index 2a930d4..fe05395 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -7,7 +7,7 @@ end const FMA_FAST = is_fma_fast(Float64) && is_fma_fast(Float32) -@inline exponent_max(::Type{T}) where {T <: IEEEFloat} = Int(exponent_mask(T) >> significand_bits(T)) +@inline exponent_max(::Type{T}) where {T<:IEEEFloat} = Int(exponent_mask(T) >> significand_bits(T)) @inline isnegzero(x::T) where {T<:IEEEFloat} = x === T(-0.0) @@ -15,9 +15,9 @@ const FMA_FAST = is_fma_fast(Float64) && is_fma_fast(Float32) @inline isninf(x::T) where {T<:IEEEFloat} = x == -T(Inf) # _sign emits better native code than sign but does not properly handle the Inf/NaN cases -@inline _sign(d::T) where {T <: IEEEFloat} = flipsign(one(T), d) +@inline _sign(d::T) where {T<:IEEEFloat} = flipsign(one(T), d) -@inline roundi(x::T) where {T <: IEEEFloat} = unsafe_trunc(Int, round(x)) +@inline roundi(x::T) where {T<:IEEEFloat} = unsafe_trunc(Int, round(x)) @inline integer2float(::Type{Float64}, m::Int) = reinterpret(Float64, (m % Int64) << significand_bits(Float64)) @inline integer2float(::Type{Float32}, m::Int) = reinterpret(Float32, (m % Int32) << significand_bits(Float32)) @@ -25,7 +25,7 @@ const FMA_FAST = is_fma_fast(Float64) && is_fma_fast(Float32) @inline float2integer(d::Float64) = (reinterpret(Int64, d) >> significand_bits(Float64)) % Int @inline float2integer(d::Float32) = (reinterpret(Int32, d) >> significand_bits(Float32)) % Int -@inline pow2i(::Type{T}, q::Int) where {T <: IEEEFloat} = integer2float(T, q + exponent_bias(T)) +@inline pow2i(::Type{T}, q::Int) where {T<:IEEEFloat} = integer2float(T, q + exponent_bias(T)) # sqrt without the domain checks which we don't need since we handle the checks ourselves _sqrt(x::T) where {T<:IEEEFloat} = Base.sqrt_llvm_fast(x) diff --git a/test/accuracy.jl b/test/accuracy.jl index fce8135..4cb1c85 100644 --- a/test/accuracy.jl +++ b/test/accuracy.jl @@ -15,160 +15,157 @@ IntF(::Type{Float32}) = Int32 test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) + fun_table = Dict(Sleef.asinh => Base.asinh, Sleef.atanh => Base.atanh) + tol = 1 + test_acc(T, fun_table, xx, tol) + + + xx = map(T, vcat(1:0.0002:10, 1:0.02:1000)) + fun_table = Dict(Sleef.acosh => Base.acosh) + tol = 1 + test_acc(T, fun_table, xx, tol) + + + xx = T[] + for i = 1:10000 + s = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) - IntF(T)(20)) + e = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) + IntF(T)(20)) + d = s + while d <= e + append!(xx, d) + d = reinterpret(T, reinterpret(IntF(T), d) + IntF(T)(1)) + end + end + xx = append!(xx, -10:0.0002:10) + xx = append!(xx, -MRANGE(T):200.1:MRANGE(T)) + + fun_table = Dict(Sleef.sin => Base.sin, Sleef.cos => Base.cos, Sleef.tan => Base.tan) + tol = 1 + test_acc(T, fun_table, xx, tol) + + fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.cos_fast => Base.cos, Sleef.tan_fast => Base.tan) + tol = 4 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) - # fun_table = Dict(Sleef.asinh => Base.asinh, Sleef.atanh => Base.atanh) - # tol = 1 - # test_acc(T, fun_table, xx, tol) - - - # xx = map(T, vcat(1:0.0002:10, 1:0.02:1000)) - # fun_table = Dict(Sleef.acosh => Base.acosh) - # tol = 1 - # test_acc(T, fun_table, xx, tol) - - - # xx = T[] - # for i = 1:10000 - # s = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) - IntF(T)(20)) - # e = reinterpret(T, reinterpret(IntF(T), T(pi)/4 * i) + IntF(T)(20)) - # d = s - # while d <= e - # append!(xx, d) - # d = reinterpret(T, reinterpret(IntF(T), d) + IntF(T)(1)) - # end - # end - # xx = append!(xx, -10:0.0002:10) - # xx = append!(xx, -MRANGE(T):200.1:MRANGE(T)) - - # fun_table = Dict(Sleef.sin => Base.sin, Sleef.cos => Base.cos, Sleef.tan => Base.tan) - # tol = 1 - # test_acc(T, fun_table, xx, tol) - - # fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.cos_fast => Base.cos, Sleef.tan_fast => Base.tan) - # tol = 4 - # test_acc(T, fun_table, xx, tol) - - - # sin_sincos_fast(x) = Sleef.sincos_fast(x)[1] - # cos_sincos_fast(x) = Sleef.sincos_fast(x)[2] - # fun_table = Dict(sin_sincos_fast => Base.sin, cos_sincos_fast => Base.cos) - # tol = 4 - # test_acc(T, fun_table, xx, tol) - - # sin_sincos(x) = Sleef.sincos(x)[1] - # cos_sincos(x) = Sleef.sincos(x)[2] - # fun_table = Dict(sin_sincos => Base.sin, cos_sincos => Base.cos) - # tol = 1 - # test_acc(T, fun_table, xx, tol) - - - # xx = map(T, vcat(-1:0.00002:1)) - # fun_table = Dict(Sleef.asin_fast => Base.asin, Sleef.acos_fast => Base.acos) - # tol = 3 - # test_acc(T, fun_table, xx, tol) - # fun_table = Dict(Sleef.asin => asin, Sleef.acos => Base.acos) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + sin_sincos_fast(x) = Sleef.sincos_fast(x)[1] + cos_sincos_fast(x) = Sleef.sincos_fast(x)[2] + fun_table = Dict(sin_sincos_fast => Base.sin, cos_sincos_fast => Base.cos) + tol = 4 + test_acc(T, fun_table, xx, tol) + sin_sincos(x) = Sleef.sincos(x)[1] + cos_sincos(x) = Sleef.sincos(x)[2] + fun_table = Dict(sin_sincos => Base.sin, cos_sincos => Base.cos) + tol = 1 + test_acc(T, fun_table, xx, tol) + + + xx = map(T, vcat(-1:0.00002:1)) + fun_table = Dict(Sleef.asin_fast => Base.asin, Sleef.acos_fast => Base.acos) + tol = 3 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10:0.0002:10, -10000:0.2:10000, -10000:0.201:10000)) - # fun_table = Dict(Sleef.atan_fast => Base.atan) - # tol = 3 - # test_acc(T, fun_table, xx, tol) + fun_table = Dict(Sleef.asin => asin, Sleef.acos => Base.acos) + tol = 1 + test_acc(T, fun_table, xx, tol) - # fun_table = Dict(Sleef.atan => Base.atan) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10:0.0002:10, -10000:0.2:10000, -10000:0.201:10000)) + fun_table = Dict(Sleef.atan_fast => Base.atan) + tol = 3 + test_acc(T, fun_table, xx, tol) - # xx1 = map(Tuple{T,T}, [zip(-10:0.050:10, -10:0.050:10)...]) - # xx2 = map(Tuple{T,T}, [zip(-10:0.051:10, -10:0.052:10)...]) - # xx3 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.51:100)...]) - # xx4 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.52:100)...]) - # xx = vcat(xx1, xx2, xx3, xx4) + fun_table = Dict(Sleef.atan => Base.atan) + tol = 1 + test_acc(T, fun_table, xx, tol) - # fun_table = Dict(Sleef.atan2_fast => Base.atan2) - # tol = 2.5 - # test_acc(T, fun_table, xx, tol) - # fun_table = Dict(Sleef.atan2 => Base.atan2) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx1 = map(Tuple{T,T}, [zip(-10:0.050:10, -10:0.050:10)...]) + xx2 = map(Tuple{T,T}, [zip(-10:0.051:10, -10:0.052:10)...]) + xx3 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.51:100)...]) + xx4 = map(Tuple{T,T}, [zip(-100:0.51:100, -100:0.52:100)...]) + xx = vcat(xx1, xx2, xx3, xx4) + fun_table = Dict(Sleef.atan2_fast => Base.atan2) + tol = 2.5 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(0.0001:0.0001:10, 0.001:0.1:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) - # fun_table = Dict(Sleef.log_fast => Base.log) - # tol = 3 - # test_acc(T, fun_table, xx, tol) + fun_table = Dict(Sleef.atan2 => Base.atan2) + tol = 1 + test_acc(T, fun_table, xx, tol) - # fun_table = Dict(Sleef.log => Base.log) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(0.0001:0.0001:10, 0.001:0.1:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) + fun_table = Dict(Sleef.log_fast => Base.log) + tol = 3 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000)) - # fun_table = Dict(Sleef.log10 => Base.log10, Sleef.log2 => Base.log2) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + fun_table = Dict(Sleef.log => Base.log) + tol = 1 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300))) - # fun_table = Dict(Sleef.log1p => Base.log1p) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000)) + fun_table = Dict(Sleef.log10 => Base.log10, Sleef.log2 => Base.log2) + tol = 1 + test_acc(T, fun_table, xx, tol) - # xx1 = map(Tuple{T,T}, [(x,y) for x = -100:0.20:100, y = 0.1:0.20:100])[:] - # xx2 = map(Tuple{T,T}, [(x,y) for x = -100:0.21:100, y = 0.1:0.22:100])[:] - # xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]) - # xx = vcat(xx1, xx2, xx2) - # fun_table = Dict(Sleef.pow => Base.:^) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(0.0001:0.0001:10, 0.0001:0.1:10000, 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300))) + fun_table = Dict(Sleef.log1p => Base.log1p) + tol = 1 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10000:0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) - # fun_table = Dict(Sleef.cbrt_fast => Base.cbrt) - # tol = 2 - # test_acc(T, fun_table, xx, tol) + xx1 = map(Tuple{T,T}, [(x,y) for x = -100:0.20:100, y = 0.1:0.20:100])[:] + xx2 = map(Tuple{T,T}, [(x,y) for x = -100:0.21:100, y = 0.1:0.22:100])[:] + xx3 = map(Tuple{T,T}, [(x,y) for x = 2.1, y = -1000:0.1:1000]) + xx = vcat(xx1, xx2, xx2) + fun_table = Dict(Sleef.pow => Base.:^) + tol = 1 + test_acc(T, fun_table, xx, tol) - # fun_table = Dict(Sleef.cbrt => Base.cbrt) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10000:0.2:10000, 1.1.^(-1000:1000), 2.1.^(-1000:1000))) + fun_table = Dict(Sleef.cbrt_fast => Base.cbrt) + tol = 2 + test_acc(T, fun_table, xx, tol) + fun_table = Dict(Sleef.cbrt => Base.cbrt) + tol = 1 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10:0.0002:10, -120:0.023:1000, -1000:0.02:2000)) - # fun_table = Dict(Sleef.exp2 => Base.exp2) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10:0.0002:10, -120:0.023:1000, -1000:0.02:2000)) + fun_table = Dict(Sleef.exp2 => Base.exp2) + tol = 1 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10:0.0002:10, -35:0.023:1000, -300:0.01:300)) - # fun_table = Dict(Sleef.exp10 => Base.exp10) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10:0.0002:10, -35:0.023:1000, -300:0.01:300)) + fun_table = Dict(Sleef.exp10 => Base.exp10) + tol = 1 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10:0.0002:10, -1000:0.021:1000, -1000:0.023:1000, - # 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300), 10.0.^(0:0.021:300), -10.0.^-(0:0.021:300))) - # fun_table = Dict(Sleef.expm1 => Base.expm1) - # tol = 2 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10:0.0002:10, -1000:0.021:1000, -1000:0.023:1000, + 10.0.^-(0:0.02:300), -10.0.^-(0:0.02:300), 10.0.^(0:0.021:300), -10.0.^-(0:0.021:300))) + fun_table = Dict(Sleef.expm1 => Base.expm1) + tol = 2 + test_acc(T, fun_table, xx, tol) - # xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) - # fun_table = Dict(Sleef.sinh => Base.sinh, Sleef.cosh => Base.cosh, Sleef.tanh => Base.tanh) - # tol = 1 - # test_acc(T, fun_table, xx, tol) + xx = map(T, vcat(-10:0.0002:10, -1000:0.02:1000)) + fun_table = Dict(Sleef.sinh => Base.sinh, Sleef.cosh => Base.cosh, Sleef.tanh => Base.tanh) + tol = 1 + test_acc(T, fun_table, xx, tol) - @testset "xilogb at arbitrary values" begin + @testset "xilogb at arbitrary values" begin xd = Dict{T,Int}(T(1e-30) => -100, T(2.31e-11) => -36, T(-1.0) => 0, T(1.0) => 0, - T(2.31e11) => 37, T(1e30) => 99) + T(2.31e11) => 37, T(1e30) => 99) for (i,j) in xd @test Sleef.ilogb(i) === j end diff --git a/test/dnml_nan.jl b/test/dnml_nan.jl index c8b0035..a68b91b 100644 --- a/test/dnml_nan.jl +++ b/test/dnml_nan.jl @@ -245,7 +245,7 @@ end fun_table = Dict(Sleef.cos_fast => Base.cos, Sleef.cos => Base.cos) @testset "denormal/nonnumber $xtrig" for (xtrig, trig) in fun_table - xa = T[NaN, Inf, -Inf] + xa = T[NaN, -0.0, 0.0, Inf, -Inf] for x in xa @test cmpdenorm(xtrig(x), trig(BigFloat(x))) end @@ -262,7 +262,7 @@ end @testset "denormal/nonnumber cos in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos) - xa = T[NaN, Inf, -Inf] + xa = T[NaN, -0.0, 0.0, Inf, -Inf] for x in xa q = xsincos(x)[2] @test cmpdenorm(q, Base.cos(BigFloat(x))) @@ -271,7 +271,7 @@ end @testset "denormal/nonnumber $xtan" for xtan in (Sleef.tan_fast, Sleef.tan) - xa = T[NaN, Inf, -Inf, -0.0, 0.0, pi/2, -pi/2] + xa = T[NaN, Inf, -Inf, -0.0, 0.0, pi/2, -pi/2, -0.0, 0.0] for x in xa @test cmpdenorm(xtan(x), Base.tan(BigFloat(x))) end @@ -280,7 +280,7 @@ end fun_table = Dict(Sleef.asin => Base.asin, Sleef.asin_fast => Base.asin, Sleef.acos => Base.acos, Sleef.acos_fast => Base.acos) @testset "denormal/nonnumber $xatrig" for (xatrig, atrig) in fun_table - xa = T[NaN, Inf, -Inf, 2, -2, 1, -1] + xa = T[NaN, Inf, -Inf, 2, -2, 1, -1, -0.0, 0.0] for x in xa @test cmpdenorm(xatrig(x), atrig(BigFloat(x))) end @@ -288,7 +288,7 @@ end @testset "denormal/nonnumber $xatan" for xatan in (Sleef.atan, Sleef.atan_fast) - xa = T[NaN, Inf, -Inf] + xa = T[NaN, Inf, -Inf, -0.0, 0.0] for x in xa @test cmpdenorm(xatan(x), Base.atan(BigFloat(x))) end @@ -434,4 +434,5 @@ end end + end #denormal/nonnumber diff --git a/test/exceptional.jl b/test/exceptional.jl new file mode 100644 index 0000000..0cc7957 --- /dev/null +++ b/test/exceptional.jl @@ -0,0 +1,438 @@ +@testset "exceptional $T" for T in (Float32, Float64) + +@testset "exceptional $xatan2" for xatan2 in (Sleef.atan2_fast, Sleef.atan2) + + @test xatan2(T(0.0), T(-0.0)) === T(pi) + @test xatan2(T(-0.0), T(-0.0)) === -T(pi) + @test ispzero(xatan2(T(0.0), T(0.0))) + @test isnzero(xatan2(T(-0.0), T(0.0))) + @test xatan2( T(Inf), -T(Inf)) === T(3*pi/4) + @test xatan2(-T(Inf), -T(Inf)) === T(-3*pi/4) + @test xatan2( T(Inf), T(Inf)) === T(pi/4) + @test xatan2(-T(Inf), T(Inf)) === T(-pi/4) + + + y = T(0.0) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + for x in xa + @test xatan2(y,x) === T(pi) + end + + + y = T(-0.0) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + for x in xa + @test xatan2(y,x) === T(-pi) + end + + + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + xa = T[T(0.0), T(-0.0)] + for x in xa, y in ya + @test xatan2(y,x) === T(-pi/2) + end + + + ya = T[100000.5, 100000, 3, 2.5, 2, 1.5, 1.0, 0.5] + xa = T[T(0.0), T(-0.0)] + for x in xa, y in ya + @test xatan2(y,x) === T(pi/2) + end + + + y = T(Inf) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for x in xa + @test xatan2(y,x) === T(pi/2) + end + + + y = T(-Inf) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for x in xa + @test xatan2(y,x) === T(-pi/2) + end + + + ya = T[0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + x = T(Inf) + for y in ya + @test ispzero(xatan2(y,x)) + end + + + ya = T[-0.5, -1.5, -2.0, -2.5, -3.0, -100000, -100000.5] + x = T(Inf) + for y in ya + @test isnzero(xatan2(y,x)) + end + + + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NaN] + x = T(NaN) + for y in ya + @test isnan(xatan2(y,x)) + end + + + y = T(NaN) + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5, NaN] + for x in xa + @test isnan(xatan2(y,x)) + end + +end # denormal/nonumber atan2 + + +@testset "exceptional xpow" begin + + @test Sleef.pow(T(1), T(NaN)) === T(1) + @test Sleef.pow( T(NaN), T(0)) === T(1) + @test Sleef.pow(-T(1), T(Inf)) === T(1) + @test Sleef.pow(-T(1), T(-Inf)) === T(1) + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + ya = T[-100000.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 100000.5] + for x in xa, y in ya + @test isnan(Sleef.pow(x,y)) + end + + + x = T(NaN) + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for y in ya + @test isnan(Sleef.pow(x,y)) + end + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5, -0.0, +0.0, 0.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + y = T(NaN) + for x in xa + @test isnan(Sleef.pow(x,y)) + end + + + x = T(0.0) + ya = T[1, 3, 5, 7, 100001] + for y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + x = T(-0.0) + ya = T[1, 3, 5, 7, 100001] + for y in ya + @test isnzero(Sleef.pow(x,y)) + end + + + xa = T[0.0, -0.0] + ya = T[0.5, 1.5, 2.0, 2.5, 4.0, 100000, 100000.5] + for x in xa, y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + xa = T[-0.999, -0.5, -0.0, +0.0, +0.5, +0.999] + y = T(-Inf) + for x in xa + @test Sleef.pow(x,y) === T(Inf) + end + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + y = T(-Inf) + for x in xa + @test ispzero(Sleef.pow(x,y)) + end + + + xa = T[-0.999, -0.5, -0.0, +0.0, +0.5, +0.999] + y = T(Inf) + for x in xa + @test ispzero(Sleef.pow(x,y)) + end + + + xa = T[-100000.5, -100000, -3, -2.5, -2, -1.5, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + y = T(Inf) + for x in xa + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(-Inf) + ya = T[-100001, -5, -3, -1] + for y in ya + @test isnzero(Sleef.pow(x,y)) + end + + + x = T(-Inf) + ya = T[-100000.5, -100000, -4, -2.5, -2, -1.5, -0.5] + for y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + x = T(-Inf) + ya = T[1, 3, 5, 7, 100001] + for y in ya + @test Sleef.pow(x,y) === T(-Inf) + end + + + x = T(-Inf) + ya = T[0.5, 1.5, 2, 2.5, 3.5, 4, 100000, 100000.5] + for y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(Inf) + ya = T[-100000.5, -100000, -3, -2.5, -2, -1.5, -1.0, -0.5] + for y in ya + @test ispzero(Sleef.pow(x,y)) + end + + + x = T(Inf) + ya = T[0.5, 1, 1.5, 2.0, 2.5, 3.0, 100000, 100000.5] + for y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(0.0) + ya = T[-100001, -5, -3, -1] + for y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + x = T(-0.0) + ya = T[-100001, -5, -3, -1] + for y in ya + @test Sleef.pow(x,y) === T(-Inf) + end + + + xa = T[0.0, -0.0] + ya = T[-100000.5, -100000, -4, -2.5, -2, -1.5, -0.5] + for x in xa, y in ya + @test Sleef.pow(x,y) === T(Inf) + end + + + xa = T[1000, -1000] + ya = T[1000, 1000.5, 1001] + for x in xa, y in ya + @test cmpdenorm(Sleef.pow(x,y), Base.:^(BigFloat(x),BigFloat(y))) + end + +end # denormal/nonumber pow + + +fun_table = Dict(Sleef.sin_fast => Base.sin, Sleef.sin => Base.sin) +@testset "exceptional $xtrig" for (xtrig, trig) in fun_table + xa = T[NaN, -0.0, 0.0, Inf, -Inf] + for x in xa + @test cmpdenorm(xtrig(x), trig(BigFloat(x))) + end +end + + +fun_table = Dict(Sleef.cos_fast => Base.cos, Sleef.cos => Base.cos) +@testset "exceptional $xtrig" for (xtrig, trig) in fun_table + xa = T[NaN, -0.0, 0.0, Inf, -Inf] + for x in xa + @test cmpdenorm(xtrig(x), trig(BigFloat(x))) + end +end + + +@testset "exceptional sin in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos) + xa = T[NaN, -0.0, 0.0, Inf, -Inf] + for x in xa + q = xsincos(x)[1] + @test cmpdenorm(q, Base.sin(BigFloat(x))) + end +end + + +@testset "exceptional cos in $xsincos"for xsincos in (Sleef.sincos_fast, Sleef.sincos) + xa = T[NaN, -0.0, 0.0, Inf, -Inf] + for x in xa + q = xsincos(x)[2] + @test cmpdenorm(q, Base.cos(BigFloat(x))) + end +end + + +@testset "exceptional $xtan" for xtan in (Sleef.tan_fast, Sleef.tan) + xa = T[NaN, Inf, -Inf, -0.0, 0.0, pi/2, -pi/2, -0.0, 0.0] + for x in xa + @test cmpdenorm(xtan(x), Base.tan(BigFloat(x))) + end +end + + +fun_table = Dict(Sleef.asin => Base.asin, Sleef.asin_fast => Base.asin, Sleef.acos => Base.acos, Sleef.acos_fast => Base.acos) +@testset "exceptional $xatrig" for (xatrig, atrig) in fun_table + xa = T[NaN, Inf, -Inf, 2, -2, 1, -1, -0.0, 0.0] + for x in xa + @test cmpdenorm(xatrig(x), atrig(BigFloat(x))) + end +end + + +@testset "exceptional $xatan" for xatan in (Sleef.atan, Sleef.atan_fast) + xa = T[NaN, Inf, -Inf, -0.0, 0.0] + for x in xa + @test cmpdenorm(xatan(x), Base.atan(BigFloat(x))) + end +end + + +@testset "exceptional exp" begin + xa = T[NaN, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.exp(x), Base.exp(BigFloat(x))) + end +end + + +@testset "exceptional sinh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.sinh(x), Base.sinh(BigFloat(x))) + end +end + + +@testset "exceptional cosh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.cosh(x), Base.cosh(BigFloat(x))) + end +end + + +@testset "exceptional tanh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.tanh(x), Base.tanh(BigFloat(x))) + end +end + + +@testset "exceptional asinh" begin + xa = T[NaN, 0.0, -0.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.asinh(x), Base.asinh(BigFloat(x))) + end +end + + +@testset "exceptional acosh" begin + xa = T[NaN, 0.0, -0.0, 1.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.acosh(x), Base.acosh(BigFloat(x))) + end +end + + +@testset "exceptional atanh" begin + xa = T[NaN, 0.0, -0.0, 1.0, -1.0, Inf, -Inf, 10000, -10000] + for x in xa + @test cmpdenorm(Sleef.atanh(x), Base.atanh(BigFloat(x))) + end +end + + +@testset "exceptional $xcbrt" for xcbrt = (Sleef.cbrt, Sleef.cbrt_fast) + xa = T[NaN, Inf, -Inf, 0.0, -0.0] + for x in xa + @test cmpdenorm(Sleef.cbrt(x), Base.cbrt(BigFloat(x))) + end +end + + +@testset "exceptional exp2" begin + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(Sleef.exp2(x), Base.exp2(BigFloat(x))) + end +end + + +@testset "exceptional exp10" begin + xa = T[NaN, Inf, -Inf] + for x in xa + @test cmpdenorm(Sleef.exp10(x), Base.exp10(BigFloat(x))) + end +end + + +@testset "exceptional expm1" begin + xa = T[NaN, Inf, -Inf, 0.0, -0.0] + for x in xa + @test cmpdenorm(Sleef.expm1(x), Base.expm1(BigFloat(x))) + end +end + + + +@testset "exceptional $xlog" for xlog in (Sleef.log, Sleef.log_fast) + xa = T[NaN, Inf, -Inf, 0, -1] + for x in xa + @test cmpdenorm(xlog(x), Base.log(BigFloat(x))) + end +end + + +@testset "exceptional log10" begin + xa = T[NaN, Inf, -Inf, 0, -1] + for x in xa + @test cmpdenorm(Sleef.log10(x), Base.log10(BigFloat(x))) + end +end + + +@testset "exceptional log2" begin + xa = T[NaN, Inf, -Inf, 0, -1] + for x in xa + @test cmpdenorm(Sleef.log2(x), Base.log2(BigFloat(x))) + end +end + + +@testset "exceptional log1p" begin + xa = T[NaN, Inf, -Inf, 0.0, -0.0, -1.0, -2.0] + for x in xa + @test cmpdenorm(Sleef.log1p(x), Base.log1p(BigFloat(x))) + end +end + + +@testset "exceptional ldexp" begin + for i = -10000:10000 + a = Sleef.ldexp(T(1.0), i) + b = Base.ldexp(BigFloat(1.0), i) + @test (isfinite(b) && a == b || cmpdenorm(a,b)) + end +end + + +@testset "exceptional ilogb" begin + @test Sleef.ilogb(+T(Inf)) == Sleef.INT_MAX + @test Sleef.ilogb(-T(Inf)) == Sleef.INT_MAX + @test Sleef.ilogb(+T(0.0)) == Sleef.FP_ILOGB0 + @test Sleef.ilogb(-T(0.0)) == Sleef.FP_ILOGB0 + @test Sleef.ilogb( T(NaN)) == Sleef.FP_ILOGBNAN +end + + + +end #exceptional diff --git a/test/runtests.jl b/test/runtests.jl index ca5f553..654cd6b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,8 @@ using Sleef using Base.Test +using Base.Math.significand_bits + isnzero(x::T) where {T <: AbstractFloat} = signbit(x) ispzero(x::T) where {T <: AbstractFloat} = !signbit(x) @@ -22,28 +24,39 @@ end # the following compares the ulp between x and y. # First it promotes them to the larger of the two types x,y -infh(::Type{Float64}) = 1e+300 -infh(::Type{Float32}) = 1e+37 +const infh(::Type{Float64}) = 1e300 +const infh(::Type{Float32}) = 1e37 function countulp(T, x::AbstractFloat, y::AbstractFloat) X, Y = promote(x, y) x, y = T(X), T(Y) # Cast to smaller type (isnan(x) && isnan(y)) && return 0 (isnan(x) || isnan(y)) && return 10000 if isinf(x) - (sign(x) == sign(y) && abs(y) > infh(T)) && return 0 # Relaxed infinity handling + (sign(x) == sign(y) && abs(y) > infh(T)) && return 0 # relaxed infinity handling return 10001 end (x == Inf && y == Inf) && return 0 (x == -Inf && y == -Inf) && return 0 if y == 0 - (x == 0) && return 0 + x == 0 && return 0 return 10002 end if isfinite(x) && isfinite(y) - return T(abs(X - Y) / eps(y)) + return T(abs(X - Y) / ulp(y)) end return 10003 end + +const DENORMAL_MIN(::Type{Float64}) = 2.0^-1074 +const DENORMAL_MIN(::Type{Float32}) = 2f0^-149 + +function ulp(x::T) where {T<:AbstractFloat} + x = abs(x) + x == T(0.0) && return DENORMAL_MIN(T) + val, e = frexp(x) + return max(ldexp(T(1.0), e - significand_bits(T) - 1), DENORMAL_MIN(T)) +end + countulp(x::T, y::T) where {T <: AbstractFloat} = countulp(T, x, y) # get rid off annoying warnings from overwritten function @@ -108,8 +121,8 @@ end function runtests() @testset "Sleef" begin - include(joinpath(@__DIR__, "dnml_nan.jl")) - include(joinpath(@__DIR__, "accuracy.jl")) + include("exceptional.jl") + include("accuracy.jl") end end