From 9a3bacf92ca1f22257141c386dced64afefe40f7 Mon Sep 17 00:00:00 2001 From: Mus M Date: Fri, 10 Nov 2017 10:59:35 -0500 Subject: [PATCH] Simplify kernel constructs --- src/exp.jl | 111 ++++++++--------- src/log.jl | 46 ++++--- src/priv.jl | 132 ++++++++++---------- src/trig.jl | 347 +++++++++++++++++++++++++--------------------------- 4 files changed, 309 insertions(+), 327 deletions(-) diff --git a/src/exp.jl b/src/exp.jl index 265ebd5..ab7b031 100644 --- a/src/exp.jl +++ b/src/exp.jl @@ -14,39 +14,36 @@ const max_exp2(::Type{Float32}) = 128f0 const min_exp2(::Type{Float64}) = -1075 const min_exp2(::Type{Float32}) = -150f0 +@inline function exp2_kernel(x::Float64) + c11d = 0.4434359082926529454e-9 + c10d = 0.7073164598085707425e-8 + c9d = 0.1017819260921760451e-6 + c8d = 0.1321543872511327615e-5 + c7d = 0.1525273353517584730e-4 + c6d = 0.1540353045101147808e-3 + c5d = 0.1333355814670499073e-2 + c4d = 0.9618129107597600536e-2 + c3d = 0.5550410866482046596e-1 + c2d = 0.2402265069591012214 + c1d = 0.6931471805599452862 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d +end + +@inline function exp2_kernel(x::Float32) + c6f = 0.1535920892f-3 + c5f = 0.1339262701f-2 + c4f = 0.9618384764f-2 + c3f = 0.5550347269f-1 + c2f = 0.2402264476f0 + c1f = 0.6931471825f0 + return @horner x c1f c2f c3f c4f c5f c6f +end + """ exp2(x) Compute the base-`2` exponential of `x`, that is `2ˣ`. """ -function exp2 end - -let -global exp2 - - -c11d = 0.4434359082926529454e-9 -c10d = 0.7073164598085707425e-8 -c9d = 0.1017819260921760451e-6 -c8d = 0.1321543872511327615e-5 -c7d = 0.1525273353517584730e-4 -c6d = 0.1540353045101147808e-3 -c5d = 0.1333355814670499073e-2 -c4d = 0.9618129107597600536e-2 -c3d = 0.5550410866482046596e-1 -c2d = 0.2402265069591012214 -c1d = 0.6931471805599452862 - -c6f = 0.1535920892f-3 -c5f = 0.1339262701f-2 -c4f = 0.9618384764f-2 -c3f = 0.5550347269f-1 -c2f = 0.2402264476f0 -c1f = 0.6931471825f0 - -global @inline exp2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d -global @inline exp2_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f - function exp2(d::T) where {T<:Union{Float32,Float64}} q = unsafe_trunc(Int, round(d)) s = d - q @@ -60,7 +57,7 @@ function exp2(d::T) where {T<:Union{Float32,Float64}} d < min_exp2(T) && (u = T(0.0)) return u end -end + const max_exp10(::Type{Float64}) = 3.08254715559916743851e2 # log 2^1023*(2-2^-52) const max_exp10(::Type{Float32}) = 38.531839419103626f0 # log 2^127 *(2-2^-23) @@ -97,44 +94,43 @@ function expm1(x::T) where {T<:Union{Float32,Float64}} return u end + const max_exp(::Type{Float64}) = 709.78271114955742909217217426 # log 2^1023*(2-2^-52) const max_exp(::Type{Float32}) = 88.72283905206835f0 # log 2^127 *(2-2^-23) const min_exp(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075 const min_exp(::Type{Float32}) = -103.97208f0 # ≈ log 2^-150 +@inline function exp_kernel(x::Float64) + c11d = 2.08860621107283687536341e-09 + c10d = 2.51112930892876518610661e-08 + c9d = 2.75573911234900471893338e-07 + c8d = 2.75572362911928827629423e-06 + c7d = 2.4801587159235472998791e-05 + c6d = 0.000198412698960509205564975 + c5d = 0.00138888888889774492207962 + c4d = 0.00833333333331652721664984 + c3d = 0.0416666666666665047591422 + c2d = 0.166666666666666851703837 + c1d = 0.50 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d +end + +@inline function exp_kernel(x::Float32) + c6f = 0.000198527617612853646278381f0 + c5f = 0.00139304355252534151077271f0 + c4f = 0.00833336077630519866943359f0 + c3f = 0.0416664853692054748535156f0 + c2f = 0.166666671633720397949219f0 + c1f = 0.5f0 + return @horner x c1f c2f c3f c4f c5f c6f +end + """ exp(x) Compute the base-`e` exponential of `x`, that is `eˣ`. """ -function exp end - -let -global exp - -c11d = 2.08860621107283687536341e-09 -c10d = 2.51112930892876518610661e-08 -c9d = 2.75573911234900471893338e-07 -c8d = 2.75572362911928827629423e-06 -c7d = 2.4801587159235472998791e-05 -c6d = 0.000198412698960509205564975 -c5d = 0.00138888888889774492207962 -c4d = 0.00833333333331652721664984 -c3d = 0.0416666666666665047591422 -c2d = 0.166666666666666851703837 -c1d = 0.50 - -c6f = 0.000198527617612853646278381f0 -c5f = 0.00139304355252534151077271f0 -c4f = 0.00833336077630519866943359f0 -c3f = 0.0416664853692054748535156f0 -c2f = 0.166666671633720397949219f0 -c1f = 0.5f0 - -global @inline exp_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d -global @inline exp_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f - function exp(d::T) where {T<:Union{Float32,Float64}} q = unsafe_trunc(Int, round(T(MLN2E) * d)) s = muladd(q, -L2U(T), d) @@ -145,9 +141,8 @@ function exp(d::T) where {T<:Union{Float32,Float64}} u = s * s * u + s + 1 u = ldexp2k(u, q) - d < min_exp(T) && (u = T(0)) d > max_exp(T) && (u = T(Inf)) + d < min_exp(T) && (u = T(0)) return u end -end diff --git a/src/log.jl b/src/log.jl index c23829c..3406db1 100644 --- a/src/log.jl +++ b/src/log.jl @@ -111,35 +111,34 @@ end # That being said, since this converges faster when the argument is close to # 1, we multiply `m` by `2` and subtract 1 for the exponent `e` when `m` is # less than `sqrt(2)/2` + +@inline function log_fast_kernel(x::Float64) + c8d = 0.153487338491425068243146 + c7d = 0.152519917006351951593857 + c6d = 0.181863266251982985677316 + c5d = 0.222221366518767365905163 + c4d = 0.285714294746548025383248 + c3d = 0.399999999950799600689777 + c2d = 0.6666666666667778740063 + c1d = 2.0 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d +end + +@inline function log_fast_kernel(x::Float32) + c5f = 0.2392828464508056640625f0 + c4f = 0.28518211841583251953125f0 + c3f = 0.400005877017974853515625f0 + c2f = 0.666666686534881591796875f0 + c1f = 2f0 + return @horner x c1f c2f c3f c4f c5f +end + """ log_fast(x) Compute the natural logarithm of `x`. The inverse of the natural logarithm is the natural expoenential function `exp(x)` """ -function log_fast end - -let -global log_fast - -c8d = 0.153487338491425068243146 -c7d = 0.152519917006351951593857 -c6d = 0.181863266251982985677316 -c5d = 0.222221366518767365905163 -c4d = 0.285714294746548025383248 -c3d = 0.399999999950799600689777 -c2d = 0.6666666666667778740063 -c1d = 2.0 - -c5f = 0.2392828464508056640625f0 -c4f = 0.28518211841583251953125f0 -c3f = 0.400005877017974853515625f0 -c2f = 0.666666686534881591796875f0 -c1f = 2f0 - -global @inline log_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d -global @inline log_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f - function log_fast(d::T) where {T<:Union{Float32,Float64}} o = d < realmin(T) o && (d *= T(Int64(1) << 32) * T(Int64(1) << 32)) @@ -161,4 +160,3 @@ function log_fast(d::T) where {T<:Union{Float32,Float64}} return x end -end diff --git a/src/priv.jl b/src/priv.jl index 00ef8b5..2fe3a82 100644 --- a/src/priv.jl +++ b/src/priv.jl @@ -170,32 +170,31 @@ end end -let -global expk -global expk2 - -c10d = 2.51069683420950419527139e-08 -c9d = 2.76286166770270649116855e-07 -c8d = 2.75572496725023574143864e-06 -c7d = 2.48014973989819794114153e-05 -c6d = 0.000198412698809069797676111 -c5d = 0.0013888888939977128960529 -c4d = 0.00833333333332371417601081 -c3d = 0.0416666666665409524128449 -c2d = 0.166666666666666740681535 -c1d = 0.500000000000000999200722 - -c5f = 0.00136324646882712841033936f0 -c4f = 0.00836596917361021041870117f0 -c3f = 0.0416710823774337768554688f0 -c2f = 0.166665524244308471679688f0 -c1f = 0.499999850988388061523438f0 - -global @inline expk_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d -global @inline expk_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f - -global under_expk(::Type{Float64}) = -1000.0 -global under_expk(::Type{Float32}) = -104f0 +const under_expk(::Type{Float64}) = -1000.0 +const under_expk(::Type{Float32}) = -104f0 + +@inline function expk_kernel(x::Float64) + c10d = 2.51069683420950419527139e-08 + c9d = 2.76286166770270649116855e-07 + c8d = 2.75572496725023574143864e-06 + c7d = 2.48014973989819794114153e-05 + c6d = 0.000198412698809069797676111 + c5d = 0.0013888888939977128960529 + c4d = 0.00833333333332371417601081 + c3d = 0.0416666666665409524128449 + c2d = 0.166666666666666740681535 + c1d = 0.500000000000000999200722 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d +end + +@inline function expk_kernel(x::Float32) + c5f = 0.00136324646882712841033936f0 + c4f = 0.00836596917361021041870117f0 + c3f = 0.0416710823774337768554688f0 + c2f = 0.166665524244308471679688f0 + c1f = 0.499999850988388061523438f0 + return @horner x c1f c2f c3f c4f c5f +end @inline function expk(d::Double{T}) where {T<:Union{Float32,Float64}} q = round(T(d) * T(MLN2E)) @@ -232,28 +231,28 @@ end t = dadd(T(1.0), t) return scale(scale(t, T(2.0)), pow2i(T, unsafe_trunc(Int, q - 1))) end -end -let -global logk2 - -c8d = 0.13860436390467167910856 -c7d = 0.131699838841615374240845 -c6d = 0.153914168346271945653214 -c5d = 0.181816523941564611721589 -c4d = 0.22222224632662035403996 -c3d = 0.285714285511134091777308 -c2d = 0.400000000000914013309483 -c1d = 0.666666666666664853302393 -c4f = 0.240320354700088500976562f0 -c3f = 0.285112679004669189453125f0 -c2f = 0.400007992982864379882812f0 -c1f = 0.666666686534881591796875f0 +@inline function logk2_kernel(x::Float64) + c8d = 0.13860436390467167910856 + c7d = 0.131699838841615374240845 + c6d = 0.153914168346271945653214 + c5d = 0.181816523941564611721589 + c4d = 0.22222224632662035403996 + c3d = 0.285714285511134091777308 + c2d = 0.400000000000914013309483 + c1d = 0.666666666666664853302393 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d +end -global @inline logk2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d -global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f +@inline function logk2_kernel(x::Float32) + c4f = 0.240320354700088500976562f0 + c3f = 0.285112679004669189453125f0 + c2f = 0.400007992982864379882812f0 + c1f = 0.666666686534881591796875f0 + return @horner x c1f c2f c3f c4f +end @inline function logk2(d::Double{T}) where {T<:Union{Float32,Float64}} e = ilogbk(d.hi * T(1.0/0.75)) @@ -266,28 +265,30 @@ global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t))) end + + + +@inline function logk_kernel(x::Double{Float64}) + c10d = 0.116255524079935043668677 + c9d = 0.103239680901072952701192 + c8d = 0.117754809412463995466069 + c7d = 0.13332981086846273921509 + c6d = 0.153846227114512262845736 + c5d = 0.181818180850050775676507 + c4d = 0.222222222230083560345903 + c3d = 0.285714285714249172087875 + c2d = 0.400000000000000077715612 + c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17) + dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d)) end -let -global logk -c10d = 0.116255524079935043668677 -c9d = 0.103239680901072952701192 -c8d = 0.117754809412463995466069 -c7d = 0.13332981086846273921509 -c6d = 0.153846227114512262845736 -c5d = 0.181818180850050775676507 -c4d = 0.222222222230083560345903 -c3d = 0.285714285714249172087875 -c2d = 0.400000000000000077715612 -c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17); - -c4f = 0.240320354700088500976562f0 -c3f = 0.285112679004669189453125f0 -c2f = 0.400007992982864379882812f0 -c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9) - -global @inline logk_kernel(x::Double{Float64}) = dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d)) -global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f)) +@inline function logk_kernel(x::Double{Float32}) + c4f = 0.240320354700088500976562f0 + c3f = 0.285112679004669189453125f0 + c2f = 0.400007992982864379882812f0 + c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9) + dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f)) +end @inline function logk(d::T) where {T<:Union{Float32,Float64}} o = d < realmin(T) @@ -305,6 +306,3 @@ global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.h dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t))) end - - -end diff --git a/src/trig.jl b/src/trig.jl index 8b4c129..d30b3d5 100644 --- a/src/trig.jl +++ b/src/trig.jl @@ -14,26 +14,25 @@ Compute the cosine of `x`, where the output is in radians. """ function cos end -let -global sin -global cos - -c8d = 2.72052416138529567917983e-15 -c7d = -7.64292594113954471900203e-13 -c6d = 1.60589370117277896211623e-10 -c5d = -2.5052106814843123359368e-08 -c4d = 2.75573192104428224777379e-06 -c3d = -0.000198412698412046454654947 -c2d = 0.00833333333333318056201922 -c1d = -0.166666666666666657414808 - -c4f = 2.6083159809786593541503f-06 -c3f = -0.0001981069071916863322258f0 -c2f = 0.00833307858556509017944336f0 -c1f = -0.166666597127914428710938f0 - -global @inline sincos_kernel(x::Double{Float64}) = dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d)) -global @inline sincos_kernel(x::Double{Float32}) = dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f)) +@inline function sincos_kernel(x::Double{Float64}) + c8d = 2.72052416138529567917983e-15 + c7d = -7.64292594113954471900203e-13 + c6d = 1.60589370117277896211623e-10 + c5d = -2.5052106814843123359368e-08 + c4d = 2.75573192104428224777379e-06 + c3d = -0.000198412698412046454654947 + c2d = 0.00833333333333318056201922 + c1d = -0.166666666666666657414808 + return dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d)) +end + +@inline function sincos_kernel(x::Double{Float32}) + c4f = 2.6083159809786593541503f-06 + c3f = -0.0001981069071916863322258f0 + c2f = 0.00833307858556509017944336f0 + c1f = -0.166666597127914428710938f0 + return dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f)) +end function sin(d::T) where {T<:Float64} qh = trunc(d * (T(M_1_PI) / (1 << 24))) @@ -140,7 +139,6 @@ function cos(d::T) where {T<:Float32} return u end -end """ @@ -157,27 +155,6 @@ Compute the cosine of `x`, where the output is in radians. """ function cos_fast end -let -global sin_fast -global cos_fast - -c9d = -7.97255955009037868891952e-18 -c8d = 2.81009972710863200091251e-15 -c7d = -7.64712219118158833288484e-13 -c6d = 1.60590430605664501629054e-10 -c5d = -2.50521083763502045810755e-08 -c4d = 2.75573192239198747630416e-06 -c3d = -0.000198412698412696162806809 -c2d = 0.00833333333333332974823815 -c1d = -0.166666666666666657414808 - -# c5f is 0f0 to handle Inf32 case, Float64 doesn't need this since it comes -# out properly (add another neg constant and remove this zero constant) -c4f = 2.6083159809786593541503f-06 -c3f = -0.0001981069071916863322258f0 -c2f = 0.00833307858556509017944336f0 -c1f = -0.166666597127914428710938f0 - # Argument is first reduced to the domain 0 < s < π/4 # We return the correct sign using `q & 1 != 0` i.e. q is odd (this works for @@ -185,8 +162,25 @@ c1f = -0.166666597127914428710938f0 # we are now in the negative branch of sin(x). Recall that q is just the integer # part of d/π and thus we can determine the correct sign using this information. -global @inline sincos_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d -global @inline sincos_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f +@inline function sincos_fast_kernel(x::Float64) + c9d = -7.97255955009037868891952e-18 + c8d = 2.81009972710863200091251e-15 + c7d = -7.64712219118158833288484e-13 + c6d = 1.60590430605664501629054e-10 + c5d = -2.50521083763502045810755e-08 + c4d = 2.75573192239198747630416e-06 + c3d = -0.000198412698412696162806809 + c2d = 0.00833333333333332974823815 + c1d = -0.166666666666666657414808 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d +end +@inline function sincos_fast_kernel(x::Float32) + c4f = 2.6083159809786593541503f-06 + c3f = -0.0001981069071916863322258f0 + c2f = 0.00833307858556509017944336f0 + c1f = -0.166666597127914428710938f0 + return @horner x c1f c2f c3f c4f +end function sin_fast(d::T) where {T<:Float64} t = d @@ -289,7 +283,6 @@ function cos_fast(d::T) where {T<:Float32} return u end -end @@ -309,39 +302,42 @@ radians, returning a tuple. """ function sincos_fast end -let -global sincos -global sincos_fast - -a6d = 1.58938307283228937328511e-10 -a5d = -2.50506943502539773349318e-08 -a4d = 2.75573131776846360512547e-06 -a3d = -0.000198412698278911770864914 -a2d = 0.0083333333333191845961746 -a1d = -0.166666666666666130709393 - -a3f = -0.000195169282960705459117889f0 -a2f = 0.00833215750753879547119141f0 -a1f = -0.166666537523269653320312f0 - -b7d = -1.13615350239097429531523e-11 -b6d = 2.08757471207040055479366e-09 -b5d = -2.75573144028847567498567e-07 -b4d = 2.48015872890001867311915e-05 -b3d = -0.00138888888888714019282329 -b2d = 0.0416666666666665519592062 -b1d = -0.50 - -b5f = -2.71811842367242206819355f-07 -b4f = 2.47990446951007470488548f-05 -b3f = -0.00138888787478208541870117f0 -b2f = 0.0416666641831398010253906f0 -b1f = -0.5f0 - -global @inline sincos_a_kernel(x::Float64) = @horner x a1d a2d a3d a4d a5d a6d -global @inline sincos_a_kernel(x::Float32) = @horner x a1f a2f a3f -global @inline sincos_b_kernel(x::Float64) = @horner x b1d b2d b3d b4d b5d b6d b7d -global @inline sincos_b_kernel(x::Float32) = @horner x b1f b2f b3f b4f b5f +@inline function sincos_a_kernel(x::Float64) + a6d = 1.58938307283228937328511e-10 + a5d = -2.50506943502539773349318e-08 + a4d = 2.75573131776846360512547e-06 + a3d = -0.000198412698278911770864914 + a2d = 0.0083333333333191845961746 + a1d = -0.166666666666666130709393 + return @horner x a1d a2d a3d a4d a5d a6d +end + +@inline function sincos_a_kernel(x::Float32) + a3f = -0.000195169282960705459117889f0 + a2f = 0.00833215750753879547119141f0 + a1f = -0.166666537523269653320312f0 + return @horner x a1f a2f a3f +end + +@inline function sincos_b_kernel(x::Float64) + b7d = -1.13615350239097429531523e-11 + b6d = 2.08757471207040055479366e-09 + b5d = -2.75573144028847567498567e-07 + b4d = 2.48015872890001867311915e-05 + b3d = -0.00138888888888714019282329 + b2d = 0.0416666666666665519592062 + b1d = -0.50 + return @horner x b1d b2d b3d b4d b5d b6d b7d +end + +@inline function sincos_b_kernel(x::Float32) + b5f = -2.71811842367242206819355f-07 + b4f = 2.47990446951007470488548f-05 + b3f = -0.00138888787478208541870117f0 + b2f = 0.0416666641831398010253906f0 + b1f = -0.5f0 + return @horner x b1f b2f b3f b4f b5f +end function sincos_fast(d::T) where {T<:Float64} s = d @@ -497,7 +493,6 @@ function sincos(d::T) where {T<:Float32} return (rx, ry) end -end """ @@ -514,36 +509,36 @@ Compute the tangent of `x`, where the output is in radians. """ function tan_fast end -let -global tan_fast - -c16d = 9.99583485362149960784268e-06 -c15d = -4.31184585467324750724175e-05 -c14d = 0.000103573238391744000389851 -c13d = -0.000137892809714281708733524 -c12d = 0.000157624358465342784274554 -c11d = -6.07500301486087879295969e-05 -c10d = 0.000148898734751616411290179 -c9d = 0.000219040550724571513561967 -c8d = 0.000595799595197098359744547 -c7d = 0.00145461240472358871965441 -c6d = 0.0035923150771440177410343 -c5d = 0.00886321546662684547901456 -c4d = 0.0218694899718446938985394 -c3d = 0.0539682539049961967903002 -c2d = 0.133333333334818976423364 -c1d = 0.333333333333320047664472 - -c7f = 0.00446636462584137916564941f0 -c6f = -8.3920182078145444393158f-05 -c5f = 0.0109639242291450500488281f0 -c4f = 0.0212360303848981857299805f0 -c3f = 0.0540687143802642822265625f0 -c2f = 0.133325666189193725585938f0 -c1f = 0.33333361148834228515625f0 - -global @inline tan_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d -global @inline tan_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f +@inline function tan_fast_kernel(x::Float64) + c16d = 9.99583485362149960784268e-06 + c15d = -4.31184585467324750724175e-05 + c14d = 0.000103573238391744000389851 + c13d = -0.000137892809714281708733524 + c12d = 0.000157624358465342784274554 + c11d = -6.07500301486087879295969e-05 + c10d = 0.000148898734751616411290179 + c9d = 0.000219040550724571513561967 + c8d = 0.000595799595197098359744547 + c7d = 0.00145461240472358871965441 + c6d = 0.0035923150771440177410343 + c5d = 0.00886321546662684547901456 + c4d = 0.0218694899718446938985394 + c3d = 0.0539682539049961967903002 + c2d = 0.133333333334818976423364 + c1d = 0.333333333333320047664472 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d +end + +@inline function tan_fast_kernel(x::Float32) + c7f = 0.00446636462584137916564941f0 + c6f = -8.3920182078145444393158f-05 + c5f = 0.0109639242291450500488281f0 + c4f = 0.0212360303848981857299805f0 + c3f = 0.0540687143802642822265625f0 + c2f = 0.133325666189193725585938f0 + c1f = 0.33333361148834228515625f0 + return @horner x c1f c2f c3f c4f c5f c6f c7f +end function tan_fast(d::T) where {T<:Float64} qh = trunc(d * (2 * T(M_1_PI)) / (1 << 24)) @@ -598,37 +593,37 @@ function tan_fast(d::T) where {T<:Float32} return u end + + +@inline function tan_kernel(x::Double{Float64}) + c15d = 1.01419718511083373224408e-05 + c14d = -2.59519791585924697698614e-05 + c13d = 5.23388081915899855325186e-05 + c12d = -3.05033014433946488225616e-05 + c11d = 7.14707504084242744267497e-05 + c10d = 8.09674518280159187045078e-05 + c9d = 0.000244884931879331847054404 + c8d = 0.000588505168743587154904506 + c7d = 0.00145612788922812427978848 + c6d = 0.00359208743836906619142924 + c5d = 0.00886323944362401618113356 + c4d = 0.0218694882853846389592078 + c3d = 0.0539682539781298417636002 + c2d = 0.133333333333125941821962 + c1d = 0.333333333333334980164153 + return dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d)) end -let -global tan - -c15d = 1.01419718511083373224408e-05 -c14d = -2.59519791585924697698614e-05 -c13d = 5.23388081915899855325186e-05 -c12d = -3.05033014433946488225616e-05 -c11d = 7.14707504084242744267497e-05 -c10d = 8.09674518280159187045078e-05 -c9d = 0.000244884931879331847054404 -c8d = 0.000588505168743587154904506 -c7d = 0.00145612788922812427978848 -c6d = 0.00359208743836906619142924 -c5d = 0.00886323944362401618113356 -c4d = 0.0218694882853846389592078 -c3d = 0.0539682539781298417636002 -c2d = 0.133333333333125941821962 -c1d = 0.333333333333334980164153 - -c7f = 0.00446636462584137916564941f0 -c6f = -8.3920182078145444393158f-05 -c5f = 0.0109639242291450500488281f0 -c4f = 0.0212360303848981857299805f0 -c3f = 0.0540687143802642822265625f0 -c2f = 0.133325666189193725585938f0 -c1f = 0.33333361148834228515625f0 - -global @inline tan_kernel(x::Double{Float64}) = dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d)) -global @inline tan_kernel(x::Double{Float32}) = dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f c5f c6f c7f)) +@inline function tan_kernel(x::Double{Float32}) + c7f = 0.00446636462584137916564941f0 + c6f = -8.3920182078145444393158f-05 + c5f = 0.0109639242291450500488281f0 + c4f = 0.0212360303848981857299805f0 + c3f = 0.0540687143802642822265625f0 + c2f = 0.133325666189193725585938f0 + c1f = 0.33333361148834228515625f0 + return dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f c5f c6f c7f)) +end function tan(d::T) where {T<:Float64} qh = trunc(d * (T(M_2_PI)) / (1 << 24)) @@ -692,8 +687,6 @@ function tan(d::T) where {T<:Float32} return v end -end - """ @@ -708,47 +701,46 @@ function atan(x::T) where {T<:Union{Float32,Float64}} end +@inline function atan_fast_kernel(x::Float64) + c19d = -1.88796008463073496563746e-05 + c18d = 0.000209850076645816976906797 + c17d = -0.00110611831486672482563471 + c16d = 0.00370026744188713119232403 + c15d = -0.00889896195887655491740809 + c14d = 0.016599329773529201970117 + c13d = -0.0254517624932312641616861 + c12d = 0.0337852580001353069993897 + c11d = -0.0407629191276836500001934 + c10d = 0.0466667150077840625632675 + c9d = -0.0523674852303482457616113 + c8d = 0.0587666392926673580854313 + c7d = -0.0666573579361080525984562 + c6d = 0.0769219538311769618355029 + c5d = -0.090908995008245008229153 + c4d = 0.111111105648261418443745 + c3d = -0.14285714266771329383765 + c2d = 0.199999999996591265594148 + c1d = -0.333333333333311110369124 + return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d +end + +@inline function atan_fast_kernel(x::Float32) + c8f = 0.00282363896258175373077393f0 + c7f = -0.0159569028764963150024414f0 + c6f = 0.0425049886107444763183594f0 + c5f = -0.0748900920152664184570312f0 + c4f = 0.106347933411598205566406f0 + c3f = -0.142027363181114196777344f0 + c2f = 0.199926957488059997558594f0 + c1f = -0.333331018686294555664062f0 + return @horner x c1f c2f c3f c4f c5f c6f c7f c8f +end + """ atan_fast(x) Compute the inverse tangent of `x`, where the output is in radians. """ -function atan_fast end -let -global atan_fast - -c19d = -1.88796008463073496563746e-05 -c18d = 0.000209850076645816976906797 -c17d = -0.00110611831486672482563471 -c16d = 0.00370026744188713119232403 -c15d = -0.00889896195887655491740809 -c14d = 0.016599329773529201970117 -c13d = -0.0254517624932312641616861 -c12d = 0.0337852580001353069993897 -c11d = -0.0407629191276836500001934 -c10d = 0.0466667150077840625632675 -c9d = -0.0523674852303482457616113 -c8d = 0.0587666392926673580854313 -c7d = -0.0666573579361080525984562 -c6d = 0.0769219538311769618355029 -c5d = -0.090908995008245008229153 -c4d = 0.111111105648261418443745 -c3d = -0.14285714266771329383765 -c2d = 0.199999999996591265594148 -c1d = -0.333333333333311110369124 - -c8f = 0.00282363896258175373077393f0 -c7f = -0.0159569028764963150024414f0 -c6f = 0.0425049886107444763183594f0 -c5f = -0.0748900920152664184570312f0 -c4f = 0.106347933411598205566406f0 -c3f = -0.142027363181114196777344f0 -c2f = 0.199926957488059997558594f0 -c1f = -0.333331018686294555664062f0 - -global @inline _atan_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d -global @inline _atan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f - function atan_fast(x::T) where {T<:Union{Float32,Float64}} q = 0 if signbit(x) @@ -760,13 +752,12 @@ function atan_fast(x::T) where {T<:Union{Float32,Float64}} q |= 1 end t = x * x - u = _atan_fast(t) + u = atan_fast_kernel(t) t = x + x * t * u q & 1 != 0 && (t = T(PI_2) - t) q & 2 != 0 && (t = -t) return t end -end const under_atan2(::Type{Float64}) = 5.5626846462680083984e-309