From 9a3bacf92ca1f22257141c386dced64afefe40f7 Mon Sep 17 00:00:00 2001
From: Mus M <mus-m@outlook.com>
Date: Fri, 10 Nov 2017 10:59:35 -0500
Subject: [PATCH] Simplify kernel constructs

---
 src/exp.jl  | 111 ++++++++---------
 src/log.jl  |  46 ++++---
 src/priv.jl | 132 ++++++++++----------
 src/trig.jl | 347 +++++++++++++++++++++++++---------------------------
 4 files changed, 309 insertions(+), 327 deletions(-)

diff --git a/src/exp.jl b/src/exp.jl
index 265ebd5..ab7b031 100644
--- a/src/exp.jl
+++ b/src/exp.jl
@@ -14,39 +14,36 @@ const max_exp2(::Type{Float32}) = 128f0
 const min_exp2(::Type{Float64}) = -1075
 const min_exp2(::Type{Float32}) = -150f0
 
+@inline function exp2_kernel(x::Float64)
+    c11d = 0.4434359082926529454e-9
+    c10d = 0.7073164598085707425e-8
+    c9d  = 0.1017819260921760451e-6
+    c8d  = 0.1321543872511327615e-5
+    c7d  = 0.1525273353517584730e-4
+    c6d  = 0.1540353045101147808e-3
+    c5d  = 0.1333355814670499073e-2
+    c4d  = 0.9618129107597600536e-2
+    c3d  = 0.5550410866482046596e-1
+    c2d  = 0.2402265069591012214
+    c1d  = 0.6931471805599452862
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
+end
+
+@inline function exp2_kernel(x::Float32)
+    c6f = 0.1535920892f-3
+    c5f = 0.1339262701f-2
+    c4f = 0.9618384764f-2
+    c3f = 0.5550347269f-1
+    c2f = 0.2402264476f0
+    c1f = 0.6931471825f0
+    return @horner x c1f c2f c3f c4f c5f c6f
+end
+
 """
     exp2(x)
 
 Compute the base-`2` exponential of `x`, that is `2ˣ`.
 """
-function exp2 end
-
-let
-global exp2
-
-
-c11d = 0.4434359082926529454e-9
-c10d = 0.7073164598085707425e-8
-c9d  = 0.1017819260921760451e-6
-c8d  = 0.1321543872511327615e-5
-c7d  = 0.1525273353517584730e-4
-c6d  = 0.1540353045101147808e-3
-c5d  = 0.1333355814670499073e-2
-c4d  = 0.9618129107597600536e-2
-c3d  = 0.5550410866482046596e-1
-c2d  = 0.2402265069591012214
-c1d  = 0.6931471805599452862
-
-c6f = 0.1535920892f-3
-c5f = 0.1339262701f-2
-c4f = 0.9618384764f-2
-c3f = 0.5550347269f-1
-c2f = 0.2402264476f0
-c1f = 0.6931471825f0
-
-global @inline exp2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
-global @inline exp2_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
-
 function exp2(d::T) where {T<:Union{Float32,Float64}}
     q = unsafe_trunc(Int, round(d))
     s = d - q
@@ -60,7 +57,7 @@ function exp2(d::T) where {T<:Union{Float32,Float64}}
     d < min_exp2(T) && (u = T(0.0))
     return u
 end
-end
+
 
 const max_exp10(::Type{Float64}) = 3.08254715559916743851e2 # log 2^1023*(2-2^-52)
 const max_exp10(::Type{Float32}) = 38.531839419103626f0 # log 2^127 *(2-2^-23) 
@@ -97,44 +94,43 @@ function expm1(x::T) where {T<:Union{Float32,Float64}}
     return u
 end
 
+
 const max_exp(::Type{Float64}) = 709.78271114955742909217217426  # log 2^1023*(2-2^-52)
 const max_exp(::Type{Float32}) = 88.72283905206835f0             # log 2^127 *(2-2^-23)
 
 const min_exp(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075
 const min_exp(::Type{Float32}) = -103.97208f0               # ≈ log 2^-150
 
+@inline function exp_kernel(x::Float64)
+    c11d = 2.08860621107283687536341e-09
+    c10d = 2.51112930892876518610661e-08
+    c9d  = 2.75573911234900471893338e-07
+    c8d  = 2.75572362911928827629423e-06
+    c7d  = 2.4801587159235472998791e-05
+    c6d  = 0.000198412698960509205564975
+    c5d  = 0.00138888888889774492207962
+    c4d  = 0.00833333333331652721664984
+    c3d  = 0.0416666666666665047591422
+    c2d  = 0.166666666666666851703837
+    c1d  = 0.50
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
+end
+
+@inline function exp_kernel(x::Float32)
+    c6f = 0.000198527617612853646278381f0
+    c5f = 0.00139304355252534151077271f0
+    c4f = 0.00833336077630519866943359f0
+    c3f = 0.0416664853692054748535156f0
+    c2f = 0.166666671633720397949219f0
+    c1f = 0.5f0
+    return @horner x c1f c2f c3f c4f c5f c6f
+end
+
 """
     exp(x)
 
 Compute the base-`e` exponential of `x`, that is `eˣ`.
 """
-function exp end
-
-let
-global exp
-
-c11d = 2.08860621107283687536341e-09
-c10d = 2.51112930892876518610661e-08
-c9d  = 2.75573911234900471893338e-07
-c8d  = 2.75572362911928827629423e-06
-c7d  = 2.4801587159235472998791e-05
-c6d  = 0.000198412698960509205564975
-c5d  = 0.00138888888889774492207962
-c4d  = 0.00833333333331652721664984
-c3d  = 0.0416666666666665047591422
-c2d  = 0.166666666666666851703837
-c1d  = 0.50
-
-c6f = 0.000198527617612853646278381f0
-c5f = 0.00139304355252534151077271f0
-c4f = 0.00833336077630519866943359f0
-c3f = 0.0416664853692054748535156f0
-c2f = 0.166666671633720397949219f0
-c1f = 0.5f0
-
-global @inline exp_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d  
-global @inline exp_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
-
 function exp(d::T) where {T<:Union{Float32,Float64}}
     q = unsafe_trunc(Int, round(T(MLN2E) * d))
     s = muladd(q, -L2U(T), d)
@@ -145,9 +141,8 @@ function exp(d::T) where {T<:Union{Float32,Float64}}
     u = s * s * u + s + 1
     u = ldexp2k(u, q)
 
-    d < min_exp(T) && (u = T(0))
     d > max_exp(T) && (u = T(Inf))
+    d < min_exp(T) && (u = T(0))
 
     return u
 end
-end
diff --git a/src/log.jl b/src/log.jl
index c23829c..3406db1 100644
--- a/src/log.jl
+++ b/src/log.jl
@@ -111,35 +111,34 @@ end
 # That being said, since this converges faster when the argument is close to
 # 1, we multiply  `m` by `2` and subtract 1 for the exponent `e` when `m` is
 # less than `sqrt(2)/2`
+
+@inline function log_fast_kernel(x::Float64)
+    c8d = 0.153487338491425068243146
+    c7d = 0.152519917006351951593857
+    c6d = 0.181863266251982985677316
+    c5d = 0.222221366518767365905163
+    c4d = 0.285714294746548025383248
+    c3d = 0.399999999950799600689777
+    c2d = 0.6666666666667778740063
+    c1d = 2.0
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+end
+
+@inline function log_fast_kernel(x::Float32)
+    c5f = 0.2392828464508056640625f0
+    c4f = 0.28518211841583251953125f0
+    c3f = 0.400005877017974853515625f0
+    c2f = 0.666666686534881591796875f0
+    c1f = 2f0
+    return @horner x c1f c2f c3f c4f c5f
+end
+
 """
     log_fast(x)
 
 Compute the natural logarithm of `x`. The inverse of the natural logarithm is
 the natural expoenential function `exp(x)`
 """
-function log_fast end
-
-let
-global log_fast
-
-c8d = 0.153487338491425068243146
-c7d = 0.152519917006351951593857
-c6d = 0.181863266251982985677316
-c5d = 0.222221366518767365905163
-c4d = 0.285714294746548025383248
-c3d = 0.399999999950799600689777
-c2d = 0.6666666666667778740063
-c1d = 2.0
-
-c5f = 0.2392828464508056640625f0
-c4f = 0.28518211841583251953125f0
-c3f = 0.400005877017974853515625f0
-c2f = 0.666666686534881591796875f0
-c1f = 2f0
-
-global @inline log_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
-global @inline log_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
-
 function log_fast(d::T) where {T<:Union{Float32,Float64}}
     o = d < realmin(T)
     o && (d *= T(Int64(1) << 32) * T(Int64(1) << 32))
@@ -161,4 +160,3 @@ function log_fast(d::T) where {T<:Union{Float32,Float64}}
 
     return x
 end
-end
diff --git a/src/priv.jl b/src/priv.jl
index 00ef8b5..2fe3a82 100644
--- a/src/priv.jl
+++ b/src/priv.jl
@@ -170,32 +170,31 @@ end
 end
 
 
-let
-global expk
-global expk2
-
-c10d = 2.51069683420950419527139e-08
-c9d  = 2.76286166770270649116855e-07
-c8d  = 2.75572496725023574143864e-06
-c7d  = 2.48014973989819794114153e-05
-c6d  = 0.000198412698809069797676111
-c5d  = 0.0013888888939977128960529
-c4d  = 0.00833333333332371417601081
-c3d  = 0.0416666666665409524128449
-c2d  = 0.166666666666666740681535
-c1d  = 0.500000000000000999200722
-
-c5f = 0.00136324646882712841033936f0
-c4f = 0.00836596917361021041870117f0
-c3f = 0.0416710823774337768554688f0
-c2f = 0.166665524244308471679688f0
-c1f = 0.499999850988388061523438f0
-
-global @inline expk_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
-global @inline expk_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
-
-global under_expk(::Type{Float64}) = -1000.0
-global under_expk(::Type{Float32}) = -104f0
+const under_expk(::Type{Float64}) = -1000.0
+const under_expk(::Type{Float32}) = -104f0
+
+@inline function  expk_kernel(x::Float64)
+    c10d = 2.51069683420950419527139e-08
+    c9d  = 2.76286166770270649116855e-07
+    c8d  = 2.75572496725023574143864e-06
+    c7d  = 2.48014973989819794114153e-05
+    c6d  = 0.000198412698809069797676111
+    c5d  = 0.0013888888939977128960529
+    c4d  = 0.00833333333332371417601081
+    c3d  = 0.0416666666665409524128449
+    c2d  = 0.166666666666666740681535
+    c1d  = 0.500000000000000999200722
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
+end
+
+@inline function  expk_kernel(x::Float32)
+    c5f = 0.00136324646882712841033936f0
+    c4f = 0.00836596917361021041870117f0
+    c3f = 0.0416710823774337768554688f0
+    c2f = 0.166665524244308471679688f0
+    c1f = 0.499999850988388061523438f0
+    return @horner x c1f c2f c3f c4f c5f
+end
 
 @inline function expk(d::Double{T}) where {T<:Union{Float32,Float64}}
     q = round(T(d) * T(MLN2E))
@@ -232,28 +231,28 @@ end
     t = dadd(T(1.0), t)
     return scale(scale(t, T(2.0)), pow2i(T, unsafe_trunc(Int, q - 1)))
 end
-end
 
 
-let
-global logk2
-
-c8d = 0.13860436390467167910856
-c7d = 0.131699838841615374240845
-c6d = 0.153914168346271945653214
-c5d = 0.181816523941564611721589
-c4d = 0.22222224632662035403996
-c3d = 0.285714285511134091777308
-c2d = 0.400000000000914013309483
-c1d = 0.666666666666664853302393
 
-c4f = 0.240320354700088500976562f0
-c3f = 0.285112679004669189453125f0
-c2f = 0.400007992982864379882812f0
-c1f = 0.666666686534881591796875f0
+@inline function logk2_kernel(x::Float64)
+    c8d = 0.13860436390467167910856
+    c7d = 0.131699838841615374240845
+    c6d = 0.153914168346271945653214
+    c5d = 0.181816523941564611721589
+    c4d = 0.22222224632662035403996
+    c3d = 0.285714285511134091777308
+    c2d = 0.400000000000914013309483
+    c1d = 0.666666666666664853302393
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+end
 
-global @inline logk2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
-global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f
+@inline function logk2_kernel(x::Float32)
+    c4f = 0.240320354700088500976562f0
+    c3f = 0.285112679004669189453125f0
+    c2f = 0.400007992982864379882812f0
+    c1f = 0.666666686534881591796875f0
+    return @horner x c1f c2f c3f c4f
+end
 
 @inline function logk2(d::Double{T}) where {T<:Union{Float32,Float64}}
     e  = ilogbk(d.hi * T(1.0/0.75))
@@ -266,28 +265,30 @@ global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f
 
     dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t)))
 end
+
+
+
+@inline function logk_kernel(x::Double{Float64})
+    c10d = 0.116255524079935043668677
+    c9d = 0.103239680901072952701192
+    c8d = 0.117754809412463995466069
+    c7d = 0.13332981086846273921509
+    c6d = 0.153846227114512262845736
+    c5d = 0.181818180850050775676507
+    c4d = 0.222222222230083560345903
+    c3d = 0.285714285714249172087875
+    c2d = 0.400000000000000077715612
+    c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17)
+    dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d))
 end
 
-let
-global logk
-c10d = 0.116255524079935043668677
-c9d = 0.103239680901072952701192
-c8d = 0.117754809412463995466069
-c7d = 0.13332981086846273921509
-c6d = 0.153846227114512262845736
-c5d = 0.181818180850050775676507
-c4d = 0.222222222230083560345903
-c3d = 0.285714285714249172087875
-c2d = 0.400000000000000077715612
-c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17);
-
-c4f = 0.240320354700088500976562f0
-c3f = 0.285112679004669189453125f0
-c2f = 0.400007992982864379882812f0
-c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9)
-
-global @inline logk_kernel(x::Double{Float64}) = dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d))
-global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f))
+@inline function logk_kernel(x::Double{Float32})
+    c4f = 0.240320354700088500976562f0
+    c3f = 0.285112679004669189453125f0
+    c2f = 0.400007992982864379882812f0
+    c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9)    
+    dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f))
+end
 
 @inline function logk(d::T) where {T<:Union{Float32,Float64}}
     o = d < realmin(T)
@@ -305,6 +306,3 @@ global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.h
 
     dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t)))
 end
-
-
-end
diff --git a/src/trig.jl b/src/trig.jl
index 8b4c129..d30b3d5 100644
--- a/src/trig.jl
+++ b/src/trig.jl
@@ -14,26 +14,25 @@ Compute the cosine of `x`, where the output is in radians.
 """
 function cos end
 
-let
-global sin
-global cos
-
-c8d =  2.72052416138529567917983e-15
-c7d = -7.64292594113954471900203e-13
-c6d =  1.60589370117277896211623e-10
-c5d = -2.5052106814843123359368e-08
-c4d =  2.75573192104428224777379e-06
-c3d = -0.000198412698412046454654947
-c2d =  0.00833333333333318056201922
-c1d = -0.166666666666666657414808
-
-c4f =  2.6083159809786593541503f-06
-c3f = -0.0001981069071916863322258f0
-c2f =  0.00833307858556509017944336f0
-c1f = -0.166666597127914428710938f0
-
-global @inline sincos_kernel(x::Double{Float64}) = dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d))
-global @inline sincos_kernel(x::Double{Float32}) = dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f))
+@inline function sincos_kernel(x::Double{Float64})
+    c8d =  2.72052416138529567917983e-15
+    c7d = -7.64292594113954471900203e-13
+    c6d =  1.60589370117277896211623e-10
+    c5d = -2.5052106814843123359368e-08
+    c4d =  2.75573192104428224777379e-06
+    c3d = -0.000198412698412046454654947
+    c2d =  0.00833333333333318056201922
+    c1d = -0.166666666666666657414808
+    return dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d))
+end
+
+@inline function sincos_kernel(x::Double{Float32})
+    c4f =  2.6083159809786593541503f-06
+    c3f = -0.0001981069071916863322258f0
+    c2f =  0.00833307858556509017944336f0
+    c1f = -0.166666597127914428710938f0
+    return dadd(c1f, x.hi * (@horner x.hi c2f c3f c4f))
+end
 
 function sin(d::T) where {T<:Float64}
     qh = trunc(d * (T(M_1_PI) / (1 << 24)))
@@ -140,7 +139,6 @@ function cos(d::T) where {T<:Float32}
 
     return u
 end
-end
 
 
 """
@@ -157,27 +155,6 @@ Compute the cosine of `x`, where the output is in radians.
 """
 function cos_fast end
 
-let 
-global sin_fast
-global cos_fast
-
-c9d = -7.97255955009037868891952e-18
-c8d =  2.81009972710863200091251e-15
-c7d = -7.64712219118158833288484e-13
-c6d =  1.60590430605664501629054e-10
-c5d = -2.50521083763502045810755e-08
-c4d =  2.75573192239198747630416e-06
-c3d = -0.000198412698412696162806809
-c2d =  0.00833333333333332974823815
-c1d = -0.166666666666666657414808
-
-# c5f is 0f0 to handle Inf32 case, Float64 doesn't need this since it comes
-# out properly (add another neg constant and remove this zero constant)
-c4f =  2.6083159809786593541503f-06
-c3f = -0.0001981069071916863322258f0
-c2f =  0.00833307858556509017944336f0
-c1f = -0.166666597127914428710938f0
-
 # Argument is first reduced to the domain 0 < s < π/4
 
 # We return the correct sign using `q & 1 != 0` i.e. q is odd (this works for
@@ -185,8 +162,25 @@ c1f = -0.166666597127914428710938f0
 # we are now in the negative branch of sin(x). Recall that q is just the integer
 # part of d/π and thus we can determine the correct sign using this information.
 
-global @inline sincos_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d
-global @inline sincos_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f
+@inline function sincos_fast_kernel(x::Float64)
+    c9d = -7.97255955009037868891952e-18
+    c8d =  2.81009972710863200091251e-15
+    c7d = -7.64712219118158833288484e-13
+    c6d =  1.60590430605664501629054e-10
+    c5d = -2.50521083763502045810755e-08
+    c4d =  2.75573192239198747630416e-06
+    c3d = -0.000198412698412696162806809
+    c2d =  0.00833333333333332974823815
+    c1d = -0.166666666666666657414808
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d
+end
+@inline function sincos_fast_kernel(x::Float32)
+    c4f =  2.6083159809786593541503f-06
+    c3f = -0.0001981069071916863322258f0
+    c2f =  0.00833307858556509017944336f0
+    c1f = -0.166666597127914428710938f0   
+    return @horner x c1f c2f c3f c4f
+end
 
 function sin_fast(d::T) where {T<:Float64}
     t = d
@@ -289,7 +283,6 @@ function cos_fast(d::T) where {T<:Float32}
 
     return u
 end
-end
 
 
 
@@ -309,39 +302,42 @@ radians, returning a tuple.
 """
 function sincos_fast end
 
-let
-global sincos
-global sincos_fast
-
-a6d =  1.58938307283228937328511e-10
-a5d = -2.50506943502539773349318e-08
-a4d =  2.75573131776846360512547e-06
-a3d = -0.000198412698278911770864914
-a2d =  0.0083333333333191845961746
-a1d = -0.166666666666666130709393
-
-a3f = -0.000195169282960705459117889f0
-a2f =  0.00833215750753879547119141f0
-a1f = -0.166666537523269653320312f0
-
-b7d = -1.13615350239097429531523e-11
-b6d =  2.08757471207040055479366e-09
-b5d = -2.75573144028847567498567e-07
-b4d =  2.48015872890001867311915e-05
-b3d = -0.00138888888888714019282329
-b2d =  0.0416666666666665519592062
-b1d = -0.50
-
-b5f = -2.71811842367242206819355f-07
-b4f =  2.47990446951007470488548f-05
-b3f = -0.00138888787478208541870117f0
-b2f =  0.0416666641831398010253906f0
-b1f = -0.5f0
-
-global @inline sincos_a_kernel(x::Float64) = @horner x a1d a2d a3d a4d a5d a6d
-global @inline sincos_a_kernel(x::Float32) = @horner x a1f a2f a3f
-global @inline sincos_b_kernel(x::Float64) = @horner x b1d b2d b3d b4d b5d b6d b7d
-global @inline sincos_b_kernel(x::Float32) = @horner x b1f b2f b3f b4f b5f
+@inline function sincos_a_kernel(x::Float64)
+    a6d =  1.58938307283228937328511e-10
+    a5d = -2.50506943502539773349318e-08
+    a4d =  2.75573131776846360512547e-06
+    a3d = -0.000198412698278911770864914
+    a2d =  0.0083333333333191845961746
+    a1d = -0.166666666666666130709393
+    return @horner x a1d a2d a3d a4d a5d a6d
+end
+
+@inline function sincos_a_kernel(x::Float32)
+    a3f = -0.000195169282960705459117889f0
+    a2f =  0.00833215750753879547119141f0
+    a1f = -0.166666537523269653320312f0
+    return @horner x a1f a2f a3f
+end
+
+@inline function sincos_b_kernel(x::Float64)
+    b7d = -1.13615350239097429531523e-11
+    b6d =  2.08757471207040055479366e-09
+    b5d = -2.75573144028847567498567e-07
+    b4d =  2.48015872890001867311915e-05
+    b3d = -0.00138888888888714019282329
+    b2d =  0.0416666666666665519592062
+    b1d = -0.50
+    return @horner x b1d b2d b3d b4d b5d b6d b7d
+end
+
+@inline function sincos_b_kernel(x::Float32)
+    b5f = -2.71811842367242206819355f-07
+    b4f =  2.47990446951007470488548f-05
+    b3f = -0.00138888787478208541870117f0
+    b2f =  0.0416666641831398010253906f0
+    b1f = -0.5f0
+    return @horner x b1f b2f b3f b4f b5f
+end
 
 function sincos_fast(d::T) where {T<:Float64}
     s = d
@@ -497,7 +493,6 @@ function sincos(d::T) where {T<:Float32}
 
     return (rx, ry)
 end
-end
 
 
 """
@@ -514,36 +509,36 @@ Compute the tangent of `x`, where the output is in radians.
 """
 function tan_fast end
 
-let
-global tan_fast
-
-c16d =  9.99583485362149960784268e-06
-c15d = -4.31184585467324750724175e-05
-c14d =  0.000103573238391744000389851
-c13d = -0.000137892809714281708733524
-c12d =  0.000157624358465342784274554
-c11d =  -6.07500301486087879295969e-05
-c10d  =  0.000148898734751616411290179
-c9d  =  0.000219040550724571513561967
-c8d  =  0.000595799595197098359744547
-c7d  =  0.00145461240472358871965441
-c6d  =  0.0035923150771440177410343
-c5d  =  0.00886321546662684547901456
-c4d  =  0.0218694899718446938985394
-c3d  =  0.0539682539049961967903002
-c2d  =  0.133333333334818976423364
-c1d  =  0.333333333333320047664472
-
-c7f =  0.00446636462584137916564941f0
-c6f = -8.3920182078145444393158f-05
-c5f =  0.0109639242291450500488281f0
-c4f =  0.0212360303848981857299805f0
-c3f =  0.0540687143802642822265625f0
-c2f =  0.133325666189193725585938f0
-c1f =  0.33333361148834228515625f0
-
-global @inline tan_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d
-global @inline tan_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f
+@inline function tan_fast_kernel(x::Float64)
+    c16d =  9.99583485362149960784268e-06
+    c15d = -4.31184585467324750724175e-05
+    c14d =  0.000103573238391744000389851
+    c13d = -0.000137892809714281708733524
+    c12d =  0.000157624358465342784274554
+    c11d =  -6.07500301486087879295969e-05
+    c10d  =  0.000148898734751616411290179
+    c9d  =  0.000219040550724571513561967
+    c8d  =  0.000595799595197098359744547
+    c7d  =  0.00145461240472358871965441
+    c6d  =  0.0035923150771440177410343
+    c5d  =  0.00886321546662684547901456
+    c4d  =  0.0218694899718446938985394
+    c3d  =  0.0539682539049961967903002
+    c2d  =  0.133333333334818976423364
+    c1d  =  0.333333333333320047664472
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d
+end
+
+@inline function tan_fast_kernel(x::Float32)
+    c7f =  0.00446636462584137916564941f0
+    c6f = -8.3920182078145444393158f-05
+    c5f =  0.0109639242291450500488281f0
+    c4f =  0.0212360303848981857299805f0
+    c3f =  0.0540687143802642822265625f0
+    c2f =  0.133325666189193725585938f0
+    c1f =  0.33333361148834228515625f0
+    return @horner x c1f c2f c3f c4f c5f c6f c7f
+end
 
 function tan_fast(d::T) where {T<:Float64}
     qh = trunc(d * (2 * T(M_1_PI)) / (1 << 24))
@@ -598,37 +593,37 @@ function tan_fast(d::T) where {T<:Float32}
 
     return u
 end
+
+
+@inline function tan_kernel(x::Double{Float64})
+    c15d = 1.01419718511083373224408e-05
+    c14d = -2.59519791585924697698614e-05
+    c13d = 5.23388081915899855325186e-05
+    c12d = -3.05033014433946488225616e-05
+    c11d = 7.14707504084242744267497e-05
+    c10d = 8.09674518280159187045078e-05
+    c9d = 0.000244884931879331847054404
+    c8d = 0.000588505168743587154904506
+    c7d = 0.00145612788922812427978848
+    c6d = 0.00359208743836906619142924
+    c5d = 0.00886323944362401618113356
+    c4d = 0.0218694882853846389592078
+    c3d = 0.0539682539781298417636002
+    c2d = 0.133333333333125941821962
+    c1d = 0.333333333333334980164153
+    return dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d))
 end
 
-let
-global tan
-
-c15d = 1.01419718511083373224408e-05
-c14d = -2.59519791585924697698614e-05
-c13d = 5.23388081915899855325186e-05
-c12d = -3.05033014433946488225616e-05
-c11d = 7.14707504084242744267497e-05
-c10d = 8.09674518280159187045078e-05
-c9d = 0.000244884931879331847054404
-c8d = 0.000588505168743587154904506
-c7d = 0.00145612788922812427978848
-c6d = 0.00359208743836906619142924
-c5d = 0.00886323944362401618113356
-c4d = 0.0218694882853846389592078
-c3d = 0.0539682539781298417636002
-c2d = 0.133333333333125941821962
-c1d = 0.333333333333334980164153
-
-c7f =  0.00446636462584137916564941f0
-c6f = -8.3920182078145444393158f-05
-c5f =  0.0109639242291450500488281f0
-c4f =  0.0212360303848981857299805f0
-c3f =  0.0540687143802642822265625f0
-c2f =  0.133325666189193725585938f0
-c1f =  0.33333361148834228515625f0
-
-global @inline tan_kernel(x::Double{Float64}) = dadd(c1d, x.hi * (@horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d))
-global @inline tan_kernel(x::Double{Float32}) = dadd(c1f,  x.hi * (@horner x.hi c2f c3f c4f c5f c6f c7f))
+@inline function tan_kernel(x::Double{Float32})
+    c7f =  0.00446636462584137916564941f0
+    c6f = -8.3920182078145444393158f-05
+    c5f =  0.0109639242291450500488281f0
+    c4f =  0.0212360303848981857299805f0
+    c3f =  0.0540687143802642822265625f0
+    c2f =  0.133325666189193725585938f0
+    c1f =  0.33333361148834228515625f0
+    return dadd(c1f,  x.hi * (@horner x.hi c2f c3f c4f c5f c6f c7f))
+end
 
 function tan(d::T) where {T<:Float64}
     qh = trunc(d * (T(M_2_PI)) / (1 << 24))
@@ -692,8 +687,6 @@ function tan(d::T) where {T<:Float32}
 
     return v
 end
-end
-
 
 
 """
@@ -708,47 +701,46 @@ function atan(x::T) where {T<:Union{Float32,Float64}}
 end
 
 
+@inline function atan_fast_kernel(x::Float64)
+    c19d = -1.88796008463073496563746e-05
+    c18d =  0.000209850076645816976906797
+    c17d = -0.00110611831486672482563471
+    c16d =  0.00370026744188713119232403
+    c15d = -0.00889896195887655491740809
+    c14d =  0.016599329773529201970117
+    c13d = -0.0254517624932312641616861
+    c12d =  0.0337852580001353069993897
+    c11d = -0.0407629191276836500001934
+    c10d =  0.0466667150077840625632675
+    c9d  = -0.0523674852303482457616113
+    c8d  =  0.0587666392926673580854313
+    c7d  = -0.0666573579361080525984562
+    c6d  =  0.0769219538311769618355029
+    c5d  = -0.090908995008245008229153
+    c4d  =  0.111111105648261418443745
+    c3d  = -0.14285714266771329383765
+    c2d  =  0.199999999996591265594148
+    c1d  = -0.333333333333311110369124
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d
+end
+
+@inline function atan_fast_kernel(x::Float32)
+    c8f =  0.00282363896258175373077393f0
+    c7f = -0.0159569028764963150024414f0
+    c6f =  0.0425049886107444763183594f0
+    c5f = -0.0748900920152664184570312f0
+    c4f =  0.106347933411598205566406f0
+    c3f = -0.142027363181114196777344f0
+    c2f =  0.199926957488059997558594f0
+    c1f = -0.333331018686294555664062f0
+    return @horner x c1f c2f c3f c4f c5f c6f c7f c8f
+end
+
 """
     atan_fast(x)
 
 Compute the inverse tangent of `x`, where the output is in radians.
 """
-function atan_fast end
-let 
-global atan_fast
-
-c19d = -1.88796008463073496563746e-05
-c18d =  0.000209850076645816976906797
-c17d = -0.00110611831486672482563471
-c16d =  0.00370026744188713119232403
-c15d = -0.00889896195887655491740809
-c14d =  0.016599329773529201970117
-c13d = -0.0254517624932312641616861
-c12d =  0.0337852580001353069993897
-c11d = -0.0407629191276836500001934
-c10d =  0.0466667150077840625632675
-c9d  = -0.0523674852303482457616113
-c8d  =  0.0587666392926673580854313
-c7d  = -0.0666573579361080525984562
-c6d  =  0.0769219538311769618355029
-c5d  = -0.090908995008245008229153
-c4d  =  0.111111105648261418443745
-c3d  = -0.14285714266771329383765
-c2d  =  0.199999999996591265594148
-c1d  = -0.333333333333311110369124
-
-c8f =  0.00282363896258175373077393f0
-c7f = -0.0159569028764963150024414f0
-c6f =  0.0425049886107444763183594f0
-c5f = -0.0748900920152664184570312f0
-c4f =  0.106347933411598205566406f0
-c3f = -0.142027363181114196777344f0
-c2f =  0.199926957488059997558594f0
-c1f = -0.333331018686294555664062f0
-
-global @inline _atan_fast(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d c12d c13d c14d c15d c16d c17d c18d c19d
-global @inline _atan_fast(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f c7f c8f
-
 function atan_fast(x::T) where {T<:Union{Float32,Float64}}
     q = 0
     if signbit(x)
@@ -760,13 +752,12 @@ function atan_fast(x::T) where {T<:Union{Float32,Float64}}
         q |= 1
     end
     t = x * x
-    u = _atan_fast(t)
+    u = atan_fast_kernel(t)
     t = x + x * t * u
     q & 1 != 0 && (t = T(PI_2) - t)
     q & 2 != 0 && (t = -t)
     return t
 end
-end
 
 
 const under_atan2(::Type{Float64}) = 5.5626846462680083984e-309