Simplify kernel constructs

musm · Nov 10, 2017 · 9a3bacf · 9a3bacf
1 parent 6e6cf88
commit 9a3bacf
Show file tree

Hide file tree

Showing 4 changed files with 309 additions and 327 deletions.
diff --git a/src/exp.jl b/src/exp.jl
@@ -14,39 +14,36 @@ const max_exp2(::Type{Float32}) = 128f0
 const min_exp2(::Type{Float64}) = -1075
 const min_exp2(::Type{Float32}) = -150f0
 
+@inline function exp2_kernel(x::Float64)
+    c11d = 0.4434359082926529454e-9
+    c10d = 0.7073164598085707425e-8
+    c9d  = 0.1017819260921760451e-6
+    c8d  = 0.1321543872511327615e-5
+    c7d  = 0.1525273353517584730e-4
+    c6d  = 0.1540353045101147808e-3
+    c5d  = 0.1333355814670499073e-2
+    c4d  = 0.9618129107597600536e-2
+    c3d  = 0.5550410866482046596e-1
+    c2d  = 0.2402265069591012214
+    c1d  = 0.6931471805599452862
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
+end
+
+@inline function exp2_kernel(x::Float32)
+    c6f = 0.1535920892f-3
+    c5f = 0.1339262701f-2
+    c4f = 0.9618384764f-2
+    c3f = 0.5550347269f-1
+    c2f = 0.2402264476f0
+    c1f = 0.6931471825f0
+    return @horner x c1f c2f c3f c4f c5f c6f
+end
+
 """
     exp2(x)
 
 Compute the base-`2` exponential of `x`, that is `2ˣ`.
 """
-function exp2 end
-
-let
-global exp2
-
-
-c11d = 0.4434359082926529454e-9
-c10d = 0.7073164598085707425e-8
-c9d  = 0.1017819260921760451e-6
-c8d  = 0.1321543872511327615e-5
-c7d  = 0.1525273353517584730e-4
-c6d  = 0.1540353045101147808e-3
-c5d  = 0.1333355814670499073e-2
-c4d  = 0.9618129107597600536e-2
-c3d  = 0.5550410866482046596e-1
-c2d  = 0.2402265069591012214
-c1d  = 0.6931471805599452862
-
-c6f = 0.1535920892f-3
-c5f = 0.1339262701f-2
-c4f = 0.9618384764f-2
-c3f = 0.5550347269f-1
-c2f = 0.2402264476f0
-c1f = 0.6931471825f0
-
-global @inline exp2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
-global @inline exp2_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
-
 function exp2(d::T) where {T<:Union{Float32,Float64}}
     q = unsafe_trunc(Int, round(d))
     s = d - q
@@ -60,7 +57,7 @@ function exp2(d::T) where {T<:Union{Float32,Float64}}
     d < min_exp2(T) && (u = T(0.0))
     return u
 end
-end
+
 
 const max_exp10(::Type{Float64}) = 3.08254715559916743851e2 # log 2^1023*(2-2^-52)
 const max_exp10(::Type{Float32}) = 38.531839419103626f0 # log 2^127 *(2-2^-23) 
@@ -97,44 +94,43 @@ function expm1(x::T) where {T<:Union{Float32,Float64}}
     return u
 end
 
+
 const max_exp(::Type{Float64}) = 709.78271114955742909217217426  # log 2^1023*(2-2^-52)
 const max_exp(::Type{Float32}) = 88.72283905206835f0             # log 2^127 *(2-2^-23)
 
 const min_exp(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075
 const min_exp(::Type{Float32}) = -103.97208f0               # ≈ log 2^-150
 
+@inline function exp_kernel(x::Float64)
+    c11d = 2.08860621107283687536341e-09
+    c10d = 2.51112930892876518610661e-08
+    c9d  = 2.75573911234900471893338e-07
+    c8d  = 2.75572362911928827629423e-06
+    c7d  = 2.4801587159235472998791e-05
+    c6d  = 0.000198412698960509205564975
+    c5d  = 0.00138888888889774492207962
+    c4d  = 0.00833333333331652721664984
+    c3d  = 0.0416666666666665047591422
+    c2d  = 0.166666666666666851703837
+    c1d  = 0.50
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
+end
+
+@inline function exp_kernel(x::Float32)
+    c6f = 0.000198527617612853646278381f0
+    c5f = 0.00139304355252534151077271f0
+    c4f = 0.00833336077630519866943359f0
+    c3f = 0.0416664853692054748535156f0
+    c2f = 0.166666671633720397949219f0
+    c1f = 0.5f0
+    return @horner x c1f c2f c3f c4f c5f c6f
+end
+
 """
     exp(x)
 
 Compute the base-`e` exponential of `x`, that is `eˣ`.
 """
-function exp end
-
-let
-global exp
-
-c11d = 2.08860621107283687536341e-09
-c10d = 2.51112930892876518610661e-08
-c9d  = 2.75573911234900471893338e-07
-c8d  = 2.75572362911928827629423e-06
-c7d  = 2.4801587159235472998791e-05
-c6d  = 0.000198412698960509205564975
-c5d  = 0.00138888888889774492207962
-c4d  = 0.00833333333331652721664984
-c3d  = 0.0416666666666665047591422
-c2d  = 0.166666666666666851703837
-c1d  = 0.50
-
-c6f = 0.000198527617612853646278381f0
-c5f = 0.00139304355252534151077271f0
-c4f = 0.00833336077630519866943359f0
-c3f = 0.0416664853692054748535156f0
-c2f = 0.166666671633720397949219f0
-c1f = 0.5f0
-
-global @inline exp_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d  
-global @inline exp_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f c6f
-
 function exp(d::T) where {T<:Union{Float32,Float64}}
     q = unsafe_trunc(Int, round(T(MLN2E) * d))
     s = muladd(q, -L2U(T), d)
@@ -145,9 +141,8 @@ function exp(d::T) where {T<:Union{Float32,Float64}}
     u = s * s * u + s + 1
     u = ldexp2k(u, q)
 
-    d < min_exp(T) && (u = T(0))
     d > max_exp(T) && (u = T(Inf))
+    d < min_exp(T) && (u = T(0))
 
     return u
 end
-end
diff --git a/src/log.jl b/src/log.jl
@@ -111,35 +111,34 @@ end
 # That being said, since this converges faster when the argument is close to
 # 1, we multiply  `m` by `2` and subtract 1 for the exponent `e` when `m` is
 # less than `sqrt(2)/2`
+
+@inline function log_fast_kernel(x::Float64)
+    c8d = 0.153487338491425068243146
+    c7d = 0.152519917006351951593857
+    c6d = 0.181863266251982985677316
+    c5d = 0.222221366518767365905163
+    c4d = 0.285714294746548025383248
+    c3d = 0.399999999950799600689777
+    c2d = 0.6666666666667778740063
+    c1d = 2.0
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+end
+
+@inline function log_fast_kernel(x::Float32)
+    c5f = 0.2392828464508056640625f0
+    c4f = 0.28518211841583251953125f0
+    c3f = 0.400005877017974853515625f0
+    c2f = 0.666666686534881591796875f0
+    c1f = 2f0
+    return @horner x c1f c2f c3f c4f c5f
+end
+
 """
     log_fast(x)
 
 Compute the natural logarithm of `x`. The inverse of the natural logarithm is
 the natural expoenential function `exp(x)`
 """
-function log_fast end
-
-let
-global log_fast
-
-c8d = 0.153487338491425068243146
-c7d = 0.152519917006351951593857
-c6d = 0.181863266251982985677316
-c5d = 0.222221366518767365905163
-c4d = 0.285714294746548025383248
-c3d = 0.399999999950799600689777
-c2d = 0.6666666666667778740063
-c1d = 2.0
-
-c5f = 0.2392828464508056640625f0
-c4f = 0.28518211841583251953125f0
-c3f = 0.400005877017974853515625f0
-c2f = 0.666666686534881591796875f0
-c1f = 2f0
-
-global @inline log_fast_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
-global @inline log_fast_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
-
 function log_fast(d::T) where {T<:Union{Float32,Float64}}
     o = d < realmin(T)
     o && (d *= T(Int64(1) << 32) * T(Int64(1) << 32))
@@ -161,4 +160,3 @@ function log_fast(d::T) where {T<:Union{Float32,Float64}}
 
     return x
 end
-end
diff --git a/src/priv.jl b/src/priv.jl
@@ -170,32 +170,31 @@ end
 end
 
 
-let
-global expk
-global expk2
-
-c10d = 2.51069683420950419527139e-08
-c9d  = 2.76286166770270649116855e-07
-c8d  = 2.75572496725023574143864e-06
-c7d  = 2.48014973989819794114153e-05
-c6d  = 0.000198412698809069797676111
-c5d  = 0.0013888888939977128960529
-c4d  = 0.00833333333332371417601081
-c3d  = 0.0416666666665409524128449
-c2d  = 0.166666666666666740681535
-c1d  = 0.500000000000000999200722
-
-c5f = 0.00136324646882712841033936f0
-c4f = 0.00836596917361021041870117f0
-c3f = 0.0416710823774337768554688f0
-c2f = 0.166665524244308471679688f0
-c1f = 0.499999850988388061523438f0
-
-global @inline expk_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
-global @inline expk_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f
-
-global under_expk(::Type{Float64}) = -1000.0
-global under_expk(::Type{Float32}) = -104f0
+const under_expk(::Type{Float64}) = -1000.0
+const under_expk(::Type{Float32}) = -104f0
+
+@inline function  expk_kernel(x::Float64)
+    c10d = 2.51069683420950419527139e-08
+    c9d  = 2.76286166770270649116855e-07
+    c8d  = 2.75572496725023574143864e-06
+    c7d  = 2.48014973989819794114153e-05
+    c6d  = 0.000198412698809069797676111
+    c5d  = 0.0013888888939977128960529
+    c4d  = 0.00833333333332371417601081
+    c3d  = 0.0416666666665409524128449
+    c2d  = 0.166666666666666740681535
+    c1d  = 0.500000000000000999200722
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d
+end
+
+@inline function  expk_kernel(x::Float32)
+    c5f = 0.00136324646882712841033936f0
+    c4f = 0.00836596917361021041870117f0
+    c3f = 0.0416710823774337768554688f0
+    c2f = 0.166665524244308471679688f0
+    c1f = 0.499999850988388061523438f0
+    return @horner x c1f c2f c3f c4f c5f
+end
 
 @inline function expk(d::Double{T}) where {T<:Union{Float32,Float64}}
     q = round(T(d) * T(MLN2E))
@@ -232,28 +231,28 @@ end
     t = dadd(T(1.0), t)
     return scale(scale(t, T(2.0)), pow2i(T, unsafe_trunc(Int, q - 1)))
 end
-end
 
 
-let
-global logk2
-
-c8d = 0.13860436390467167910856
-c7d = 0.131699838841615374240845
-c6d = 0.153914168346271945653214
-c5d = 0.181816523941564611721589
-c4d = 0.22222224632662035403996
-c3d = 0.285714285511134091777308
-c2d = 0.400000000000914013309483
-c1d = 0.666666666666664853302393
 
-c4f = 0.240320354700088500976562f0
-c3f = 0.285112679004669189453125f0
-c2f = 0.400007992982864379882812f0
-c1f = 0.666666686534881591796875f0
+@inline function logk2_kernel(x::Float64)
+    c8d = 0.13860436390467167910856
+    c7d = 0.131699838841615374240845
+    c6d = 0.153914168346271945653214
+    c5d = 0.181816523941564611721589
+    c4d = 0.22222224632662035403996
+    c3d = 0.285714285511134091777308
+    c2d = 0.400000000000914013309483
+    c1d = 0.666666666666664853302393
+    return @horner x c1d c2d c3d c4d c5d c6d c7d c8d
+end
 
-global @inline logk2_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d
-global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f
+@inline function logk2_kernel(x::Float32)
+    c4f = 0.240320354700088500976562f0
+    c3f = 0.285112679004669189453125f0
+    c2f = 0.400007992982864379882812f0
+    c1f = 0.666666686534881591796875f0
+    return @horner x c1f c2f c3f c4f
+end
 
 @inline function logk2(d::Double{T}) where {T<:Union{Float32,Float64}}
     e  = ilogbk(d.hi * T(1.0/0.75))
@@ -266,28 +265,30 @@ global @inline logk2_kernel(x::Float32) = @horner x c1f c2f c3f c4f
 
     dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t)))
 end
+
+
+
+@inline function logk_kernel(x::Double{Float64})
+    c10d = 0.116255524079935043668677
+    c9d = 0.103239680901072952701192
+    c8d = 0.117754809412463995466069
+    c7d = 0.13332981086846273921509
+    c6d = 0.153846227114512262845736
+    c5d = 0.181818180850050775676507
+    c4d = 0.222222222230083560345903
+    c3d = 0.285714285714249172087875
+    c2d = 0.400000000000000077715612
+    c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17)
+    dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d))
 end
 
-let
-global logk
-c10d = 0.116255524079935043668677
-c9d = 0.103239680901072952701192
-c8d = 0.117754809412463995466069
-c7d = 0.13332981086846273921509
-c6d = 0.153846227114512262845736
-c5d = 0.181818180850050775676507
-c4d = 0.222222222230083560345903
-c3d = 0.285714285714249172087875
-c2d = 0.400000000000000077715612
-c1dd = Double(0.666666666666666629659233, 3.80554962542412056336616e-17);
-
-c4f = 0.240320354700088500976562f0
-c3f = 0.285112679004669189453125f0
-c2f = 0.400007992982864379882812f0
-c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9)
-
-global @inline logk_kernel(x::Double{Float64}) = dadd2(c1dd, dmul(x, @horner x.hi c2d c3d c4d c5d c6d c7d c8d c9d c10d))
-global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f))
+@inline function logk_kernel(x::Double{Float32})
+    c4f = 0.240320354700088500976562f0
+    c3f = 0.285112679004669189453125f0
+    c2f = 0.400007992982864379882812f0
+    c1fd = Double(0.66666662693023681640625f0, 3.69183861259614332084311f-9)    
+    dadd2(c1fd, dmul(x, @horner x.hi c2f c3f c4f))
+end
 
 @inline function logk(d::T) where {T<:Union{Float32,Float64}}
     o = d < realmin(T)
@@ -305,6 +306,3 @@ global @inline logk_kernel(x::Double{Float32}) = dadd2(c1fd, dmul(x, @horner x.h
 
     dadd(dmul(MDLN2(T), T(e)), dadd(scale(x, T(2.0)), dmul(dmul(x2, x), t)))
 end
-
-
-end