Skip to content

Commit

Permalink
Updates and tweaks for SLEEF 2.9
Browse files Browse the repository at this point in the history
  • Loading branch information
musm committed May 30, 2017
1 parent 383d790 commit 9af7677
Show file tree
Hide file tree
Showing 12 changed files with 674 additions and 215 deletions.
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ alt="SLEEF Logo" width="420"></img> </div>
A pure Julia port of the [SLEEF math library](https://github.com/shibatch/sleef).

**History**
- Release [v0.1.0](https://github.com/musm/Sleef.jl/releases/tag/v0.1.0) based on SLEEF version v2.80
- Release [v0.2.0](https://github.com/musm/Sleef.jl/releases/tag/v0.2.0) based on SLEEF v2.90
- Release [v0.1.0](https://github.com/musm/Sleef.jl/releases/tag/v0.1.0) based on SLEEF v2.80

<br><br>
[![Travis Build Status](https://travis-ci.org/musm/Sleef.jl.svg?branch=master)](https://travis-ci.org/musm/Sleef.jl)
Expand Down Expand Up @@ -40,18 +41,27 @@ julia> Sleef.exp(3f0)
The available functions include (within 1 ulp)
```julia
sin, cos, tan, asin, acos, atan, atan2, sincos, sinh, cosh, tanh,
asinh, acosh, atanh, log, log2, log10, log1p, ilog2, exp, exp2, exp10, expm1, ldexp, cbrt, pow
asinh, acosh, atanh, log, log2, log10, log1p, ilogb, exp, exp2, exp10, expm1, ldexp, cbrt, pow
```
Faster variants include (within 3 ulp)

Faster variants (within 3 ulp)
```julia
sin_fast, cos_fast, tan_fast, sincos_fast, asin_fast, acos_fast, atan_fast, atan2_fast, log_fast, cbrt_fast
```

## Notes

The trigonometric functions are tested to return values with specified
accuracy when the argument is within the following range:

- Double (Float64) precision trigonometric functions : `|arg| <= 10000000`
- Single (Float32) precision trigonometric functions : `|arg| <= 10000`

# Benchmarks

You can benchmark the performance of the Sleef.jl math library on your machine by running

```julia
include(joinpath(Pkg.dir("Sleef"), "benchmark", "benchmark.jl"))
```

94 changes: 47 additions & 47 deletions src/double.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ struct Double{T<:IEEEFloat} <: Number
hi::T
lo::T
end
Double(x::T) where {T <: IEEEFloat} = Double(x, zero(T))
Double(x::T) where {T<:IEEEFloat} = Double(x, zero(T))

convert(::Type{Tuple{T,T}}, x::Double{T}) where {T <: IEEEFloat} = (x.hi, x.lo)
convert(::Type{T}, x::Double) where {T <: IEEEFloat} = x.hi + x.lo
convert(::Type{Tuple{T,T}}, x::Double{T}) where {T<:IEEEFloat} = (x.hi, x.lo)
convert(::Type{T}, x::Double) where {T<:IEEEFloat} = x.hi + x.lo


@inline trunclo(x::Float64) = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000) # clear lower 27 bits (leave upper 26 bits)
Expand All @@ -23,107 +23,107 @@ end
Double(r, (x.hi - r) + x.lo)
end

@inline flipsign(x::Double{T}, y::T) where {T <: IEEEFloat} = Double(flipsign(x.hi, y), flipsign(x.lo, y))
@inline flipsign(x::Double{T}, y::T) where {T<:IEEEFloat} = Double(flipsign(x.hi, y), flipsign(x.lo, y))

@inline scale(x::Double{T}, s::T) where {T <: IEEEFloat} = Double(s * x.hi, s * x.lo)
@inline scale(x::Double{T}, s::T) where {T<:IEEEFloat} = Double(s * x.hi, s * x.lo)


@inline (-)(x::Double{T}) where {T <: IEEEFloat} = Double(-x.hi, -x.lo)
@inline (-)(x::Double{T}) where {T<:IEEEFloat} = Double(-x.hi, -x.lo)

@inline function (<)(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function (<)(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
x.hi < y.hi
end

@inline function (<)(x::Double{T}, y::Number) where {T <: IEEEFloat}
@inline function (<)(x::Double{T}, y::Number) where {T<:IEEEFloat}
x.hi < y
end

@inline function (<)(x::Number, y::Double{T}) where {T <: IEEEFloat}
@inline function (<)(x::Number, y::Double{T}) where {T<:IEEEFloat}
x < y.hi
end

# quick-two-sum x+y
@inline function dadd(x::T, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::T, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x + y
Double(s, (x - s) + y)
end

@inline function dadd(x::T, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::T, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x + y.hi
Double(s, (x - s) + y.hi + y.lo)
end

@inline function dadd(x::Double{T}, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::Double{T}, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x.hi + y
Double(s, (x.hi - s) + y + x.lo)
end

@inline function dadd(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dadd(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x.hi + y.hi
Double(s, (x.hi - s) + y.hi + y.lo + x.lo)
end

@inline function dsub(x::Double{T}, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::Double{T}, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x.hi - y.hi
Double(s, (x.hi - s) - y.hi - y.lo + x.lo)
end

@inline function dsub(x::Double{T}, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::Double{T}, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x.hi - y
Double(s, (x.hi - s) - y + x.lo)
end

@inline function dsub(x::T, y::Double{T}) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::T, y::Double{T}) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x - y.hi
Double(s, (x - s) - y.hi - y.lo)
end

@inline function dsub(x::T, y::T) where {T <: IEEEFloat} #WARNING |x| >= |y|
@inline function dsub(x::T, y::T) where {T<:IEEEFloat} #WARNING |x| >= |y|
s = x - y
Double(s, (x - s) - y)
end


# two-sum x+y NO BRANCH
@inline function dadd2(x::T, y::T) where {T <: IEEEFloat}
@inline function dadd2(x::T, y::T) where {T<:IEEEFloat}
s = x + y
v = s - x
Double(s, (x - (s - v)) + (y - v))
end

@inline function dadd2(x::T, y::Double{T}) where {T <: IEEEFloat}
@inline function dadd2(x::T, y::Double{T}) where {T<:IEEEFloat}
s = x + y.hi
v = s - x
Double(s, (x - (s - v)) + (y.hi - v) + y.lo)
end

@inline dadd2(x::Double{T}, y::T) where {T <: IEEEFloat} = dadd2(y, x)
@inline dadd2(x::Double{T}, y::T) where {T<:IEEEFloat} = dadd2(y, x)

@inline function dadd2(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function dadd2(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
s = x.hi + y.hi
v = s - x.hi
Double(s, (x.hi - (s - v)) + (y.hi - v) + x.lo + y.lo)
end

@inline function dsub2(x::T, y::T) where {T <: IEEEFloat}
@inline function dsub2(x::T, y::T) where {T<:IEEEFloat}
s = x - y
v = s - x
Double(s, (x - (s - v)) + (-y - v))
end

@inline function dsub2(x::T, y::Double{T}) where {T <: IEEEFloat}
@inline function dsub2(x::T, y::Double{T}) where {T<:IEEEFloat}
s = x - y.hi
v = s - x
Double(s, (x - (s - v)) + (-y.hi - v) - y.lo)
end

@inline function dsub2(x::Double{T}, y::T) where {T <: IEEEFloat}
@inline function dsub2(x::Double{T}, y::T) where {T<:IEEEFloat}
s = x.hi - y
v = s - x.hi
Double(s, (x.hi - (s - v)) + (-y - v) + x.lo)
end

@inline function dsub2(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function dsub2(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
s = x.hi - y.hi
v = s - x.hi
Double(s, (x.hi - (s - v)) + (-y.hi - v) + x.lo - y.lo)
Expand All @@ -134,119 +134,119 @@ end
if FMA_FAST

# two-prod-fma
@inline function dmul(x::T, y::T) where {T <: IEEEFloat}
@inline function dmul(x::T, y::T) where {T<:IEEEFloat}
z = x * y
Double(z, fma(x, y, -z))
end

@inline function dmul(x::Double{T}, y::T) where {T <: IEEEFloat}
@inline function dmul(x::Double{T}, y::T) where {T<:IEEEFloat}
z = x.hi * y
Double(z, fma(x.hi, y, -z) + x.lo * y)
end

@inline dmul(x::T, y::Double{T}) where {T <: IEEEFloat} = dmul(y, x)
@inline dmul(x::T, y::Double{T}) where {T<:IEEEFloat} = dmul(y, x)

@inline function dmul(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function dmul(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
z = x.hi * y.hi
Double(z, fma(x.hi, y.hi, -z) + x.hi * y.lo + x.lo * y.hi)
end

# x^2
@inline function dsqu(x::T) where {T <: IEEEFloat}
@inline function dsqu(x::T) where {T<:IEEEFloat}
z = x * x
Double(z, fma(x, x, -z))
end

@inline function dsqu(x::Double{T}) where {T <: IEEEFloat}
@inline function dsqu(x::Double{T}) where {T<:IEEEFloat}
z = x.hi * x.hi
Double(z, fma(x.hi, x.hi, -z) + x.hi * (x.lo + x.lo))
end

# sqrt(x)
@inline function dsqrt(x::Double{T}) where {T <: IEEEFloat}
@inline function dsqrt(x::Double{T}) where {T<:IEEEFloat}
zhi = _sqrt(x.hi)
Double(zhi, (x.lo + fma(-zhi, zhi, x.hi)) / (zhi + zhi))
end

# x/y
@inline function ddiv(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function ddiv(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
invy = 1 / y.hi
zhi = x.hi * invy
Double(zhi, (fma(-zhi, y.hi, x.hi) + fma(-zhi, y.lo, x.lo)) * invy)
end

@inline function ddiv(x::T, y::T) where {T <: IEEEFloat}
@inline function ddiv(x::T, y::T) where {T<:IEEEFloat}
ry = 1 / y
r = x * ry
Double(r, fma(-r, y, x) * ry)
end

# 1/x
@inline function ddrec(x::T) where {T <: IEEEFloat}
@inline function ddrec(x::T) where {T<:IEEEFloat}
zhi = 1 / x
Double(zhi, fma(-zhi, x, one(T)) * zhi)
end

@inline function ddrec(x::Double{T}) where {T <: IEEEFloat}
@inline function ddrec(x::Double{T}) where {T<:IEEEFloat}
zhi = 1 / x.hi
Double(zhi, (fma(-zhi, x.hi, one(T)) + -zhi * x.lo) * zhi)
end

else

#two-prod x*y
@inline function dmul(x::T, y::T) where {T <: IEEEFloat}
@inline function dmul(x::T, y::T) where {T<:IEEEFloat}
hx, lx = splitprec(x)
hy, ly = splitprec(y)
z = x * y
Double(z, ((hx * hy - z) + lx * hy + hx * ly) + lx * ly)
end

@inline function dmul(x::Double{T}, y::T) where {T <: IEEEFloat}
@inline function dmul(x::Double{T}, y::T) where {T<:IEEEFloat}
hx, lx = splitprec(x.hi)
hy, ly = splitprec(y)
z = x.hi * y
Double(z, (hx * hy - z) + lx * hy + hx * ly + lx * ly + x.lo * y)
end

@inline dmul(x::T, y::Double{T}) where {T <: IEEEFloat} = dmul(y, x)
@inline dmul(x::T, y::Double{T}) where {T<:IEEEFloat} = dmul(y, x)

@inline function dmul(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function dmul(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
hx, lx = splitprec(x.hi)
hy, ly = splitprec(y.hi)
z = x.hi * y.hi
Double(z, (((hx * hy - z) + lx * hy + hx * ly) + lx * ly) + x.hi * y.lo + x.lo * y.hi)
end

# x^2
@inline function dsqu(x::T) where {T <: IEEEFloat}
@inline function dsqu(x::T) where {T<:IEEEFloat}
hx, lx = splitprec(x)
z = x * x
Double(z, (hx * hx - z) + lx * (hx + hx) + lx * lx)
end

@inline function dsqu(x::Double{T}) where {T <: IEEEFloat}
@inline function dsqu(x::Double{T}) where {T<:IEEEFloat}
hx, lx = splitprec(x.hi)
z = x.hi * x.hi
Double(z, (hx * hx - z) + lx * (hx + hx) + lx * lx + x.hi * (x.lo + x.lo))
end

# sqrt(x)
@inline function dsqrt(x::Double{T}) where {T <: IEEEFloat}
@inline function dsqrt(x::Double{T}) where {T<:IEEEFloat}
c = _sqrt(x.hi)
u = dsqu(c)
Double(c, (x.hi - u.hi - u.lo + x.lo) / (c + c))
end

# x/y
@inline function ddiv(x::Double{T}, y::Double{T}) where {T <: IEEEFloat}
@inline function ddiv(x::Double{T}, y::Double{T}) where {T<:IEEEFloat}
invy = 1 / y.hi
c = x.hi * invy
u = dmul(c, y.hi)
Double(c, ((((x.hi - u.hi) - u.lo) + x.lo) - c * y.lo) * invy)
end

@inline function ddiv(x::T, y::T) where {T <: IEEEFloat}
@inline function ddiv(x::T, y::T) where {T<:IEEEFloat}
ry = 1 / y
r = x * ry
hx, lx = splitprec(r)
Expand All @@ -256,13 +256,13 @@ else


# 1/x
@inline function ddrec(x::T) where {T <: IEEEFloat}
@inline function ddrec(x::T) where {T<:IEEEFloat}
c = 1 / x
u = dmul(c, x)
Double(c, (one(T) - u.hi - u.lo) * c)
end

@inline function ddrec(x::Double{T}) where {T <: IEEEFloat}
@inline function ddrec(x::Double{T}) where {T<:IEEEFloat}
c = 1 / x.hi
u = dmul(c, x.hi)
Double(c, (one(T) - u.hi - u.lo - c * x.lo) * c)
Expand Down
6 changes: 3 additions & 3 deletions src/exp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ const over_e2(::Type{Float32}) = 128f0
Compute the base-`2` exponential of `x`, that is `2ˣ`.
"""
function exp2(x::T) where {T <: IEEEFloat}
function exp2(x::T) where {T<:IEEEFloat}
u = expk(dmul(MDLN2(T), x))
x > over_e2(T) && (u = T(Inf))
isninf(x) && (u = T(0))
Expand All @@ -34,7 +34,7 @@ const over_e10(::Type{Float32}) = 38f0
Compute the base-`10` exponential of `x`, that is `10ˣ`.
"""
function exp10(x::T) where {T <: IEEEFloat}
function exp10(x::T) where {T<:IEEEFloat}
u = expk(dmul(MDLN10(T), x))
x > over_e10(T) && (u = T(Inf))
isninf(x) && (u = T(0))
Expand Down Expand Up @@ -95,7 +95,7 @@ const c1f = 0.499999850988388061523438f0
global @inline exp_kernel(x::Float64) = @horner x c1d c2d c3d c4d c5d c6d c7d c8d c9d c10d c11d
global @inline exp_kernel(x::Float32) = @horner x c1f c2f c3f c4f c5f

function exp(x::T) where {T <: IEEEFloat}
function exp(x::T) where {T<:IEEEFloat}
q = roundi(T(MLN2E) * x)
s = muladd(q, -LN2U(T), x)
s = muladd(q, -LN2L(T), s)
Expand Down
Loading

0 comments on commit 9af7677

Please sign in to comment.