Skip to content

Commit

Permalink
Adding vfmasubadd
Browse files Browse the repository at this point in the history
  • Loading branch information
dannys4 committed Jul 24, 2021
1 parent e27c75d commit 1e6b689
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 14 deletions.
26 changes: 13 additions & 13 deletions src/LLVM_intrinsics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -431,24 +431,24 @@ for f in MULADD_INTRINSICS
end
end


for (t, N, T) in [("d" , 2, Float64), ("s" , 4, Float32),
const AVX_EXTS = [("d" , 2, Float64), ("s" , 4, Float32),
("d.256", 4, Float64), ("s.256", 8, Float32),
# ("d.512", 8, Float64), ("s.512", 16, Float32) # These don't seem supported by LLVM yet
]
@eval @generated function fmaddsub(a::LVec{$N, $T}, b::LVec{$N, $T}, c::LVec{$N, $T})
ff = "llvm.x86.fma.vfmaddsub.p"*$t
return :(
$(Expr(:meta, :inline));
ccall($ff, llvmcall, LVec{$($N), $($T)}, (LVec{$($N), $($T)}, LVec{$($N), $($T)}, LVec{$($N), $($T)}), a, b, c)
)
]
const MULALTADD_INTRINSICS = [:vfmaddsub, :vfmsubadd]

for f in MULALTADD_INTRINSICS
for (t, N, T) in AVX_EXTS
@eval @generated function ($f)(a::LVec{$N, $T}, b::LVec{$N, $T}, c::LVec{$N, $T})
ff = "llvm.x86.fma."*(string($f))*".p"*($t)
return :(
$(Expr(:meta, :inline));
ccall($ff, llvmcall, LVec{$($N), $($T)}, (LVec{$($N), $($T)}, LVec{$($N), $($T)}, LVec{$($N), $($T)}), a, b, c)
)
end
end
end

# function fmaddsub(a::LVec{4, Float64}, b::LVec{4, Float64}, c::LVec{4, Float64}) where N
# ccall("llvm.x86.fma.vfmaddsub.pd.256", llvmcall, LVec{4, Float64}, (LVec{4, Float64}, LVec{4, Float64}, LVec{4, Float64}), a, b, c)
# end

################
# Load / store #
################
Expand Down
2 changes: 1 addition & 1 deletion src/SIMD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ using Base: @propagate_inbounds

export Vec, vload, vloada, vloadnt, vloadx, vstore, vstorea, vstorent, vstorec,
vgather, vgathera, vscatter, vscattera, shufflevector, vifelse, valloc,
VecRange
VecRange, vfmaddsub, vfmasubadd

const VE = Base.VecElement
const LVec{N, T} = NTuple{N, VE{T}}
Expand Down
3 changes: 3 additions & 0 deletions src/simdvec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,9 @@ for (op, llvmop) in [(:fma, Intrinsics.fma), (:muladd, Intrinsics.fmuladd)]
end
end

@inline vfmaddsub(a::Vec{N,T}, b::Vec{N,T}, c::Vec{N,T}) where {N, T <: FloatingTypes} = Vec{N,T}(Intrinsics.vfmaddsub(a.data, b.data, c.data))
@inline vfmsubadd(a::Vec{N,T}, b::Vec{N,T}, c::Vec{N,T}) where {N, T <: FloatingTypes} = Vec{N,T}(Intrinsics.vfmaddsub(a.data, b.data, c.data))

if isdefined(Base, :bitrotate)
@inline Base.bitrotate(x::Vec, k::Vec) = Vec(Intrinsics.fshl(x.data, x.data, k.data))
@inline Base.bitrotate(x::Vec{N, T}, k::Integer) where {N, T} = bitrotate(x, Vec{N, T}(k))
Expand Down

0 comments on commit 1e6b689

Please sign in to comment.