Skip to content

Commit

Permalink
Merge pull request #44 from tkf/contiguous
Browse files Browse the repository at this point in the history
Make indexing aware of SubArray memory layout
  • Loading branch information
eschnett authored Mar 2, 2019
2 parents 5ce2964 + 1c6c76e commit dcd617f
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 30 deletions.
108 changes: 84 additions & 24 deletions src/SIMD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,48 @@ uinttype{T}(::Type{T}) = uinttype(Val{8*sizeof(T)})
=#

# Array types for SIMD

using Base: Slice, ScalarIndex

"""
ContiguousSubArray{T,N,P,I,L}
Like `Base.FastContiguousSubArray` but without requirement for linear
indexing (i.e., type parameter `L` can be `false`).
# Examples
```
julia> A = view(ones(5, 5), :, [1,3]);
julia> A isa Base.FastContiguousSubArray
false
julia> A isa SIMD.ContiguousSubArray
true
```
"""
ContiguousSubArray{T,N,P,
I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Vararg{Any}},
Tuple{Vararg{ScalarIndex}}},
L} = SubArray{T,N,P,I,L}

"""
ContiguousArray{T,N}
Array types with contiguous first dimension.
"""
ContiguousArray{T,N} = Union{DenseArray{T,N}, ContiguousSubArray{T,N}}

"""
FastContiguousArray{T,N}
This is the type of arrays that `pointer(A, i)` works.
"""
FastContiguousArray{T,N} = Union{DenseArray{T,N}, Base.FastContiguousSubArray{T,N}}
# https://github.com/eschnett/SIMD.jl/pull/40#discussion_r254131184
# https://github.com/JuliaArrays/MappedArrays.jl/pull/24#issuecomment-460568978

# The Julia SIMD vector type

const BoolTypes = Union{Bool}
Expand Down Expand Up @@ -1311,14 +1353,14 @@ end
vload(Vec{N,T}, ptr, Val{true})

@inline function vload(::Type{Vec{N,T}},
arr::Union{Array{T,1},SubArray{T,1}},
arr::FastContiguousArray{T,1},
i::Integer,
::Type{Val{Aligned}} = Val{false}) where {N,T,Aligned}
#TODO @boundscheck 1 <= i <= length(arr) - (N-1) || throw(BoundsError())
vload(Vec{N,T}, pointer(arr, i), Val{Aligned})
end
@inline function vloada(::Type{Vec{N,T}},
arr::Union{Array{T,1},SubArray{T,1}},
arr::FastContiguousArray{T,1},
i::Integer) where {N,T}
vload(Vec{N,T}, arr, i, Val{true})
end
Expand Down Expand Up @@ -1369,14 +1411,14 @@ end
vload(Vec{N,T}, ptr, mask, Val{true})

@inline function vload(::Type{Vec{N,T}},
arr::Union{Array{T,1},SubArray{T,1}},
arr::FastContiguousArray{T,1},
i::Integer, mask::Union{Vec{N,Bool}, Nothing},
::Type{Val{Aligned}} = Val{false}) where {N,T,Aligned}
#TODO @boundscheck 1 <= i <= length(arr) - (N-1) || throw(BoundsError())
vload(Vec{N,T}, pointer(arr, i), mask, Val{Aligned})
end
@inline function vloada(::Type{Vec{N,T}},
arr::Union{Array{T,1},SubArray{T,1}}, i::Integer,
arr::FastContiguousArray{T,1}, i::Integer,
mask::Union{Vec{N,Bool}, Nothing}) where {N,T}
vload(Vec{N,T}, arr, i, mask, Val{true})
end
Expand Down Expand Up @@ -1423,18 +1465,18 @@ end
@inline vstorent(v::Vec{N,T}, ptr::Ptr{T}) where {N,T} = vstore(v, ptr, Val{true}, Val{true})

@inline function vstore(v::Vec{N,T},
arr::Union{Array{T,1},SubArray{T,1}},
arr::FastContiguousArray{T,1},
i::Integer,
::Type{Val{Aligned}} = Val{false},
::Type{Val{Nontemporal}} = Val{false}) where {N,T,Aligned,Nontemporal}
@boundscheck 1 <= i <= length(arr) - (N-1) || throw(BoundsError())
vstore(v, pointer(arr, i), Val{Aligned}, Val{Nontemporal})
end
@inline function vstorea(v::Vec{N,T}, arr::Union{Array{T,1},SubArray{T,1}},
@inline function vstorea(v::Vec{N,T}, arr::FastContiguousArray{T,1},
i::Integer) where {N,T}
vstore(v, arr, i, Val{true})
end
@inline function vstorent(v::Vec{N,T}, arr::Union{Array{T,1},SubArray{T,1}},
@inline function vstorent(v::Vec{N,T}, arr::FastContiguousArray{T,1},
i::Integer) where {N,T}
vstore(v, arr, i, Val{true}, Val{true})
end
Expand Down Expand Up @@ -1485,7 +1527,7 @@ end
vstore(v, ptr, mask, Val{true})

@inline function vstore(v::Vec{N,T},
arr::Union{Array{T,1},SubArray{T,1}},
arr::FastContiguousArray{T,1},
i::Integer,
mask::Union{Vec{N,Bool}, Nothing},
::Type{Val{Aligned}} = Val{false},
Expand All @@ -1494,7 +1536,7 @@ end
vstore(v, pointer(arr, i), mask, Val{Aligned}, Val{Nontemporal})
end
@inline function vstorea(v::Vec{N,T},
arr::Union{Array{T,1},SubArray{T,1}},
arr::FastContiguousArray{T,1},
i::Integer,
mask::Union{Vec{N,Bool}, Nothing}) where {N,T}
vstore(v, arr, i, mask, Val{true})
Expand Down Expand Up @@ -1550,15 +1592,15 @@ end
mask::Union{Vec{N,Bool}, Nothing}) where {N,T} =
vgather(Vec{N,T}, ptrs, mask, Val{true})

@inline vgather(arr::Union{Array{T,1},SubArray{T,1}},
@inline vgather(arr::FastContiguousArray{T,1},
idx::Vec{N,<:Integer},
mask::Union{Vec{N,Bool}, Nothing} = nothing,
::Type{Val{Aligned}} = Val{false}) where {N,T,Aligned} =
vgather(Vec{N,T},
pointer(arr) + sizeof(T) * (idx - 1),
mask, Val{Aligned})

@inline vgathera(arr::Union{Array{T,1},SubArray{T,1}},
@inline vgathera(arr::FastContiguousArray{T,1},
idx::Vec{N,<:Integer},
mask::Union{Vec{N,Bool}, Nothing} = nothing) where {N,T} =
vgather(arr, idx, mask, Val{true})
Expand Down Expand Up @@ -1613,13 +1655,13 @@ end
mask::Union{Vec{N,Bool}, Nothing}) where {N,T} =
vscatter(v, ptrs, mask, Val{true})

@inline vscatter(v::Vec{N,T}, arr::Union{Array{T,1},SubArray{T,1}},
@inline vscatter(v::Vec{N,T}, arr::FastContiguousArray{T,1},
idx::Vec{N,<:Integer},
mask::Union{Vec{N,Bool}, Nothing} = nothing,
::Type{Val{Aligned}} = Val{false}) where {N,T,Aligned} =
vscatter(v, pointer(arr) + sizeof(T) * (idx - 1), mask, Val{Aligned})

@inline vscattera(v::Vec{N,T}, arr::Union{Array{T,1},SubArray{T,1}},
@inline vscattera(v::Vec{N,T}, arr::FastContiguousArray{T,1},
idx::Vec{N,<:Integer},
mask::Union{Vec{N,Bool}, Nothing} = nothing) where {N,T} =
vscatter(v, arr, idx, mask, Val{true})
Expand Down Expand Up @@ -1701,7 +1743,15 @@ Base.checkindex(::Type{Bool}, inds::AbstractUnitRange, idx::Vec) =
@inline _checkarity(::AbstractArray{<:Any,N}, ::Vararg{<:Any,N}) where {N} =
nothing

@inline _checkarity(::AbstractArray, ::Any) = nothing
@inline _checkarity(::T, ::Any) where {T <: AbstractArray} =
if IndexStyle(T) isa IndexLinear
nothing
else
throw(ArgumentError("""
Array type $T does not support indexing with a single index.
Exactly $(ndims(T)) (non-mask) indices have to be specified.
"""))
end

_checkarity(::AbstractArray{<:Any,N}, ::Vararg{<:Any,M}) where {N,M} =
throw(ArgumentError("""
Expand Down Expand Up @@ -1741,36 +1791,46 @@ Base.@propagate_inbounds function _preprocessindices(arr, idx, args)
return I, mask
end

"""
_pointer(arr, i, I)
Pointer to the element `arr[i, I...]`.
"""
Base.@propagate_inbounds _pointer(arr::Array, i, I) =
pointer(arr, LinearIndices(arr)[i, I...])
Base.@propagate_inbounds _pointer(arr::Base.FastContiguousSubArray, i, I) =
pointer(arr, (i, I...))
Base.@propagate_inbounds _pointer(arr::SubArray, i, I) =
pointer(Base.unsafe_view(arr, 1, I...), i)

Base.@propagate_inbounds function Base.getindex(
arr::Union{Array{T},SubArray{T}}, idx::VecRange{N},
arr::ContiguousArray{T}, idx::VecRange{N},
args::Vararg{Union{Integer,Vec{N,Bool}}}) where {N,T}
I, mask = _preprocessindices(arr, idx, args)
return vload(Vec{N,T}, pointer(arr, LinearIndices(arr)[idx.i, I...]), mask)
return vload(Vec{N,T}, _pointer(arr, idx.i, I), mask)
end

Base.@propagate_inbounds function Base.setindex!(
arr::Union{Array{T},SubArray{T}}, v::Vec{N,T}, idx::VecRange{N},
arr::ContiguousArray{T}, v::Vec{N,T}, idx::VecRange{N},
args::Vararg{Union{Integer,Vec{N,Bool}}}) where {N,T}
I, mask = _preprocessindices(arr, idx, args)
vstore(v, pointer(arr, LinearIndices(arr)[idx.i, I...]), mask)
vstore(v, _pointer(arr, idx.i, I), mask)
return arr
end

Base.@propagate_inbounds function Base.getindex(
arr::Union{Array{T},SubArray{T}}, idx::Vec{N,<:Integer},
arr::ContiguousArray{T}, idx::Vec{N,<:Integer},
args::Vararg{Union{Integer,Vec{N,Bool}}}) where {N,T}
I, mask = _preprocessindices(arr, idx, args)
ptrs = pointer(arr, LinearIndices(arr)[1, I...]) - sizeof(T) +
sizeof(T) * idx
ptrs = _pointer(arr, 1, I) - sizeof(T) + sizeof(T) * idx
return vgather(Vec{N,T}, ptrs, mask)
end

Base.@propagate_inbounds function Base.setindex!(
arr::Union{Array{T},SubArray{T}}, v::Vec{N,T}, idx::Vec{N,<:Integer},
arr::ContiguousArray{T}, v::Vec{N,T}, idx::Vec{N,<:Integer},
args::Vararg{Union{Integer,Vec{N,Bool}}}) where {N,T}
I, mask = _preprocessindices(arr, idx, args)
ptrs = pointer(arr, LinearIndices(arr)[1, I...]) - sizeof(T) +
sizeof(T) * idx
ptrs = _pointer(arr, 1, I) - sizeof(T) + sizeof(T) * idx
vscatter(v, ptrs, mask)
return arr
end
Expand Down
54 changes: 48 additions & 6 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -413,21 +413,50 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
end
end

@testset "Matrix ($VT)" begin
arr .= 1:length(arr)
mat = repeat(arr, outer=(1, 3))
arr .= 1:length(arr)
@testset "$name" for (name, mat) in [
("Matrix ($VT)", repeat(arr, outer=(1, 3))),
("Matrix ($VT) with non-strided row",
view(repeat(arr, outer=(1, 5)), :, [2, 1, 5])),
]
idx = VecRange{length(VT)}(1)
@test mat[idx, 1] === VT(Tuple(1:length(VT)))
@test mat[idx, 2] === VT(Tuple(1:length(VT)))
@test mat[idx] === VT(Tuple(1:length(VT)))
if mat isa SIMD.FastContiguousArray
@test mat[idx] === VT(Tuple(1:length(VT)))
else
err = try
mat[idx]
nothing
catch err
err
end
@test err isa ArgumentError
@test occursin(
"Exactly 2 (non-mask) indices have to be specified.",
sprint(showerror, err))
end

maskarr = zeros(Bool, length(VT))
maskarr[1] = true
mask = Vec(Tuple(maskarr))
varr = zeros(length(VT))
varr[1] = 1
@test mat[idx, 1, mask] === VT(Tuple(varr))
@test mat[idx, mask] === VT(Tuple(varr))
if mat isa SIMD.FastContiguousArray
@test mat[idx, mask] === VT(Tuple(varr))
else
err = try
mat[idx]
nothing
catch err
err
end
@test err isa ArgumentError
@test occursin(
"Exactly 2 (non-mask) indices have to be specified.",
sprint(showerror, err))
end

@test_throws ArgumentError mat[idx, 1, 1]
@test_throws ArgumentError mat[idx, 1, 1, mask]
Expand All @@ -437,7 +466,20 @@ llvm_ir(f, args) = sprint(code_llvm, f, Base.typesof(args...))
lane = VecRange{length(VT)}(0)
@test_throws BoundsError mat[lane, 1]
@test_throws BoundsError mat[lane + end, 1]
@test_throws BoundsError mat[lane + end]
if mat isa SIMD.FastContiguousArray
@test_throws BoundsError mat[lane + end]
else
err = try
mat[lane + end]
nothing
catch err
err
end
@test err isa ArgumentError
@test occursin(
"Exactly 2 (non-mask) indices have to be specified.",
sprint(showerror, err))
end

# Out-of-bound access
varr = collect(1:length(VT))
Expand Down

0 comments on commit dcd617f

Please sign in to comment.