diff --git a/Project.toml b/Project.toml index bd61e63..57de6c2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Transforms" uuid = "8fd68953-04b8-4117-ac19-158bf6de9782" authors = ["Invenia Technical Computing Corporation"] -version = "0.1.0" +version = "0.1.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/src/one_hot_encoding.jl b/src/one_hot_encoding.jl index 5d8b722..2d43adc 100644 --- a/src/one_hot_encoding.jl +++ b/src/one_hot_encoding.jl @@ -32,7 +32,7 @@ function _apply(x, encoding::OneHotEncoding; kwargs...) results = zeros(Int, length(x), n_categories) - for (i, value) in enumerate(x) + @views for (i, value) in enumerate(x) col_pos = encoding.categories[value] results[i, col_pos] = 1 end diff --git a/src/periodic.jl b/src/periodic.jl index beaab77..342d1ff 100644 --- a/src/periodic.jl +++ b/src/periodic.jl @@ -43,11 +43,6 @@ function _apply(x, P::Periodic{T}; kwargs...) where T <: Period map(xi -> _periodic(P.f, xi, P.period, P.phase_shift), x) end -function _apply!(x::AbstractArray{T}, P::Periodic; kwargs...) where T <: Real - x[:] = _apply(x, P; kwargs...) - return x -end - """ _periodic(f, instant, period, phase_shift=Day(0)) diff --git a/src/power.jl b/src/power.jl index d628e85..0871d86 100644 --- a/src/power.jl +++ b/src/power.jl @@ -10,8 +10,3 @@ end function _apply(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real return x .^ P.exponent end - -function _apply!(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real - x[:] = _apply(x, P; kwargs...) - return x -end diff --git a/src/scaling.jl b/src/scaling.jl index b738c38..729e612 100644 --- a/src/scaling.jl +++ b/src/scaling.jl @@ -74,7 +74,7 @@ function compute_stats(table; cols=nothing) return (; μ_pairs...), (; σ_pairs...) end -function _apply!( +function _apply( A::AbstractArray, scaling::MeanStdScaling; name=nothing, inverse=false, eps=1e-3, kwargs... ) @@ -82,13 +82,12 @@ function _apply!( μ = scaling.mean[name] σ = scaling.std[name] if inverse - A[:] = μ .+ σ .* A + return μ .+ σ .* A else # Avoid division by 0 # If std is 0 then data was uniform, so the scaled value would end up ≈ 0 # Therefore the particular `eps` value should not matter much. σ_safe = σ == 0 ? eps : σ - A[:] = (A .- μ) ./ σ_safe + return (A .- μ) ./ σ_safe end - return A end diff --git a/src/temporal.jl b/src/temporal.jl index 37152cc..a003a0c 100644 --- a/src/temporal.jl +++ b/src/temporal.jl @@ -5,5 +5,4 @@ Get the hour of day corresponding to the data. """ struct HoD <: Transform end - _apply(x, ::HoD; kwargs...) = hour.(x) diff --git a/src/transformers.jl b/src/transformers.jl index e4521cb..b41c352 100644 --- a/src/transformers.jl +++ b/src/transformers.jl @@ -40,10 +40,10 @@ function apply end """ apply!(data::T, ::Transform; kwargs...) -> T -Applies the [`Transform`](@ref) mutating the input `data`. New transforms should usually -only extend `_apply!` which this method delegates to. +Applies the [`Transform`](@ref) mutating the input `data`. This method delegates to +[`apply`](@ref) under the hood so does not need to be defined separately. -Where necessary, this should be extended for new data types `T`. +If [`Transform`](@ref) does not support mutation, this method will error. """ function apply! end @@ -52,7 +52,11 @@ function apply! end apply(A::AbstractArray, ::Transform; dims=:, inds=:, kwargs...) Applies the [`Transform`](@ref) to the elements of `A`. + Provide the `dims` keyword to apply the [`Transform`](@ref) along a certain dimension. +For example, given a `Matrix`, `dims=1` applies to each column, while `dims=2` applies +to each row. + Provide the `inds` keyword to apply the [`Transform`](@ref) to certain indices along the `dims` specified. @@ -68,17 +72,27 @@ function apply(A::AbstractArray, t::Transform; dims=:, inds=:, kwargs...) if inds === Colon() return _apply(A, t; kwargs...) else - return _apply(A[:][inds], t; kwargs...) + return @views _apply(A[:][inds], t; kwargs...) end end slice_index = 0 - return mapslices(A, dims=dims) do x + return @views mapslices(A, dims=dims) do x slice_index += 1 _apply(x[inds], t; name=Symbol(slice_index), kwargs...) end end +""" + apply!(A::AbstractArray, ::Transform; dims=:, kwargs...) + +Applies the [`Transform`](@ref) to each element of `A`, mutating the data. +""" +function apply!(A::AbstractArray, t::Transform; kwargs...) + A[:] = apply(A, t; kwargs...) + return A +end + """ apply(table, ::Transform; cols=nothing, kwargs...) -> Vector @@ -102,32 +116,6 @@ function apply(table, t::Transform; cols=nothing, kwargs...) ] end -_apply(x, t::Transform; kwargs...) = _apply!(_try_copy(x), t; kwargs...) - - -""" - apply!(A::AbstractArray, ::Transform; dims=:, kwargs...) - -Applies the [`Transform`](@ref) to each element of `A`. -Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions. -For example in a `Matrix`, `dims=1` applies to each column, while `dims=2` applies -to each row. - -!!! note - For arrays with more than 2 dimensions, single `dims` are not supported. -""" -function apply!(A::AbstractArray, t::Transform; dims=:, kwargs...) - dims == Colon() && return _apply!(A, t; kwargs...) - - _dims = invert_dims(A, dims) # opposite convention to iterating `eachslice` - # TODO support multiple _dims https://github.com/invenia/Transforms.jl/issues/21 - for (slice_index, slice) in enumerate(eachslice(A; dims=_dims)) - _apply!(slice, t; name=Symbol(slice_index), kwargs...) - end - - return A -end - """ apply!(table::T, ::Transform; cols=nothing)::T where T diff --git a/src/utils.jl b/src/utils.jl index 703fc55..f908013 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -11,14 +11,3 @@ function _try_copy(data) deepcopy(data) end end - -function invert_dims(A::AbstractArray, dims) - ndims(A) == 1 && return dims - # TODO: support named dims https://github.com/invenia/Transforms.jl/issues/20 - inverted_dims = setdiff(1:ndims(A), dims) - if length(inverted_dims) == 1 - inverted_dims = inverted_dims[1] - end - - return inverted_dims -end