From 88a6441de35adbed9e007968b58e9c38939dbd58 Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Mon, 22 Feb 2021 18:55:37 +0000 Subject: [PATCH 1/7] Directly delegate to apply inside apply! --- src/transformers.jl | 43 +++++++++++++++++-------------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/src/transformers.jl b/src/transformers.jl index e4521cb..249e95a 100644 --- a/src/transformers.jl +++ b/src/transformers.jl @@ -79,6 +79,19 @@ function apply(A::AbstractArray, t::Transform; dims=:, inds=:, kwargs...) end end +""" + apply!(A::AbstractArray, ::Transform; dims=:, kwargs...) + +Applies the [`Transform`](@ref) to each element of `A`, mutating the data. +Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions. +For example in a `Matrix`, `dims=1` applies to each column, while `dims=2` applies +to each row. +""" +function apply!(A::AbstractArray, t::Transform; dims=:, kwargs...) + A[:] = apply(A, t; dims=dims, kwargs...) + return A +end + """ apply(table, ::Transform; cols=nothing, kwargs...) -> Vector @@ -102,32 +115,6 @@ function apply(table, t::Transform; cols=nothing, kwargs...) ] end -_apply(x, t::Transform; kwargs...) = _apply!(_try_copy(x), t; kwargs...) - - -""" - apply!(A::AbstractArray, ::Transform; dims=:, kwargs...) - -Applies the [`Transform`](@ref) to each element of `A`. -Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions. -For example in a `Matrix`, `dims=1` applies to each column, while `dims=2` applies -to each row. - -!!! note - For arrays with more than 2 dimensions, single `dims` are not supported. -""" -function apply!(A::AbstractArray, t::Transform; dims=:, kwargs...) - dims == Colon() && return _apply!(A, t; kwargs...) - - _dims = invert_dims(A, dims) # opposite convention to iterating `eachslice` - # TODO support multiple _dims https://github.com/invenia/Transforms.jl/issues/21 - for (slice_index, slice) in enumerate(eachslice(A; dims=_dims)) - _apply!(slice, t; name=Symbol(slice_index), kwargs...) - end - - return A -end - """ apply!(table::T, ::Transform; cols=nothing)::T where T @@ -149,3 +136,7 @@ function apply!(table::T, t::Transform; cols=nothing, kwargs...)::T where T return table end + + +# Fallback method for when _apply is not directly defined +_apply(x, t::Transform; kwargs...) = _apply!(_try_copy(x), t; kwargs...) From 7bd767a5d816077a6485a050aa4e7208494f34f6 Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Mon, 22 Feb 2021 18:55:48 +0000 Subject: [PATCH 2/7] Delete invert_dims --- src/utils.jl | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 703fc55..f908013 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -11,14 +11,3 @@ function _try_copy(data) deepcopy(data) end end - -function invert_dims(A::AbstractArray, dims) - ndims(A) == 1 && return dims - # TODO: support named dims https://github.com/invenia/Transforms.jl/issues/20 - inverted_dims = setdiff(1:ndims(A), dims) - if length(inverted_dims) == 1 - inverted_dims = inverted_dims[1] - end - - return inverted_dims -end From ab8e001400a89fb896af7d8f59c43a86fe337378 Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Mon, 22 Feb 2021 18:56:01 +0000 Subject: [PATCH 3/7] bump Project --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index bd61e63..57de6c2 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Transforms" uuid = "8fd68953-04b8-4117-ac19-158bf6de9782" authors = ["Invenia Technical Computing Corporation"] -version = "0.1.0" +version = "0.1.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" From 1ffcbcafe280c84020ceb1cd86b2871bf555ae77 Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Mon, 22 Feb 2021 19:27:50 +0000 Subject: [PATCH 4/7] Update apply! docstring --- src/transformers.jl | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/transformers.jl b/src/transformers.jl index 249e95a..82f26e5 100644 --- a/src/transformers.jl +++ b/src/transformers.jl @@ -40,10 +40,10 @@ function apply end """ apply!(data::T, ::Transform; kwargs...) -> T -Applies the [`Transform`](@ref) mutating the input `data`. New transforms should usually -only extend `_apply!` which this method delegates to. +Applies the [`Transform`](@ref) mutating the input `data`. This method delegates to +[`apply`](@ref) under the hood so does not need to be defined separately. -Where necessary, this should be extended for new data types `T`. +If [`Transform`](@ref) does not support mutation, this method will error. """ function apply! end @@ -136,7 +136,3 @@ function apply!(table::T, t::Transform; cols=nothing, kwargs...)::T where T return table end - - -# Fallback method for when _apply is not directly defined -_apply(x, t::Transform; kwargs...) = _apply!(_try_copy(x), t; kwargs...) From eafb98244fc8cafcf0efd58f762942d85043c045 Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Mon, 22 Feb 2021 19:28:20 +0000 Subject: [PATCH 5/7] Delete unecessary _apply! methods --- src/periodic.jl | 5 ----- src/power.jl | 5 ----- src/scaling.jl | 7 +++---- src/temporal.jl | 1 - 4 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/periodic.jl b/src/periodic.jl index beaab77..342d1ff 100644 --- a/src/periodic.jl +++ b/src/periodic.jl @@ -43,11 +43,6 @@ function _apply(x, P::Periodic{T}; kwargs...) where T <: Period map(xi -> _periodic(P.f, xi, P.period, P.phase_shift), x) end -function _apply!(x::AbstractArray{T}, P::Periodic; kwargs...) where T <: Real - x[:] = _apply(x, P; kwargs...) - return x -end - """ _periodic(f, instant, period, phase_shift=Day(0)) diff --git a/src/power.jl b/src/power.jl index d628e85..0871d86 100644 --- a/src/power.jl +++ b/src/power.jl @@ -10,8 +10,3 @@ end function _apply(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real return x .^ P.exponent end - -function _apply!(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real - x[:] = _apply(x, P; kwargs...) - return x -end diff --git a/src/scaling.jl b/src/scaling.jl index b738c38..729e612 100644 --- a/src/scaling.jl +++ b/src/scaling.jl @@ -74,7 +74,7 @@ function compute_stats(table; cols=nothing) return (; μ_pairs...), (; σ_pairs...) end -function _apply!( +function _apply( A::AbstractArray, scaling::MeanStdScaling; name=nothing, inverse=false, eps=1e-3, kwargs... ) @@ -82,13 +82,12 @@ function _apply!( μ = scaling.mean[name] σ = scaling.std[name] if inverse - A[:] = μ .+ σ .* A + return μ .+ σ .* A else # Avoid division by 0 # If std is 0 then data was uniform, so the scaled value would end up ≈ 0 # Therefore the particular `eps` value should not matter much. σ_safe = σ == 0 ? eps : σ - A[:] = (A .- μ) ./ σ_safe + return (A .- μ) ./ σ_safe end - return A end diff --git a/src/temporal.jl b/src/temporal.jl index 37152cc..a003a0c 100644 --- a/src/temporal.jl +++ b/src/temporal.jl @@ -5,5 +5,4 @@ Get the hour of day corresponding to the data. """ struct HoD <: Transform end - _apply(x, ::HoD; kwargs...) = hour.(x) From cfaab0687ee75b3cd39f2c2f4df3dc9958795aee Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Tue, 23 Feb 2021 12:15:15 +0000 Subject: [PATCH 6/7] Reword apply! docstring --- src/transformers.jl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/transformers.jl b/src/transformers.jl index 82f26e5..a519b7f 100644 --- a/src/transformers.jl +++ b/src/transformers.jl @@ -52,7 +52,11 @@ function apply! end apply(A::AbstractArray, ::Transform; dims=:, inds=:, kwargs...) Applies the [`Transform`](@ref) to the elements of `A`. + Provide the `dims` keyword to apply the [`Transform`](@ref) along a certain dimension. +For example, given a `Matrix`, `dims=1` applies to each column, while `dims=2` applies +to each row. + Provide the `inds` keyword to apply the [`Transform`](@ref) to certain indices along the `dims` specified. @@ -83,12 +87,9 @@ end apply!(A::AbstractArray, ::Transform; dims=:, kwargs...) Applies the [`Transform`](@ref) to each element of `A`, mutating the data. -Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions. -For example in a `Matrix`, `dims=1` applies to each column, while `dims=2` applies -to each row. """ -function apply!(A::AbstractArray, t::Transform; dims=:, kwargs...) - A[:] = apply(A, t; dims=dims, kwargs...) +function apply!(A::AbstractArray, t::Transform; kwargs...) + A[:] = apply(A, t; kwargs...) return A end From 6d438f11486601269177c0a67ae730080ca1815f Mon Sep 17 00:00:00 2001 From: Glenn Moynihan Date: Tue, 23 Feb 2021 12:35:43 +0000 Subject: [PATCH 7/7] Use @views to reduce allocations --- src/one_hot_encoding.jl | 2 +- src/transformers.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/one_hot_encoding.jl b/src/one_hot_encoding.jl index 5d8b722..2d43adc 100644 --- a/src/one_hot_encoding.jl +++ b/src/one_hot_encoding.jl @@ -32,7 +32,7 @@ function _apply(x, encoding::OneHotEncoding; kwargs...) results = zeros(Int, length(x), n_categories) - for (i, value) in enumerate(x) + @views for (i, value) in enumerate(x) col_pos = encoding.categories[value] results[i, col_pos] = 1 end diff --git a/src/transformers.jl b/src/transformers.jl index a519b7f..b41c352 100644 --- a/src/transformers.jl +++ b/src/transformers.jl @@ -72,12 +72,12 @@ function apply(A::AbstractArray, t::Transform; dims=:, inds=:, kwargs...) if inds === Colon() return _apply(A, t; kwargs...) else - return _apply(A[:][inds], t; kwargs...) + return @views _apply(A[:][inds], t; kwargs...) end end slice_index = 0 - return mapslices(A, dims=dims) do x + return @views mapslices(A, dims=dims) do x slice_index += 1 _apply(x[inds], t; name=Symbol(slice_index), kwargs...) end