Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transform interface and example Power transformation #1

Merged
merged 17 commits into from
Feb 2, 2021
2 changes: 1 addition & 1 deletion src/power.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ struct Power <: Transform
exponent::Real
end

function _transform!(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real
function _apply!(x::AbstractArray{T}, P::Power; kwargs...) where T <: Real
x[:] = x .^ P.exponent
return x
end
58 changes: 38 additions & 20 deletions src/transformers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,61 +7,79 @@ Abstract supertype for all Transforms.
abstract type Transform end

# Make Transforms callable types
(t::Transform)(x; kwargs...) = transform(x, t; kwargs...)
(t::Transform)(x; kwargs...) = apply(x, t; kwargs...)


"""
transform!(data::T, Transform::Transform; kwargs...) -> T
transform!(::T, data)

Apply the `Transform` mutating the input `data`.
Where possible, this should be extended for new data types `T`.
Defines the feature engineering pipeline for some type `T`, which comprises a collection of
[`Transform`](@ref)s to be peformed on the `data`.

`transform!` should be overloaded for custom types `T` that require feature engineering.
"""
function transform! end

"""
transform(data::T, Transform::Transform; kwargs...) -> T
transform(::T, data)

Non-mutating version of [`transform!`](@ref), which it delegates to by default.
Does not need to be extended unless a mutating Transform is not possible.
Non-mutating version of [`transform!`](@ref).
"""
function transform end
glennmoy marked this conversation as resolved.
Show resolved Hide resolved

"""
transform!(A::AbstractArray{T}, ::Transform; dims=:, kwargs...) where T <: Real
Transforms.apply!(data::T, Transform::Transform; kwargs...) -> T
glennmoy marked this conversation as resolved.
Show resolved Hide resolved

Applies the [`Transform`](@ref) mutating the input `data`.
Where possible, this should be extended for new data types `T`.
"""
function apply! end

"""
Transforms.apply(data::T, Transform::Transform; kwargs...) -> T
glennmoy marked this conversation as resolved.
Show resolved Hide resolved

Non-mutating version of [`apply!`](@ref), which it delegates to by default.
Does not need to be extended unless a mutating [`Transform`](@ref) is not possible.
"""
function apply end

"""
apply!(A::AbstractArray{T}, ::Transform; dims=:, kwargs...) where T <: Real

Applies the Transform to each element of `A`.
Optionally specify the `dims` to apply the Transform along certain dimensions.
Applies the [`Transform`](@ref) to each element of `A`.
Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions.
"""
function transform!(
function apply!(
A::AbstractArray{T}, t::Transform; dims=:, kwargs...
) where T <: Real
dims == Colon() && return _transform!(A, t; kwargs...)
dims == Colon() && return _apply!(A, t; kwargs...)

for x in eachslice(A; dims=dims)
_transform!(x, t; kwargs...)
_apply!(x, t; kwargs...)
end

return A
end

transform(x, t::Transform; kwargs...) = transform!(_try_copy(x), t; kwargs...)
apply(x, t::Transform; kwargs...) = apply!(_try_copy(x), t; kwargs...)

"""
transform!(table::T, ::Transform; cols=nothing)::T where T
Transforms.apply!(table::T, ::Transform; cols=nothing)::T where T

Applies the Transform to each of the specified columns in the `table`.
If no `cols` are specified, then the Transform is applied to all columns.
Applies the [`Transform`](@ref) to each of the specified columns in the `table`.
If no `cols` are specified, then the [`Transform`](@ref) is applied to all columns.
"""
function transform!(table::T, t::Transform; cols=nothing)::T where T
function apply!(table::T, t::Transform; cols=nothing)::T where T
# TODO: We could probably handle iterators of tables here
Tables.istable(table) || throw(MethodError(transform!, (table, t)))
Tables.istable(table) || throw(MethodError(apply!, (table, t)))

# Extract a columns iterator that we should be able to use to mutate the data.
# NOTE: Mutation is not guaranteed for all table types, but it avoid copying the data
columntable = Tables.columns(table)

cnames = cols === nothing ? propertynames(columntable) : cols
for cname in cnames
transform!(getproperty(columntable, cname), t)
apply!(getproperty(columntable, cname), t)
end

return table
Expand Down
30 changes: 15 additions & 15 deletions test/power.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
x = [1, 2, 3, 4, 5]
expected = [1, 8, 27, 64, 125]

@test transform(x, p) == expected
@test Transforms.apply(x, p) == expected
@test p(x) == expected

_x = copy(x)
transform!(_x, p)
Transforms.apply!(_x, p)
@test _x == expected
end

Expand All @@ -21,11 +21,11 @@
expected = [1 8 27; 64 125 216]

@testset "dims = $d" for d in (Colon(), 1, 2)
@test transform(M, p; dims=d) == expected
@test Transforms.apply(M, p; dims=d) == expected
@test p(M; dims=d) == expected

_M = copy(M)
transform!(_M, p; dims=d)
Transforms.apply!(_M, p; dims=d)
@test _M == expected
end
end
Expand All @@ -35,23 +35,23 @@
expected = (a = [1, 8, 27], b = [64, 125, 216])

@testset "all cols" begin
@test transform(nt, p) == expected
@test Transforms.apply(nt, p) == expected
@test p(nt) == expected

_nt = deepcopy(nt)
transform!(_nt, p)
Transforms.apply!(_nt, p)
@test _nt == expected
end

@testset "cols = $c" for c in (:a, :b)
nt_mutated = NamedTuple{(Symbol("$c"), )}((expected[c], ))
nt_expected = merge(nt, nt_mutated)

@test transform(nt, p; cols=[c]) == nt_expected
@test Transforms.apply(nt, p; cols=[c]) == nt_expected
@test p(nt; cols=[c]) == nt_expected

_nt = deepcopy(nt)
transform!(_nt, p; cols=[c])
Transforms.apply!(_nt, p; cols=[c])
@test _nt == nt_expected
end
end
Expand All @@ -61,7 +61,7 @@
expected = AxisArray([1 8 27; 64 125 216], foo=["a", "b"], bar=["x", "y", "z"])

@testset "dims = $d" for d in (Colon(), 1, 2)
@test transform(A, p; dims=d) == expected
@test Transforms.apply(A, p; dims=d) == expected
end

end
Expand All @@ -71,24 +71,24 @@
expected = AxisArray([1 8 27; 64 125 216], foo=["a", "b"], bar=["x", "y", "z"])
glennmoy marked this conversation as resolved.
Show resolved Hide resolved

@testset "dims = $d" for d in (Colon(), :foo, :bar)
@test transform(A, p; dims=d) == expected
@test Transforms.apply(A, p; dims=d) == expected
end

_A = copy(A)
transform!(_A, p)
Transforms.apply!(_A, p)
@test _A == expected
end

@testset "DataFrame" begin
df = DataFrame(:a => [1, 2, 3], :b => [4, 5, 6])
expected = DataFrame(:a => [1, 8, 27], :b => [64, 125, 216])

@test transform(df, p) == expected
@test transform(df, p; cols=[:a]) == DataFrame(:a => [1, 8, 27], :b => [4, 5, 6])
@test transform(df, p; cols=[:b]) == DataFrame(:a => [1, 2, 3], :b => [64, 125, 216])
@test Transforms.apply(df, p) == expected
@test Transforms.apply(df, p; cols=[:a]) == DataFrame(:a => [1, 8, 27], :b => [4, 5, 6])
@test Transforms.apply(df, p; cols=[:b]) == DataFrame(:a => [1, 2, 3], :b => [64, 125, 216])

_df = deepcopy(df)
transform!(_df, p)
Transforms.apply!(_df, p)
@test _df == expected
end

glennmoy marked this conversation as resolved.
Show resolved Hide resolved
Expand Down