Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add HoD transform #11

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,24 @@ authors = ["Invenia Technical Computing Corporation"]
version = "0.1.0"

[deps]
AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
AxisArrays = "0.4"
AxisKeys = "0.1"
DataFrames = "0.22"
Tables = "1.3"
TimeZones = "1"
julia = "1.5"

[extras]
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
TimeZones = "f269a46b-ccf7-5d73-abea-4c690281aa53"

[targets]
test = ["AxisArrays", "AxisKeys", "DataFrames", "Test"]
test = ["AxisArrays", "AxisKeys", "DataFrames", "Test", "TimeZones"]
5 changes: 4 additions & 1 deletion src/Transforms.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
module Transforms

using AxisKeys
using Dates
using Tables

export LinearCombination, Transform, Power
export HoD, LinearCombination, Transform, Power
export transform, transform!

include("utils.jl")
include("transformers.jl")
include("linear_combination.jl")
include("power.jl")
include("temporal.jl")

end
9 changes: 9 additions & 0 deletions src/temporal.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
HoD <: Transform
nicoleepp marked this conversation as resolved.
Show resolved Hide resolved

Get the hour of day corresponding to the data.
"""
struct HoD <: Transform end


_apply(x, ::HoD) = hour.(x)
75 changes: 62 additions & 13 deletions src/transformers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,30 +28,81 @@ Non-mutating version of [`transform!`](@ref).
function transform end

"""
Transforms.apply!(data::T, ::Transform; kwargs...) -> T
apply(data::T, ::Transform; kwargs...) -> T

Applies the [`Transform`](@ref) to the data. New transforms should usually only extend
`_apply` which this method delegates to.

Where possible, this should be extended for new data types `T`.
"""
function apply end

"""
apply!(data::T, ::Transform; kwargs...) -> T

Applies the [`Transform`](@ref) mutating the input `data`. New transforms should usually
only extend `_apply!` which this method delegates to.

Applies the [`Transform`](@ref) mutating the input `data`.
Where possible, this should be extended for new data types `T`.
"""
function apply! end


"""
Transforms.apply(data::T, ::Transform; kwargs...) -> T
apply(A::AbstractArray, ::Transform; dims=:, inds=:, kwargs...)

Applies the [`Transform`](@ref) to each element of `A`.

Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions
and `inds` will be the indices to apply the Transform to along the `dims` specified.

Non-mutating version of [`apply!`](@ref), which it delegates to by default.
Does not need to be extended unless a mutating [`Transform`](@ref) is not possible.
If `dims` === : (all dimensions), then `inds` will be the global indices of the array,
instead of being relative to a certain dimension.
"""
function apply end
function apply(A::AbstractArray, t::Transform; dims=:, inds=:, kwargs...)
if dims === Colon()
if inds === Colon()
return _apply(A, t; kwargs...)
else
if A isa KeyedArray
# KeyedArrays don't support indexing into them via an array of indices
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, that's inconvenient. There is the syntax of A[dimname1=inds, dimname2=inds, ...] but I'm not sure if that can be written for all dimnames(A) generically. There's also syntax of A(dimname1=[axiskey11, axiskey12], dimname2=[axiskey21, axiskey22], ...) which we could support.

I don't mind this as-is for now.

nicoleepp marked this conversation as resolved.
Show resolved Hide resolved
return [_apply(A[ind], t; kwargs...) for ind in inds]
else
# Apply to global indices `inds`, not `inds` relative to a certain dimension
glennmoy marked this conversation as resolved.
Show resolved Hide resolved
return _apply(A[inds], t; kwargs...)
end
end
end

return mapslices(x -> _apply(x[inds], t; kwargs...), A, dims=dims)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm confused why this works for KeyedArray given

KeyedArrays don't support indexing into them via an array of indices

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mapslices x is not a KeyedArray, it's a view of the array

end

"""
apply!(A::AbstractArray{T}, ::Transform; dims=:, kwargs...) where T <: Real
apply(table, ::Transform; cols=nothing, kwargs...)::Vector

Applies the [`Transform`](@ref) to each of the specified columns in the `table`.
If no `cols` are specified, then the [`Transform`](@ref) is applied to all columns.
"""
function apply(table, t::Transform; cols=nothing, kwargs...)
Tables.istable(table) || throw(MethodError(apply!, (table, t)))

# Extract a columns iterator that we should be able to use to mutate the data.
# NOTE: Mutation is not guaranteed for all table types, but it avoid copying the data
columntable = Tables.columns(table)

cnames = cols === nothing ? propertynames(columntable) : cols
return [_apply(getproperty(columntable, cname), t; kwargs...) for cname in cnames]
end

_apply(x, t::Transform; kwargs...) = apply!(_try_copy(x), t; kwargs...)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
_apply(x, t::Transform; kwargs...) = apply!(_try_copy(x), t; kwargs...)
_apply(x, t::Transform; kwargs...) = _apply!(_try_copy(x), t; kwargs...)


"""
apply!(A::AbstractArray, ::Transform; dims=:, kwargs...)

Applies the [`Transform`](@ref) to each element of `A`.
Optionally specify the `dims` to apply the [`Transform`](@ref) along certain dimensions.
"""
function apply!(
A::AbstractArray{T}, t::Transform; dims=:, kwargs...
) where T <: Real
function apply!(A::AbstractArray, t::Transform; dims=:, kwargs...)
nicoleepp marked this conversation as resolved.
Show resolved Hide resolved
dims == Colon() && return _apply!(A, t; kwargs...)

for x in eachslice(A; dims=dims)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mapslices here too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works just as welll since it's mutating the array, I briefly tried using mapslices and things were breaking so I'd opt to leave as is. It does return the result we expect

Expand All @@ -61,10 +112,8 @@ function apply!(
return A
end

apply(x, t::Transform; kwargs...) = apply!(_try_copy(x), t; kwargs...)

"""
Transforms.apply!(table::T, ::Transform; cols=nothing)::T where T
apply!(table::T, ::Transform; cols=nothing)::T where T

Applies the [`Transform`](@ref) to each of the specified columns in the `table`.
If no `cols` are specified, then the [`Transform`](@ref) is applied to all columns.
Expand Down
94 changes: 49 additions & 45 deletions test/power.jl
Original file line number Diff line number Diff line change
Expand Up @@ -30,75 +30,79 @@
end
end

@testset "NamedTuple" begin
nt = (a = [1, 2, 3], b = [4, 5, 6])
expected = (a = [1, 8, 27], b = [64, 125, 216])

@testset "all cols" begin
transformed = Transforms.apply(nt, p)
@test transformed isa NamedTuple{(:a, :b)}
@test transformed == expected
@test p(nt) == expected

_nt = deepcopy(nt)
Transforms.apply!(_nt, p)
@test _nt == expected
end

@testset "cols = $c" for c in (:a, :b)
nt_mutated = NamedTuple{(Symbol("$c"), )}((expected[c], ))
nt_expected = merge(nt, nt_mutated)

@test Transforms.apply(nt, p; cols=[c]) == nt_expected
@test p(nt; cols=[c]) == nt_expected

_nt = deepcopy(nt)
Transforms.apply!(_nt, p; cols=[c])
@test _nt == nt_expected
end
end

@testset "AxisArray" begin
A = AxisArray([1 2 3; 4 5 6], foo=["a", "b"], bar=["x", "y", "z"])
expected = AxisArray([1 8 27; 64 125 216], foo=["a", "b"], bar=["x", "y", "z"])
expected = [1 8 27; 64 125 216]
axisarray_expected = AxisArray([1 8 27; 64 125 216], foo=["a", "b"], bar=["x", "y", "z"])

@testset "dims = $d" for d in (Colon(), 1, 2)
transformed = Transforms.apply(A, p; dims=d)
@test transformed isa AxisArray
@test transformed == expected
@test Transforms.apply(A, p; dims=d) == expected
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't figure out why this returns a plain array?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we haven't asked apply to return a KeyedArray - just an array.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for AxisArray. If I follow apply(A::AbstractArray, t::Transform; dims=:, inds=:, kwargs...) then I think it does _apply (without wrapping in an array), which I thought would preserve the type. But maybe this relates to your suggestion? i.e. it's currently calling apply! when it should be _apply! https://github.com/invenia/Transforms.jl/pull/11/files#r573720397

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for AxisArray

ah my mistake. Then this is AxisArrays being annoying and not conserving type when operating on them

julia> A = AxisArray([1 2 3; 4 5 6], foo=["a", "b"], bar=["x", "y", "z"]);

julia> A .+ 1
2×3 Array{Int64,2}:
 2  3  4
 5  6  7

end

_A = copy(A)
Transforms.apply!(_A, p)
@test _A == axisarray_expected
@test _A isa AxisArray
end

@testset "AxisKey" begin
A = KeyedArray([1 2 3; 4 5 6], foo=["a", "b"], bar=["x", "y", "z"])
expected = KeyedArray([1 8 27; 64 125 216], foo=["a", "b"], bar=["x", "y", "z"])
expected = [1 8 27; 64 125 216]
axiskey_expected = KeyedArray([1 8 27; 64 125 216], foo=["a", "b"], bar=["x", "y", "z"])

@testset "dims = $d" for d in (Colon(), :foo, :bar)
transformed = Transforms.apply(A, p; dims=d)
@test transformed isa KeyedArray
@test transformed == expected
@test Transforms.apply(A, p; dims=d) == expected
end

_A = copy(A)
Transforms.apply!(_A, p)
@test _A == expected
@test _A == axiskey_expected
@test _A isa KeyedArray
end

@testset "NamedTuple" begin
nt = (a = [1, 2, 3], b = [4, 5, 6])
expected = [[1, 8, 27], [64, 125, 216]]
nt_expected = (a = [1, 8, 27], b = [64, 125, 216])

@testset "all cols" begin
@test Transforms.apply(nt, p) == expected
@test p(nt) == expected

_nt = deepcopy(nt)
Transforms.apply!(_nt, p)
@test _nt == nt_expected
@test _nt isa NamedTuple
end

@testset "cols = $c" for c in (:a, :b)
nt_mutated = NamedTuple{(Symbol("$c"), )}((nt_expected[c], ))
nt_expected_col = merge(nt, nt_mutated)

expected = [nt_expected[c]]
@test Transforms.apply(nt, p; cols=[c]) == expected
@test p(nt; cols=[c]) == expected

_nt = deepcopy(nt)
Transforms.apply!(_nt, p; cols=[c])
@test _nt == nt_expected_col
@test _nt isa NamedTuple
end
end

@testset "DataFrame" begin
df = DataFrame(:a => [1, 2, 3], :b => [4, 5, 6])
expected = DataFrame(:a => [1, 8, 27], :b => [64, 125, 216])
expected = [[1, 8, 27], [64, 125, 216]]
df_expected = DataFrame(:a => [1, 8, 27], :b => [64, 125, 216])

transformed = Transforms.apply(df, p)
@test transformed isa DataFrame
@test transformed == expected
@test Transforms.apply(df, p) == expected

@test Transforms.apply(df, p; cols=[:a]) == DataFrame(:a => [1, 8, 27], :b => [4, 5, 6])
@test Transforms.apply(df, p; cols=[:b]) == DataFrame(:a => [1, 2, 3], :b => [64, 125, 216])
@test Transforms.apply(df, p; cols=[:a]) == [[1, 8, 27]]
@test Transforms.apply(df, p; cols=[:b]) == [[64, 125, 216]]

_df = deepcopy(df)
Transforms.apply!(_df, p)
@test _df == expected
@test _df == df_expected
@test _df isa DataFrame
end

end
5 changes: 4 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
using AxisArrays
using AxisKeys
using DataFrames: DataFrame
using Dates
using Test
using TimeZones
using Transforms
using Transforms: _try_copy
using Test

@testset "Transforms.jl" begin
include("linear_combination.jl")
include("power.jl")
include("temporal.jl")
end
Loading