invenia · bencottier · Apr 7, 2021 · Apr 7, 2021 · Apr 7, 2021 · Apr 9, 2021
diff --git a/Project.toml b/Project.toml
@@ -1,11 +1,12 @@
 name = "AxisSets"
 uuid = "a1a1544e-ba16-4f6d-8861-e833517b754e"
 authors = ["Invenia Technical Computing Corporation"]
-version = "0.1.4"
+version = "0.1.5"
 
 [deps]
 AutoHashEquals = "15f4f7f2-30c1-5605-9d31-71845cf9641f"
 AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5"
+FeatureTransforms = "8fd68953-04b8-4117-ac19-158bf6de9782"
 Impute = "f7bf1975-0170-51b9-8c5f-a992d46b9575"
 NamedDims = "356022a1-0364-5f58-8944-0da4b18d706f"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
@@ -14,11 +15,12 @@ ReadOnlyArrays = "988b38a3-91fc-5605-94a2-ee2116b3bd83"
 [compat]
 AutoHashEquals = "0.2"
 AxisKeys = "0.1"
+FeatureTransforms = "0.3"
 Impute = "0.6"
 NamedDims = "0.2"
 OrderedCollections = "1"
 ReadOnlyArrays = "0.1"
-julia = "1.3"
+julia = "1.5"
 
 [extras]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/src/AxisSets.jl b/src/AxisSets.jl
@@ -2,6 +2,7 @@ module AxisSets
 
 using AutoHashEquals
 using AxisKeys
+using FeatureTransforms
 using Impute
 using NamedDims
 using OrderedCollections
@@ -88,5 +89,6 @@ include("dataset.jl")
 include("indexing.jl")
 include("functions.jl")
 include("impute.jl")
+include("featuretransforms.jl")
 
 end
diff --git a/src/featuretransforms.jl b/src/featuretransforms.jl
@@ -0,0 +1,154 @@
+FeatureTransforms.is_transformable(::KeyedDataset) = true
+
+_transform_pattern(keys, dims) = isempty(keys) ? _transform_pattern(dims) : Pattern[keys...]
+_transform_pattern(::Colon) = Pattern[(:__,)]
+_transform_pattern(dims::Symbol) = Pattern[(:__, dims)]
+_transform_pattern(dims) = Pattern[(:__, d) for d in dims]
+
+"""
+    FeatureTransforms.apply(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...)
+
+Apply the `Transform` to each component of the [`KeyedDataset`](@ref).
+Returns a new dataset with the same constraints, but transformed components.
+
+The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`.
+Otherwise, components are selected by the desired `dims`.
+
+Keyword arguments including `dims` are passed to the appropriate `FeatureTransforms` method
+for a component.
+
+# Example
+```jldoctest
+julia> using AxisKeys, FeatureTransforms; using AxisSets: KeyedDataset, Pattern, flatten;
+
+julia> ds = KeyedDataset(
+           flatten([
+               :train => [
+                   :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]),
+                   :price => KeyedArray([-2.0 4.0; 3.0 2.0; -1.0 -1.0]; time=1:3, id=[:a, :b]),
+               ],
+               :predict => [
+                   :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]),
+                   :price => KeyedArray([0.5 -1.0; -5.0 -2.0; 0.0 1.0]; time=1:3, id=[:a, :b]),
+               ]
+           ])...
+       );
+
+julia> p = Power(2);
+
+julia> r = FeatureTransforms.apply(ds, p, (:_, :price, :_));
+
+julia> [k => parent(parent(v)) for (k, v) in r.data]
+4-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}:
+    (:train, :load) => [7.0 7.7; 8.0 8.2; 9.0 9.9]
+   (:train, :price) => [4.0 16.0; 9.0 4.0; 1.0 1.0]
+  (:predict, :load) => [7.0 7.7; 8.1 7.9; 9.0 9.9]
+ (:predict, :price) => [0.25 1.0; 25.0 4.0; 0.0 1.0]
+```
+"""
+function FeatureTransforms.apply(
+    ds::KeyedDataset, t::Transform, keys...;
+    dims=:, kwargs...
+)
+    return map(ds, _transform_pattern(keys, dims)...) do a
+        FeatureTransforms.apply(a, t; dims=dims, kwargs...)
+    end
+end
+
+"""
+    FeatureTransforms.apply!(ds::KeyedDataset, t::Transform, [key]; dims=:, kwargs...)
+
+Apply the `Transform` to each component of the [`KeyedDataset`](@ref).
+Returns a new dataset with the same constraints, but transformed components.
+
+The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`.
+Otherwise, components are selected by the desired `dims`.
+
+Keyword arguments including `dims` are passed to the appropriate `FeatureTransforms` method
+for a component.
+"""
+function FeatureTransforms.apply!(
+    ds::KeyedDataset, t::Transform, keys...;
+    dims=:, kwargs...
+)
+    return map(ds, _transform_pattern(keys, dims)...) do a
+        FeatureTransforms.apply!(a, t; dims=dims, kwargs...)
+    end
+end
+
+"""
+    FeatureTransforms.apply_append(
+        ds::KeyedDataset, t::Transform, [key];
+        dims=:, inner=false, component_name=:component, kwargs...
+    )
+
+Apply the `Transform` to each component of the [`KeyedDataset`](@ref).
+
+The transform can be applied to a subselection of components via a [`Pattern`](@ref) `key`.
+Otherwise, components are selected by the desired `dims`.
+
+If `inner=true`, perform `FeatureTransforms.apply_append` on each component,
+returning a new dataset with the same constraints, but transformed components.
+
+Otherwise, transform each component using `FeatureTransforms.apply`, and append
+to a copy of the dataset as a new component called `component_name`.
+
+Keyword arguments including `dims` are passed to the appropriate `FeatureTransforms` method
+for a component.
+
+# Example
+```jldoctest
+julia> using AxisKeys, FeatureTransforms; using AxisSets: KeyedDataset, Pattern, flatten;
+
+julia> ds = KeyedDataset(
+           flatten([
+               :train => [
+                   :load => KeyedArray([7.0 7.7; 8.0 8.2; 9.0 9.9]; time=1:3, loc=[:x, :y]),
+                   :price => KeyedArray([-2.0 4.0; 3.0 2.0; -1.0 -1.0]; time=1:3, id=[:a, :b]),
+               ],
+               :predict => [
+                   :load => KeyedArray([7.0 7.7; 8.1 7.9; 9.0 9.9]; time=1:3, loc=[:x, :y]),
+                   :price => KeyedArray([0.5 -1.0; -5.0 -2.0; 0.0 1.0]; time=1:3, id=[:a, :b]),
+               ]
+           ])...
+       );
+
+julia> p = Power(2);
+
+julia> r = FeatureTransforms.apply_append(ds, p, (:_, :price, :_); component_name=:price2);
+
+julia> [k => parent(parent(v)) for (k, v) in r.data]
+6-element Vector{Pair{Tuple{Symbol, Symbol}, Matrix{Float64}}}:
+     (:train, :load) => [7.0 7.7; 8.0 8.2; 9.0 9.9]
+    (:train, :price) => [-2.0 4.0; 3.0 2.0; -1.0 -1.0]
+   (:predict, :load) => [7.0 7.7; 8.1 7.9; 9.0 9.9]
+  (:predict, :price) => [0.5 -1.0; -5.0 -2.0; 0.0 1.0]
+   (:train, :price2) => [4.0 16.0; 9.0 4.0; 1.0 1.0]
+ (:predict, :price2) => [0.25 1.0; 25.0 4.0; 0.0 1.0]
+```
+"""
+function FeatureTransforms.apply_append(
+    ds::KeyedDataset, t::Transform, keys...;
+    dims=:, inner=false, component_name=:component, kwargs...
+)
+    patterns = _transform_pattern(keys, dims)
+
+    if inner  # batched apply_append on each component
+        return map(ds, patterns...) do a
+            FeatureTransforms.apply_append(a, t; dims=dims, kwargs...)
+        end
+    else  # merge transformed components as new components of dataset
+        # select any components the keys match
+        selected = unique(x[1:end-1] for x in dimpaths(ds) if any(p -> x in p, patterns))
+
+        # construct keys of new transformed components
+        new_keys = [(k[1:end-1]..., component_name) for k in selected]
+
+        # pair new keys with transformed components
+        pairs = map(new_keys, selected) do new_k, k
+            new_k => FeatureTransforms.apply(ds.data[k], t; dims=dims, kwargs...)
+        end
+
+        return merge(ds, KeyedDataset(pairs...))
+    end
+end
diff --git a/src/impute.jl b/src/impute.jl
@@ -126,12 +126,12 @@ julia> [k => parent(parent(v)) for (k, v) in Impute.filter(ds; dims=:loc).data]
 """
 Impute.apply(ds::KeyedDataset, f::Filter; dims) = Impute.apply!(deepcopy(ds), f; dims=dims)
 
-_pattern(dims::Pattern) = dims
-_pattern(dims::Tuple) = Pattern(dims)
-_pattern(dims) = Pattern(:__, dims)
+_impute_pattern(dims::Pattern) = dims
 function Base.mapslices(f::Function, ds::KeyedDataset, keys...; dims) 
     patterns = if isempty(keys) 
         dims isa Symbol ? Pattern[(:__, dims)] : Pattern[(:__, d) for d in dims] 
     else 
         Pattern[keys...] 
     end 
 function Base.mapslices(f::Function, ds::KeyedDataset, keys...; dims) 
     patterns = if isempty(keys) 
         dims isa Symbol ? Pattern[(:__, dims)] : Pattern[(:__, d) for d in dims] 
     else 
         Pattern[keys...] 
     end 
+_impute_pattern(dims::Tuple) = Pattern(dims)
+_impute_pattern(dims) = Pattern(:__, dims)
 
 function Impute.apply!(ds::KeyedDataset, f::Filter; dims)
-    pattern = _pattern(dims)
+    pattern = _impute_pattern(dims)
     dim = pattern.segments[end]
 
     dim in (:_, :__) && throw(ArgumentError(