invenia · glennmoy · Apr 16, 2021 · Apr 7, 2021 · Apr 16, 2021 · Apr 16, 2021
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "FeatureTransforms"
 uuid = "8fd68953-04b8-4117-ac19-158bf6de9782"
 authors = ["Invenia Technical Computing Corporation"]
-version = "0.3.2"
+version = "0.3.3-DEV"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/src/FeatureTransforms.jl b/src/FeatureTransforms.jl
@@ -11,6 +11,7 @@ export Transform
 export is_transformable, transform, transform!
 
 include("utils.jl")
+include("traits.jl")
 include("transform.jl")
 include("apply.jl")
 

diff --git a/src/linear_combination.jl b/src/linear_combination.jl
@@ -7,6 +7,8 @@ struct LinearCombination <: Transform
     coefficients::Vector{Real}
 end
 
+cardinality(::LinearCombination) = ManyToOne()
+
 """
     apply(
         ::AbstractArray{<:Real, N}, ::LinearCombination; dims=1, inds=:

diff --git a/src/one_hot_encoding.jl b/src/one_hot_encoding.jl
@@ -32,6 +32,8 @@ struct OneHotEncoding{R<:Real} <: Transform
     end
 end
 
+cardinality(::OneHotEncoding) = OneToMany()
+
 function OneHotEncoding(possible_values::AbstractVector{T}) where T
     return OneHotEncoding{Bool}(possible_values)
 end

diff --git a/src/periodic.jl b/src/periodic.jl
@@ -33,6 +33,8 @@ struct Periodic{P, S} <: Transform
     end
 end
 
+cardinality(::Periodic) = OneToOne()
+
 """
     Periodic(f, period) -> Periodic
 

diff --git a/src/power.jl b/src/power.jl
@@ -7,4 +7,6 @@ struct Power <: Transform
     exponent::Real
 end
 
+cardinality(::Power) = OneToOne()
+
 _apply(x, P::Power; kwargs...) = x .^ P.exponent
diff --git a/src/scaling.jl b/src/scaling.jl
@@ -13,6 +13,8 @@ Represents the no-op scaling which simply returns the `data` it is applied on.
 struct IdentityScaling <: AbstractScaling end
 IdentityScaling(args...) = IdentityScaling()
 
+cardinality(::IdentityScaling) = OneToOne()
+
 @inline _apply(x, ::IdentityScaling; kwargs...) = x
 
 """
@@ -69,6 +71,8 @@ end
 
 compute_stats(x) = (mean(x), std(x))
 
+cardinality(::MeanStdScaling) = OneToOne()
+
 function _apply(A::AbstractArray, scaling::MeanStdScaling; inverse=false, eps=1e-3, kwargs...)
     inverse && return scaling.μ .+ scaling.σ .* A
     # Avoid division by 0

diff --git a/src/temporal.jl b/src/temporal.jl
@@ -4,5 +4,6 @@
 Get the hour of day corresponding to the data.
 """
 struct HoD <: Transform end
+cardinality(::HoD) = OneToOne()
 
 _apply(x, ::HoD; kwargs...) = hour.(x)
diff --git a/src/traits.jl b/src/traits.jl
@@ -0,0 +1,48 @@
+"""
+    type Cardinality
+
+A trait describing the cardinality of a [`Transform`]. Available cardinalities are:
+[`OneToOne`](@ref), [`ManyToOne`](@ref), [`OneToMany`](@ref), and [`ManyToMany`](@ref).
+"""
+abstract type Cardinality end
+
+"""
+    OneToOne <: Cardinality
+
+Transforms that map each input to exactly one output: `x → y`.
+Examples: [`Power`](@ref), [`Periodic`](@ref).
+"""
+struct OneToOne <: Cardinality end
+
+"""
+    ManyToOne <: Cardinality
+
+Transforms that map many inputs to one output: `(x_1, x_2, ..., x_n) → y`.
+These are typically reduction operations.
+Examples: [`LinearCombination`](@ref).
+"""
+struct ManyToOne <: Cardinality end
+
+"""
+    OneToMany <: Cardinality
+
+Transforms that map one input to many outputs: `x → (y_1, y_2, ..., y_n)`.
+Examples: [`OneHotEncoding`](@ref).
+"""
+struct OneToMany <: Cardinality end
+
+"""
+    ManyToMany <: Cardinality
+
+Transforms that map many inputs to many outputs: `(x_1, x_2, ..., x_m) → (y_1, y_2, ..., y_n)`.
+Examples: Principle Component Analysis (not implemented).
+"""
+struct ManyToMany <: Cardinality end
+
+
+"""
+    cardinality(transform) -> Cardinality
+
+Returns the [`Cardinality`](@ref) of the `transform`.
+"""
+function cardinality end
diff --git a/test/linear_combination.jl b/test/linear_combination.jl
@@ -1,6 +1,8 @@
 @testset "linear combination" begin
 
-    @test LinearCombination([1, -1]) isa Transform
+    lc = LinearCombination([1, -1])
+    @test lc isa Transform
+    @test cardinality(lc) == ManyToOne()
 
     @testset "Vector" begin
 

diff --git a/test/one_hot_encoding.jl b/test/one_hot_encoding.jl
@@ -3,6 +3,7 @@
     categories = ["foo", "bar", "baz"]
     ohe = OneHotEncoding(categories)
     @test ohe isa Transform
+    @test cardinality(ohe) == OneToMany()
 
     @testset "Vector" begin
 

diff --git a/test/periodic.jl b/test/periodic.jl
@@ -322,6 +322,7 @@
                 @test p isa Transform
                 @test p.period == Day(5)
                 @test p.phase_shift == Day(2)
+                @test cardinality(p) == OneToOne()
             end
 
             @testset "No phase_shift" begin

diff --git a/test/power.jl b/test/power.jl
@@ -2,6 +2,7 @@
 
     p = Power(3)
     @test p isa Transform
+    @test cardinality(p) == OneToOne()
 
     # TODO: all of these should be part of some test utils
     @testset "Vector" begin

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -5,6 +5,7 @@ using Dates
 using Documenter: doctest
 using FeatureTransforms
 using FeatureTransforms: _periodic
+using FeatureTransforms: cardinality, OneToOne, OneToMany, ManyToOne, ManyToMany
 using Test
 using TimeZones
 
@@ -19,4 +20,5 @@ using TimeZones
     include("scaling.jl")
     include("temporal.jl")
     include("transform.jl")
+    include("traits.jl")
 end
diff --git a/test/scaling.jl b/test/scaling.jl
@@ -3,6 +3,7 @@
     @testset "IdentityScaling" begin
         scaling = IdentityScaling()
         @test scaling isa Transform
+        @test cardinality(scaling) == OneToOne()
 
         @testset "Arguments do nothing" begin
             @test IdentityScaling(123) == IdentityScaling()
@@ -233,6 +234,7 @@
             @testset "simple" for x in (M, nt)
                 x_copy = deepcopy(x)
                 scaling = MeanStdScaling(x)
+                @test cardinality(scaling) == OneToOne()
                 @test scaling isa Transform
                 @test x == x_copy  # data is not mutated
                 # constructor uses all data by default

diff --git a/test/temporal.jl b/test/temporal.jl
@@ -2,6 +2,7 @@
 
     hod = HoD()
     @test hod isa Transform
+    @test cardinality(hod) == OneToOne()
 
     @testset "Vector" begin
         x = collect(DateTime(2020, 1, 1, 9, 0):Hour(1):DateTime(2020, 5, 7, 9, 0))

diff --git a/test/traits.jl b/test/traits.jl
@@ -0,0 +1,5 @@
+@testset "traits.jl" begin
+    for t in (OneToOne(), OneToMany(), ManyToOne(), ManyToMany())
+        @test t isa FeatureTransforms.Cardinality
+    end
+end
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,8 @@ struct LinearCombination <: Transform @@
         coefficients::Vector{Real}
     end
+    cardinality(::LinearCombination) = ManyToOne()
     """
         apply(
             ::AbstractArray{<:Real, N}, ::LinearCombination; dims=1, inds=:
@@ Expand Down @@