diff --git a/Project.toml b/Project.toml index 075c199..c1fe007 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FeatureTransforms" uuid = "8fd68953-04b8-4117-ac19-158bf6de9782" authors = ["Invenia Technical Computing Corporation"] -version = "0.3.10" +version = "0.3.11" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/docs/src/api.md b/docs/src/api.md index 9f96ecf..a34c30e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -15,7 +15,9 @@ Power Periodic MeanStdScaling IdentityScaling +InverseHyperbolicSine LinearCombination +LogTransform OneHotEncoding ``` diff --git a/src/FeatureTransforms.jl b/src/FeatureTransforms.jl index ddb741c..f278731 100644 --- a/src/FeatureTransforms.jl +++ b/src/FeatureTransforms.jl @@ -8,6 +8,7 @@ using Tables export Transform, transform, transform! export HoD, LinearCombination, OneHotEncoding, Periodic, Power export AbstractScaling, IdentityScaling, MeanStdScaling +export LogTransform, InverseHyperbolicSine include("utils.jl") include("traits.jl") @@ -16,6 +17,7 @@ include("apply.jl") # Transform implementations include("linear_combination.jl") +include("log.jl") include("one_hot_encoding.jl") include("periodic.jl") include("power.jl") diff --git a/src/log.jl b/src/log.jl new file mode 100644 index 0000000..323b24e --- /dev/null +++ b/src/log.jl @@ -0,0 +1,33 @@ +""" + LogTransform <: Transform + +Logarithmically transform the data through: sign(x) * log(|x| + 1). + +This allows transformations of all real numbers, not just positive ones. +""" +struct LogTransform <: Transform end +cardinality(::LogTransform) = OneToOne() + +_logtransform(x) = sign(x) * log(abs(x) + one(x)) +_invlogtransform(x) = sign(x) * (exp(sign(x) * x) - one(x)) + +function _apply(A::AbstractArray, transform::LogTransform; inverse=false, kwargs...) + inverse && return _invlogtransform.(A) + return _logtransform.(A) +end + + +""" + InverseHyperbolicSine <: Transform + +Logarithmically transform the data through: log(x + √(x² + 1)). + +This is the inverse hyperbolic sine. +""" +struct InverseHyperbolicSine <: Transform end +cardinality(::InverseHyperbolicSine) = OneToOne() + +function _apply(A::AbstractArray, transform::InverseHyperbolicSine; inverse=false, kwargs...) + inverse && return sinh.(A) + return asinh.(A) +end \ No newline at end of file diff --git a/test/log.jl b/test/log.jl new file mode 100644 index 0000000..c3f993c --- /dev/null +++ b/test/log.jl @@ -0,0 +1,46 @@ +@testset "log" begin + + V1 = [1; -2; 3; 4; 0; -6] + V2 = -V1 + M = [1 1 0.5; 0.0 1.0 2.0] + + @testset "LogTransform" begin + + logV1 = [log(2); -log(3); log(4); log(5); log(1); -log(7)] + logV2 = -logV1 + logM = [log(2) log(2) log(1.5); log(1) log(2) log(3)] + + @testset "simple" for x in (V1, V2, M) + transform = LogTransform() + @test cardinality(transform) == OneToOne() + @test transform isa Transform + end + + @testset "Apply" for (x, y) in ((V1, logV1), (V2, logV2), (M, logM)) + f = LogTransform() + transformed = FeatureTransforms.apply(x, f) + @test transformed ≈ y atol=1e-5 + @test FeatureTransforms.apply(transformed, f; inverse=true) ≈ x atol=1e-5 + end + end + + @testset "InverseHyperbolicSine" begin + + logV1 = asinh.(V1) + logV2 = asinh.(V2) + logM = asinh.(M) + + @testset "simple" for x in (V1, V2, M) + transform = InverseHyperbolicSine() + @test cardinality(transform) == OneToOne() + @test transform isa Transform + end + + @testset "Apply" for (x, y) in ((V1, logV1), (V2, logV2), (M, logM)) + f = InverseHyperbolicSine() + transformed = FeatureTransforms.apply(x, f) + @test transformed ≈ y atol=1e-5 + @test FeatureTransforms.apply(transformed, f; inverse=true) ≈ x atol=1e-5 + end + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7fedca2..eaa447c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -17,6 +17,7 @@ using TimeZones Sys.WORD_SIZE == 64 && v"1.6" <= VERSION < v"1.7" && doctest(FeatureTransforms) include("linear_combination.jl") + include("log.jl") include("one_hot_encoding.jl") include("periodic.jl") include("power.jl")