From 97e2d772cde5541811cf4f3b9e9f3e41ee3ca434 Mon Sep 17 00:00:00 2001 From: Omar Elrefaei <17922991+Omar-Elrefaei@users.noreply.github.com> Date: Tue, 23 Nov 2021 13:11:50 -0500 Subject: [PATCH] wip: implement Rename transform (#15) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * wip: implement Rename transform * wip: implement Rename transform * fix * Actually address brought up issues * refactor and add a pairs of strings constructor * add tests * confirm to Tables.jl spec * assert that all requested renames exist * update * make it work with a single Pair, and add test * Update src/transforms/rename.jl * Update src/transforms/rename.jl * Update src/transforms/rename.jl * Update src/transforms/rename.jl * Update src/transforms/rename.jl Co-authored-by: Júlio Hoffimann --- src/TableTransforms.jl | 1 + src/transforms.jl | 1 + src/transforms/rename.jl | 46 ++++++++++++++++++++++++++++++++++ test/transforms.jl | 53 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 101 insertions(+) create mode 100644 src/transforms/rename.jl diff --git a/src/TableTransforms.jl b/src/TableTransforms.jl index 060ff79b..9e24008d 100644 --- a/src/TableTransforms.jl +++ b/src/TableTransforms.jl @@ -31,6 +31,7 @@ export # built-in Select, Reject, + Rename, Identity, Center, Scale, diff --git a/src/transforms.jl b/src/transforms.jl index e035b13a..82613ca0 100644 --- a/src/transforms.jl +++ b/src/transforms.jl @@ -213,6 +213,7 @@ end include("transforms/identity.jl") include("transforms/select.jl") +include("transforms/rename.jl") include("transforms/center.jl") include("transforms/scale.jl") include("transforms/zscore.jl") diff --git a/src/transforms/rename.jl b/src/transforms/rename.jl new file mode 100644 index 00000000..db80f8d0 --- /dev/null +++ b/src/transforms/rename.jl @@ -0,0 +1,46 @@ +# ------------------------------------------------------------------ +# Licensed under the MIT License. See LICENSE in the project root. +# ------------------------------------------------------------------ + +""" + Rename(:col₁ => :newcol₁, :col₂ => :newcol₂, ..., :col₁ => :newcolₙ) + +The transform that renames `col₁` to `newcol₁`, `col₂` to `newcol₂`, ... +""" +struct Rename <: Stateless + names::Dict{Symbol,Symbol} +end + +pairsyms(x::Pair) = Symbol(first(x)) => Symbol(last(x)) + +Rename(names::Pair) = pairsyms(names) |> Dict |> Rename +Rename(names...) = pairsyms.(names) |> Dict |> Rename + +function apply(transform::Rename, table) + _rename(transform.names, table) +end + +function revert(transform::Rename, table, cache) + # reversing the key-value pairs of the Dict + newnames = Dict(new => old for (old, new) in transform.names) + _rename(newnames, table) |> first +end + + +function _rename(names, table) + oldnames = Tables.columnnames(table) + + # check if requested renames exist in the table + @assert keys(names) ⊆ oldnames "invalid column names" + + # use new names if necessary + newnames = map(oldnames) do oldname + oldname in keys(names) ? names[oldname] : oldname + end + + cols = Tables.columns(table) + vals = [Tables.getcolumn(cols, name) for name in oldnames] + 𝒯 = (; zip(newnames, vals)...) |> Tables.materializer(table) + + 𝒯, nothing +end \ No newline at end of file diff --git a/test/transforms.jl b/test/transforms.jl index 16c86cf6..6c5d57e7 100644 --- a/test/transforms.jl +++ b/test/transforms.jl @@ -131,6 +131,59 @@ @test n1 == n2 end + @testset "Rename" begin + a = rand(4000) + b = rand(4000) + c = rand(4000) + d = rand(4000) + t = Table(; a, b, c, d) + + T = Rename(Dict(:a => :x)) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :c, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + T = Rename(Dict(:a => :x, :c => :y)) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # rename with string pairs + T = Rename("a" => "x", "c" => "y") + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # rename with symbol pairs + T = Rename(:a => :x, :c => :y) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # rename with mixed pairs + T = Rename("a" => :x) + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :c, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + T = Rename("a" => :x, :c => "y") + n, c = apply(T, t) + @test Tables.columnnames(n) == (:x, :b, :y, :d) + tₒ = revert(T, n, c) + @test t == tₒ + + # reapply test + T = Rename(:b => :x, :d => :y) + n1, c1 = apply(T, t) + n2 = reapply(T, t, c1) + @test n1 == n2 + end + @testset "Identity" begin x = rand(4000) y = rand(4000)