From 96310bf018e9c6bd8ff19febe134fe7897c5f975 Mon Sep 17 00:00:00 2001 From: rofinn Date: Fri, 18 Sep 2020 14:16:33 -0500 Subject: [PATCH 1/2] Got minimal tests passing for imputing KeyedArrays. --- Project.toml | 6 ++++-- src/imputors.jl | 17 +++++++++-------- src/imputors/drop.jl | 15 ++++++++------- test/runtests.jl | 10 ++++++++++ 4 files changed, 31 insertions(+), 17 deletions(-) diff --git a/Project.toml b/Project.toml index 2b44ef7..8ac709d 100644 --- a/Project.toml +++ b/Project.toml @@ -16,7 +16,8 @@ TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] -Distances = "0.9" +AxisKeys = "0.1.5" +Distances = "0.8, 0.9" IterTools = "1.2, 1.3" Missings = "0.4" NearestNeighbors = "0.4" @@ -27,6 +28,7 @@ julia = "1" [extras] AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9" +AxisKeys = "94b1ba4f-4ee9-5380-92f1-94cde586c3c5" Combinatorics = "861a8166-3701-5b0c-9a16-15d98fcdc6aa" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" @@ -35,4 +37,4 @@ RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["AxisArrays", "Combinatorics", "DataFrames", "Dates", "Distances", "RDatasets", "Test"] +test = ["AxisArrays", "AxisKeys", "Combinatorics", "DataFrames", "Dates", "Distances", "RDatasets", "Test"] diff --git a/src/imputors.jl b/src/imputors.jl index cda9f06..78dafa3 100644 --- a/src/imputors.jl +++ b/src/imputors.jl @@ -76,14 +76,8 @@ function impute(data, imp::Imputor; kwargs...) return impute!(deepcopy(data), imp; kwargs...) end -# Wrapper method intended to handle ambiguities between vector and row tables. -function impute!(data::AbstractVector, imp::Imputor) - if istable(data) - return materializer(data)(impute!(Tables.columns(data), imp)) - else - return _impute!(data, imp) - end -end +# Generic fallback for methods that have only defined _impute(v, imp; kwargs...) +impute!(data::AbstractVector, imp::Imputor; kwargs...) = _impute!(data, imp; kwargs...) """ impute!(data::AbstractMatrix, imp::Imputor; kwargs...) @@ -175,6 +169,13 @@ function impute!(table, imp::Imputor) return table end +# Special case row tables +# NOTE: This may introduce ambiguities for specific imputors that have defined a +# `impute!(data, imp)`` method +function impute!(data::Vector{<:NamedTuple}, imp::Imputor) + return materializer(data)(impute!(Tables.columns(data), imp)) +end + for file in ("drop.jl", "locf.jl", "nocb.jl", "interp.jl", "fill.jl", "chain.jl", "srs.jl", "svd.jl", "knn.jl") include(joinpath("imputors", file)) end diff --git a/src/imputors/drop.jl b/src/imputors/drop.jl index dc3ccd0..999ee6b 100644 --- a/src/imputors/drop.jl +++ b/src/imputors/drop.jl @@ -34,6 +34,10 @@ function impute!(data::Vector, imp::DropObs) imp.context(c -> filter!(x -> !ismissing!(c, x), data)) end +function impute!(data::Vector{<:NamedTuple}, imp::DropObs) + return materializer(data)(impute(Tables.columns(data), imp)) +end + function impute(data::AbstractVector, imp::DropObs) imp.context(c -> filter(x -> !ismissing!(c, x), data)) end @@ -95,6 +99,10 @@ end # TODO: Switch to using Base.@kwdef on 1.1 DropVars(; context=Context()) = DropVars(context) +function impute!(data::Vector{<:NamedTuple}, imp::DropVars) + return materializer(data)(impute(Tables.columns(data), imp)) +end + function impute(data::AbstractMatrix, imp::DropVars; dims=1) imp.context() do c return filtervars(data; dims=dims) do vars @@ -121,10 +129,3 @@ end # Add impute! methods to override the default behaviour in imputors.jl impute!(data::AbstractMatrix, imp::Union{DropObs, DropVars}) = impute(data, imp) impute!(data, imp::Union{DropObs, DropVars}) = impute(data, imp) -function impute!(data::AbstractVector, imp::Union{DropObs, DropVars}) - if istable(data) - return materializer(data)(impute(Tables.columns(data), imp)) - else - throw(MethodError(impute!, (data, imp))) - end -end diff --git a/test/runtests.jl b/test/runtests.jl index 0f9c62e..5f2f8bb 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using AxisArrays +using AxisKeys using Combinatorics using DataFrames using Dates @@ -488,6 +489,15 @@ end # Confirm that we don't have any more missing values @test all(!ismissing, result) end + + @testset "KeyedArray" begin + data = KeyedArray(Matrix(orig); row=1:size(orig, 1), V=names(orig)) + result = Impute.interp(data; context=ctx) |> Impute.locf!() |> Impute.nocb!() + + @test size(result) == size(data) + # Confirm that we don't have any more missing values + @test all(!ismissing, result) + end end @testset "Alternate missing functions" begin From a9da1dfe3788797cfd0de4f869f33505da906078 Mon Sep 17 00:00:00 2001 From: rofinn Date: Mon, 21 Sep 2020 16:08:24 -0500 Subject: [PATCH 2/2] Test on 1.3. --- .appveyor.yml | 2 +- .travis.yml | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 756f881..8b86a93 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,6 +1,6 @@ environment: matrix: - - julia_version: 1.0 + - julia_version: 1.3 - julia_version: nightly platform: diff --git a/.travis.yml b/.travis.yml index 3e44a10..0ff5933 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,8 @@ os: - linux - osx julia: - - 1.0 + # 1.0 should also work, but Pkg.test hit some chmod issues on 1.0 in docker containers + - 1.3 - nightly notifications: email: false