Skip to content

Commit

Permalink
Remove "svd"s in comment
Browse files Browse the repository at this point in the history
  • Loading branch information
appleparan committed Feb 24, 2020
1 parent e4cd596 commit e51cfb5
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 13 deletions.
7 changes: 1 addition & 6 deletions src/imputors/knn.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@ function KNN(; num_nn=1,
KNN(num_nn, dist, context)
end

"""
impute!(imp::KNN, data::AbstractMatrix)
data : N x D matrix
"""
function impute!(data::AbstractMatrix{<:Union{T, Missing}},
imp::KNN) where T<:Real

Expand All @@ -51,7 +46,7 @@ function impute!(data::AbstractMatrix{<:Union{T, Missing}},
# index of columns
idxs, dists = NearestNeighbors.knn(kdtree, transposed, imp.num_nn, true)
invWdist(i) = dists[i] == 0 ? transposed[idxs[i]] : dists[i]
# TODO : going to parallel?

for (i, x) in enumerate(mdata)
if ndims(transposed) != 1
# ndims(dataT) == 1 means there is there is only single row.
Expand Down
9 changes: 2 additions & 7 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -529,29 +529,24 @@ end
@testset "KNN" begin
# Test a case with few variable
# (e.g., only a few variables, only )
@testset "Data - few variables" begin
@testset "Data with few variables" begin
data = Matrix(dataset("Ecdat", "Electricity"))
X = add_missings(data)

knn_imputed = impute(copy(X), Impute.KNN(; num_nn=3, dist=Euclidean(), context=Context(; limit = 1.0)))
mean_imputed = impute(copy(X), Fill(; context=Context(; limit=1.0)))

# If we don't have enough variables then SVD imputation will probably perform
# about as well as mean imputation.
@test nrmsd(knn_imputed, data) > nrmsd(mean_imputed, data) * 0.9
end

@testset "Data - random variables" begin
@testset "Data with random values" begin
M = rand(100, 200)
data = M * M'
X = add_missings(data)

knn_imputed = impute(copy(X), Impute.KNN(; num_nn=3, dist=Euclidean(), context=Context(; limit = 1.0)))
mean_imputed = impute(copy(X), Fill(; context=Context(; limit=1.0)))

# If most of the variance in the original data can't be explained by a small
# subset of the eigen values in the svd decomposition then our low rank approximations
# won't perform very well.
@test nrmsd(knn_imputed, data) > nrmsd(mean_imputed, data) * 0.9
end
end
Expand Down

0 comments on commit e51cfb5

Please sign in to comment.