Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle interp for integers by casting and rounding (#71) #142

Merged
merged 9 commits into from
Apr 19, 2024
54 changes: 43 additions & 11 deletions src/imputors/interp.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Interpolate(; limit=nothing)
Interpolate(; limit=nothing, r=nothing)

Performs linear interpolation between the nearest values in an vector.
The current implementation is univariate, so each variable in a table or matrix will
Expand All @@ -11,6 +11,8 @@ that all missing values will be imputed.

# Keyword Arguments
* `limit::Union{UInt, Nothing}`: Optionally limit the gap sizes that can be interpolated.
* `r::Union{RoundingMode, Nothing}`: Optionally specify a rounding mode.
Avoids `InexactError`s when interpolating over integers.

# Example
```jldoctest
Expand All @@ -34,9 +36,10 @@ julia> impute(M, Interpolate(; limit=2); dims=:rows)
"""
struct Interpolate <: Imputor
limit::Union{UInt, Nothing}
r::Union{RoundingMode, Nothing}
end

Interpolate(; limit=nothing) = Interpolate(limit)
Interpolate(; limit=nothing, r=nothing) = Interpolate(limit, r)

function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) where T
@assert !all(ismissing, data)
Expand All @@ -51,15 +54,8 @@ function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) w
gap_sz = (next_idx - prev_idx) - 1

if imp.limit === nothing || gap_sz <= imp.limit
diff = data[next_idx] - data[prev_idx]
incr = diff / T(gap_sz + 1)
val = data[prev_idx] + incr

# Iteratively fill in the values
for j in i:(next_idx - 1)
data[j] = val
val += incr
end
gen = _gen_interp(data[prev_idx], data[next_idx], gap_sz+1, imp.r)
_gen_set!(data, prev_idx, gen)
end

i = next_idx
Expand All @@ -72,3 +68,39 @@ function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) w

return data
end

"""
Set a vector slice over the values of a generator, starting from `after+1`
"""
function _gen_set!(v::AbstractVector, after::Integer, gen)
rofinn marked this conversation as resolved.
Show resolved Hide resolved
for (i, val) in enumerate(gen)
v[after+i] = val
end
end

"""
Return generator over interpolated values.
"""
function _gen_interp(a, b, n, ::Nothing)
rofinn marked this conversation as resolved.
Show resolved Hide resolved
inc = _calculate_increment(a, b, n)
(a + inc*i for i=1:n)
end

_gen_interp(a, b, n, r::RoundingMode) = _gen_interp(a, b, n, nothing)

function _gen_interp(a::T, b::T, n, ::Nothing) where {T<:Integer}
inc = _calculate_increment(a, b, n)
(convert(T, a + inc*i) for i=1:n)
rofinn marked this conversation as resolved.
Show resolved Hide resolved
end

function _gen_interp(a::T, b::T, n, r::RoundingMode) where {T<:Integer}
inc = _calculate_increment(a, b, n)
(round(T, a + inc*i, r) for i=1:n)
end

_calculate_increment(a, b, n) = (b - a) / n

function _calculate_increment(a::T, b::T, n) where {T<:Integer}
rofinn marked this conversation as resolved.
Show resolved Hide resolved
_calculate_increment(float(a), float(b), n)
end

51 changes: 50 additions & 1 deletion test/imputors/interp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,59 @@
@test ismissing(result[1])
@test ismissing(result[20])

# Test inexact error
# Test with UInt
c = [0x1, missing, 0x3, 0x4]
@test Impute.interp(c) == [0x1, 0x2, 0x3, 0x4]

# Test reverse case where the increment is negative
@test Impute.interp(reverse(c)) == [0x4, 0x3, 0x2, 0x1]

# Test inexact error (no rounding mode provided)
# https://github.com/invenia/Impute.jl/issues/71
c = [1, missing, 2, 3]
@test_throws InexactError Impute.interp(c)

# Test with UInt
c = [0x1, missing, 0x2, 0x3]
@test_throws InexactError Impute.interp(c)

# Test reverse case where the increment is negative
@test_throws InexactError Impute.interp(reverse(c))

# Test inexact cases with a rounding mode
c = [1, missing, 2, 3]
@test Impute.interp(c; r=RoundToZero) == [1, 1, 2, 3]

# Test with UInt
c = [0x1, missing, 0x2, 0x3]
@test Impute.interp(c; r=RoundNearest) == [0x1, 0x2, 0x2, 0x3]

# Test reverse case where the increment is negative
@test Impute.interp(reverse(c); r=RoundUp) == [0x3, 0x2, 0x2, 0x1]

# Test rounding doesn't cause values to exceed endpoint values
@test Impute.interp([1, missing, missing, 2]; r=RoundUp) == [1, 2, 2, 2]
@test Impute.interp([2, missing, missing, 1]; r=RoundUp) == [2, 2, 2, 1]
@test Impute.interp([1, missing, missing, 0]; r=RoundDown) == [1, 0, 0, 0]
@test Impute.interp([0x1, missing, missing, 0x0]; r=RoundDown) == [0x1, 0x0, 0x0, 0x0]

# Test long gaps (above .5 increment)
@test Impute.interp([2, fill(missing, 10)..., 8]; r=RoundNearest) == [2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8]
@test Impute.interp([0x2, fill(missing, 10)..., 0x8]; r=RoundNearest) == [0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0x8]
@test Impute.interp([8, fill(missing, 10)..., 2]; r=RoundNearest) == [8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2]
@test Impute.interp([0x8, fill(missing, 10)..., 0x2]; r=RoundNearest) == [0x8, 0x7, 0x7, 0x6, 0x6, 0x5, 0x5, 0x4, 0x4, 0x3, 0x3, 0x2]

# Test long gaps (at .5 increment)
@test Impute.interp([2, fill(missing, 11)..., 8]; r=RoundNearest) == [2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 8]
@test Impute.interp([0x2, fill(missing, 11)..., 0x8]; r=RoundNearest) == [0x2, 0x2, 0x3, 0x4, 0x4, 0x4, 0x5, 0x6, 0x6, 0x6, 0x7, 0x8, 0x8]
@test Impute.interp([8, fill(missing, 11)..., 2]; r=RoundNearest) == [8, 8, 7, 6, 6, 6, 5, 4, 4, 4, 3, 2, 2]
@test Impute.interp([0x8, fill(missing, 11)..., 0x2]; r=RoundNearest) == [0x8, 0x8, 0x7, 0x6, 0x6, 0x6, 0x5, 0x4, 0x4, 0x4, 0x3, 0x2, 0x2]

# Test long gaps (below .5 increment)
@test Impute.interp([2, fill(missing, 12)..., 8]; r=RoundNearest) == [2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8]
@test Impute.interp([0x2, fill(missing, 12)..., 0x8]; r=RoundNearest) == [0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0x8, 0x8]
@test Impute.interp([8, fill(missing, 12)..., 2]; r=RoundNearest) == [8, 8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2]
@test Impute.interp([0x8, fill(missing, 12)..., 0x2]; r=RoundNearest) == [0x8, 0x8, 0x7, 0x7, 0x6, 0x6, 0x5, 0x5, 0x4, 0x4, 0x3, 0x3, 0x2, 0x2]
end

# TODO Test error cases on non-numeric types
Expand Down
Loading