invenia · rofinn · Apr 19, 2024 · Mar 5, 2024 · Apr 9, 2024 · Apr 13, 2024
diff --git a/src/imputors/interp.jl b/src/imputors/interp.jl
@@ -1,5 +1,5 @@
 """
-    Interpolate(; limit=nothing)
+    Interpolate(; limit=nothing, r=nothing)
 
 Performs linear interpolation between the nearest values in an vector.
 The current implementation is univariate, so each variable in a table or matrix will
@@ -11,6 +11,8 @@ that all missing values will be imputed.
 
 # Keyword Arguments
 * `limit::Union{UInt, Nothing}`: Optionally limit the gap sizes that can be interpolated.
+* `r::Union{RoundingMode, Nothing}`: Optionally specify a rounding mode.
+    Avoids `InexactError`s when interpolating over integers.
 
 # Example
 ```jldoctest
@@ -34,9 +36,10 @@ julia> impute(M, Interpolate(; limit=2); dims=:rows)
 """
 struct Interpolate <: Imputor
     limit::Union{UInt, Nothing}
+    r::Union{RoundingMode, Nothing}
 end
 
-Interpolate(; limit=nothing) = Interpolate(limit)
+Interpolate(; limit=nothing, r=nothing) = Interpolate(limit, r)
 
 function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) where T
     @assert !all(ismissing, data)
@@ -51,15 +54,8 @@ function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) w
                 gap_sz = (next_idx - prev_idx) - 1
 
                 if imp.limit === nothing || gap_sz <= imp.limit
-                    diff = data[next_idx] - data[prev_idx]
-                    incr = diff / T(gap_sz + 1)
-                    val = data[prev_idx] + incr
-
-                    # Iteratively fill in the values
-                    for j in i:(next_idx - 1)
-                        data[j] = val
-                        val += incr
-                    end
+                    gen = _gen_interp(data[prev_idx], data[next_idx], gap_sz+1, imp.r)
+                    _gen_set!(data, prev_idx, gen)
                 end
 
                 i = next_idx
@@ -72,3 +68,39 @@ function _impute!(data::AbstractVector{<:Union{T, Missing}}, imp::Interpolate) w
 
     return data
 end
+
+"""
+Set a vector slice over the values of a generator, starting from `after+1`
+"""
+function _gen_set!(v::AbstractVector, after::Integer, gen)
+    for (i, val) in enumerate(gen)
+       v[after+i] = val
+    end
+end
+
+"""
+Return generator over interpolated values.
+"""
+function _gen_interp(a, b, n, ::Nothing)
+    inc = _calculate_increment(a, b, n)
+    (a + inc*i for i=1:n)
+end
+
+_gen_interp(a, b, n, r::RoundingMode) = _gen_interp(a, b, n, nothing) 
+
+function _gen_interp(a::T, b::T, n, ::Nothing) where {T<:Integer}
+    inc = _calculate_increment(a, b, n)
+    (convert(T, a + inc*i) for i=1:n)
+end
+
+function _gen_interp(a::T, b::T, n, r::RoundingMode) where {T<:Integer}
+    inc = _calculate_increment(a, b, n)
+    (round(T, a + inc*i, r) for i=1:n)
+end
+
+_calculate_increment(a, b, n) = (b - a) / n
+
+function _calculate_increment(a::T, b::T, n) where {T<:Integer}
+    _calculate_increment(float(a), float(b), n)
+end
+
diff --git a/test/imputors/interp.jl b/test/imputors/interp.jl
@@ -90,10 +90,59 @@
         @test ismissing(result[1])
         @test ismissing(result[20])
 
-        # Test inexact error
+        # Test with UInt
+        c = [0x1, missing, 0x3, 0x4]
+        @test Impute.interp(c) == [0x1, 0x2, 0x3, 0x4]
+
+        # Test reverse case where the increment is negative
+        @test Impute.interp(reverse(c)) == [0x4, 0x3, 0x2, 0x1]
+
+        # Test inexact error (no rounding mode provided)
         # https://github.com/invenia/Impute.jl/issues/71
         c = [1, missing, 2, 3]
         @test_throws InexactError Impute.interp(c)
+
+        # Test with UInt
+        c = [0x1, missing, 0x2, 0x3]
+        @test_throws InexactError Impute.interp(c)
+
+        # Test reverse case where the increment is negative
+        @test_throws InexactError Impute.interp(reverse(c))
+
+        # Test inexact cases with a rounding mode
+        c = [1, missing, 2, 3]
+        @test Impute.interp(c; r=RoundToZero) == [1, 1, 2, 3]
+
+        # Test with UInt
+        c = [0x1, missing, 0x2, 0x3]
+        @test Impute.interp(c; r=RoundNearest) == [0x1, 0x2, 0x2, 0x3]
+
+        # Test reverse case where the increment is negative
+        @test Impute.interp(reverse(c); r=RoundUp) == [0x3, 0x2, 0x2, 0x1]
+
+        # Test rounding doesn't cause values to exceed endpoint values
+        @test Impute.interp([1, missing, missing, 2]; r=RoundUp) == [1, 2, 2, 2]
+        @test Impute.interp([2, missing, missing, 1]; r=RoundUp) == [2, 2, 2, 1]
+        @test Impute.interp([1, missing, missing, 0]; r=RoundDown) == [1, 0, 0, 0]
+        @test Impute.interp([0x1, missing, missing, 0x0]; r=RoundDown) == [0x1, 0x0, 0x0, 0x0]
+
+        # Test long gaps (above .5 increment)
+        @test Impute.interp([2, fill(missing, 10)..., 8]; r=RoundNearest) == [2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8]
+        @test Impute.interp([0x2, fill(missing, 10)..., 0x8]; r=RoundNearest) == [0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0x8]
+        @test Impute.interp([8, fill(missing, 10)..., 2]; r=RoundNearest) == [8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2]
+        @test Impute.interp([0x8, fill(missing, 10)..., 0x2]; r=RoundNearest) == [0x8, 0x7, 0x7, 0x6, 0x6, 0x5, 0x5, 0x4, 0x4, 0x3, 0x3, 0x2]
+
+        # Test long gaps (at .5 increment)
+        @test Impute.interp([2, fill(missing, 11)..., 8]; r=RoundNearest) == [2, 2, 3, 4, 4, 4, 5, 6, 6, 6, 7, 8, 8]
+        @test Impute.interp([0x2, fill(missing, 11)..., 0x8]; r=RoundNearest) == [0x2, 0x2, 0x3, 0x4, 0x4, 0x4, 0x5, 0x6, 0x6, 0x6, 0x7, 0x8, 0x8]
+        @test Impute.interp([8, fill(missing, 11)..., 2]; r=RoundNearest) == [8, 8, 7, 6, 6, 6, 5, 4, 4, 4, 3, 2, 2]
+        @test Impute.interp([0x8, fill(missing, 11)..., 0x2]; r=RoundNearest) == [0x8, 0x8, 0x7, 0x6, 0x6, 0x6, 0x5, 0x4, 0x4, 0x4, 0x3, 0x2, 0x2]
+
+        # Test long gaps (below .5 increment)
+        @test Impute.interp([2, fill(missing, 12)..., 8]; r=RoundNearest) == [2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8]
+        @test Impute.interp([0x2, fill(missing, 12)..., 0x8]; r=RoundNearest) == [0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, 0x6, 0x6, 0x7, 0x7, 0x8, 0x8]
+        @test Impute.interp([8, fill(missing, 12)..., 2]; r=RoundNearest) == [8, 8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2]
+        @test Impute.interp([0x8, fill(missing, 12)..., 0x2]; r=RoundNearest) == [0x8, 0x8, 0x7, 0x7, 0x6, 0x6, 0x5, 0x5, 0x4, 0x4, 0x3, 0x3, 0x2, 0x2]
     end
 
     # TODO Test error cases on non-numeric types