Skip to content

Commit

Permalink
Raise on out of bounds range access (#1061)
Browse files Browse the repository at this point in the history
* Change behavior to raise on out of bounds

* Add test of new error message

* Account for slice semantics a different way

* Fix typos

* Also update tests to fix typos there

* Account for "pseudo max" on ranges

If `step > 1`, it's possible for `range.last`
to be pastthe bounds but for the `range`
to still be a subset of the columns.

* Account for reverse ranges in `Enum.slice/2`

This won't be supported until Elixir 2.0. But
we can make it future proof now.

* Both indices need to be in bounds
  • Loading branch information
billylanchantin authored Feb 3, 2025
1 parent 29d7b36 commit 0d59ddc
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 7 deletions.
15 changes: 14 additions & 1 deletion lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,20 @@ defmodule Explorer.Shared do
names
end

def to_existing_columns(%{names: names}, %Range{} = columns, _raise?) do
def to_existing_columns(%{names: names} = df, first..last//_ = columns, raise?) do
if raise? do
# We say a range is in bounds when both limits are valid column indices.
# In certain instances this will reject a valid input to `Enum.slice/2`,
# but there were some ambiguous cases we needed to cover. See
# https://github.com/elixir-explorer/explorer/pull/1061 for a discussion.
n_cols = Explorer.DataFrame.n_columns(df)

if max(abs(first), abs(last)) >= n_cols do
raise ArgumentError,
"range #{inspect(columns)} is out of bounds for a dataframe with #{n_cols} column(s)"
end
end

Enum.slice(names, columns)
end

Expand Down
4 changes: 3 additions & 1 deletion test/explorer/data_frame/lazy_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,9 @@ defmodule Explorer.DataFrame.LazyTest do
df3 = DF.distinct(df, ..)
assert DF.names(df3) == DF.names(df)

assert df == DF.distinct(df, 100..200)
assert_raise ArgumentError,
"range 100..200 is out of bounds for a dataframe with 10 column(s)",
fn -> DF.distinct(df, 100..200) end
end
end

Expand Down
29 changes: 24 additions & 5 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -2689,7 +2689,6 @@ defmodule Explorer.DataFrameTest do
assert DF.to_columns(df[["a"]]) == %{"a" => [1, 2, 3]}
assert DF.to_columns(df[[:a, :c]]) == %{"a" => [1, 2, 3], "c" => [4.0, 5.1, 6.2]}
assert DF.to_columns(df[0..-2//1]) == %{"a" => [1, 2, 3], "b" => ["a", "b", "c"]}
assert DF.to_columns(df[-3..-1]) == DF.to_columns(df)
assert DF.to_columns(df[..]) == DF.to_columns(df)

assert %Series{} = s1 = df[0]
Expand All @@ -2715,7 +2714,24 @@ defmodule Explorer.DataFrameTest do
~r"could not find column name \"class\"",
fn -> df[:class] end

assert DF.to_columns(df[0..100]) == DF.to_columns(df)
assert_raise ArgumentError,
"range 0..3 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[0..3]) end

assert_raise ArgumentError,
"range 0..-4//1 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[0..-4//1]) end

assert_raise ArgumentError,
"range -3..-1 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[-3..-1]) end

# Note: Even though `Enum.to_list(1..3//3) == [1]`, we still classify this
# range as out of bounds because we define in bounds as both limits of the
# range being in bounds.
assert_raise ArgumentError,
"range 1..3//3 is out of bounds for a dataframe with 3 column(s)",
fn -> DF.to_columns(df[1..3//3]) end
end

test "pop/2" do
Expand Down Expand Up @@ -2961,7 +2977,9 @@ defmodule Explorer.DataFrameTest do
df3 = DF.distinct(df, ..)
assert DF.names(df3) == DF.names(df)

assert df == DF.distinct(df, 100..200)
assert_raise ArgumentError,
"range 100..200 is out of bounds for a dataframe with 10 column(s)",
fn -> DF.drop_nil(df, 100..200) end
end
end

Expand All @@ -2983,8 +3001,9 @@ defmodule Explorer.DataFrameTest do
fn -> DF.drop_nil(df, [3, 4, 5]) end

# It takes the slice of columns in the range
df4 = DF.drop_nil(df, 0..200)
assert DF.to_columns(df4) == %{"a" => [1], "b" => [1]}
assert_raise ArgumentError,
"range 0..200 is out of bounds for a dataframe with 2 column(s)",
fn -> DF.drop_nil(df, 0..200) end
end

describe "relocate/3" do
Expand Down

0 comments on commit 0d59ddc

Please sign in to comment.