Skip to content

Commit

Permalink
Arrow2 now supports non-ordered projections in IPC. Fixes pola-rs#1761.
Browse files Browse the repository at this point in the history
  • Loading branch information
ghuls committed May 10, 2022
1 parent 575bc4c commit 23eca3f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 14 deletions.
12 changes: 2 additions & 10 deletions polars/polars-io/src/ipc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,7 @@ impl<R: Read + Seek> IpcReader<R> {
) -> Result<DataFrame> {
let rechunk = self.rechunk;
let metadata = read::read_file_metadata(&mut self.reader)?;
let projection = projection.map(|x| {
let mut x = x.to_vec();
x.sort_unstable();
x
});
let projection = projection.map(|x| x.to_vec());

let schema = if let Some(projection) = &projection {
apply_projection(&metadata.schema, projection)
Expand Down Expand Up @@ -180,11 +176,7 @@ where
let schema = &metadata.schema;

if let Some(columns) = self.columns {
let mut prj = columns_to_projection(columns, schema)?;

// Ipc reader panics if the projection is not in increasing order, so sorting is the safer way.
prj.sort_unstable();
self.projection = Some(prj);
self.projection = Some(columns_to_projection(columns, schema)?);
}

let schema = if let Some(projection) = &self.projection {
Expand Down
8 changes: 4 additions & 4 deletions py-polars/tests/io/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,24 @@ def test_from_to_file(

def test_select_columns() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [True, False, True], "c": ["a", "b", "c"]})
expected = pl.DataFrame({"b": [True, False, True], "c": ["a", "b", "c"]})
expected = pl.DataFrame({"c": ["a", "b", "c"], "b": [True, False, True]})

f = io.BytesIO()
df.write_ipc(f)
f.seek(0)

read_df = pl.read_ipc(f, columns=["b", "c"], use_pyarrow=False)
read_df = pl.read_ipc(f, columns=["c", "b"], use_pyarrow=False)
assert expected.frame_equal(read_df)


def test_select_projection() -> None:
df = pl.DataFrame({"a": [1, 2, 3], "b": [True, False, True], "c": ["a", "b", "c"]})
expected = pl.DataFrame({"b": [True, False, True], "c": ["a", "b", "c"]})
expected = pl.DataFrame({"c": ["a", "b", "c"], "b": [True, False, True]})
f = io.BytesIO()
df.write_ipc(f)
f.seek(0)

read_df = pl.read_ipc(f, columns=[1, 2], use_pyarrow=False)
read_df = pl.read_ipc(f, columns=[2, 1], use_pyarrow=False)
assert expected.frame_equal(read_df)


Expand Down

0 comments on commit 23eca3f

Please sign in to comment.