From 01de452b5261008b5c3aa92f46ca63bd7bcd4fba Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 19 Jan 2023 11:43:44 +0100 Subject: [PATCH] fix(python): default to pyarrow for writing parquet (#6313) --- py-polars/polars/internals/dataframe/frame.py | 9 +++++++-- py-polars/tests/unit/io/test_lazy_parquet.py | 2 +- py-polars/tests/unit/io/test_parquet.py | 4 ++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py index aeca1bc14481..13f679c73e49 100644 --- a/py-polars/polars/internals/dataframe/frame.py +++ b/py-polars/polars/internals/dataframe/frame.py @@ -51,7 +51,12 @@ get_idx_type, py_type_to_dtype, ) -from polars.dependencies import _check_for_numpy, _check_for_pandas, _check_for_pyarrow +from polars.dependencies import ( + _PYARROW_AVAILABLE, + _check_for_numpy, + _check_for_pandas, + _check_for_pyarrow, +) from polars.dependencies import numpy as np from polars.dependencies import pandas as pd from polars.dependencies import pyarrow as pa @@ -2260,7 +2265,7 @@ def write_parquet( compression_level: int | None = None, statistics: bool = False, row_group_size: int | None = None, - use_pyarrow: bool = False, + use_pyarrow: bool = _PYARROW_AVAILABLE, pyarrow_options: dict[str, object] | None = None, ) -> None: """ diff --git a/py-polars/tests/unit/io/test_lazy_parquet.py b/py-polars/tests/unit/io/test_lazy_parquet.py index a60481bf1444..15bb764460c3 100644 --- a/py-polars/tests/unit/io/test_lazy_parquet.py +++ b/py-polars/tests/unit/io/test_lazy_parquet.py @@ -136,7 +136,7 @@ def test_parquet_statistics(io_test_dir: str, capfd: CaptureFixture[str]) -> Non assert df.n_chunks("all") == [4, 4] if not os.path.exists(fname): - df.write_parquet(fname, statistics=True) + df.write_parquet(fname, statistics=True, use_pyarrow=False) for pred in [ pl.col("idx") < 50, diff --git a/py-polars/tests/unit/io/test_parquet.py b/py-polars/tests/unit/io/test_parquet.py index 8d77ec26b7c5..4be47a0471a3 100644 --- a/py-polars/tests/unit/io/test_parquet.py +++ b/py-polars/tests/unit/io/test_parquet.py @@ -41,7 +41,7 @@ def test_to_from_buffer( buf = io.BytesIO() # Writing lzo compressed parquet files is not supported for now. with pytest.raises(pl.ArrowError): - df.write_parquet(buf, compression=compression) + df.write_parquet(buf, compression=compression, use_pyarrow=False) buf.seek(0) # Invalid parquet file as writing failed. with pytest.raises(pl.ArrowError): @@ -78,7 +78,7 @@ def test_to_from_file( if compression == "lzo": # Writing lzo compressed parquet files is not supported for now. with pytest.raises(pl.ArrowError): - df.write_parquet(f, compression=compression) + df.write_parquet(f, compression=compression, use_pyarrow=False) # Invalid parquet file as writing failed. with pytest.raises(pl.ArrowError): _ = pl.read_parquet(f)