diff --git a/.github/workflows/integration-parquet.yml b/.github/workflows/integration-parquet.yml index 9e253a7491d..53a76d5b52f 100644 --- a/.github/workflows/integration-parquet.yml +++ b/.github/workflows/integration-parquet.yml @@ -46,7 +46,7 @@ jobs: python -m venv venv source venv/bin/activate pip install --upgrade pip - pip install pyarrow==6 pyspark==3 + pip install pyarrow==8 pyspark==3 python main.py # test against spark python main_spark.py diff --git a/arrow-parquet-integration-testing/main.py b/arrow-parquet-integration-testing/main.py index 545a6e08b51..42e19e81b8f 100644 --- a/arrow-parquet-integration-testing/main.py +++ b/arrow-parquet-integration-testing/main.py @@ -49,11 +49,8 @@ def _expected(file: str): pyarrow.date64(), pyarrow.time32("s"), pyarrow.timestamp("s"), + # the issue here is the second, not the tz pyarrow.timestamp("s", tz="UTC"), - pyarrow.duration("s"), - pyarrow.duration("ms"), - pyarrow.duration("us"), - pyarrow.duration("ns"), ] diff --git a/arrow-parquet-integration-testing/src/main.rs b/arrow-parquet-integration-testing/src/main.rs index c7e9a03431d..f62fba5e099 100644 --- a/arrow-parquet-integration-testing/src/main.rs +++ b/arrow-parquet-integration-testing/src/main.rs @@ -11,8 +11,8 @@ use arrow2::{ json_integration::read, json_integration::ArrowJson, parquet::write::{ - CompressionOptions as ParquetCompression, Encoding, FileWriter, RowGroupIterator, - Version as ParquetVersion, WriteOptions, + transverse, CompressionOptions as ParquetCompression, Encoding, FileWriter, + RowGroupIterator, Version as ParquetVersion, WriteOptions, }, }, }; @@ -174,16 +174,18 @@ fn main() -> Result<()> { let encodings = schema .fields .iter() - .map(|x| match x.data_type() { - DataType::Dictionary(..) => vec![Encoding::RleDictionary], - DataType::Utf8 | DataType::LargeUtf8 => { - vec![if args.encoding_utf8 == EncodingScheme::Delta { - Encoding::DeltaLengthByteArray - } else { - Encoding::Plain - }] - } - _ => vec![Encoding::Plain], + .map(|f| { + transverse(&f.data_type, |dt| match dt { + DataType::Dictionary(..) => Encoding::RleDictionary, + DataType::Utf8 | DataType::LargeUtf8 => { + if args.encoding_utf8 == EncodingScheme::Delta { + Encoding::DeltaLengthByteArray + } else { + Encoding::Plain + } + } + _ => Encoding::Plain, + }) }) .collect(); diff --git a/src/io/parquet/write/pages.rs b/src/io/parquet/write/pages.rs index 4c0c0c23420..28f45a06fd1 100644 --- a/src/io/parquet/write/pages.rs +++ b/src/io/parquet/write/pages.rs @@ -172,7 +172,7 @@ fn to_leafs_recursive<'a>(array: &'a dyn Array, leafs: &mut Vec<&'a dyn Array>) } Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8 | LargeUtf8 | Dictionary(_) => leafs.push(array), - _ => todo!(), + other => todo!("Writing {:?} to parquet not yet implemented", other), } }