From 2d3f0c56b6a62ceadebbd73f4919ae29bb45a9de Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sat, 25 Jun 2022 07:55:34 +0000 Subject: [PATCH] Added more tests --- arrow-parquet-integration-testing/main.py | 5 +--- arrow-parquet-integration-testing/src/main.rs | 26 ++++++++++--------- src/io/parquet/write/pages.rs | 2 +- 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/arrow-parquet-integration-testing/main.py b/arrow-parquet-integration-testing/main.py index 545a6e08b51..42e19e81b8f 100644 --- a/arrow-parquet-integration-testing/main.py +++ b/arrow-parquet-integration-testing/main.py @@ -49,11 +49,8 @@ def _expected(file: str): pyarrow.date64(), pyarrow.time32("s"), pyarrow.timestamp("s"), + # the issue here is the second, not the tz pyarrow.timestamp("s", tz="UTC"), - pyarrow.duration("s"), - pyarrow.duration("ms"), - pyarrow.duration("us"), - pyarrow.duration("ns"), ] diff --git a/arrow-parquet-integration-testing/src/main.rs b/arrow-parquet-integration-testing/src/main.rs index c7e9a03431d..f62fba5e099 100644 --- a/arrow-parquet-integration-testing/src/main.rs +++ b/arrow-parquet-integration-testing/src/main.rs @@ -11,8 +11,8 @@ use arrow2::{ json_integration::read, json_integration::ArrowJson, parquet::write::{ - CompressionOptions as ParquetCompression, Encoding, FileWriter, RowGroupIterator, - Version as ParquetVersion, WriteOptions, + transverse, CompressionOptions as ParquetCompression, Encoding, FileWriter, + RowGroupIterator, Version as ParquetVersion, WriteOptions, }, }, }; @@ -174,16 +174,18 @@ fn main() -> Result<()> { let encodings = schema .fields .iter() - .map(|x| match x.data_type() { - DataType::Dictionary(..) => vec![Encoding::RleDictionary], - DataType::Utf8 | DataType::LargeUtf8 => { - vec![if args.encoding_utf8 == EncodingScheme::Delta { - Encoding::DeltaLengthByteArray - } else { - Encoding::Plain - }] - } - _ => vec![Encoding::Plain], + .map(|f| { + transverse(&f.data_type, |dt| match dt { + DataType::Dictionary(..) => Encoding::RleDictionary, + DataType::Utf8 | DataType::LargeUtf8 => { + if args.encoding_utf8 == EncodingScheme::Delta { + Encoding::DeltaLengthByteArray + } else { + Encoding::Plain + } + } + _ => Encoding::Plain, + }) }) .collect(); diff --git a/src/io/parquet/write/pages.rs b/src/io/parquet/write/pages.rs index 4c0c0c23420..28f45a06fd1 100644 --- a/src/io/parquet/write/pages.rs +++ b/src/io/parquet/write/pages.rs @@ -172,7 +172,7 @@ fn to_leafs_recursive<'a>(array: &'a dyn Array, leafs: &mut Vec<&'a dyn Array>) } Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8 | LargeUtf8 | Dictionary(_) => leafs.push(array), - _ => todo!(), + other => todo!("Writing {:?} to parquet not yet implemented", other), } }