From 95ecff8074701c4daffb986278e5f2853069deb2 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sun, 10 Oct 2021 11:12:37 +0000 Subject: [PATCH] Fixed error in writing dict-encoded and compressed pages --- src/io/parquet/write/dictionary.rs | 23 ++++++++++++++--------- tests/it/io/parquet/write.rs | 12 ++++++++++++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/io/parquet/write/dictionary.rs b/src/io/parquet/write/dictionary.rs index 240109e6372..8a221099f5f 100644 --- a/src/io/parquet/write/dictionary.rs +++ b/src/io/parquet/write/dictionary.rs @@ -114,11 +114,12 @@ fn encode_keys( } macro_rules! dyn_prim { - ($from:ty, $to:ty, $array:expr) => {{ + ($from:ty, $to:ty, $array:expr, $options:expr) => {{ let values = $array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; primitive_encode_plain::<$from, $to>(values, false, &mut buffer); + let buffer = utils::compress(buffer, $options, 0)?; CompressedPage::Dict(CompressedDictPage::new(buffer, values.len())) }}; @@ -137,25 +138,26 @@ where Encoding::PlainDictionary | Encoding::RleDictionary => { // write DictPage let dict_page = match array.values().data_type().to_logical_type() { - DataType::Int8 => dyn_prim!(i8, i32, array), - DataType::Int16 => dyn_prim!(i16, i32, array), + DataType::Int8 => dyn_prim!(i8, i32, array, options), + DataType::Int16 => dyn_prim!(i16, i32, array, options), DataType::Int32 | DataType::Date32 | DataType::Time32(_) => { - dyn_prim!(i32, i32, array) + dyn_prim!(i32, i32, array, options) } DataType::Int64 | DataType::Date64 | DataType::Time64(_) | DataType::Timestamp(_, _) - | DataType::Duration(_) => dyn_prim!(i64, i64, array), - DataType::UInt8 => dyn_prim!(u8, i32, array), - DataType::UInt16 => dyn_prim!(u16, i32, array), - DataType::UInt32 => dyn_prim!(u32, i32, array), - DataType::UInt64 => dyn_prim!(i64, i64, array), + | DataType::Duration(_) => dyn_prim!(i64, i64, array, options), + DataType::UInt8 => dyn_prim!(u8, i32, array, options), + DataType::UInt16 => dyn_prim!(u16, i32, array, options), + DataType::UInt32 => dyn_prim!(u32, i32, array, options), + DataType::UInt64 => dyn_prim!(i64, i64, array, options), DataType::Utf8 => { let values = array.values().as_any().downcast_ref().unwrap(); let mut buffer = vec![]; utf8_encode_plain::(values, false, &mut buffer); + let buffer = utils::compress(buffer, options, 0)?; CompressedPage::Dict(CompressedDictPage::new(buffer, values.len())) } DataType::LargeUtf8 => { @@ -163,6 +165,7 @@ where let mut buffer = vec![]; utf8_encode_plain::(values, false, &mut buffer); + let buffer = utils::compress(buffer, options, 0)?; CompressedPage::Dict(CompressedDictPage::new(buffer, values.len())) } DataType::Binary => { @@ -170,6 +173,7 @@ where let mut buffer = vec![]; binary_encode_plain::(values, false, &mut buffer); + let buffer = utils::compress(buffer, options, 0)?; CompressedPage::Dict(CompressedDictPage::new(buffer, values.len())) } DataType::LargeBinary => { @@ -177,6 +181,7 @@ where let mut buffer = vec![]; binary_encode_plain::(values, false, &mut buffer); + let buffer = utils::compress(buffer, options, 0)?; CompressedPage::Dict(CompressedDictPage::new(buffer, values.len())) } other => { diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs index 5b1ceb40c34..4feb8405d52 100644 --- a/tests/it/io/parquet/write.rs +++ b/tests/it/io/parquet/write.rs @@ -367,6 +367,18 @@ fn i32_optional_v2_dict() -> Result<()> { ) } +#[test] +fn i32_optional_v2_dict_compressed() -> Result<()> { + round_trip( + 6, + true, + false, + Version::V2, + Compression::Snappy, + Encoding::RleDictionary, + ) +} + // Decimal Testing #[test] fn decimal_9_optional_v1() -> Result<()> {