From 8749215b187cc8347ffc0711cf8c3bcf1d10a18c Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sun, 24 Oct 2021 05:36:19 +0000 Subject: [PATCH] Fixed error in reading fixed_len from parquet --- src/io/parquet/read/fixed_size_binary.rs | 12 ++++++------ tests/it/io/parquet/read.rs | 5 +++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/io/parquet/read/fixed_size_binary.rs b/src/io/parquet/read/fixed_size_binary.rs index 9552b2b1082..be5b0a988c8 100644 --- a/src/io/parquet/read/fixed_size_binary.rs +++ b/src/io/parquet/read/fixed_size_binary.rs @@ -27,7 +27,7 @@ pub(crate) fn read_dict_buffer( values: &mut MutableBuffer, validity: &mut MutableBitmap, ) { - let length = values.len() * size + additional; + let length = values.len() + additional * size; let dict_values = dict.values(); // SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32), @@ -42,13 +42,13 @@ pub(crate) fn read_dict_buffer( for run in validity_iterator { match run { hybrid_rle::HybridEncoded::Bitpacked(packed) => { - let remaining = length - values.len() * size; + let remaining = (length - values.len()) / size; let len = std::cmp::min(packed.len() * 8, remaining); for is_valid in BitmapIter::new(packed, 0, len) { validity.push(is_valid); if is_valid { let index = indices.next().unwrap() as usize; - values.extend_from_slice(&dict_values[index..(index + 1) * size]); + values.extend_from_slice(&dict_values[index * size..(index + 1) * size]); } else { values.extend_constant(size, 0); } @@ -60,7 +60,7 @@ pub(crate) fn read_dict_buffer( if is_set { (0..additional).for_each(|_| { let index = indices.next().unwrap() as usize; - values.extend_from_slice(&dict_values[index..(index + 1) * size]); + values.extend_from_slice(&dict_values[index * size..(index + 1) * size]); }) } else { values.extend_constant(additional * size, 0) @@ -78,7 +78,7 @@ pub(crate) fn read_optional( values: &mut MutableBuffer, validity: &mut MutableBitmap, ) { - let length = values.len() * size + additional; + let length = values.len() + additional * size; assert_eq!(values_buffer.len() % size, 0); let mut values_iterator = values_buffer.chunks_exact(size); @@ -89,7 +89,7 @@ pub(crate) fn read_optional( match run { hybrid_rle::HybridEncoded::Bitpacked(packed) => { // the pack may contain more items than needed. - let remaining = length - values.len() * size; + let remaining = (length - values.len()) / size; let len = std::cmp::min(packed.len() * 8, remaining); for is_valid in BitmapIter::new(packed, 0, len) { validity.push(is_valid); diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs index bfad685eb49..72c60021950 100644 --- a/tests/it/io/parquet/read.rs +++ b/tests/it/io/parquet/read.rs @@ -242,6 +242,11 @@ fn v1_decimal_9_required() -> Result<()> { test_pyarrow_integration(6, 1, "basic", false, true) } +#[test] +fn v1_decimal_9_nullable_dict() -> Result<()> { + test_pyarrow_integration(7, 1, "basic", true, false) +} + #[test] fn v1_decimal_18_nullable() -> Result<()> { test_pyarrow_integration(8, 1, "basic", false, false)