From a11e2583d2541548b2053f7185e7efd0d1adface Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Mon, 27 Jun 2022 02:00:42 -0700 Subject: [PATCH] Fixed error in reading chunked parquet (#1108) --- src/io/parquet/read/deserialize/utils.rs | 12 ++++++------ src/io/parquet/read/row_group.rs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/io/parquet/read/deserialize/utils.rs b/src/io/parquet/read/deserialize/utils.rs index 9e560e43014..dd2174c4180 100644 --- a/src/io/parquet/read/deserialize/utils.rs +++ b/src/io/parquet/read/deserialize/utils.rs @@ -230,6 +230,7 @@ impl<'a> OptionalPageValidity<'a> { } } + /// Number of items remaining pub fn len(&self) -> usize { self.iter.len() + self @@ -300,10 +301,9 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable, I: Iterator 0 { + let run = page_validity.next_limited(remaining); let run = if let Some(run) = run { run } else { break }; match run { @@ -325,7 +325,7 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable, I: Iterator { validity.extend_constant(length, is_set); @@ -335,7 +335,7 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable, I: Iterator for _ in values_iter.by_ref().take(valids) {}, }; diff --git a/src/io/parquet/read/row_group.rs b/src/io/parquet/read/row_group.rs index 25c21658190..0c7222343c8 100644 --- a/src/io/parquet/read/row_group.rs +++ b/src/io/parquet/read/row_group.rs @@ -74,7 +74,7 @@ impl Iterator for RowGroupDeserializer { }) }) .collect::>>() - .map(Chunk::new); + .and_then(Chunk::try_new); self.remaining_rows = self.remaining_rows.saturating_sub( chunk .as_ref()