diff --git a/src/io/parquet/read/deserialize/binary/utils.rs b/src/io/parquet/read/deserialize/binary/utils.rs index 1f30d045a37..a47cb967a55 100644 --- a/src/io/parquet/read/deserialize/binary/utils.rs +++ b/src/io/parquet/read/deserialize/binary/utils.rs @@ -91,6 +91,7 @@ impl Binary { } impl<'a, O: Offset> Pushable<&'a [u8]> for Binary { + #[inline] fn reserve(&mut self, additional: usize) { let avg_len = self.values.len() / std::cmp::max(self.last_offset.to_usize(), 1); self.values.reserve(additional * avg_len); diff --git a/src/io/parquet/read/deserialize/fixed_size_binary/utils.rs b/src/io/parquet/read/deserialize/fixed_size_binary/utils.rs index 88fd06ee641..f718ce1bdc2 100644 --- a/src/io/parquet/read/deserialize/fixed_size_binary/utils.rs +++ b/src/io/parquet/read/deserialize/fixed_size_binary/utils.rs @@ -30,6 +30,7 @@ impl FixedSizeBinary { } impl<'a> Pushable<&'a [u8]> for FixedSizeBinary { + #[inline] fn reserve(&mut self, additional: usize) { self.values.reserve(additional * self.size); } diff --git a/src/io/parquet/read/deserialize/utils.rs b/src/io/parquet/read/deserialize/utils.rs index d3b1e8c351f..71bce24c586 100644 --- a/src/io/parquet/read/deserialize/utils.rs +++ b/src/io/parquet/read/deserialize/utils.rs @@ -38,6 +38,7 @@ pub(super) trait Pushable: Sized { } impl Pushable for MutableBitmap { + #[inline] fn reserve(&mut self, additional: usize) { MutableBitmap::reserve(self, additional) } @@ -63,6 +64,7 @@ impl Pushable for MutableBitmap { } impl Pushable for Vec { + #[inline] fn reserve(&mut self, additional: usize) { Vec::reserve(self, additional) } @@ -296,11 +298,33 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable, I: Iterator 0 { let run = page_validity.next_limited(remaining); let run = if let Some(run) = run { run } else { break }; + match run { + FilteredHybridEncoded::Bitmap { length, .. } => { + reserve_pushable += length; + remaining -= length; + } + FilteredHybridEncoded::Repeated { length, .. } => { + reserve_pushable += length; + remaining -= length; + } + _ => {} + }; + runs.push(run) + } + pushable.reserve(reserve_pushable); + validity.reserve(reserve_pushable); + + // then a second loop to really fill the buffers + for run in runs { match run { FilteredHybridEncoded::Bitmap { values, @@ -310,7 +334,6 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable, I: Iterator, I: Iterator { validity.extend_constant(length, is_set); if is_set { - pushable.reserve(length); - (0..length).for_each(|_| pushable.push(values_iter.next().unwrap())); + for v in (&mut values_iter).take(length) { + pushable.push(v) + } } else { pushable.extend_constant(length, T::default()); } - - remaining -= length; } FilteredHybridEncoded::Skipped(valids) => for _ in values_iter.by_ref().take(valids) {}, };