diff --git a/src/io/parquet/read/deserialize/binary/basic.rs b/src/io/parquet/read/deserialize/binary/basic.rs index 28cbca8c99d..1f2dc7896dc 100644 --- a/src/io/parquet/read/deserialize/binary/basic.rs +++ b/src/io/parquet/read/deserialize/binary/basic.rs @@ -311,12 +311,12 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { // this is an upper bound - we may not consume the whole page. // we do not know how many nulls are there, so we do not know how many // valid items are there to discount over the sequence. - Binary::::with_capacity(capacity, values.num_bytes()), + Binary::::with_capacity(capacity, Some(values.num_bytes())), MutableBitmap::with_capacity(capacity), ), State::Required(values) => ( // this is an upper bound - we may not consume the whole page. - Binary::::with_capacity(capacity, values.values.num_bytes()), + Binary::::with_capacity(capacity, Some(values.values.num_bytes())), MutableBitmap::new(), ), State::FilteredRequiredDictionary(_) @@ -324,7 +324,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { | State::RequiredDictionary(_) | State::Delta(_) | State::FilteredDelta(_) => ( - Binary::::with_capacity(capacity, 0), + Binary::::with_capacity(capacity, None), MutableBitmap::new(), ), State::FilteredOptionalDictionary(_, _) @@ -332,7 +332,7 @@ impl<'a, O: Offset> utils::Decoder<'a> for BinaryDecoder { | State::OptionalDictionary(_, _) | State::OptionalDelta(_, _) | State::FilteredOptionalDelta(_, _) => ( - Binary::::with_capacity(capacity, 0), + Binary::::with_capacity(capacity, None), MutableBitmap::with_capacity(capacity), ), } diff --git a/src/io/parquet/read/deserialize/binary/dictionary.rs b/src/io/parquet/read/deserialize/binary/dictionary.rs index 96344265899..95c783d67ac 100644 --- a/src/io/parquet/read/deserialize/binary/dictionary.rs +++ b/src/io/parquet/read/deserialize/binary/dictionary.rs @@ -59,7 +59,7 @@ fn read_dict(data_type: DataType, dict: &DictPage) -> Box let values = SizedBinaryIter::new(&dict.buffer, dict.num_values); - let mut data = Binary::::with_capacity(dict.num_values, 0); + let mut data = Binary::::with_capacity(dict.num_values, None); data.values = Vec::with_capacity(dict.buffer.len() - 4 * dict.num_values); for item in values { data.push(item) diff --git a/src/io/parquet/read/deserialize/binary/nested.rs b/src/io/parquet/read/deserialize/binary/nested.rs index 5da4120d85a..e3d3f84cb7d 100644 --- a/src/io/parquet/read/deserialize/binary/nested.rs +++ b/src/io/parquet/read/deserialize/binary/nested.rs @@ -85,7 +85,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { fn with_capacity(&self, capacity: usize) -> Self::DecodedState { ( - Binary::::with_capacity(capacity, 0), + Binary::::with_capacity(capacity, None), MutableBitmap::with_capacity(capacity), ) } diff --git a/src/io/parquet/read/deserialize/binary/utils.rs b/src/io/parquet/read/deserialize/binary/utils.rs index 37e6e695139..6648eb261e4 100644 --- a/src/io/parquet/read/deserialize/binary/utils.rs +++ b/src/io/parquet/read/deserialize/binary/utils.rs @@ -36,7 +36,8 @@ impl Pushable for Offsets { impl Binary { #[inline] - pub fn with_capacity(capacity: usize, values_capacity: usize) -> Self { + pub fn with_capacity(capacity: usize, values_capacity: Option) -> Self { + let values_capacity = values_capacity.unwrap_or(capacity.min(100) * 24); Self { offsets: Offsets::with_capacity(capacity), values: Vec::with_capacity(values_capacity),