From 03e981e156c578e1394c35be03dacd5cf714e542 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sat, 29 Oct 2022 11:35:09 +0200 Subject: [PATCH 1/2] Improved ZipValidity iterators --- src/array/binary/mod.rs | 7 ++++++- src/array/boolean/iterator.rs | 8 +++++++- src/array/boolean/mod.rs | 3 +++ src/array/dictionary/mod.rs | 4 ++++ src/array/fixed_size_binary/iterator.rs | 9 ++++++++- src/array/fixed_size_list/iterator.rs | 3 +++ src/array/list/iterator.rs | 3 +++ src/array/map/iterator.rs | 1 + src/array/primitive/iterator.rs | 6 +++++- src/array/primitive/mod.rs | 1 + src/array/struct_/iterator.rs | 3 +++ src/array/utf8/mod.rs | 6 +++++- src/array/utf8/mutable.rs | 6 +++++- src/bitmap/utils/zip_validity.rs | 9 ++++++--- src/io/avro/write/serialize.rs | 12 ++++++++++-- src/io/json/write/serialize.rs | 7 ++++++- tests/it/bitmap/utils/zip_validity.rs | 16 +++++++++------- 17 files changed, 85 insertions(+), 19 deletions(-) diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 17c29f1b30d..5a84241359b 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -117,7 +117,12 @@ impl BinaryArray { /// Returns an iterator of `Option<&[u8]>` over every element of this array. pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter, BitmapIter> { - ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) + let null_count = self.validity.as_ref().map(|validity| validity.unset_bits()); + ZipValidity::new( + self.values_iter(), + self.validity.as_ref().map(|x| x.iter()), + null_count, + ) } /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity diff --git a/src/array/boolean/iterator.rs b/src/array/boolean/iterator.rs index 019ba5b388f..9beb53b1dbb 100644 --- a/src/array/boolean/iterator.rs +++ b/src/array/boolean/iterator.rs @@ -22,8 +22,9 @@ impl IntoIterator for BooleanArray { fn into_iter(self) -> Self::IntoIter { let (_, values, validity) = self.into_inner(); let values = values.into_iter(); + let null_count = validity.as_ref().map(|validity| validity.unset_bits()); let validity = validity.map(|x| x.into_iter()); - ZipValidity::new(values, validity) + ZipValidity::new(values, validity, null_count) } } @@ -41,9 +42,14 @@ impl<'a> MutableBooleanArray { /// Returns an iterator over the optional values of this [`MutableBooleanArray`]. #[inline] pub fn iter(&'a self) -> ZipValidity, BitmapIter<'a>> { + let null_count = self + .validity() + .as_ref() + .map(|validity| validity.unset_bits()); ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), + null_count, ) } diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 2c33ef155ab..816e901144e 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -91,6 +91,9 @@ impl BooleanArray { ZipValidity::new( self.values().iter(), self.validity.as_ref().map(|x| x.iter()), + self.validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 02d72ccd260..81d2eec354d 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -194,6 +194,10 @@ impl DictionaryArray { ZipValidity::new( DictionaryValuesIter::new(self), self.keys.validity().as_ref().map(|x| x.iter()), + self.keys + .validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/fixed_size_binary/iterator.rs b/src/array/fixed_size_binary/iterator.rs index ad3c36968fa..2e4acaf60f2 100644 --- a/src/array/fixed_size_binary/iterator.rs +++ b/src/array/fixed_size_binary/iterator.rs @@ -19,7 +19,11 @@ impl<'a> FixedSizeBinaryArray { pub fn iter( &'a self, ) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> { - ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) + ZipValidity::new( + self.values_iter(), + self.validity.as_ref().map(|x| x.iter()), + self.validity.as_ref().map(|validity| validity.unset_bits()), + ) } /// Returns iterator over the values of [`FixedSizeBinaryArray`] @@ -45,6 +49,9 @@ impl<'a> MutableFixedSizeBinaryArray { ZipValidity::new( self.iter_values(), self.validity().as_ref().map(|x| x.iter()), + self.validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/fixed_size_list/iterator.rs b/src/array/fixed_size_list/iterator.rs index 4ee178da5a7..046050d5671 100644 --- a/src/array/fixed_size_list/iterator.rs +++ b/src/array/fixed_size_list/iterator.rs @@ -39,6 +39,9 @@ impl<'a> FixedSizeListArray { ZipValidity::new( FixedSizeListValuesIter::new(self), self.validity.as_ref().map(|x| x.iter()), + self.validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/list/iterator.rs b/src/array/list/iterator.rs index 90ec9b4cb58..8fff5fa8204 100644 --- a/src/array/list/iterator.rs +++ b/src/array/list/iterator.rs @@ -38,6 +38,9 @@ impl<'a, O: Offset> ListArray { ZipValidity::new( ListValuesIter::new(self), self.validity.as_ref().map(|x| x.iter()), + self.validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/map/iterator.rs b/src/array/map/iterator.rs index 8bd8ce820ed..c20324a5d1e 100644 --- a/src/array/map/iterator.rs +++ b/src/array/map/iterator.rs @@ -75,6 +75,7 @@ impl<'a> MapArray { ZipValidity::new( MapValuesIter::new(self), self.validity.as_ref().map(|x| x.iter()), + self.validity.as_ref().map(|x| x.unset_bits()), ) } diff --git a/src/array/primitive/iterator.rs b/src/array/primitive/iterator.rs index 89a8842e8fd..7b4a40a0355 100644 --- a/src/array/primitive/iterator.rs +++ b/src/array/primitive/iterator.rs @@ -16,8 +16,9 @@ impl IntoIterator for PrimitiveArray { fn into_iter(self) -> Self::IntoIter { let (_, values, validity) = self.into_inner(); let values = values.into_iter(); + let null_count = validity.as_ref().map(|x| x.unset_bits()); let validity = validity.map(|x| x.into_iter()); - ZipValidity::new(values, validity) + ZipValidity::new(values, validity, null_count) } } @@ -38,6 +39,9 @@ impl<'a, T: NativeType> MutablePrimitiveArray { ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), + self.validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 2bbdbd05239..c27e98395fb 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -144,6 +144,7 @@ impl PrimitiveArray { ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), + self.validity.as_ref().map(|validity| validity.unset_bits()), ) } diff --git a/src/array/struct_/iterator.rs b/src/array/struct_/iterator.rs index c5c75a6b925..884cc300126 100644 --- a/src/array/struct_/iterator.rs +++ b/src/array/struct_/iterator.rs @@ -92,6 +92,9 @@ impl<'a> StructArray { ZipValidity::new( StructValueIter::new(self), self.validity.as_ref().map(|x| x.iter()), + self.validity() + .as_ref() + .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index dc453a5c37b..84ab1b941f0 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -133,7 +133,11 @@ impl Utf8Array { /// Returns an iterator of `Option<&str>` pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter, BitmapIter> { - ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) + ZipValidity::new( + self.values_iter(), + self.validity.as_ref().map(|x| x.iter()), + self.validity.as_ref().map(|validity| validity.unset_bits()), + ) } /// Returns an iterator of `&str` diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index 4cc83da9011..c993ad18f8b 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -206,7 +206,11 @@ impl MutableUtf8Array { /// Returns an iterator of `Option<&str>` pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter, BitmapIter> { - ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) + ZipValidity::new( + self.values_iter(), + self.validity.as_ref().map(|x| x.iter()), + self.validity.as_ref().map(|validity| validity.unset_bits()), + ) } /// Converts itself into an [`Array`]. diff --git a/src/bitmap/utils/zip_validity.rs b/src/bitmap/utils/zip_validity.rs index 88ad018db33..201cb071ae2 100644 --- a/src/bitmap/utils/zip_validity.rs +++ b/src/bitmap/utils/zip_validity.rs @@ -101,10 +101,13 @@ where V: Iterator, { /// Returns a new [`ZipValidity`] - pub fn new(values: I, validity: Option) -> Self { + pub fn new(values: I, validity: Option, null_count: Option) -> Self { match validity { - Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)), - None => Self::Required(values), + // only if we have a validity and there are nulls we will iterate them + Some(validity) if null_count != Some(0) => { + Self::Optional(ZipValidityIter::new(values, validity)) + } + _ => Self::Required(values), } } } diff --git a/src/io/avro/write/serialize.rs b/src/io/avro/write/serialize.rs index f3a56a22501..30e31adaed1 100644 --- a/src/io/avro/write/serialize.rs +++ b/src/io/avro/write/serialize.rs @@ -126,7 +126,11 @@ fn list_optional<'a, O: Offset>(array: &'a ListArray, schema: &AvroSchema) -> .offsets() .windows(2) .map(|w| (w[1] - w[0]).to_usize() as i64); - let lengths = ZipValidity::new(lengths, array.validity().as_ref().map(|x| x.iter())); + let lengths = ZipValidity::new( + lengths, + array.validity().as_ref().map(|x| x.iter()), + array.validity().as_ref().map(|x| x.unset_bits()), + ); Box::new(BufStreamingIterator::new( lengths, @@ -180,7 +184,11 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer .map(|(x, schema)| new_serializer(x.as_ref(), schema)) .collect::>(); - let iterator = ZipValidity::new(0..array.len(), array.validity().as_ref().map(|x| x.iter())); + let iterator = ZipValidity::new( + 0..array.len(), + array.validity().as_ref().map(|x| x.iter()), + array.validity().as_ref().map(|x| x.unset_bits()), + ); Box::new(BufStreamingIterator::new( iterator, diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 1e2c8445eb2..288c23f235b 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -103,7 +103,11 @@ fn struct_serializer<'a>( let names = array.fields().iter().map(|f| f.name.as_str()); Box::new(BufStreamingIterator::new( - ZipValidity::new(0..array.len(), array.validity().map(|x| x.iter())), + ZipValidity::new( + 0..array.len(), + array.validity().map(|x| x.iter()), + array.validity().map(|x| x.unset_bits()), + ), move |maybe, buf| { if maybe.is_some() { let names = names.clone(); @@ -143,6 +147,7 @@ fn list_serializer<'a, O: Offset>( ZipValidity::new( array.offsets().windows(2), array.validity().map(|x| x.iter()), + array.validity().map(|x| x.unset_bits()), ), move |offset, buf| { if let Some(offset) = offset { diff --git a/tests/it/bitmap/utils/zip_validity.rs b/tests/it/bitmap/utils/zip_validity.rs index 9c6187f60a6..c86240eb0c2 100644 --- a/tests/it/bitmap/utils/zip_validity.rs +++ b/tests/it/bitmap/utils/zip_validity.rs @@ -8,7 +8,7 @@ fn basic() { let a = Bitmap::from([true, false]); let a = Some(a.iter()); let values = vec![0, 1]; - let zip = ZipValidity::new(values.into_iter(), a); + let zip = ZipValidity::new(values.into_iter(), a, None); let a = zip.collect::>(); assert_eq!(a, vec![Some(0), None]); @@ -19,7 +19,7 @@ fn complete() { let a = Bitmap::from([true, false, true, false, true, false, true, false]); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a); + let zip = ZipValidity::new(values.into_iter(), a, None); let a = zip.collect::>(); assert_eq!( @@ -31,6 +31,7 @@ fn complete() { #[test] fn slices() { let a = Bitmap::from([true, false]); + let null_count = Some(a.unset_bits()); let a = Some(a.iter()); let offsets = vec![0, 2, 3]; let values = vec![1, 2, 3]; @@ -39,7 +40,7 @@ fn slices() { let end = x[1]; &values[start..end] }); - let zip = ZipValidity::new(iter, a); + let zip = ZipValidity::new(iter, a, null_count); let a = zip.collect::>(); assert_eq!(a, vec![Some([1, 2].as_ref()), None]); @@ -50,7 +51,7 @@ fn byte() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7, 8]; - let zip = ZipValidity::new(values.into_iter(), a); + let zip = ZipValidity::new(values.into_iter(), a, None); let a = zip.collect::>(); assert_eq!( @@ -74,7 +75,7 @@ fn offset() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a); + let zip = ZipValidity::new(values.into_iter(), a, None); let a = zip.collect::>(); assert_eq!( @@ -86,7 +87,7 @@ fn offset() { #[test] fn none() { let values = vec![0, 1, 2]; - let zip = ZipValidity::new(values.into_iter(), None::); + let zip = ZipValidity::new(values.into_iter(), None::, None); let a = zip.collect::>(); assert_eq!(a, vec![Some(0), Some(1), Some(2)]); @@ -95,9 +96,10 @@ fn none() { #[test] fn rev() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8); + let null_count = Some(a.unset_bits()); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a); + let zip = ZipValidity::new(values.into_iter(), a, null_count); let result = zip.rev().collect::>(); let expected = vec![None, Some(1), None, None, Some(4), Some(5), None, Some(7)] From d02e3a9b40525cfb0f33b4707c6b359ff77771f9 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 2 Nov 2022 08:59:12 +0100 Subject: [PATCH 2/2] add 'new_with_validity' --- src/array/binary/mod.rs | 7 +------ src/array/boolean/iterator.rs | 11 +++-------- src/array/boolean/mod.rs | 8 +------- src/array/dictionary/mod.rs | 9 +-------- src/array/fixed_size_binary/iterator.rs | 14 ++------------ src/array/fixed_size_list/iterator.rs | 8 +------- src/array/list/iterator.rs | 8 +------- src/array/map/iterator.rs | 6 +----- src/array/primitive/iterator.rs | 9 +++------ src/array/primitive/mod.rs | 6 +----- src/array/struct_/iterator.rs | 8 +------- src/array/utf8/mod.rs | 6 +----- src/array/utf8/mutable.rs | 6 +----- src/bitmap/utils/zip_validity.rs | 24 +++++++++++++++++++----- src/io/avro/write/serialize.rs | 12 ++---------- src/io/json/write/serialize.rs | 12 ++---------- tests/it/bitmap/utils/zip_validity.rs | 16 +++++++--------- 17 files changed, 48 insertions(+), 122 deletions(-) diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 5a84241359b..8aae45b2a45 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -117,12 +117,7 @@ impl BinaryArray { /// Returns an iterator of `Option<&[u8]>` over every element of this array. pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter, BitmapIter> { - let null_count = self.validity.as_ref().map(|validity| validity.unset_bits()); - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - null_count, - ) + ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref()) } /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity diff --git a/src/array/boolean/iterator.rs b/src/array/boolean/iterator.rs index 9beb53b1dbb..8243a8d985f 100644 --- a/src/array/boolean/iterator.rs +++ b/src/array/boolean/iterator.rs @@ -22,9 +22,9 @@ impl IntoIterator for BooleanArray { fn into_iter(self) -> Self::IntoIter { let (_, values, validity) = self.into_inner(); let values = values.into_iter(); - let null_count = validity.as_ref().map(|validity| validity.unset_bits()); - let validity = validity.map(|x| x.into_iter()); - ZipValidity::new(values, validity, null_count) + let validity = + validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter())); + ZipValidity::new(values, validity) } } @@ -42,14 +42,9 @@ impl<'a> MutableBooleanArray { /// Returns an iterator over the optional values of this [`MutableBooleanArray`]. #[inline] pub fn iter(&'a self) -> ZipValidity, BitmapIter<'a>> { - let null_count = self - .validity() - .as_ref() - .map(|validity| validity.unset_bits()); ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), - null_count, ) } diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 816e901144e..a3788b9b856 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -88,13 +88,7 @@ impl BooleanArray { /// Returns an iterator over the optional values of this [`BooleanArray`]. #[inline] pub fn iter(&self) -> ZipValidity { - ZipValidity::new( - self.values().iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values().iter(), self.validity()) } /// Returns an iterator over the values of this [`BooleanArray`]. diff --git a/src/array/dictionary/mod.rs b/src/array/dictionary/mod.rs index 81d2eec354d..8b62c91dce9 100644 --- a/src/array/dictionary/mod.rs +++ b/src/array/dictionary/mod.rs @@ -191,14 +191,7 @@ impl DictionaryArray { /// This function will allocate a new [`Scalar`] per item and is usually not performant. /// Consider calling `keys_iter` and `values`, downcasting `values`, and iterating over that. pub fn iter(&self) -> ZipValidity, DictionaryValuesIter, BitmapIter> { - ZipValidity::new( - DictionaryValuesIter::new(self), - self.keys.validity().as_ref().map(|x| x.iter()), - self.keys - .validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(DictionaryValuesIter::new(self), self.keys.validity()) } /// Returns an iterator of [`Box`] diff --git a/src/array/fixed_size_binary/iterator.rs b/src/array/fixed_size_binary/iterator.rs index 2e4acaf60f2..0d456e0ba35 100644 --- a/src/array/fixed_size_binary/iterator.rs +++ b/src/array/fixed_size_binary/iterator.rs @@ -19,11 +19,7 @@ impl<'a> FixedSizeBinaryArray { pub fn iter( &'a self, ) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> { - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values_iter(), self.validity()) } /// Returns iterator over the values of [`FixedSizeBinaryArray`] @@ -46,13 +42,7 @@ impl<'a> MutableFixedSizeBinaryArray { pub fn iter( &'a self, ) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> { - ZipValidity::new( - self.iter_values(), - self.validity().as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new(self.iter_values(), self.validity().map(|x| x.iter())) } /// Returns iterator over the values of [`MutableFixedSizeBinaryArray`] diff --git a/src/array/fixed_size_list/iterator.rs b/src/array/fixed_size_list/iterator.rs index 046050d5671..2f2b70b2eff 100644 --- a/src/array/fixed_size_list/iterator.rs +++ b/src/array/fixed_size_list/iterator.rs @@ -36,13 +36,7 @@ impl<'a> IntoIterator for &'a FixedSizeListArray { impl<'a> FixedSizeListArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipIter<'a> { - ZipValidity::new( - FixedSizeListValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(FixedSizeListValuesIter::new(self), self.validity()) } /// Returns an iterator of `Box` diff --git a/src/array/list/iterator.rs b/src/array/list/iterator.rs index 8fff5fa8204..d0b7d22455b 100644 --- a/src/array/list/iterator.rs +++ b/src/array/list/iterator.rs @@ -35,13 +35,7 @@ impl<'a, O: Offset> IntoIterator for &'a ListArray { impl<'a, O: Offset> ListArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipIter<'a, O> { - ZipValidity::new( - ListValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(ListValuesIter::new(self), self.validity.as_ref()) } /// Returns an iterator of `Box` diff --git a/src/array/map/iterator.rs b/src/array/map/iterator.rs index c20324a5d1e..5867372c886 100644 --- a/src/array/map/iterator.rs +++ b/src/array/map/iterator.rs @@ -72,11 +72,7 @@ impl<'a> IntoIterator for &'a MapArray { impl<'a> MapArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipValidity, MapValuesIter<'a>, BitmapIter<'a>> { - ZipValidity::new( - MapValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|x| x.unset_bits()), - ) + ZipValidity::new_with_validity(MapValuesIter::new(self), self.validity()) } /// Returns an iterator of `Box` diff --git a/src/array/primitive/iterator.rs b/src/array/primitive/iterator.rs index 7b4a40a0355..18e213b563d 100644 --- a/src/array/primitive/iterator.rs +++ b/src/array/primitive/iterator.rs @@ -16,9 +16,9 @@ impl IntoIterator for PrimitiveArray { fn into_iter(self) -> Self::IntoIter { let (_, values, validity) = self.into_inner(); let values = values.into_iter(); - let null_count = validity.as_ref().map(|x| x.unset_bits()); - let validity = validity.map(|x| x.into_iter()); - ZipValidity::new(values, validity, null_count) + let validity = + validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter())); + ZipValidity::new(values, validity) } } @@ -39,9 +39,6 @@ impl<'a, T: NativeType> MutablePrimitiveArray { ZipValidity::new( self.values().iter(), self.validity().as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), ) } diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index c27e98395fb..a64cd90f38a 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -141,11 +141,7 @@ impl PrimitiveArray { /// Returns an iterator over the values and validity, `Option<&T>`. #[inline] pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter, BitmapIter> { - ZipValidity::new( - self.values().iter(), - self.validity().as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values().iter(), self.validity()) } /// Returns an iterator of the values, `&T`, ignoring the arrays' validity. diff --git a/src/array/struct_/iterator.rs b/src/array/struct_/iterator.rs index 884cc300126..2b6849d2e26 100644 --- a/src/array/struct_/iterator.rs +++ b/src/array/struct_/iterator.rs @@ -89,13 +89,7 @@ impl<'a> IntoIterator for &'a StructArray { impl<'a> StructArray { /// Returns an iterator of `Option>` pub fn iter(&'a self) -> ZipIter<'a> { - ZipValidity::new( - StructValueIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - self.validity() - .as_ref() - .map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(StructValueIter::new(self), self.validity()) } /// Returns an iterator of `Box` diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index 84ab1b941f0..5e057e90a5b 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -133,11 +133,7 @@ impl Utf8Array { /// Returns an iterator of `Option<&str>` pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter, BitmapIter> { - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new_with_validity(self.values_iter(), self.validity()) } /// Returns an iterator of `&str` diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index c993ad18f8b..4cc83da9011 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -206,11 +206,7 @@ impl MutableUtf8Array { /// Returns an iterator of `Option<&str>` pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter, BitmapIter> { - ZipValidity::new( - self.values_iter(), - self.validity.as_ref().map(|x| x.iter()), - self.validity.as_ref().map(|validity| validity.unset_bits()), - ) + ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) } /// Converts itself into an [`Array`]. diff --git a/src/bitmap/utils/zip_validity.rs b/src/bitmap/utils/zip_validity.rs index 201cb071ae2..d43802333a3 100644 --- a/src/bitmap/utils/zip_validity.rs +++ b/src/bitmap/utils/zip_validity.rs @@ -1,3 +1,5 @@ +use crate::bitmap::utils::BitmapIter; +use crate::bitmap::Bitmap; use crate::trusted_len::TrustedLen; /// An [`Iterator`] over validity and values. @@ -101,12 +103,24 @@ where V: Iterator, { /// Returns a new [`ZipValidity`] - pub fn new(values: I, validity: Option, null_count: Option) -> Self { + pub fn new(values: I, validity: Option) -> Self { match validity { - // only if we have a validity and there are nulls we will iterate them - Some(validity) if null_count != Some(0) => { - Self::Optional(ZipValidityIter::new(values, validity)) - } + Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)), + _ => Self::Required(values), + } + } +} + +impl<'a, T, I> ZipValidity> +where + I: Iterator, +{ + /// Returns a new [`ZipValidity`] and drops the `validity` if all values + /// are valid. + pub fn new_with_validity(values: I, validity: Option<&'a Bitmap>) -> Self { + // only if the validity has nulls we take the optional branch. + match validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.iter())) { + Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)), _ => Self::Required(values), } } diff --git a/src/io/avro/write/serialize.rs b/src/io/avro/write/serialize.rs index 30e31adaed1..cb94b78d28b 100644 --- a/src/io/avro/write/serialize.rs +++ b/src/io/avro/write/serialize.rs @@ -126,11 +126,7 @@ fn list_optional<'a, O: Offset>(array: &'a ListArray, schema: &AvroSchema) -> .offsets() .windows(2) .map(|w| (w[1] - w[0]).to_usize() as i64); - let lengths = ZipValidity::new( - lengths, - array.validity().as_ref().map(|x| x.iter()), - array.validity().as_ref().map(|x| x.unset_bits()), - ); + let lengths = ZipValidity::new_with_validity(lengths, array.validity()); Box::new(BufStreamingIterator::new( lengths, @@ -184,11 +180,7 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer .map(|(x, schema)| new_serializer(x.as_ref(), schema)) .collect::>(); - let iterator = ZipValidity::new( - 0..array.len(), - array.validity().as_ref().map(|x| x.iter()), - array.validity().as_ref().map(|x| x.unset_bits()), - ); + let iterator = ZipValidity::new_with_validity(0..array.len(), array.validity()); Box::new(BufStreamingIterator::new( iterator, diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index 288c23f235b..0afb0732960 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -103,11 +103,7 @@ fn struct_serializer<'a>( let names = array.fields().iter().map(|f| f.name.as_str()); Box::new(BufStreamingIterator::new( - ZipValidity::new( - 0..array.len(), - array.validity().map(|x| x.iter()), - array.validity().map(|x| x.unset_bits()), - ), + ZipValidity::new_with_validity(0..array.len(), array.validity()), move |maybe, buf| { if maybe.is_some() { let names = names.clone(); @@ -144,11 +140,7 @@ fn list_serializer<'a, O: Offset>( let mut serializer = new_serializer(array.values().as_ref()); Box::new(BufStreamingIterator::new( - ZipValidity::new( - array.offsets().windows(2), - array.validity().map(|x| x.iter()), - array.validity().map(|x| x.unset_bits()), - ), + ZipValidity::new_with_validity(array.offsets().windows(2), array.validity()), move |offset, buf| { if let Some(offset) = offset { let length = (offset[1] - offset[0]).to_usize(); diff --git a/tests/it/bitmap/utils/zip_validity.rs b/tests/it/bitmap/utils/zip_validity.rs index c86240eb0c2..9c6187f60a6 100644 --- a/tests/it/bitmap/utils/zip_validity.rs +++ b/tests/it/bitmap/utils/zip_validity.rs @@ -8,7 +8,7 @@ fn basic() { let a = Bitmap::from([true, false]); let a = Some(a.iter()); let values = vec![0, 1]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!(a, vec![Some(0), None]); @@ -19,7 +19,7 @@ fn complete() { let a = Bitmap::from([true, false, true, false, true, false, true, false]); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!( @@ -31,7 +31,6 @@ fn complete() { #[test] fn slices() { let a = Bitmap::from([true, false]); - let null_count = Some(a.unset_bits()); let a = Some(a.iter()); let offsets = vec![0, 2, 3]; let values = vec![1, 2, 3]; @@ -40,7 +39,7 @@ fn slices() { let end = x[1]; &values[start..end] }); - let zip = ZipValidity::new(iter, a, null_count); + let zip = ZipValidity::new(iter, a); let a = zip.collect::>(); assert_eq!(a, vec![Some([1, 2].as_ref()), None]); @@ -51,7 +50,7 @@ fn byte() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7, 8]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!( @@ -75,7 +74,7 @@ fn offset() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a, None); + let zip = ZipValidity::new(values.into_iter(), a); let a = zip.collect::>(); assert_eq!( @@ -87,7 +86,7 @@ fn offset() { #[test] fn none() { let values = vec![0, 1, 2]; - let zip = ZipValidity::new(values.into_iter(), None::, None); + let zip = ZipValidity::new(values.into_iter(), None::); let a = zip.collect::>(); assert_eq!(a, vec![Some(0), Some(1), Some(2)]); @@ -96,10 +95,9 @@ fn none() { #[test] fn rev() { let a = Bitmap::from([true, false, true, false, false, true, true, false, true]).slice(1, 8); - let null_count = Some(a.unset_bits()); let a = Some(a.iter()); let values = vec![0, 1, 2, 3, 4, 5, 6, 7]; - let zip = ZipValidity::new(values.into_iter(), a, null_count); + let zip = ZipValidity::new(values.into_iter(), a); let result = zip.rev().collect::>(); let expected = vec![None, Some(1), None, None, Some(4), Some(5), None, Some(7)]