Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved ZipValidity iterators #1284

Merged
merged 2 commits into from
Nov 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ impl<O: Offset> BinaryArray<O> {

/// Returns an iterator of `Option<&[u8]>` over every element of this array.
pub fn iter(&self) -> ZipValidity<&[u8], BinaryValueIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
ZipValidity::new_with_validity(self.values_iter(), self.validity.as_ref())
}

/// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
Expand Down
3 changes: 2 additions & 1 deletion src/array/boolean/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ impl IntoIterator for BooleanArray {
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let validity = validity.map(|x| x.into_iter());
let validity =
validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter()));
ZipValidity::new(values, validity)
}
}
Expand Down
5 changes: 1 addition & 4 deletions src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,7 @@ impl BooleanArray {
/// Returns an iterator over the optional values of this [`BooleanArray`].
#[inline]
pub fn iter(&self) -> ZipValidity<bool, BitmapIter, BitmapIter> {
ZipValidity::new(
self.values().iter(),
self.validity.as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(self.values().iter(), self.validity())
}

/// Returns an iterator over the values of this [`BooleanArray`].
Expand Down
5 changes: 1 addition & 4 deletions src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,7 @@ impl<K: DictionaryKey> DictionaryArray<K> {
/// This function will allocate a new [`Scalar`] per item and is usually not performant.
/// Consider calling `keys_iter` and `values`, downcasting `values`, and iterating over that.
pub fn iter(&self) -> ZipValidity<Box<dyn Scalar>, DictionaryValuesIter<K>, BitmapIter> {
ZipValidity::new(
DictionaryValuesIter::new(self),
self.keys.validity().as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(DictionaryValuesIter::new(self), self.keys.validity())
}

/// Returns an iterator of [`Box<dyn Scalar>`]
Expand Down
7 changes: 2 additions & 5 deletions src/array/fixed_size_binary/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ impl<'a> FixedSizeBinaryArray {
pub fn iter(
&'a self,
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
ZipValidity::new_with_validity(self.values_iter(), self.validity())
}

/// Returns iterator over the values of [`FixedSizeBinaryArray`]
Expand All @@ -42,10 +42,7 @@ impl<'a> MutableFixedSizeBinaryArray {
pub fn iter(
&'a self,
) -> ZipValidity<&'a [u8], std::slice::ChunksExact<'a, u8>, BitmapIter<'a>> {
ZipValidity::new(
self.iter_values(),
self.validity().as_ref().map(|x| x.iter()),
)
ZipValidity::new(self.iter_values(), self.validity().map(|x| x.iter()))
}

/// Returns iterator over the values of [`MutableFixedSizeBinaryArray`]
Expand Down
5 changes: 1 addition & 4 deletions src/array/fixed_size_list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@ impl<'a> IntoIterator for &'a FixedSizeListArray {
impl<'a> FixedSizeListArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a> {
ZipValidity::new(
FixedSizeListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(FixedSizeListValuesIter::new(self), self.validity())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
5 changes: 1 addition & 4 deletions src/array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,7 @@ impl<'a, O: Offset> IntoIterator for &'a ListArray<O> {
impl<'a, O: Offset> ListArray<O> {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a, O> {
ZipValidity::new(
ListValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(ListValuesIter::new(self), self.validity.as_ref())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
5 changes: 1 addition & 4 deletions src/array/map/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,7 @@ impl<'a> IntoIterator for &'a MapArray {
impl<'a> MapArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipValidity<Box<dyn Array>, MapValuesIter<'a>, BitmapIter<'a>> {
ZipValidity::new(
MapValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(MapValuesIter::new(self), self.validity())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
3 changes: 2 additions & 1 deletion src/array/primitive/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ impl<T: NativeType> IntoIterator for PrimitiveArray<T> {
fn into_iter(self) -> Self::IntoIter {
let (_, values, validity) = self.into_inner();
let values = values.into_iter();
let validity = validity.map(|x| x.into_iter());
let validity =
validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.into_iter()));
ZipValidity::new(values, validity)
}
}
Expand Down
5 changes: 1 addition & 4 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,10 +141,7 @@ impl<T: NativeType> PrimitiveArray<T> {
/// Returns an iterator over the values and validity, `Option<&T>`.
#[inline]
pub fn iter(&self) -> ZipValidity<&T, std::slice::Iter<T>, BitmapIter> {
ZipValidity::new(
self.values().iter(),
self.validity().as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(self.values().iter(), self.validity())
}

/// Returns an iterator of the values, `&T`, ignoring the arrays' validity.
Expand Down
5 changes: 1 addition & 4 deletions src/array/struct_/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,7 @@ impl<'a> IntoIterator for &'a StructArray {
impl<'a> StructArray {
/// Returns an iterator of `Option<Box<dyn Array>>`
pub fn iter(&'a self) -> ZipIter<'a> {
ZipValidity::new(
StructValueIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
ZipValidity::new_with_validity(StructValueIter::new(self), self.validity())
}

/// Returns an iterator of `Box<dyn Array>`
Expand Down
2 changes: 1 addition & 1 deletion src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ impl<O: Offset> Utf8Array<O> {

/// Returns an iterator of `Option<&str>`
pub fn iter(&self) -> ZipValidity<&str, Utf8ValuesIter<O>, BitmapIter> {
ZipValidity::new(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
ZipValidity::new_with_validity(self.values_iter(), self.validity())
}

/// Returns an iterator of `&str`
Expand Down
19 changes: 18 additions & 1 deletion src/bitmap/utils/zip_validity.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::bitmap::utils::BitmapIter;
use crate::bitmap::Bitmap;
use crate::trusted_len::TrustedLen;

/// An [`Iterator`] over validity and values.
Expand Down Expand Up @@ -104,7 +106,22 @@ where
pub fn new(values: I, validity: Option<V>) -> Self {
match validity {
Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)),
None => Self::Required(values),
_ => Self::Required(values),
}
}
}

impl<'a, T, I> ZipValidity<T, I, BitmapIter<'a>>
where
I: Iterator<Item = T>,
{
/// Returns a new [`ZipValidity`] and drops the `validity` if all values
/// are valid.
pub fn new_with_validity(values: I, validity: Option<&'a Bitmap>) -> Self {
// only if the validity has nulls we take the optional branch.
match validity.and_then(|validity| (validity.unset_bits() > 0).then(|| validity.iter())) {
Some(validity) => Self::Optional(ZipValidityIter::new(values, validity)),
_ => Self::Required(values),
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/io/avro/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ fn list_optional<'a, O: Offset>(array: &'a ListArray<O>, schema: &AvroSchema) ->
.offsets()
.windows(2)
.map(|w| (w[1] - w[0]).to_usize() as i64);
let lengths = ZipValidity::new(lengths, array.validity().as_ref().map(|x| x.iter()));
let lengths = ZipValidity::new_with_validity(lengths, array.validity());

Box::new(BufStreamingIterator::new(
lengths,
Expand Down Expand Up @@ -180,7 +180,7 @@ fn struct_optional<'a>(array: &'a StructArray, schema: &Record) -> BoxSerializer
.map(|(x, schema)| new_serializer(x.as_ref(), schema))
.collect::<Vec<_>>();

let iterator = ZipValidity::new(0..array.len(), array.validity().as_ref().map(|x| x.iter()));
let iterator = ZipValidity::new_with_validity(0..array.len(), array.validity());

Box::new(BufStreamingIterator::new(
iterator,
Expand Down
7 changes: 2 additions & 5 deletions src/io/json/write/serialize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ fn struct_serializer<'a>(
let names = array.fields().iter().map(|f| f.name.as_str());

Box::new(BufStreamingIterator::new(
ZipValidity::new(0..array.len(), array.validity().map(|x| x.iter())),
ZipValidity::new_with_validity(0..array.len(), array.validity()),
move |maybe, buf| {
if maybe.is_some() {
let names = names.clone();
Expand Down Expand Up @@ -140,10 +140,7 @@ fn list_serializer<'a, O: Offset>(
let mut serializer = new_serializer(array.values().as_ref());

Box::new(BufStreamingIterator::new(
ZipValidity::new(
array.offsets().windows(2),
array.validity().map(|x| x.iter()),
),
ZipValidity::new_with_validity(array.offsets().windows(2), array.validity()),
move |offset, buf| {
if let Some(offset) = offset {
let length = (offset[1] - offset[0]).to_usize();
Expand Down