diff --git a/src/array/binary/iterator.rs b/src/array/binary/iterator.rs index 4c80a780423..43f85176197 100644 --- a/src/array/binary/iterator.rs +++ b/src/array/binary/iterator.rs @@ -1,68 +1,26 @@ -use crate::{array::Offset, bitmap::utils::ZipValidity, trusted_len::TrustedLen}; +use crate::{ + array::{ArrayAccessor, ArrayValuesIter, Offset}, + bitmap::utils::ZipValidity, +}; use super::BinaryArray; -/// Iterator over slices of `&[u8]`. -#[derive(Debug, Clone)] -pub struct BinaryValueIter<'a, O: Offset> { - array: &'a BinaryArray, - index: usize, - end: usize, -} - -impl<'a, O: Offset> BinaryValueIter<'a, O> { - /// Creates a new [`BinaryValueIter`] - pub fn new(array: &'a BinaryArray) -> Self { - Self { - array, - index: 0, - end: array.len(), - } - } -} - -impl<'a, O: Offset> Iterator for BinaryValueIter<'a, O> { +unsafe impl<'a, O: Offset> ArrayAccessor<'a> for BinaryArray { type Item = &'a [u8]; #[inline] - fn next(&mut self) -> Option { - if self.index == self.end { - return None; - } - let old = self.index; - self.index += 1; - Some(unsafe { self.array.value_unchecked(old) }) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - (self.end - self.index, Some(self.end - self.index)) + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + self.value_unchecked(index) } #[inline] - fn nth(&mut self, n: usize) -> Option { - let new_index = self.index + n; - if new_index > self.end { - self.index = self.end; - None - } else { - self.index = new_index; - self.next() - } + fn len(&self) -> usize { + self.len() } } -impl<'a, O: Offset> DoubleEndedIterator for BinaryValueIter<'a, O> { - #[inline] - fn next_back(&mut self) -> Option { - if self.index == self.end { - None - } else { - self.end -= 1; - Some(unsafe { self.array.value_unchecked(self.end) }) - } - } -} +/// Iterator of values of an [`BinaryArray`]. +pub type BinaryValueIter<'a, O> = ArrayValuesIter<'a, BinaryArray>; impl<'a, O: Offset> IntoIterator for &'a BinaryArray { type Item = Option<&'a [u8]>; @@ -72,5 +30,3 @@ impl<'a, O: Offset> IntoIterator for &'a BinaryArray { self.iter() } } - -unsafe impl TrustedLen for BinaryValueIter<'_, O> {} diff --git a/src/array/iterator.rs b/src/array/iterator.rs new file mode 100644 index 00000000000..a6cc51281e2 --- /dev/null +++ b/src/array/iterator.rs @@ -0,0 +1,83 @@ +use crate::trusted_len::TrustedLen; + +mod private { + pub trait Sealed {} + + impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {} +} + +/// +/// # Safety +/// Implementers of this trait guarantee that +/// `value_unchecked` is safe when called up to `len` +/// Implementations must guarantee that +pub unsafe trait ArrayAccessor<'a>: private::Sealed { + type Item: 'a; + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item; + fn len(&self) -> usize; +} + +/// Iterator of values of an `ArrayAccessor`. +#[derive(Debug, Clone)] +pub struct ArrayValuesIter<'a, A: ArrayAccessor<'a>> { + array: &'a A, + index: usize, + end: usize, +} + +impl<'a, A: ArrayAccessor<'a>> ArrayValuesIter<'a, A> { + /// Creates a new [`ArrayValuesIter`] + #[inline] + pub fn new(array: &'a A) -> Self { + Self { + array, + index: 0, + end: array.len(), + } + } +} + +impl<'a, A: ArrayAccessor<'a>> Iterator for ArrayValuesIter<'a, A> { + type Item = A::Item; + + #[inline] + fn next(&mut self) -> Option { + if self.index == self.end { + return None; + } + let old = self.index; + self.index += 1; + Some(unsafe { self.array.value_unchecked(old) }) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.end - self.index, Some(self.end - self.index)) + } + + #[inline] + fn nth(&mut self, n: usize) -> Option { + let new_index = self.index + n; + if new_index > self.end { + self.index = self.end; + None + } else { + self.index = new_index; + self.next() + } + } +} + +impl<'a, A: ArrayAccessor<'a>> DoubleEndedIterator for ArrayValuesIter<'a, A> { + #[inline] + fn next_back(&mut self) -> Option { + if self.index == self.end { + None + } else { + self.end -= 1; + Some(unsafe { self.array.value_unchecked(self.end) }) + } + } +} + +unsafe impl<'a, A: ArrayAccessor<'a>> TrustedLen for ArrayValuesIter<'a, A> {} diff --git a/src/array/mod.rs b/src/array/mod.rs index 86b645b8412..b78bb3f3b81 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -377,8 +377,12 @@ mod equal; mod ffi; mod fmt; pub mod growable; +mod iterator; pub mod ord; +pub(crate) use iterator::ArrayAccessor; +pub use iterator::ArrayValuesIter; + pub use equal::equal; pub use fmt::{get_display, get_value_display}; diff --git a/src/array/utf8/iterator.rs b/src/array/utf8/iterator.rs index 6a725c6254b..1953d4352fc 100644 --- a/src/array/utf8/iterator.rs +++ b/src/array/utf8/iterator.rs @@ -1,77 +1,79 @@ +use crate::array::{ArrayAccessor, ArrayValuesIter, Offset}; use crate::bitmap::utils::ZipValidity; -use crate::{array::Offset, trusted_len::TrustedLen}; -use super::Utf8Array; +use super::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array}; -/// Iterator of values of an `Utf8Array`. -#[derive(Debug, Clone)] -pub struct Utf8ValuesIter<'a, O: Offset> { - array: &'a Utf8Array, - index: usize, - end: usize, +unsafe impl<'a, O: Offset> ArrayAccessor<'a> for Utf8Array { + type Item = &'a str; + + #[inline] + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + self.value_unchecked(index) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } } -impl<'a, O: Offset> Utf8ValuesIter<'a, O> { - /// Creates a new [`Utf8ValuesIter`] - pub fn new(array: &'a Utf8Array) -> Self { - Self { - array, - index: 0, - end: array.len(), - } +/// Iterator of values of an [`Utf8Array`]. +pub type Utf8ValuesIter<'a, O> = ArrayValuesIter<'a, Utf8Array>; + +impl<'a, O: Offset> IntoIterator for &'a Utf8Array { + type Item = Option<&'a str>; + type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() } } -impl<'a, O: Offset> Iterator for Utf8ValuesIter<'a, O> { +unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8Array { type Item = &'a str; #[inline] - fn next(&mut self) -> Option { - if self.index == self.end { - return None; - } - let old = self.index; - self.index += 1; - Some(unsafe { self.array.value_unchecked(old) }) + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + self.value_unchecked(index) } #[inline] - fn size_hint(&self) -> (usize, Option) { - (self.end - self.index, Some(self.end - self.index)) + fn len(&self) -> usize { + self.len() } +} - #[inline] - fn nth(&mut self, n: usize) -> Option { - let new_index = self.index + n; - if new_index > self.end { - self.index = self.end; - None - } else { - self.index = new_index; - self.next() - } +/// Iterator of values of an [`MutableUtf8ValuesArray`]. +pub type MutableUtf8ValuesIter<'a, O> = ArrayValuesIter<'a, MutableUtf8ValuesArray>; + +impl<'a, O: Offset> IntoIterator for &'a MutableUtf8Array { + type Item = Option<&'a str>; + type IntoIter = ZipValidity<'a, &'a str, MutableUtf8ValuesIter<'a, O>>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() } } -impl<'a, O: Offset> DoubleEndedIterator for Utf8ValuesIter<'a, O> { +unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8ValuesArray { + type Item = &'a str; + #[inline] - fn next_back(&mut self) -> Option { - if self.index == self.end { - None - } else { - self.end -= 1; - Some(unsafe { self.array.value_unchecked(self.end) }) - } + unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item { + self.value_unchecked(index) + } + + #[inline] + fn len(&self) -> usize { + self.len() } } -impl<'a, O: Offset> IntoIterator for &'a Utf8Array { - type Item = Option<&'a str>; - type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>; +impl<'a, O: Offset> IntoIterator for &'a MutableUtf8ValuesArray { + type Item = &'a str; + type IntoIter = ArrayValuesIter<'a, MutableUtf8ValuesArray>; fn into_iter(self) -> Self::IntoIter { self.iter() } } - -unsafe impl TrustedLen for Utf8ValuesIter<'_, O> {} diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index 61a7b65a0f5..0cc1f8d1c5f 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -3,13 +3,16 @@ use std::{iter::FromIterator, sync::Arc}; use crate::array::physical_binary::*; use crate::{ array::{Array, MutableArray, Offset, TryExtend, TryPush}, - bitmap::{Bitmap, MutableBitmap}, + bitmap::{ + utils::{zip_validity, ZipValidity}, + Bitmap, MutableBitmap, + }, datatypes::DataType, error::{Error, Result}, trusted_len::TrustedLen, }; -use super::{MutableUtf8ValuesArray, StrAsBytes, Utf8Array}; +use super::{MutableUtf8ValuesArray, MutableUtf8ValuesIter, StrAsBytes, Utf8Array}; /// A [`MutableArray`] that builds a [`Utf8Array`]. It differs /// from [`MutableUtf8ValuesArray`] in that it can build nullable [`Utf8Array`]s. @@ -153,6 +156,12 @@ impl MutableUtf8Array { self.values.capacity() } + /// Returns the length of this array + #[inline] + pub fn len(&self) -> usize { + self.values.len() + } + /// Pushes a new element to the array. /// # Panic /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value. @@ -161,6 +170,22 @@ impl MutableUtf8Array { self.try_push(value).unwrap() } + /// Returns the value of the element at index `i`, ignoring the array's validity. + /// # Safety + /// This function is safe iff `i < self.len`. + #[inline] + pub fn value(&self, i: usize) -> &str { + self.values.value(i) + } + + /// Returns the value of the element at index `i`, ignoring the array's validity. + /// # Safety + /// This function is safe iff `i < self.len`. + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> &str { + self.values.value_unchecked(i) + } + /// Pop the last entry from [`MutableUtf8Array`]. /// This function returns `None` iff this array is empty. pub fn pop(&mut self) -> Option { @@ -179,6 +204,11 @@ impl MutableUtf8Array { self.validity = Some(validity); } + /// Returns an iterator of `Option<&str>` + pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter> { + zip_validity(self.values_iter(), self.validity.as_ref().map(|x| x.iter())) + } + /// Converts itself into an [`Array`]. pub fn into_arc(self) -> Arc { let a: Utf8Array = self.into(); @@ -198,6 +228,11 @@ impl MutableUtf8Array { let (data_type, offsets, values) = self.values.into_inner(); (data_type, offsets, values, self.validity) } + + /// Returns an iterator of `&str` + pub fn values_iter(&self) -> MutableUtf8ValuesIter { + self.values.iter() + } } impl MutableUtf8Array { @@ -214,7 +249,7 @@ impl MutableUtf8Array { impl MutableArray for MutableUtf8Array { fn len(&self) -> usize { - self.values.len() + self.len() } fn validity(&self) -> Option<&MutableBitmap> { diff --git a/src/array/utf8/mutable_values.rs b/src/array/utf8/mutable_values.rs index 65c9d1231a0..bdeacf658d4 100644 --- a/src/array/utf8/mutable_values.rs +++ b/src/array/utf8/mutable_values.rs @@ -3,7 +3,7 @@ use std::{iter::FromIterator, sync::Arc}; use crate::{ array::{ specification::{check_offsets_minimal, try_check_offsets_and_utf8}, - Array, MutableArray, Offset, TryExtend, TryPush, + Array, ArrayValuesIter, MutableArray, Offset, TryExtend, TryPush, }, bitmap::MutableBitmap, datatypes::DataType, @@ -168,6 +168,12 @@ impl MutableUtf8ValuesArray { self.offsets.capacity() - 1 } + /// Returns the length of this array + #[inline] + pub fn len(&self) -> usize { + self.offsets.len() - 1 + } + /// Pushes a new item to the array. /// # Panic /// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value. @@ -189,6 +195,36 @@ impl MutableUtf8ValuesArray { Some(unsafe { String::from_utf8_unchecked(value) }) } + /// Returns the value of the element at index `i`. + /// # Panic + /// This function panics iff `i >= self.len`. + #[inline] + pub fn value(&self, i: usize) -> &str { + assert!(i < self.len()); + unsafe { self.value_unchecked(i) } + } + + /// Returns the value of the element at index `i`. + /// # Safety + /// This function is safe iff `i < self.len`. + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> &str { + // soundness: the invariant of the function + let start = self.offsets.get_unchecked(i).to_usize(); + let end = self.offsets.get_unchecked(i + 1).to_usize(); + + // soundness: the invariant of the struct + let slice = self.values.get_unchecked(start..end); + + // soundness: the invariant of the struct + std::str::from_utf8_unchecked(slice) + } + + /// Returns an iterator of `&str` + pub fn iter(&self) -> ArrayValuesIter { + ArrayValuesIter::new(self) + } + /// Shrinks the capacity of the [`MutableUtf8ValuesArray`] to fit its current length. pub fn shrink_to_fit(&mut self) { self.values.shrink_to_fit(); @@ -203,7 +239,7 @@ impl MutableUtf8ValuesArray { impl MutableArray for MutableUtf8ValuesArray { fn len(&self) -> usize { - self.offsets.len() - 1 + self.len() } fn validity(&self) -> Option<&MutableBitmap> { diff --git a/tests/it/array/utf8/mutable.rs b/tests/it/array/utf8/mutable.rs index 80cc24ca3c8..377d8b26de2 100644 --- a/tests/it/array/utf8/mutable.rs +++ b/tests/it/array/utf8/mutable.rs @@ -167,3 +167,24 @@ fn as_arc() { array.as_arc().as_ref() ); } + +#[test] +fn test_iter() { + let mut array = MutableUtf8Array::::new(); + + array.extend_trusted_len(vec![Some("hi"), Some("there")].into_iter()); + array.extend_trusted_len(vec![None, Some("hello")].into_iter()); + array.extend_trusted_len_values(["again"].iter()); + + let result = array.iter().collect::>(); + assert_eq!( + result, + vec![ + Some("hi"), + Some("there"), + None, + Some("hello"), + Some("again"), + ] + ); +}