Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Sep 29, 2022
1 parent a41d266 commit 9510c77
Show file tree
Hide file tree
Showing 8 changed files with 267 additions and 110 deletions.
66 changes: 11 additions & 55 deletions src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,26 @@
use crate::{array::Offset, bitmap::utils::ZipValidity, trusted_len::TrustedLen};
use crate::{
array::{ArrayAccessor, ArrayValuesIter, Offset},
bitmap::utils::ZipValidity,
};

use super::BinaryArray;

/// Iterator over slices of `&[u8]`.
#[derive(Debug, Clone)]
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
end: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
/// Creates a new [`BinaryValueIter`]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, O: Offset> Iterator for BinaryValueIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for BinaryArray<O> {
type Item = &'a [u8];

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> DoubleEndedIterator for BinaryValueIter<'a, O> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}
/// Iterator of values of an [`BinaryArray`].
pub type BinaryValueIter<'a, O> = ArrayValuesIter<'a, BinaryArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
type Item = Option<&'a [u8]>;
Expand All @@ -72,5 +30,3 @@ impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
self.iter()
}
}

unsafe impl<O: Offset> TrustedLen for BinaryValueIter<'_, O> {}
83 changes: 83 additions & 0 deletions src/array/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use crate::trusted_len::TrustedLen;

mod private {
pub trait Sealed {}

impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {}
}

///
/// # Safety
/// Implementers of this trait guarantee that
/// `value_unchecked` is safe when called up to `len`
/// Implementations must guarantee that
pub unsafe trait ArrayAccessor<'a>: private::Sealed {
type Item: 'a;
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item;
fn len(&self) -> usize;
}

/// Iterator of values of an `ArrayAccessor`.
#[derive(Debug, Clone)]
pub struct ArrayValuesIter<'a, A: ArrayAccessor<'a>> {
array: &'a A,
index: usize,
end: usize,
}

impl<'a, A: ArrayAccessor<'a>> ArrayValuesIter<'a, A> {
/// Creates a new [`ArrayValuesIter`]
#[inline]
pub fn new(array: &'a A) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, A: ArrayAccessor<'a>> Iterator for ArrayValuesIter<'a, A> {
type Item = A::Item;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
}
}

impl<'a, A: ArrayAccessor<'a>> DoubleEndedIterator for ArrayValuesIter<'a, A> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

unsafe impl<'a, A: ArrayAccessor<'a>> TrustedLen for ArrayValuesIter<'a, A> {}
4 changes: 4 additions & 0 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,12 @@ mod equal;
mod ffi;
mod fmt;
pub mod growable;
mod iterator;
pub mod ord;

pub(crate) use iterator::ArrayAccessor;
pub use iterator::ArrayValuesIter;

pub use equal::equal;
pub use fmt::{get_display, get_value_display};

Expand Down
100 changes: 51 additions & 49 deletions src/array/utf8/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,77 +1,79 @@
use crate::array::{ArrayAccessor, ArrayValuesIter, Offset};
use crate::bitmap::utils::ZipValidity;
use crate::{array::Offset, trusted_len::TrustedLen};

use super::Utf8Array;
use super::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array};

/// Iterator of values of an `Utf8Array`.
#[derive(Debug, Clone)]
pub struct Utf8ValuesIter<'a, O: Offset> {
array: &'a Utf8Array<O>,
index: usize,
end: usize,
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for Utf8Array<O> {
type Item = &'a str;

#[inline]
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> Utf8ValuesIter<'a, O> {
/// Creates a new [`Utf8ValuesIter`]
pub fn new(array: &'a Utf8Array<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
/// Iterator of values of an [`Utf8Array`].
pub type Utf8ValuesIter<'a, O> = ArrayValuesIter<'a, Utf8Array<O>>;

impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> Iterator for Utf8ValuesIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8Array<O> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
fn len(&self) -> usize {
self.len()
}
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
/// Iterator of values of an [`MutableUtf8ValuesArray`].
pub type MutableUtf8ValuesIter<'a, O> = ArrayValuesIter<'a, MutableUtf8ValuesArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a MutableUtf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, MutableUtf8ValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> DoubleEndedIterator for Utf8ValuesIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8ValuesArray<O> {
type Item = &'a str;

#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>;
impl<'a, O: Offset> IntoIterator for &'a MutableUtf8ValuesArray<O> {
type Item = &'a str;
type IntoIter = ArrayValuesIter<'a, MutableUtf8ValuesArray<O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

unsafe impl<O: Offset> TrustedLen for Utf8ValuesIter<'_, O> {}
41 changes: 38 additions & 3 deletions src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ use std::{iter::FromIterator, sync::Arc};
use crate::array::physical_binary::*;
use crate::{
array::{Array, MutableArray, Offset, TryExtend, TryPush},
bitmap::{Bitmap, MutableBitmap},
bitmap::{
utils::{zip_validity, ZipValidity},
Bitmap, MutableBitmap,
},
datatypes::DataType,
error::{Error, Result},
trusted_len::TrustedLen,
};

use super::{MutableUtf8ValuesArray, StrAsBytes, Utf8Array};
use super::{MutableUtf8ValuesArray, MutableUtf8ValuesIter, StrAsBytes, Utf8Array};

/// A [`MutableArray`] that builds a [`Utf8Array`]. It differs
/// from [`MutableUtf8ValuesArray`] in that it can build nullable [`Utf8Array`]s.
Expand Down Expand Up @@ -153,6 +156,12 @@ impl<O: Offset> MutableUtf8Array<O> {
self.values.capacity()
}

/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.values.len()
}

/// Pushes a new element to the array.
/// # Panic
/// This operation panics iff the length of all values (in bytes) exceeds `O` maximum value.
Expand All @@ -161,6 +170,22 @@ impl<O: Offset> MutableUtf8Array<O> {
self.try_push(value).unwrap()
}

/// Returns the value of the element at index `i`, ignoring the array's validity.
/// # Safety
/// This function is safe iff `i < self.len`.
#[inline]
pub fn value(&self, i: usize) -> &str {
self.values.value(i)
}

/// Returns the value of the element at index `i`, ignoring the array's validity.
/// # Safety
/// This function is safe iff `i < self.len`.
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &str {
self.values.value_unchecked(i)
}

/// Pop the last entry from [`MutableUtf8Array`].
/// This function returns `None` iff this array is empty.
pub fn pop(&mut self) -> Option<String> {
Expand All @@ -179,6 +204,11 @@ impl<O: Offset> MutableUtf8Array<O> {
self.validity = Some(validity);
}

/// Returns an iterator of `Option<&str>`
pub fn iter(&self) -> ZipValidity<&str, MutableUtf8ValuesIter<O>> {
zip_validity(self.values_iter(), self.validity.as_ref().map(|x| x.iter()))
}

/// Converts itself into an [`Array`].
pub fn into_arc(self) -> Arc<dyn Array> {
let a: Utf8Array<O> = self.into();
Expand All @@ -198,6 +228,11 @@ impl<O: Offset> MutableUtf8Array<O> {
let (data_type, offsets, values) = self.values.into_inner();
(data_type, offsets, values, self.validity)
}

/// Returns an iterator of `&str`
pub fn values_iter(&self) -> MutableUtf8ValuesIter<O> {
self.values.iter()
}
}

impl<O: Offset> MutableUtf8Array<O> {
Expand All @@ -214,7 +249,7 @@ impl<O: Offset> MutableUtf8Array<O> {

impl<O: Offset> MutableArray for MutableUtf8Array<O> {
fn len(&self) -> usize {
self.values.len()
self.len()
}

fn validity(&self) -> Option<&MutableBitmap> {
Expand Down
Loading

0 comments on commit 9510c77

Please sign in to comment.