Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added MutableUtf8ValuesArray (#1260)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Oct 4, 2022
1 parent 828d976 commit 9574d7f
Show file tree
Hide file tree
Showing 10 changed files with 804 additions and 242 deletions.
66 changes: 11 additions & 55 deletions src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,26 @@
use crate::{array::Offset, bitmap::utils::ZipValidity, trusted_len::TrustedLen};
use crate::{
array::{ArrayAccessor, ArrayValuesIter, Offset},
bitmap::utils::ZipValidity,
};

use super::BinaryArray;

/// Iterator over slices of `&[u8]`.
#[derive(Debug, Clone)]
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
end: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
/// Creates a new [`BinaryValueIter`]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, O: Offset> Iterator for BinaryValueIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for BinaryArray<O> {
type Item = &'a [u8];

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> DoubleEndedIterator for BinaryValueIter<'a, O> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}
/// Iterator of values of an [`BinaryArray`].
pub type BinaryValueIter<'a, O> = ArrayValuesIter<'a, BinaryArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
type Item = Option<&'a [u8]>;
Expand All @@ -72,5 +30,3 @@ impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
self.iter()
}
}

unsafe impl<O: Offset> TrustedLen for BinaryValueIter<'_, O> {}
83 changes: 83 additions & 0 deletions src/array/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use crate::trusted_len::TrustedLen;

mod private {
pub trait Sealed {}

impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {}
}

///
/// # Safety
/// Implementers of this trait guarantee that
/// `value_unchecked` is safe when called up to `len`
/// Implementations must guarantee that
pub unsafe trait ArrayAccessor<'a>: private::Sealed {
type Item: 'a;
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item;
fn len(&self) -> usize;
}

/// Iterator of values of an `ArrayAccessor`.
#[derive(Debug, Clone)]
pub struct ArrayValuesIter<'a, A: ArrayAccessor<'a>> {
array: &'a A,
index: usize,
end: usize,
}

impl<'a, A: ArrayAccessor<'a>> ArrayValuesIter<'a, A> {
/// Creates a new [`ArrayValuesIter`]
#[inline]
pub fn new(array: &'a A) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, A: ArrayAccessor<'a>> Iterator for ArrayValuesIter<'a, A> {
type Item = A::Item;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
}
}

impl<'a, A: ArrayAccessor<'a>> DoubleEndedIterator for ArrayValuesIter<'a, A> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

unsafe impl<'a, A: ArrayAccessor<'a>> TrustedLen for ArrayValuesIter<'a, A> {}
6 changes: 5 additions & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,12 @@ mod equal;
mod ffi;
mod fmt;
pub mod growable;
mod iterator;
pub mod ord;

pub(crate) use iterator::ArrayAccessor;
pub use iterator::ArrayValuesIter;

pub use equal::equal;
pub use fmt::{get_display, get_value_display};

Expand All @@ -394,7 +398,7 @@ pub use null::NullArray;
pub use primitive::*;
pub use struct_::{MutableStructArray, StructArray};
pub use union::UnionArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};
pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};

pub(crate) use self::ffi::offset_buffers_children_dictionary;
pub(crate) use self::ffi::FromFfi;
Expand Down
100 changes: 51 additions & 49 deletions src/array/utf8/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,77 +1,79 @@
use crate::array::{ArrayAccessor, ArrayValuesIter, Offset};
use crate::bitmap::utils::ZipValidity;
use crate::{array::Offset, trusted_len::TrustedLen};

use super::Utf8Array;
use super::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array};

/// Iterator of values of an `Utf8Array`.
#[derive(Debug, Clone)]
pub struct Utf8ValuesIter<'a, O: Offset> {
array: &'a Utf8Array<O>,
index: usize,
end: usize,
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for Utf8Array<O> {
type Item = &'a str;

#[inline]
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> Utf8ValuesIter<'a, O> {
/// Creates a new [`Utf8ValuesIter`]
pub fn new(array: &'a Utf8Array<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
/// Iterator of values of an [`Utf8Array`].
pub type Utf8ValuesIter<'a, O> = ArrayValuesIter<'a, Utf8Array<O>>;

impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> Iterator for Utf8ValuesIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8Array<O> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
fn len(&self) -> usize {
self.len()
}
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
/// Iterator of values of an [`MutableUtf8ValuesArray`].
pub type MutableUtf8ValuesIter<'a, O> = ArrayValuesIter<'a, MutableUtf8ValuesArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a MutableUtf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, MutableUtf8ValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> DoubleEndedIterator for Utf8ValuesIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8ValuesArray<O> {
type Item = &'a str;

#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>;
impl<'a, O: Offset> IntoIterator for &'a MutableUtf8ValuesArray<O> {
type Item = &'a str;
type IntoIter = ArrayValuesIter<'a, MutableUtf8ValuesArray<O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

unsafe impl<O: Offset> TrustedLen for Utf8ValuesIter<'_, O> {}
11 changes: 11 additions & 0 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,19 @@ pub(super) mod fmt;
mod from;
mod iterator;
mod mutable;
mod mutable_values;
pub use iterator::*;
pub use mutable::*;
pub use mutable_values::MutableUtf8ValuesArray;

// Auxiliary struct to allow presenting &str as [u8] to a generic function
pub(super) struct StrAsBytes<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
#[inline(always)]
fn as_ref(&self) -> &[u8] {
self.0.as_ref().as_bytes()
}
}

/// A [`Utf8Array`] is arrow's semantic equivalent of an immutable `Vec<Option<String>>`.
/// Cloning and slicing this struct is `O(1)`.
Expand Down
Loading

0 comments on commit 9574d7f

Please sign in to comment.