Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added MutableUtf8ValuesArray #1260

Merged
merged 3 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 11 additions & 55 deletions src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,26 @@
use crate::{array::Offset, bitmap::utils::ZipValidity, trusted_len::TrustedLen};
use crate::{
array::{ArrayAccessor, ArrayValuesIter, Offset},
bitmap::utils::ZipValidity,
};

use super::BinaryArray;

/// Iterator over slices of `&[u8]`.
#[derive(Debug, Clone)]
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
end: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
/// Creates a new [`BinaryValueIter`]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, O: Offset> Iterator for BinaryValueIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for BinaryArray<O> {
type Item = &'a [u8];

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> DoubleEndedIterator for BinaryValueIter<'a, O> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}
/// Iterator of values of an [`BinaryArray`].
pub type BinaryValueIter<'a, O> = ArrayValuesIter<'a, BinaryArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
type Item = Option<&'a [u8]>;
Expand All @@ -72,5 +30,3 @@ impl<'a, O: Offset> IntoIterator for &'a BinaryArray<O> {
self.iter()
}
}

unsafe impl<O: Offset> TrustedLen for BinaryValueIter<'_, O> {}
83 changes: 83 additions & 0 deletions src/array/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use crate::trusted_len::TrustedLen;

mod private {
pub trait Sealed {}

impl<'a, T: super::ArrayAccessor<'a>> Sealed for T {}
}

///
/// # Safety
/// Implementers of this trait guarantee that
/// `value_unchecked` is safe when called up to `len`
/// Implementations must guarantee that
pub unsafe trait ArrayAccessor<'a>: private::Sealed {
type Item: 'a;
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item;
fn len(&self) -> usize;
}

/// Iterator of values of an `ArrayAccessor`.
#[derive(Debug, Clone)]
pub struct ArrayValuesIter<'a, A: ArrayAccessor<'a>> {
array: &'a A,
index: usize,
end: usize,
}

impl<'a, A: ArrayAccessor<'a>> ArrayValuesIter<'a, A> {
/// Creates a new [`ArrayValuesIter`]
#[inline]
pub fn new(array: &'a A) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, A: ArrayAccessor<'a>> Iterator for ArrayValuesIter<'a, A> {
type Item = A::Item;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
}
}

impl<'a, A: ArrayAccessor<'a>> DoubleEndedIterator for ArrayValuesIter<'a, A> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

unsafe impl<'a, A: ArrayAccessor<'a>> TrustedLen for ArrayValuesIter<'a, A> {}
6 changes: 5 additions & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,8 +377,12 @@ mod equal;
mod ffi;
mod fmt;
pub mod growable;
mod iterator;
pub mod ord;

pub(crate) use iterator::ArrayAccessor;
pub use iterator::ArrayValuesIter;

pub use equal::equal;
pub use fmt::{get_display, get_value_display};

Expand All @@ -394,7 +398,7 @@ pub use null::NullArray;
pub use primitive::*;
pub use struct_::{MutableStructArray, StructArray};
pub use union::UnionArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};
pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};

pub(crate) use self::ffi::offset_buffers_children_dictionary;
pub(crate) use self::ffi::FromFfi;
Expand Down
100 changes: 51 additions & 49 deletions src/array/utf8/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,77 +1,79 @@
use crate::array::{ArrayAccessor, ArrayValuesIter, Offset};
use crate::bitmap::utils::ZipValidity;
use crate::{array::Offset, trusted_len::TrustedLen};

use super::Utf8Array;
use super::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array};

/// Iterator of values of an `Utf8Array`.
#[derive(Debug, Clone)]
pub struct Utf8ValuesIter<'a, O: Offset> {
array: &'a Utf8Array<O>,
index: usize,
end: usize,
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for Utf8Array<O> {
type Item = &'a str;

#[inline]
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> Utf8ValuesIter<'a, O> {
/// Creates a new [`Utf8ValuesIter`]
pub fn new(array: &'a Utf8Array<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
/// Iterator of values of an [`Utf8Array`].
pub type Utf8ValuesIter<'a, O> = ArrayValuesIter<'a, Utf8Array<O>>;

impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> Iterator for Utf8ValuesIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8Array<O> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
fn len(&self) -> usize {
self.len()
}
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
/// Iterator of values of an [`MutableUtf8ValuesArray`].
pub type MutableUtf8ValuesIter<'a, O> = ArrayValuesIter<'a, MutableUtf8ValuesArray<O>>;

impl<'a, O: Offset> IntoIterator for &'a MutableUtf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, MutableUtf8ValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> DoubleEndedIterator for Utf8ValuesIter<'a, O> {
unsafe impl<'a, O: Offset> ArrayAccessor<'a> for MutableUtf8ValuesArray<O> {
type Item = &'a str;

#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
unsafe fn value_unchecked(&'a self, index: usize) -> Self::Item {
self.value_unchecked(index)
}

#[inline]
fn len(&self) -> usize {
self.len()
}
}

impl<'a, O: Offset> IntoIterator for &'a Utf8Array<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, Utf8ValuesIter<'a, O>>;
impl<'a, O: Offset> IntoIterator for &'a MutableUtf8ValuesArray<O> {
type Item = &'a str;
type IntoIter = ArrayValuesIter<'a, MutableUtf8ValuesArray<O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

unsafe impl<O: Offset> TrustedLen for Utf8ValuesIter<'_, O> {}
11 changes: 11 additions & 0 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,19 @@ pub(super) mod fmt;
mod from;
mod iterator;
mod mutable;
mod mutable_values;
pub use iterator::*;
pub use mutable::*;
pub use mutable_values::MutableUtf8ValuesArray;

// Auxiliary struct to allow presenting &str as [u8] to a generic function
pub(super) struct StrAsBytes<P>(P);
impl<T: AsRef<str>> AsRef<[u8]> for StrAsBytes<T> {
#[inline(always)]
fn as_ref(&self) -> &[u8] {
self.0.as_ref().as_bytes()
}
}

/// A [`Utf8Array`] is arrow's semantic equivalent of an immutable `Vec<Option<String>>`.
/// Cloning and slicing this struct is `O(1)`.
Expand Down
Loading