Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added Utf8Sequence
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Jan 19, 2022
1 parent 9f25174 commit 982d259
Show file tree
Hide file tree
Showing 22 changed files with 393 additions and 7 deletions.
2 changes: 2 additions & 0 deletions src/array/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box<dyn Fn(usize) -> Strin
}),
Utf8 => dyn_display!(array, Utf8Array<i32>, |x| x),
LargeUtf8 => dyn_display!(array, Utf8Array<i64>, |x| x),
Utf8Sequence => dyn_display!(array, StringSequenceArray<i32>, |x| x),
LargeUtf8Sequence => dyn_display!(array, StringSequenceArray<i64>, |x| x),
Decimal(_, scale) => {
// The number 999.99 has a precision of 5 and scale of 2
let scale = *scale as u32;
Expand Down
11 changes: 11 additions & 0 deletions src/array/equal/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ mod primitive;
mod struct_;
mod union;
mod utf8;
mod utf8_sequence;

impl PartialEq for dyn Array + '_ {
fn eq(&self, that: &dyn Array) -> bool {
Expand Down Expand Up @@ -201,6 +202,16 @@ pub fn equal(lhs: &dyn Array, rhs: &dyn Array) -> bool {
let rhs = rhs.as_any().downcast_ref().unwrap();
utf8::equal::<i64>(lhs, rhs)
}
Utf8Sequence => {
let lhs = lhs.as_any().downcast_ref().unwrap();
let rhs = rhs.as_any().downcast_ref().unwrap();
utf8_sequence::equal::<i32>(lhs, rhs)
}
LargeUtf8Sequence => {
let lhs = lhs.as_any().downcast_ref().unwrap();
let rhs = rhs.as_any().downcast_ref().unwrap();
utf8_sequence::equal::<i64>(lhs, rhs)
}
Binary => {
let lhs = lhs.as_any().downcast_ref().unwrap();
let rhs = rhs.as_any().downcast_ref().unwrap();
Expand Down
5 changes: 5 additions & 0 deletions src/array/equal/utf8_sequence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
use crate::array::{Offset, StringSequenceArray};

pub(super) fn equal<O: Offset>(lhs: &StringSequenceArray<O>, rhs: &StringSequenceArray<O>) -> bool {
lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len() && lhs.iter().eq(rhs.iter())
}
2 changes: 2 additions & 0 deletions src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ pub fn offset_buffers_children_dictionary(array: &dyn Array) -> BuffersChildren
FixedSizeBinary => ffi_dyn!(array, FixedSizeBinaryArray),
Utf8 => ffi_dyn!(array, Utf8Array::<i32>),
LargeUtf8 => ffi_dyn!(array, Utf8Array::<i64>),
Utf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
LargeUtf8Sequence => todo!("Arrow does not yet support exporting sequence views via FFI"),
List => ffi_dyn!(array, ListArray::<i32>),
LargeList => ffi_dyn!(array, ListArray::<i64>),
FixedSizeList => ffi_dyn!(array, FixedSizeListArray),
Expand Down
1 change: 1 addition & 0 deletions src/array/growable/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,5 +130,6 @@ pub fn make_growable<'a>(
))
})
}
_ => todo!("Sequence views"),
}
}
10 changes: 10 additions & 0 deletions src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,8 @@ impl std::fmt::Debug for dyn Array + '_ {
FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
Utf8Sequence => fmt_dyn!(self, StringSequenceArray::<i32>, f),
LargeUtf8Sequence => fmt_dyn!(self, StringSequenceArray::<i64>, f),
List => fmt_dyn!(self, ListArray::<i32>, f),
LargeList => fmt_dyn!(self, ListArray::<i64>, f),
FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
Expand Down Expand Up @@ -258,6 +260,8 @@ pub fn new_empty_array(data_type: DataType) -> Box<dyn Array> {
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(data_type)),
Utf8 => Box::new(Utf8Array::<i32>::new_empty(data_type)),
LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(data_type)),
Utf8Sequence => Box::new(StringSequenceArray::<i32>::new_empty(data_type)),
LargeUtf8Sequence => Box::new(StringSequenceArray::<i64>::new_empty(data_type)),
List => Box::new(ListArray::<i32>::new_empty(data_type)),
LargeList => Box::new(ListArray::<i64>::new_empty(data_type)),
FixedSizeList => Box::new(FixedSizeListArray::new_empty(data_type)),
Expand Down Expand Up @@ -288,6 +292,8 @@ pub fn new_null_array(data_type: DataType, length: usize) -> Box<dyn Array> {
FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(data_type, length)),
Utf8 => Box::new(Utf8Array::<i32>::new_null(data_type, length)),
LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(data_type, length)),
Utf8Sequence => Box::new(StringSequenceArray::<i32>::new_null(data_type, length)),
LargeUtf8Sequence => Box::new(StringSequenceArray::<i64>::new_null(data_type, length)),
List => Box::new(ListArray::<i32>::new_null(data_type, length)),
LargeList => Box::new(ListArray::<i64>::new_null(data_type, length)),
FixedSizeList => Box::new(FixedSizeListArray::new_null(data_type, length)),
Expand Down Expand Up @@ -326,6 +332,8 @@ pub fn clone(array: &dyn Array) -> Box<dyn Array> {
FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
Utf8 => clone_dyn!(array, Utf8Array::<i32>),
LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
Utf8Sequence => clone_dyn!(array, StringSequenceArray::<i32>),
LargeUtf8Sequence => clone_dyn!(array, StringSequenceArray::<i64>),
List => clone_dyn!(array, ListArray::<i32>),
LargeList => clone_dyn!(array, ListArray::<i64>),
FixedSizeList => clone_dyn!(array, FixedSizeListArray),
Expand Down Expand Up @@ -359,6 +367,7 @@ mod map;
mod null;
mod primitive;
mod specification;
mod string_sequence;
mod struct_;
mod union;
mod utf8;
Expand All @@ -381,6 +390,7 @@ pub use list::{ListArray, MutableListArray};
pub use map::MapArray;
pub use null::NullArray;
pub use primitive::*;
pub use string_sequence::StringSequenceArray;
pub use struct_::StructArray;
pub use union::UnionArray;
pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter};
Expand Down
80 changes: 80 additions & 0 deletions src/array/string_sequence/iterator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use crate::bitmap::utils::{zip_validity, ZipValidity};
use crate::{array::Offset, trusted_len::TrustedLen};

use super::StringSequenceArray;

/// Iterator of values of an `Utf8Array`.
#[derive(Debug, Clone)]
pub struct StringSequenceValuesIter<'a, O: Offset> {
array: &'a StringSequenceArray<O>,
index: usize,
end: usize,
}

impl<'a, O: Offset> StringSequenceValuesIter<'a, O> {
/// Creates a new [`StringSequenceValuesIter`]
pub fn new(array: &'a StringSequenceArray<O>) -> Self {
Self {
array,
index: 0,
end: array.len(),
}
}
}

impl<'a, O: Offset> Iterator for StringSequenceValuesIter<'a, O> {
type Item = &'a str;

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index == self.end {
return None;
}
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(self.end - self.index, Some(self.end - self.index))
}
}

impl<'a, O: Offset> DoubleEndedIterator for StringSequenceValuesIter<'a, O> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

impl<'a, O: Offset> IntoIterator for &'a StringSequenceArray<O> {
type Item = Option<&'a str>;
type IntoIter = ZipValidity<'a, &'a str, StringSequenceValuesIter<'a, O>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a, O: Offset> StringSequenceArray<O> {
/// Returns an iterator of `Option<&str>`
pub fn iter(&'a self) -> ZipValidity<'a, &'a str, StringSequenceValuesIter<'a, O>> {
zip_validity(
StringSequenceValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
}

/// Returns an iterator of `&str`
pub fn values_iter(&'a self) -> StringSequenceValuesIter<'a, O> {
StringSequenceValuesIter::new(self)
}
}

unsafe impl<O: Offset> TrustedLen for StringSequenceValuesIter<'_, O> {}
Loading

0 comments on commit 982d259

Please sign in to comment.