Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
[WIP] Add extend methods for MutableUtf8Array
Browse files Browse the repository at this point in the history
  • Loading branch information
VasanthakumarV committed Sep 16, 2021
1 parent ee23796 commit 2d61d67
Showing 1 changed file with 138 additions and 0 deletions.
138 changes: 138 additions & 0 deletions src/array/utf8/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,145 @@ impl<O: Offset, P: AsRef<str>> FromIterator<Option<P>> for MutableUtf8Array<O> {
}
}

// TODO Move to tests/
#[cfg(test)]
mod tests {
use super::*;
use crate::bitmap::Bitmap;

#[test]
fn test_extend_trusted_len_values() {
let mut array = MutableUtf8Array::<i32>::new();

array.extend_trusted_len_values(["hi", "there"].iter());
array.extend_trusted_len_values(["hello"].iter());

assert_eq!(array.values().as_slice(), b"hitherehello");
assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 12]);
assert!(array.validity().is_none());
}

#[test]
fn test_extend_trusted_len() {
let mut array = MutableUtf8Array::<i32>::new();

// TODO Understand why the following is not possible
//array.extend_trusted_len([Some("hi"), Some("there")].into_iter());

array.extend_trusted_len(vec![Some("hi"), Some("there")].into_iter());
array.extend_trusted_len(vec![None, Some("hello")].into_iter());

let array: Utf8Array<i32> = array.into();

assert_eq!(array.values().as_slice(), b"hitherehello");
assert_eq!(array.offsets().as_slice(), &[0, 2, 7, 7, 12]);
assert_eq!(
array.validity(),
&Some(Bitmap::from_u8_slice(&[0b00001011], 4))
);
}
}

impl<O: Offset> MutableUtf8Array<O> {
/// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len.
/// This differs from `extended_trusted_len` which accepts iterator of optional values.
#[inline]
pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: TrustedLen<Item = P>,
{
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}

/// Extends the [`MutableUtf8Array`] from an iterator of values of trusted len.
/// This differs from `extended_trusted_len_unchecked` which accepts iterator of optional
/// values.
/// # Safety
/// The iterator must be trusted len.
#[inline]
pub unsafe fn extend_trusted_len_values_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = P>,
{
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_trusted_len_values requires an upper limit");

// TODO How to avoid creating vec, how to assign capacity without iterating
// the entire input iterator
let mut values = vec![];
let mut offsets = Vec::with_capacity(additional);

let mut length = self.last_offset();

for item in iterator {
let bytes = item.as_ref().as_bytes();

length += O::from_usize(bytes.len()).unwrap();

values.extend_from_slice(bytes);
offsets.push(length);
}

self.values
.extend_from_trusted_len_iter_unchecked(values.into_iter());
self.offsets
.extend_from_trusted_len_iter_unchecked(offsets.into_iter());

if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}

/// Extends the [`MutableUtf8Array`] from an iterator of trusted len.
#[inline]
pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: TrustedLen<Item = Option<P>>,
{
unsafe { self.extend_trusted_len_unchecked(iterator) }
}

/// Extends [`MutableUtf8Array`] from an iterator of trusted len.
/// #Safety
/// The iterator must be trusted len.
#[inline]
pub unsafe fn extend_trusted_len_unchecked<I, P>(&mut self, iterator: I)
where
P: AsRef<str>,
I: Iterator<Item = Option<P>>,
{
let (_, upper) = iterator.size_hint();
let additional = upper.expect("extend_trusted_len_unzip requires an upper limit");

// TODO How to reserve space for `values` and `validity` and take advantage
// of `push_unchecked`
self.offsets.reserve(additional);

for item in iterator {
if let Some(item) = item {
let bytes = item.as_ref().as_bytes();
self.values.extend_from_slice(bytes);

let size = O::from_usize(self.values.len()).unwrap();
self.offsets.push_unchecked(size);

match &mut self.validity {
Some(validity) => validity.push(true),
None => {}
}
} else {
self.offsets.push_unchecked(self.last_offset());
match &mut self.validity {
Some(validity) => validity.push(false),
None => self.init_validity(),
}
};
}
}

/// Creates a [`MutableUtf8Array`] from an iterator of trusted length.
/// # Safety
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
Expand Down

0 comments on commit 2d61d67

Please sign in to comment.