Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added BinaryArray::into_mut and double-ended support for its iterator #1255

Merged
merged 5 commits into from
Sep 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 36 additions & 9 deletions src/array/binary/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@ use super::BinaryArray;
pub struct BinaryValueIter<'a, O: Offset> {
array: &'a BinaryArray<O>,
index: usize,
end: usize,
}

impl<'a, O: Offset> BinaryValueIter<'a, O> {
/// Creates a new [`BinaryValueIter`]
pub fn new(array: &'a BinaryArray<O>) -> Self {
Self { array, index: 0 }
Self {
array,
index: 0,
end: array.len(),
}
}
}

Expand All @@ -21,19 +26,41 @@ impl<'a, O: Offset> Iterator for BinaryValueIter<'a, O> {

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.array.len() {
if self.index == self.end {
return None;
} else {
self.index += 1;
}
Some(unsafe { self.array.value_unchecked(self.index - 1) })
let old = self.index;
self.index += 1;
Some(unsafe { self.array.value_unchecked(old) })
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(
self.array.len() - self.index,
Some(self.array.len() - self.index),
)
(self.end - self.index, Some(self.end - self.index))
}

#[inline]
fn nth(&mut self, n: usize) -> Option<Self::Item> {
let new_index = self.index + n;
if new_index > self.end {
self.index = self.end;
None
} else {
self.index = new_index;
self.next()
}
}
}

impl<'a, O: Offset> DoubleEndedIterator for BinaryValueIter<'a, O> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.index == self.end {
None
} else {
self.end -= 1;
Some(unsafe { self.array.value_unchecked(self.end) })
}
}
}

Expand Down
84 changes: 84 additions & 0 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use crate::{
trusted_len::TrustedLen,
};

use either::Either;

use super::{
specification::{try_check_offsets, try_check_offsets_bounds},
Array, GenericBinaryArray, Offset,
Expand Down Expand Up @@ -238,6 +240,88 @@ impl<O: Offset> BinaryArray<O> {
self.validity = validity;
}

/// Try to convert this `BinaryArray` to a `MutableBinaryArray`
pub fn into_mut(mut self) -> Either<Self, MutableBinaryArray<O>> {
use Either::*;
if let Some(bitmap) = self.validity {
match bitmap.into_mut() {
// Safety: invariants are preserved
Left(bitmap) => Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(bitmap),
)
}),
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(mutable_bitmap.into()),
)
})
}
(None, Some(offsets)) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
offsets.into(),
self.values,
Some(mutable_bitmap.into()),
)
})
}
(Some(mutable_values), None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
mutable_values.into(),
Some(mutable_bitmap.into()),
)
})
}
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(
self.data_type,
offsets,
values,
Some(mutable_bitmap),
)
}),
},
}
} else {
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
) {
(None, None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, self.values, None)
}),
(None, Some(offsets)) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, offsets.into(), self.values, None)
}),
(Some(values), None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, values.into(), None)
}),
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(self.data_type, offsets, values, None)
}),
}
}
}

/// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero.
pub fn new_empty(data_type: DataType) -> Self {
Self::new(
Expand Down
15 changes: 15 additions & 0 deletions src/array/binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,21 @@ impl<O: Offset> MutableBinaryArray<O> {
unsafe { self.extend_trusted_len_values_unchecked(iterator) }
}

/// Extends the [`MutableBinaryArray`] from an iterator of values.
/// This differs from `extended_trusted_len` which accepts iterator of optional values.
#[inline]
pub fn extend_values<I, P>(&mut self, iterator: I)
where
P: AsRef<[u8]>,
I: Iterator<Item = P>,
{
let additional = extend_from_values_iter(&mut self.offsets, &mut self.values, iterator);

if let Some(validity) = self.validity.as_mut() {
validity.extend_constant(additional, true);
}
}

/// Extends the [`MutableBinaryArray`] from an `iterator` of values of trusted length.
/// This differs from `extend_trusted_len_unchecked` which accepts iterator of optional
/// values.
Expand Down
7 changes: 0 additions & 7 deletions src/io/parquet/write/binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,13 +138,6 @@ pub(crate) fn encode_delta<O: Offset>(

delta_bitpacked::encode(lengths, buffer);
} else {
println!(
"{:?}",
offsets
.windows(2)
.map(|w| (w[1] - w[0]).to_usize() as i64)
.collect::<Vec<_>>()
);
let lengths = offsets.windows(2).map(|w| (w[1] - w[0]).to_usize() as i64);
delta_bitpacked::encode(lengths, buffer);
}
Expand Down
59 changes: 59 additions & 0 deletions tests/it/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use arrow2::{
};

mod mutable;
mod to_mutable;

#[test]
fn basics() {
Expand Down Expand Up @@ -152,3 +153,61 @@ fn debug() {

assert_eq!(format!("{:?}", array), "BinaryArray[[1, 2], [], None]");
}

#[test]
fn into_mut_1() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let a = values.clone(); // cloned values
assert_eq!(a, values);
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
assert!(array.into_mut().is_left());
}

#[test]
fn into_mut_2() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let a = offsets.clone(); // cloned offsets
assert_eq!(a, offsets);
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
assert!(array.into_mut().is_left());
}

#[test]
fn into_mut_3() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let validity = Some([true].into());
let a = validity.clone(); // cloned validity
assert_eq!(a, validity);
let array = BinaryArray::<i32>::new(DataType::Binary, offsets, values, validity);
assert!(array.into_mut().is_left());
}

#[test]
fn into_mut_4() {
let offsets = Buffer::from(vec![0, 1]);
let values = Buffer::from(b"a".to_vec());
let validity = Some([true].into());
let array = BinaryArray::<i32>::new(DataType::Binary, offsets, values, validity);
assert!(array.into_mut().is_right());
}

#[test]
fn rev_iter() {
let array = BinaryArray::<i32>::from(&[Some("hello".as_bytes()), Some(" ".as_bytes()), None]);

assert_eq!(
array.into_iter().rev().collect::<Vec<_>>(),
vec![None, Some(" ".as_bytes()), Some("hello".as_bytes())]
);
}

#[test]
fn iter_nth() {
let array = BinaryArray::<i32>::from(&[Some("hello"), Some(" "), None]);

assert_eq!(array.iter().nth(1), Some(Some(" ".as_bytes())));
assert_eq!(array.iter().nth(10), None);
}
67 changes: 67 additions & 0 deletions tests/it/array/binary/to_mutable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
use arrow2::{array::BinaryArray, bitmap::Bitmap, buffer::Buffer, datatypes::DataType};

#[test]
fn not_shared() {
let array = BinaryArray::<i32>::from(&[Some("hello"), Some(" "), None]);
assert!(array.into_mut().is_right());
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_validity() {
let validity = Bitmap::from([true]);
let array = BinaryArray::<i32>::new(
DataType::Binary,
vec![0, 1].into(),
b"a".to_vec().into(),
Some(validity.clone()),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_values() {
let values: Buffer<u8> = b"a".to_vec().into();
let array = BinaryArray::<i32>::new(
DataType::Binary,
vec![0, 1].into(),
values.clone(),
Some(Bitmap::from([true])),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_offsets_values() {
let offsets: Buffer<i32> = vec![0, 1].into();
let values: Buffer<u8> = b"a".to_vec().into();
let array = BinaryArray::<i32>::new(
DataType::Binary,
offsets.clone(),
values.clone(),
Some(Bitmap::from([true])),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_offsets() {
let offsets: Buffer<i32> = vec![0, 1].into();
let array = BinaryArray::<i32>::new(
DataType::Binary,
offsets.clone(),
b"a".to_vec().into(),
Some(Bitmap::from([true])),
);
assert!(array.into_mut().is_left())
}

#[test]
#[allow(clippy::redundant_clone)]
fn shared_all() {
let array = BinaryArray::<i32>::from(&[Some("hello"), Some(" "), None]);
assert!(array.clone().into_mut().is_left())
}