Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved MutableFixedSizeBinaryArray
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 20, 2021
1 parent bbe607c commit 4238570
Show file tree
Hide file tree
Showing 5 changed files with 188 additions and 35 deletions.
68 changes: 50 additions & 18 deletions src/array/fixed_size_binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,54 @@
use crate::array::Array;
use crate::bitmap::utils::{zip_validity, ZipValidity};

use super::FixedSizeBinaryArray;
use super::super::MutableArray;
use super::{FixedSizeBinaryArray, FixedSizeBinaryValues, MutableFixedSizeBinaryArray};

/// # Safety
/// This iterator is `TrustedLen`
pub struct FixedSizeBinaryValuesIter<'a> {
array: &'a FixedSizeBinaryArray,
pub struct FixedSizeBinaryValuesIter<'a, T: FixedSizeBinaryValues> {
array: &'a T,
len: usize,
index: usize,
}

impl<'a> FixedSizeBinaryValuesIter<'a> {
impl<'a, T: FixedSizeBinaryValues> FixedSizeBinaryValuesIter<'a, T> {
#[inline]
pub fn new(array: &'a FixedSizeBinaryArray) -> Self {
Self { array, index: 0 }
pub fn new(array: &'a T) -> Self {
Self {
array,
len: array.values().len() / array.size(),
index: 0,
}
}
}

impl<'a> Iterator for FixedSizeBinaryValuesIter<'a> {
impl<'a, T: FixedSizeBinaryValues> Iterator for FixedSizeBinaryValuesIter<'a, T> {
type Item = &'a [u8];

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.array.len() {
if self.index >= self.len {
return None;
} else {
self.index += 1;
}
Some(unsafe { self.array.value_unchecked(self.index - 1) })
let index = self.index;
let r = Some(unsafe {
std::slice::from_raw_parts(
self.array.values().as_ptr().add(index * self.array.size()),
self.array.size(),
)
});
self.index += 1;
r
}

fn size_hint(&self) -> (usize, Option<usize>) {
(
self.array.len() - self.index,
Some(self.array.len() - self.index),
)
(self.len - self.index, Some(self.len - self.index))
}
}

impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
type Item = Option<&'a [u8]>;
type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a>>;
type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
Expand All @@ -49,10 +57,34 @@ impl<'a> IntoIterator for &'a FixedSizeBinaryArray {

impl<'a> FixedSizeBinaryArray {
/// constructs a new iterator
pub fn iter(&'a self) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a>> {
pub fn iter(
&'a self,
) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>> {
zip_validity(
FixedSizeBinaryValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
}
}

impl<'a> IntoIterator for &'a MutableFixedSizeBinaryArray {
type Item = Option<&'a [u8]>;
type IntoIter =
ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a> MutableFixedSizeBinaryArray {
/// constructs a new iterator
pub fn iter(
&'a self,
) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>> {
zip_validity(
FixedSizeBinaryValuesIter::new(self),
self.validity().as_ref().map(|x| x.iter()),
)
}
}
17 changes: 17 additions & 0 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,20 @@ impl FixedSizeBinaryArray {
.into()
}
}

pub trait FixedSizeBinaryValues {
fn values(&self) -> &[u8];
fn size(&self) -> usize;
}

impl FixedSizeBinaryValues for FixedSizeBinaryArray {
#[inline]
fn values(&self) -> &[u8] {
&self.values
}

#[inline]
fn size(&self) -> usize {
self.size as usize
}
}
91 changes: 80 additions & 11 deletions src/array/fixed_size_binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::{
error::{ArrowError, Result},
};

use super::FixedSizeBinaryArray;
use super::{FixedSizeBinaryArray, FixedSizeBinaryValues};

/// Mutable version of [`FixedSizeBinaryArray`].
#[derive(Debug)]
Expand All @@ -30,19 +30,44 @@ impl From<MutableFixedSizeBinaryArray> for FixedSizeBinaryArray {
}

impl MutableFixedSizeBinaryArray {
pub fn from_data(
size: usize,
values: MutableBuffer<u8>,
validity: Option<MutableBitmap>,
) -> Self {
assert_eq!(
values.len() % size,
0,
"The len of values must be a multiple of size"
);
if let Some(validity) = &validity {
assert_eq!(
validity.len(),
values.len() / size,
"The len of the validity must be equal to values / size"
);
}
Self {
data_type: DataType::FixedSizeBinary(size as i32),
size,
values,
validity,
}
}

pub fn new(size: usize) -> Self {
Self::with_capacity(size, 0)
}

pub fn with_capacity(size: usize, capacity: usize) -> Self {
Self {
data_type: DataType::FixedSizeBinary(size as i32),
Self::from_data(
size,
values: MutableBuffer::<u8>::with_capacity(capacity * size),
validity: None,
}
MutableBuffer::<u8>::with_capacity(capacity * size),
None,
)
}

#[inline]
pub fn try_push<P: AsRef<[u8]>>(&mut self, value: Option<P>) -> Result<()> {
match value {
Some(bytes) => {
Expand Down Expand Up @@ -89,11 +114,37 @@ impl MutableFixedSizeBinaryArray {
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.len() - 1)
.chain(std::iter::once(false)),
))
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity)
}

/// Returns the element at index `i` as `&[u8]`
#[inline]
pub fn value(&self, i: usize) -> &[u8] {
&self.values[i * self.size..(i + 1) * self.size]
}

/// Returns the element at index `i` as `&[u8]`
/// # Safety
/// Assumes that the `i < self.len`.
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
std::slice::from_raw_parts(self.values.as_ptr().add(i * self.size), self.size)
}
}

/// Accessors
impl MutableFixedSizeBinaryArray {
/// Returns its values.
pub fn values(&self) -> &MutableBuffer<u8> {
&self.values
}

/// Returns a mutable slice of values.
pub fn values_mut_slice(&mut self) -> &mut [u8] {
self.values.as_mut_slice()
}
}

Expand Down Expand Up @@ -130,3 +181,21 @@ impl MutableArray for MutableFixedSizeBinaryArray {
self.values.extend_constant(self.size, 0);
}
}

impl FixedSizeBinaryValues for MutableFixedSizeBinaryArray {
#[inline]
fn values(&self) -> &[u8] {
&self.values
}

#[inline]
fn size(&self) -> usize {
self.size
}
}

impl PartialEq for MutableFixedSizeBinaryArray {
fn eq(&self, other: &Self) -> bool {
self.iter().eq(other.iter())
}
}
9 changes: 4 additions & 5 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,10 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.len() - 1)
.chain(std::iter::once(false)),
))
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity)
}

/// Changes the arrays' [`DataType`], returning a new [`MutablePrimitiveArray`].
Expand Down
38 changes: 37 additions & 1 deletion tests/it/array/fixed_size_binary/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,44 @@
use arrow2::array::*;
use arrow2::bitmap::Bitmap;
use arrow2::bitmap::{Bitmap, MutableBitmap};
use arrow2::buffer::MutableBuffer;
use arrow2::datatypes::DataType;

#[test]
fn basic() {
let a = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2, 3, 4]), None);
assert_eq!(a.len(), 2);
assert_eq!(a.data_type(), &DataType::FixedSizeBinary(2));
assert_eq!(a.values(), &MutableBuffer::from([1, 2, 3, 4]));
assert_eq!(a.validity(), &None);
assert_eq!(a.value(1), &[3, 4]);
assert_eq!(unsafe { a.value_unchecked(1) }, &[3, 4]);
}

#[allow(clippy::eq_op)]
#[test]
fn equal() {
let a = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2, 3, 4]), None);
assert_eq!(a, a);
let b = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2]), None);
assert_eq!(b, b);
assert!(a != b);
let a = MutableFixedSizeBinaryArray::from_data(
2,
MutableBuffer::from([1, 2, 3, 4]),
Some(MutableBitmap::from([true, false])),
);
let b = MutableFixedSizeBinaryArray::from_data(
2,
MutableBuffer::from([1, 2, 3, 4]),
Some(MutableBitmap::from([false, true])),
);
assert_eq!(a, a);
assert_eq!(b, b);
assert!(a != b);
}

#[test]
fn try_from_iter() {
let array = MutableFixedSizeBinaryArray::try_from_iter(
vec![Some(b"ab"), Some(b"bc"), None, Some(b"fh")],
2,
Expand Down

0 comments on commit 4238570

Please sign in to comment.