From 4238570daf0c5583702fec386bce8c4f66a3f228 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Fri, 20 Aug 2021 06:26:37 +0000 Subject: [PATCH] Improved MutableFixedSizeBinaryArray --- src/array/fixed_size_binary/iterator.rs | 68 +++++++++++---- src/array/fixed_size_binary/mod.rs | 17 ++++ src/array/fixed_size_binary/mutable.rs | 91 ++++++++++++++++++--- src/array/primitive/mutable.rs | 9 +- tests/it/array/fixed_size_binary/mutable.rs | 38 ++++++++- 5 files changed, 188 insertions(+), 35 deletions(-) diff --git a/src/array/fixed_size_binary/iterator.rs b/src/array/fixed_size_binary/iterator.rs index b24117fea75..9eb77b43fd8 100644 --- a/src/array/fixed_size_binary/iterator.rs +++ b/src/array/fixed_size_binary/iterator.rs @@ -1,46 +1,54 @@ -use crate::array::Array; use crate::bitmap::utils::{zip_validity, ZipValidity}; -use super::FixedSizeBinaryArray; +use super::super::MutableArray; +use super::{FixedSizeBinaryArray, FixedSizeBinaryValues, MutableFixedSizeBinaryArray}; /// # Safety /// This iterator is `TrustedLen` -pub struct FixedSizeBinaryValuesIter<'a> { - array: &'a FixedSizeBinaryArray, +pub struct FixedSizeBinaryValuesIter<'a, T: FixedSizeBinaryValues> { + array: &'a T, + len: usize, index: usize, } -impl<'a> FixedSizeBinaryValuesIter<'a> { +impl<'a, T: FixedSizeBinaryValues> FixedSizeBinaryValuesIter<'a, T> { #[inline] - pub fn new(array: &'a FixedSizeBinaryArray) -> Self { - Self { array, index: 0 } + pub fn new(array: &'a T) -> Self { + Self { + array, + len: array.values().len() / array.size(), + index: 0, + } } } -impl<'a> Iterator for FixedSizeBinaryValuesIter<'a> { +impl<'a, T: FixedSizeBinaryValues> Iterator for FixedSizeBinaryValuesIter<'a, T> { type Item = &'a [u8]; #[inline] fn next(&mut self) -> Option { - if self.index >= self.array.len() { + if self.index >= self.len { return None; - } else { - self.index += 1; } - Some(unsafe { self.array.value_unchecked(self.index - 1) }) + let index = self.index; + let r = Some(unsafe { + std::slice::from_raw_parts( + self.array.values().as_ptr().add(index * self.array.size()), + self.array.size(), + ) + }); + self.index += 1; + r } fn size_hint(&self) -> (usize, Option) { - ( - self.array.len() - self.index, - Some(self.array.len() - self.index), - ) + (self.len - self.index, Some(self.len - self.index)) } } impl<'a> IntoIterator for &'a FixedSizeBinaryArray { type Item = Option<&'a [u8]>; - type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a>>; + type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>>; fn into_iter(self) -> Self::IntoIter { self.iter() @@ -49,10 +57,34 @@ impl<'a> IntoIterator for &'a FixedSizeBinaryArray { impl<'a> FixedSizeBinaryArray { /// constructs a new iterator - pub fn iter(&'a self) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a>> { + pub fn iter( + &'a self, + ) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>> { zip_validity( FixedSizeBinaryValuesIter::new(self), self.validity.as_ref().map(|x| x.iter()), ) } } + +impl<'a> IntoIterator for &'a MutableFixedSizeBinaryArray { + type Item = Option<&'a [u8]>; + type IntoIter = + ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> MutableFixedSizeBinaryArray { + /// constructs a new iterator + pub fn iter( + &'a self, + ) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>> { + zip_validity( + FixedSizeBinaryValuesIter::new(self), + self.validity().as_ref().map(|x| x.iter()), + ) + } +} diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index ee4405cfd2f..714dccc8cc5 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -161,3 +161,20 @@ impl FixedSizeBinaryArray { .into() } } + +pub trait FixedSizeBinaryValues { + fn values(&self) -> &[u8]; + fn size(&self) -> usize; +} + +impl FixedSizeBinaryValues for FixedSizeBinaryArray { + #[inline] + fn values(&self) -> &[u8] { + &self.values + } + + #[inline] + fn size(&self) -> usize { + self.size as usize + } +} diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index 395ecfca3cd..4f06eae3473 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -8,7 +8,7 @@ use crate::{ error::{ArrowError, Result}, }; -use super::FixedSizeBinaryArray; +use super::{FixedSizeBinaryArray, FixedSizeBinaryValues}; /// Mutable version of [`FixedSizeBinaryArray`]. #[derive(Debug)] @@ -30,19 +30,44 @@ impl From for FixedSizeBinaryArray { } impl MutableFixedSizeBinaryArray { + pub fn from_data( + size: usize, + values: MutableBuffer, + validity: Option, + ) -> Self { + assert_eq!( + values.len() % size, + 0, + "The len of values must be a multiple of size" + ); + if let Some(validity) = &validity { + assert_eq!( + validity.len(), + values.len() / size, + "The len of the validity must be equal to values / size" + ); + } + Self { + data_type: DataType::FixedSizeBinary(size as i32), + size, + values, + validity, + } + } + pub fn new(size: usize) -> Self { Self::with_capacity(size, 0) } pub fn with_capacity(size: usize, capacity: usize) -> Self { - Self { - data_type: DataType::FixedSizeBinary(size as i32), + Self::from_data( size, - values: MutableBuffer::::with_capacity(capacity * size), - validity: None, - } + MutableBuffer::::with_capacity(capacity * size), + None, + ) } + #[inline] pub fn try_push>(&mut self, value: Option

) -> Result<()> { match value { Some(bytes) => { @@ -89,11 +114,37 @@ impl MutableFixedSizeBinaryArray { } fn init_validity(&mut self) { - self.validity = Some(MutableBitmap::from_trusted_len_iter( - std::iter::repeat(true) - .take(self.len() - 1) - .chain(std::iter::once(false)), - )) + let mut validity = MutableBitmap::new(); + validity.extend_constant(self.len(), true); + validity.set(self.len() - 1, false); + self.validity = Some(validity) + } + + /// Returns the element at index `i` as `&[u8]` + #[inline] + pub fn value(&self, i: usize) -> &[u8] { + &self.values[i * self.size..(i + 1) * self.size] + } + + /// Returns the element at index `i` as `&[u8]` + /// # Safety + /// Assumes that the `i < self.len`. + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] { + std::slice::from_raw_parts(self.values.as_ptr().add(i * self.size), self.size) + } +} + +/// Accessors +impl MutableFixedSizeBinaryArray { + /// Returns its values. + pub fn values(&self) -> &MutableBuffer { + &self.values + } + + /// Returns a mutable slice of values. + pub fn values_mut_slice(&mut self) -> &mut [u8] { + self.values.as_mut_slice() } } @@ -130,3 +181,21 @@ impl MutableArray for MutableFixedSizeBinaryArray { self.values.extend_constant(self.size, 0); } } + +impl FixedSizeBinaryValues for MutableFixedSizeBinaryArray { + #[inline] + fn values(&self) -> &[u8] { + &self.values + } + + #[inline] + fn size(&self) -> usize { + self.size + } +} + +impl PartialEq for MutableFixedSizeBinaryArray { + fn eq(&self, other: &Self) -> bool { + self.iter().eq(other.iter()) + } +} diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index a4f771ebe39..9e126ee4857 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -173,11 +173,10 @@ impl MutablePrimitiveArray { } fn init_validity(&mut self) { - self.validity = Some(MutableBitmap::from_trusted_len_iter( - std::iter::repeat(true) - .take(self.len() - 1) - .chain(std::iter::once(false)), - )) + let mut validity = MutableBitmap::new(); + validity.extend_constant(self.len(), true); + validity.set(self.len() - 1, false); + self.validity = Some(validity) } /// Changes the arrays' [`DataType`], returning a new [`MutablePrimitiveArray`]. diff --git a/tests/it/array/fixed_size_binary/mutable.rs b/tests/it/array/fixed_size_binary/mutable.rs index 064fcadddac..d8c27ccee53 100644 --- a/tests/it/array/fixed_size_binary/mutable.rs +++ b/tests/it/array/fixed_size_binary/mutable.rs @@ -1,8 +1,44 @@ use arrow2::array::*; -use arrow2::bitmap::Bitmap; +use arrow2::bitmap::{Bitmap, MutableBitmap}; +use arrow2::buffer::MutableBuffer; +use arrow2::datatypes::DataType; #[test] fn basic() { + let a = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2, 3, 4]), None); + assert_eq!(a.len(), 2); + assert_eq!(a.data_type(), &DataType::FixedSizeBinary(2)); + assert_eq!(a.values(), &MutableBuffer::from([1, 2, 3, 4])); + assert_eq!(a.validity(), &None); + assert_eq!(a.value(1), &[3, 4]); + assert_eq!(unsafe { a.value_unchecked(1) }, &[3, 4]); +} + +#[allow(clippy::eq_op)] +#[test] +fn equal() { + let a = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2, 3, 4]), None); + assert_eq!(a, a); + let b = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2]), None); + assert_eq!(b, b); + assert!(a != b); + let a = MutableFixedSizeBinaryArray::from_data( + 2, + MutableBuffer::from([1, 2, 3, 4]), + Some(MutableBitmap::from([true, false])), + ); + let b = MutableFixedSizeBinaryArray::from_data( + 2, + MutableBuffer::from([1, 2, 3, 4]), + Some(MutableBitmap::from([false, true])), + ); + assert_eq!(a, a); + assert_eq!(b, b); + assert!(a != b); +} + +#[test] +fn try_from_iter() { let array = MutableFixedSizeBinaryArray::try_from_iter( vec![Some(b"ab"), Some(b"bc"), None, Some(b"fh")], 2,