Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved MutableFixedSizeBinaryArray #303

Merged
merged 1 commit into from
Aug 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 50 additions & 18 deletions src/array/fixed_size_binary/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,46 +1,54 @@
use crate::array::Array;
use crate::bitmap::utils::{zip_validity, ZipValidity};

use super::FixedSizeBinaryArray;
use super::super::MutableArray;
use super::{FixedSizeBinaryArray, FixedSizeBinaryValues, MutableFixedSizeBinaryArray};

/// # Safety
/// This iterator is `TrustedLen`
pub struct FixedSizeBinaryValuesIter<'a> {
array: &'a FixedSizeBinaryArray,
pub struct FixedSizeBinaryValuesIter<'a, T: FixedSizeBinaryValues> {
array: &'a T,
len: usize,
index: usize,
}

impl<'a> FixedSizeBinaryValuesIter<'a> {
impl<'a, T: FixedSizeBinaryValues> FixedSizeBinaryValuesIter<'a, T> {
#[inline]
pub fn new(array: &'a FixedSizeBinaryArray) -> Self {
Self { array, index: 0 }
pub fn new(array: &'a T) -> Self {
Self {
array,
len: array.values().len() / array.size(),
index: 0,
}
}
}

impl<'a> Iterator for FixedSizeBinaryValuesIter<'a> {
impl<'a, T: FixedSizeBinaryValues> Iterator for FixedSizeBinaryValuesIter<'a, T> {
type Item = &'a [u8];

#[inline]
fn next(&mut self) -> Option<Self::Item> {
if self.index >= self.array.len() {
if self.index >= self.len {
return None;
} else {
self.index += 1;
}
Some(unsafe { self.array.value_unchecked(self.index - 1) })
let index = self.index;
let r = Some(unsafe {
std::slice::from_raw_parts(
self.array.values().as_ptr().add(index * self.array.size()),
self.array.size(),
)
});
self.index += 1;
r
}

fn size_hint(&self) -> (usize, Option<usize>) {
(
self.array.len() - self.index,
Some(self.array.len() - self.index),
)
(self.len - self.index, Some(self.len - self.index))
}
}

impl<'a> IntoIterator for &'a FixedSizeBinaryArray {
type Item = Option<&'a [u8]>;
type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a>>;
type IntoIter = ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
Expand All @@ -49,10 +57,34 @@ impl<'a> IntoIterator for &'a FixedSizeBinaryArray {

impl<'a> FixedSizeBinaryArray {
/// constructs a new iterator
pub fn iter(&'a self) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a>> {
pub fn iter(
&'a self,
) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>> {
zip_validity(
FixedSizeBinaryValuesIter::new(self),
self.validity.as_ref().map(|x| x.iter()),
)
}
}

impl<'a> IntoIterator for &'a MutableFixedSizeBinaryArray {
type Item = Option<&'a [u8]>;
type IntoIter =
ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>>;

fn into_iter(self) -> Self::IntoIter {
self.iter()
}
}

impl<'a> MutableFixedSizeBinaryArray {
/// constructs a new iterator
pub fn iter(
&'a self,
) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>> {
zip_validity(
FixedSizeBinaryValuesIter::new(self),
self.validity().as_ref().map(|x| x.iter()),
)
}
}
17 changes: 17 additions & 0 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,20 @@ impl FixedSizeBinaryArray {
.into()
}
}

pub trait FixedSizeBinaryValues {
fn values(&self) -> &[u8];
fn size(&self) -> usize;
}

impl FixedSizeBinaryValues for FixedSizeBinaryArray {
#[inline]
fn values(&self) -> &[u8] {
&self.values
}

#[inline]
fn size(&self) -> usize {
self.size as usize
}
}
91 changes: 80 additions & 11 deletions src/array/fixed_size_binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::{
error::{ArrowError, Result},
};

use super::FixedSizeBinaryArray;
use super::{FixedSizeBinaryArray, FixedSizeBinaryValues};

/// Mutable version of [`FixedSizeBinaryArray`].
#[derive(Debug)]
Expand All @@ -30,19 +30,44 @@ impl From<MutableFixedSizeBinaryArray> for FixedSizeBinaryArray {
}

impl MutableFixedSizeBinaryArray {
pub fn from_data(
size: usize,
values: MutableBuffer<u8>,
validity: Option<MutableBitmap>,
) -> Self {
assert_eq!(
values.len() % size,
0,
"The len of values must be a multiple of size"
);
if let Some(validity) = &validity {
assert_eq!(
validity.len(),
values.len() / size,
"The len of the validity must be equal to values / size"
);
}
Self {
data_type: DataType::FixedSizeBinary(size as i32),
size,
values,
validity,
}
}

pub fn new(size: usize) -> Self {
Self::with_capacity(size, 0)
}

pub fn with_capacity(size: usize, capacity: usize) -> Self {
Self {
data_type: DataType::FixedSizeBinary(size as i32),
Self::from_data(
size,
values: MutableBuffer::<u8>::with_capacity(capacity * size),
validity: None,
}
MutableBuffer::<u8>::with_capacity(capacity * size),
None,
)
}

#[inline]
pub fn try_push<P: AsRef<[u8]>>(&mut self, value: Option<P>) -> Result<()> {
match value {
Some(bytes) => {
Expand Down Expand Up @@ -89,11 +114,37 @@ impl MutableFixedSizeBinaryArray {
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.len() - 1)
.chain(std::iter::once(false)),
))
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity)
}

/// Returns the element at index `i` as `&[u8]`
#[inline]
pub fn value(&self, i: usize) -> &[u8] {
&self.values[i * self.size..(i + 1) * self.size]
}

/// Returns the element at index `i` as `&[u8]`
/// # Safety
/// Assumes that the `i < self.len`.
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
std::slice::from_raw_parts(self.values.as_ptr().add(i * self.size), self.size)
}
}

/// Accessors
impl MutableFixedSizeBinaryArray {
/// Returns its values.
pub fn values(&self) -> &MutableBuffer<u8> {
&self.values
}

/// Returns a mutable slice of values.
pub fn values_mut_slice(&mut self) -> &mut [u8] {
self.values.as_mut_slice()
}
}

Expand Down Expand Up @@ -130,3 +181,21 @@ impl MutableArray for MutableFixedSizeBinaryArray {
self.values.extend_constant(self.size, 0);
}
}

impl FixedSizeBinaryValues for MutableFixedSizeBinaryArray {
#[inline]
fn values(&self) -> &[u8] {
&self.values
}

#[inline]
fn size(&self) -> usize {
self.size
}
}

impl PartialEq for MutableFixedSizeBinaryArray {
fn eq(&self, other: &Self) -> bool {
self.iter().eq(other.iter())
}
}
9 changes: 4 additions & 5 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,10 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
}

fn init_validity(&mut self) {
self.validity = Some(MutableBitmap::from_trusted_len_iter(
std::iter::repeat(true)
.take(self.len() - 1)
.chain(std::iter::once(false)),
))
let mut validity = MutableBitmap::new();
validity.extend_constant(self.len(), true);
validity.set(self.len() - 1, false);
self.validity = Some(validity)
}

/// Changes the arrays' [`DataType`], returning a new [`MutablePrimitiveArray`].
Expand Down
38 changes: 37 additions & 1 deletion tests/it/array/fixed_size_binary/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,44 @@
use arrow2::array::*;
use arrow2::bitmap::Bitmap;
use arrow2::bitmap::{Bitmap, MutableBitmap};
use arrow2::buffer::MutableBuffer;
use arrow2::datatypes::DataType;

#[test]
fn basic() {
let a = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2, 3, 4]), None);
assert_eq!(a.len(), 2);
assert_eq!(a.data_type(), &DataType::FixedSizeBinary(2));
assert_eq!(a.values(), &MutableBuffer::from([1, 2, 3, 4]));
assert_eq!(a.validity(), &None);
assert_eq!(a.value(1), &[3, 4]);
assert_eq!(unsafe { a.value_unchecked(1) }, &[3, 4]);
}

#[allow(clippy::eq_op)]
#[test]
fn equal() {
let a = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2, 3, 4]), None);
assert_eq!(a, a);
let b = MutableFixedSizeBinaryArray::from_data(2, MutableBuffer::from([1, 2]), None);
assert_eq!(b, b);
assert!(a != b);
let a = MutableFixedSizeBinaryArray::from_data(
2,
MutableBuffer::from([1, 2, 3, 4]),
Some(MutableBitmap::from([true, false])),
);
let b = MutableFixedSizeBinaryArray::from_data(
2,
MutableBuffer::from([1, 2, 3, 4]),
Some(MutableBitmap::from([false, true])),
);
assert_eq!(a, a);
assert_eq!(b, b);
assert!(a != b);
}

#[test]
fn try_from_iter() {
let array = MutableFixedSizeBinaryArray::try_from_iter(
vec![Some(b"ab"), Some(b"bc"), None, Some(b"fh")],
2,
Expand Down