Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added Offsets and OffsetsBuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Dec 4, 2022
1 parent 9ea25f0 commit ab38982
Show file tree
Hide file tree
Showing 40 changed files with 842 additions and 926 deletions.
13 changes: 7 additions & 6 deletions src/array/binary/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
array::{FromFfi, ToFfi},
bitmap::align,
ffi,
offset::Offset,
offset::{Offset, OffsetsBuffer},
};

use crate::error::Result;
Expand All @@ -19,7 +19,7 @@ unsafe impl<O: Offset> ToFfi for BinaryArray<O> {
}

fn offset(&self) -> Option<usize> {
let offset = self.offsets.offset();
let offset = self.offsets.buffer().offset();
if let Some(bitmap) = self.validity.as_ref() {
if bitmap.offset() == offset {
Some(offset)
Expand All @@ -32,7 +32,7 @@ unsafe impl<O: Offset> ToFfi for BinaryArray<O> {
}

fn to_ffi_aligned(&self) -> Self {
let offset = self.offsets.offset();
let offset = self.offsets.buffer().offset();

let validity = self.validity.as_ref().map(|bitmap| {
if bitmap.offset() == offset {
Expand All @@ -59,8 +59,9 @@ impl<O: Offset, A: ffi::ArrowArrayRef> FromFfi<A> for BinaryArray<O> {
let offsets = unsafe { array.buffer::<O>(1) }?;
let values = unsafe { array.buffer::<u8>(2) }?;

Ok(Self::from_data_unchecked(
data_type, offsets, values, validity,
))
// assumption that data from FFI is well constructed
let offsets = unsafe { OffsetsBuffer::new_unchecked(offsets) };

Ok(Self::new(data_type, offsets, values, validity))
}
}
123 changes: 20 additions & 103 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,13 @@ use crate::{
buffer::Buffer,
datatypes::DataType,
error::Error,
offset::Offset,
offset::{Offset, OffsetsBuffer},
trusted_len::TrustedLen,
};

use either::Either;

use super::{
specification::{try_check_offsets, try_check_offsets_bounds},
Array, GenericBinaryArray,
};
use super::{specification::try_check_offsets_bounds, Array, GenericBinaryArray};

mod ffi;
pub(super) mod fmt;
Expand Down Expand Up @@ -60,7 +57,7 @@ pub use mutable::*;
#[derive(Clone)]
pub struct BinaryArray<O: Offset> {
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
}
Expand All @@ -70,19 +67,18 @@ impl<O: Offset> BinaryArray<O> {
///
/// # Errors
/// This function returns an error iff:
/// * the offsets are not monotonically increasing
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
/// # Implementation
/// This function is `O(N)` - checking monotinicity is `O(N)`
pub fn try_new(
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self, Error> {
try_check_offsets(&offsets, values.len())?;
try_check_offsets_bounds(&offsets, values.len())?;

if validity
.as_ref()
Expand Down Expand Up @@ -131,7 +127,7 @@ impl<O: Offset> BinaryArray<O> {
/// Returns the length of this array
#[inline]
pub fn len(&self) -> usize {
self.offsets.len() - 1
self.offsets.len()
}

/// Returns the element at index `i`
Expand Down Expand Up @@ -170,7 +166,7 @@ impl<O: Offset> BinaryArray<O> {

/// Returns the offsets of this [`BinaryArray`].
#[inline]
pub fn offsets(&self) -> &Buffer<O> {
pub fn offsets(&self) -> &OffsetsBuffer<O> {
&self.offsets
}

Expand Down Expand Up @@ -251,21 +247,16 @@ impl<O: Offset> BinaryArray<O> {
match bitmap.into_mut() {
// Safety: invariants are preserved
Left(bitmap) => Left(unsafe {
BinaryArray::new_unchecked(
self.data_type,
self.offsets,
self.values,
Some(bitmap),
)
BinaryArray::new(self.data_type, self.offsets, self.values, Some(bitmap))
}),
Right(mutable_bitmap) => match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
self.offsets.get_mut(),
) {
(None, None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
BinaryArray::new(
self.data_type,
self.offsets,
self.values,
Expand All @@ -276,7 +267,7 @@ impl<O: Offset> BinaryArray<O> {
(None, Some(offsets)) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
BinaryArray::new(
self.data_type,
offsets.into(),
self.values,
Expand All @@ -287,7 +278,7 @@ impl<O: Offset> BinaryArray<O> {
(Some(mutable_values), None) => {
// Safety: invariants are preserved
Left(unsafe {
BinaryArray::new_unchecked(
BinaryArray::new(
self.data_type,
self.offsets,
mutable_values.into(),
Expand All @@ -308,16 +299,16 @@ impl<O: Offset> BinaryArray<O> {
} else {
match (
self.values.get_mut().map(std::mem::take),
self.offsets.get_mut().map(std::mem::take),
self.offsets.get_mut(),
) {
(None, None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, self.values, None)
BinaryArray::new(self.data_type, self.offsets, self.values, None)
}),
(None, Some(offsets)) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, offsets.into(), self.values, None)
BinaryArray::new(self.data_type, offsets.into(), self.values, None)
}),
(Some(values), None) => Left(unsafe {
BinaryArray::new_unchecked(self.data_type, self.offsets, values.into(), None)
BinaryArray::new(self.data_type, self.offsets, values.into(), None)
}),
(Some(values), Some(offsets)) => Right(unsafe {
MutableBinaryArray::from_data(self.data_type, offsets, values, None)
Expand All @@ -328,20 +319,15 @@ impl<O: Offset> BinaryArray<O> {

/// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero.
pub fn new_empty(data_type: DataType) -> Self {
Self::new(
data_type,
Buffer::from(vec![O::zero()]),
Buffer::new(),
None,
)
Self::new(data_type, OffsetsBuffer::new(), Buffer::new(), None)
}

/// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`.
#[inline]
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
vec![O::default(); 1 + length].into(),
vec![O::default(); 1 + length].try_into().unwrap(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
Expand All @@ -356,72 +342,16 @@ impl<O: Offset> BinaryArray<O> {
}
}

/// Creates a new [`BinaryArray`] without checking for offsets monotinicity.
///
/// # Errors
/// This function returns an error iff:
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
/// # Safety
/// This function is unsafe iff:
/// * the offsets are not monotonically increasing
/// # Implementation
/// This function is `O(1)`
pub unsafe fn try_new_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Result<Self, Error> {
try_check_offsets_bounds(&offsets, values.len())?;

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len() - 1)
{
return Err(Error::oos(
"validity mask length must match the number of values",
));
}

if data_type.to_physical_type() != Self::default_data_type().to_physical_type() {
return Err(Error::oos(
"BinaryArray can only be initialized with DataType::Binary or DataType::LargeBinary",
));
}

Ok(Self {
data_type,
offsets,
values,
validity,
})
}

/// Alias for unwrapping [`Self::try_new`]
pub fn new(
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new(data_type, offsets, values, validity).unwrap()
}

/// Alias for unwrapping [`Self::try_new_unchecked`]
/// # Safety
/// This function is unsafe iff:
/// * the offsets are not monotonically increasing
pub unsafe fn new_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::try_new_unchecked(data_type, offsets, values, validity).unwrap()
}

/// Returns a [`BinaryArray`] from an iterator of trusted length.
///
/// The [`BinaryArray`] is guaranteed to not have a validity
Expand Down Expand Up @@ -487,23 +417,10 @@ impl<O: Offset> BinaryArray<O> {
unsafe { Self::try_from_trusted_len_iter_unchecked(iter) }
}

/// Alias for [`Self::new_unchecked`]
/// # Safety
/// This function is unsafe iff:
/// * the offsets are not monotonically increasing
pub unsafe fn from_data_unchecked(
data_type: DataType,
offsets: Buffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Self::new_unchecked(data_type, offsets, values, validity)
}

/// Alias for `new`
pub fn from_data(
data_type: DataType,
offsets: Buffer<O>,
offsets: OffsetsBuffer<O>,
values: Buffer<u8>,
validity: Option<Bitmap>,
) -> Self {
Expand Down
45 changes: 11 additions & 34 deletions src/array/binary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use crate::{
},
datatypes::DataType,
error::{Error, Result},
offset::Offset,
offset::{Offset, Offsets},
trusted_len::TrustedLen,
};

Expand Down Expand Up @@ -54,15 +54,14 @@ impl<O: Offset> MutableBinaryArray<O> {
///
/// # Errors
/// This function returns an error iff:
/// * the offsets are not monotonically increasing
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len() - 1`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either `Binary` or `LargeBinary`.
/// # Implementation
/// This function is `O(N)` - checking monotinicity is `O(N)`
pub fn try_new(
data_type: DataType,
offsets: Vec<O>,
offsets: Offsets<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Result<Self> {
Expand All @@ -80,26 +79,6 @@ impl<O: Offset> MutableBinaryArray<O> {
Ok(Self { values, validity })
}

/// Create a [`MutableBinaryArray`] out of its inner attributes.
/// # Safety
/// The caller must ensure that every value between offsets is a valid utf8.
/// # Panics
/// This function panics iff:
/// * The `offsets` and `values` are inconsistent
/// * The validity is not `None` and its length is different from `offsets`'s length minus one.
pub unsafe fn new_unchecked(
data_type: DataType,
offsets: Vec<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Self {
let values = MutableBinaryValuesArray::new_unchecked(data_type, offsets, values);
if let Some(ref validity) = validity {
assert_eq!(values.len(), validity.len());
}
Self { values, validity }
}

/// Creates a new [`MutableBinaryArray`] from a slice of optional `&[u8]`.
// Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
pub fn from<T: AsRef<[u8]>, P: AsRef<[Option<T>]>>(slice: P) -> Self {
Expand Down Expand Up @@ -185,7 +164,7 @@ impl<O: Offset> MutableBinaryArray<O> {
/// Equivalent to `Self::try_new(...).unwrap()`
pub fn from_data(
data_type: DataType,
offsets: Vec<O>,
offsets: Offsets<O>,
values: Vec<u8>,
validity: Option<MutableBitmap>,
) -> Self {
Expand All @@ -200,7 +179,7 @@ impl<O: Offset> MutableBinaryArray<O> {
}

/// returns its offsets.
pub fn offsets(&self) -> &Vec<O> {
pub fn offsets(&self) -> &Offsets<O> {
self.values.offsets()
}

Expand Down Expand Up @@ -229,14 +208,12 @@ impl<O: Offset> MutableArray for MutableBinaryArray<O> {
// `MutableBinaryArray` has the same invariants as `BinaryArray` and thus
// `BinaryArray` can be safely created from `MutableBinaryArray` without checks.
let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner();
unsafe {
BinaryArray::new_unchecked(
data_type,
offsets.into(),
values.into(),
std::mem::take(&mut self.validity).map(|x| x.into()),
)
}
BinaryArray::new(
data_type,
offsets.into(),
values.into(),
std::mem::take(&mut self.validity).map(|x| x.into()),
)
.boxed()
}

Expand All @@ -246,7 +223,7 @@ impl<O: Offset> MutableArray for MutableBinaryArray<O> {
// `BinaryArray` can be safely created from `MutableBinaryArray` without checks.
let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner();
unsafe {
BinaryArray::new_unchecked(
BinaryArray::new(
data_type,
offsets.into(),
values.into(),
Expand Down
Loading

0 comments on commit ab38982

Please sign in to comment.