Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Remove from_trusted_len_* from Buffer #1020

Merged
merged 3 commits into from
May 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions guide/src/high_level.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,7 @@ What this means is that certain operations can be performed irrespectively of wh
is "null" or not (e.g. `PrimitiveArray<i32> + i32` can be applied to _all_ values
via SIMD and only copy the validity bitmap independently).

When an operation benefits from such arrangement, it is advantageous to use

* `Buffer::from_iter`
* `Buffer::from_trusted_len_iter`
* `Buffer::try_from_trusted_len_iter`

When an operation benefits from such arrangement, it is advantageous to use `Vec` and `Into<Buffer>`
If not, then use the `MutableArray` API, such as
`MutablePrimitiveArray<T>`, `MutableUtf8Array<O>` or `MutableListArray`.

Expand Down Expand Up @@ -254,12 +249,11 @@ where
O: NativeType,
F: Fn(I) -> O,
{
// create the iterator over _all_ values
let values = array.values().iter().map(|v| op(*v));
let values = Buffer::from_trusted_len_iter(values);
// apply F over _all_ values
let values = array.values().iter().map(|v| op(*v)).collect::<Vec<_>>();

// create the new array, cloning its validity
PrimitiveArray::<O>::from_data(data_type.clone(), values, array.validity().cloned())
PrimitiveArray::<O>::from_data(data_type.clone(), values.into(), array.validity().cloned())
}
```

Expand Down
2 changes: 1 addition & 1 deletion src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ impl<O: Offset> BinaryArray<O> {
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![O::default(); 1 + length].into(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ impl FixedSizeBinaryArray {
let size = Self::maybe_get_size(&data_type).unwrap();
Self::new(
data_type,
Buffer::new_zeroed(length * size),
vec![0u8; length * size].into(),
Some(Bitmap::new_zeroed(length)),
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ impl<O: Offset> ListArray<O> {
let child = Self::get_child_type(&data_type).clone();
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![O::default(); 1 + length].into(),
new_empty_array(child).into(),
Some(Bitmap::new_zeroed(length)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/array/map/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ impl MapArray {
let field = new_empty_array(Self::get_field(&data_type).data_type().clone()).into();
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![0i32; 1 + length].into(),
field,
Some(Bitmap::new_zeroed(length)),
)
Expand Down
2 changes: 1 addition & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ pub trait Array: Send + Sync {

/// The number of null slots on this [`Array`].
/// # Implementation
/// This is `O(1)`.
/// This is `O(1)` since the number of null elements is pre-computed.
#[inline]
fn null_count(&self) -> usize {
if self.data_type() == &DataType::Null {
Expand Down
2 changes: 1 addition & 1 deletion src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ impl<T: NativeType> PrimitiveArray<T> {
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length),
vec![T::default(); length].into(),
Some(Bitmap::new_zeroed(length)),
)
}
Expand Down
2 changes: 1 addition & 1 deletion src/array/union/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl UnionArray {
};

// all from the same field
let types = Buffer::new_zeroed(length);
let types = vec![0i8; length].into();

Self::new(data_type, types, fields, offsets)
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ impl<O: Offset> Utf8Array<O> {
pub fn new_null(data_type: DataType, length: usize) -> Self {
Self::new(
data_type,
Buffer::new_zeroed(length + 1),
vec![O::default(); 1 + length].into(),
Buffer::new(),
Some(Bitmap::new_zeroed(length)),
)
Expand Down
95 changes: 27 additions & 68 deletions src/buffer/immutable.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,38 @@
use either::Either;
use std::{iter::FromIterator, sync::Arc, usize};

use crate::{trusted_len::TrustedLen, types::NativeType};
use crate::types::NativeType;

use super::bytes::Bytes;

/// [`Buffer`] is a contiguous memory region that can
/// be shared across thread boundaries.
/// [`Buffer`] is a contiguous memory region that can be shared across thread boundaries.
///
/// The easiest way to think about `Buffer<T>` is being equivalent to
/// an immutable `Vec<T>`, with the following differences:
/// a `Arc<Vec<T>>`, with the following differences:
/// * `T` must be [`NativeType`]
/// * clone is `O(1)`
/// * memory is sharable across thread boundaries (it is under an `Arc`)
/// * it supports external allocated memory (FFI)
/// * slicing the buffer is `O(1)`
/// * it supports external allocated memory (via FFI)
///
/// The easiest way to create one is to use its implementation of `From` a `Vec`.
///
/// # Examples
/// ```
/// use arrow2::buffer::Buffer;
///
/// let buffer: Buffer<u32> = vec![1, 2, 3].into();
/// assert_eq!(buffer.as_ref(), [1, 2, 3].as_ref());
///
/// // it supports copy-on-write semantics (i.e. back to a `Vec`)
/// let vec: Vec<u32> = buffer.into_mut().right().unwrap();
/// assert_eq!(vec, vec![1, 2, 3]);
///
/// // cloning and slicing is `O(1)` (data is shared)
/// let buffer: Buffer<u32> = vec![1, 2, 3].into();
/// let slice = buffer.clone().slice(1, 1);
/// assert_eq!(slice.as_ref(), [2].as_ref());
/// // no longer possible to get a vec since `slice` and `buffer` share data
/// let same: Buffer<u32> = buffer.into_mut().left().unwrap();
/// ```
#[derive(Clone, PartialEq)]
pub struct Buffer<T: NativeType> {
/// the internal byte buffer.
Expand Down Expand Up @@ -46,20 +66,6 @@ impl<T: NativeType> Buffer<T> {
Self::default()
}

/// Creates a new [`Buffer`] filled with zeros.
#[inline]
pub fn new_zeroed(length: usize) -> Self {
vec![T::default(); length].into()
}

/// Takes ownership of [`Vec`].
/// # Implementation
/// This function is `O(1)`
#[inline]
pub fn from_slice<R: AsRef<[T]>>(data: R) -> Self {
data.as_ref().to_vec().into()
}

/// Auxiliary method to create a new Buffer
pub(crate) fn from_bytes(bytes: Bytes<T>) -> Self {
let length = bytes.len();
Expand Down Expand Up @@ -153,53 +159,6 @@ impl<T: NativeType> Buffer<T> {
}
}

impl<T: NativeType> Buffer<T> {
/// Creates a [`Buffer`] from an [`Iterator`] with a trusted length.
/// Prefer this to `collect` whenever possible, as it often enables auto-vectorization.
/// # Example
/// ```
/// # use arrow2::buffer::Buffer;
/// let v = vec![1u32];
/// let iter = v.iter().map(|x| x * 2);
/// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) };
/// assert_eq!(buffer.len(), 1)
/// ```
#[inline]
pub fn from_trusted_len_iter<I: TrustedLen<Item = T>>(iterator: I) -> Self {
iterator.collect::<Vec<_>>().into()
}

/// Creates a [`Buffer`] from an fallible [`Iterator`] with a trusted length.
#[inline]
pub fn try_from_trusted_len_iter<E, I: TrustedLen<Item = std::result::Result<T, E>>>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(iterator.collect::<std::result::Result<Vec<_>, E>>()?.into())
}

/// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
/// # Safety
/// This method assumes that the iterator's size is correct and is undefined behavior
/// to use it on an iterator that reports an incorrect length.
#[inline]
pub unsafe fn from_trusted_len_iter_unchecked<I: Iterator<Item = T>>(iterator: I) -> Self {
iterator.collect::<Vec<_>>().into()
}

/// # Safety
/// This method assumes that the iterator's size is correct and is undefined behavior
/// to use it on an iterator that reports an incorrect length.
#[inline]
pub unsafe fn try_from_trusted_len_iter_unchecked<
E,
I: Iterator<Item = std::result::Result<T, E>>,
>(
iterator: I,
) -> std::result::Result<Self, E> {
Ok(iterator.collect::<std::result::Result<Vec<_>, E>>()?.into())
}
}

impl<T: NativeType> From<Vec<T>> for Buffer<T> {
#[inline]
fn from(p: Vec<T>) -> Self {
Expand Down
20 changes: 10 additions & 10 deletions tests/it/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,24 +86,24 @@ fn with_validity() {
#[test]
#[should_panic]
fn wrong_offsets() {
let offsets = Buffer::from_slice([0, 5, 4]); // invalid offsets
let values = Buffer::from_slice(b"abbbbb");
let offsets = Buffer::from(vec![0, 5, 4]); // invalid offsets
let values = Buffer::from(b"abbbbb".to_vec());
BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);
}

#[test]
#[should_panic]
fn wrong_data_type() {
let offsets = Buffer::from_slice([0, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 4]);
let values = Buffer::from(b"abbb".to_vec());
BinaryArray::<i32>::from_data(DataType::Int8, offsets, values, None);
}

#[test]
#[should_panic]
fn value_with_wrong_offsets_panics() {
let offsets = Buffer::from_slice([0, 10, 11, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 10, 11, 4]);
let values = Buffer::from(b"abbb".to_vec());
// the 10-11 is not checked
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);

Expand All @@ -115,8 +115,8 @@ fn value_with_wrong_offsets_panics() {
#[test]
#[should_panic]
fn index_out_of_bounds_panics() {
let offsets = Buffer::from_slice([0, 1, 2, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 1, 2, 4]);
let values = Buffer::from(b"abbb".to_vec());
let array = BinaryArray::<i32>::from_data(DataType::Utf8, offsets, values, None);

array.value(3);
Expand All @@ -125,8 +125,8 @@ fn index_out_of_bounds_panics() {
#[test]
#[should_panic]
fn value_unchecked_with_wrong_offsets_panics() {
let offsets = Buffer::from_slice([0, 10, 11, 4]);
let values = Buffer::from_slice(b"abbb");
let offsets = Buffer::from(vec![0, 10, 11, 4]);
let values = Buffer::from(b"abbb".to_vec());
// the 10-11 is not checked
let array = BinaryArray::<i32>::from_data(DataType::Binary, offsets, values, None);

Expand Down
4 changes: 2 additions & 2 deletions tests/it/array/equal/list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ fn test_list_offsets() {

#[test]
fn test_bla() {
let offsets = Buffer::from_slice([0, 3, 3, 6]);
let offsets = Buffer::from(vec![0, 3, 3, 6]);
let data_type = ListArray::<i32>::default_datatype(DataType::Int32);
let values = Arc::new(Int32Array::from([
Some(1),
Expand All @@ -83,7 +83,7 @@ fn test_bla() {
let lhs = ListArray::<i32>::from_data(data_type, offsets, values, Some(validity));
let lhs = lhs.slice(1, 2);

let offsets = Buffer::from_slice([0, 0, 3]);
let offsets = Buffer::from(vec![0, 0, 3]);
let data_type = ListArray::<i32>::default_datatype(DataType::Int32);
let values = Arc::new(Int32Array::from([Some(4), None, Some(6)]));
let validity = Bitmap::from([false, true]);
Expand Down
14 changes: 7 additions & 7 deletions tests/it/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod mutable;
fn basics() {
let array = FixedSizeBinaryArray::from_data(
DataType::FixedSizeBinary(2),
Buffer::from_slice([1, 2, 3, 4, 5, 6]),
Buffer::from(vec![1, 2, 3, 4, 5, 6]),
Some(Bitmap::from([true, false, true])),
);
assert_eq!(array.size(), 2);
Expand All @@ -23,15 +23,15 @@ fn basics() {

#[test]
fn with_validity() {
let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]);
let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]);
let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None);
let a = a.with_validity(Some(Bitmap::from([true, false, true])));
assert!(a.validity().is_some());
}

#[test]
fn debug() {
let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]);
let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]);
let a = FixedSizeBinaryArray::from_data(
DataType::FixedSizeBinary(2),
values,
Expand Down Expand Up @@ -66,26 +66,26 @@ fn from_iter() {

#[test]
fn wrong_size() {
let values = Buffer::from_slice(b"abb");
let values = Buffer::from(b"abb".to_vec());
assert!(FixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), values, None).is_err());
}

#[test]
fn wrong_len() {
let values = Buffer::from_slice(b"abba");
let values = Buffer::from(b"abba".to_vec());
let validity = Some([true, false, false].into()); // it should be 2
assert!(FixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), values, validity).is_err());
}

#[test]
fn wrong_data_type() {
let values = Buffer::from_slice(b"abba");
let values = Buffer::from(b"abba".to_vec());
assert!(FixedSizeBinaryArray::try_new(DataType::Binary, values, None).is_err());
}

#[test]
fn to() {
let values = Buffer::from_slice(b"abba");
let values = Buffer::from(b"abba".to_vec());
let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None);

let extension = DataType::Extension(
Expand Down
Loading