diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index ba7f635de9f..1805e5e33f3 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -116,7 +116,7 @@ impl BinaryArray { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![O::default(); 1 + length].into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 2a843842dc0..4209998ec1f 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -89,7 +89,7 @@ impl FixedSizeBinaryArray { let size = Self::maybe_get_size(&data_type).unwrap(); Self::new( data_type, - Buffer::new_zeroed(length * size), + vec![0u8; length * size].into(), Some(Bitmap::new_zeroed(length)), ) } diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 1a3de45a342..c5ab495559f 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -116,7 +116,7 @@ impl ListArray { let child = Self::get_child_type(&data_type).clone(); Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![O::default(); 1 + length].into(), new_empty_array(child).into(), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/map/mod.rs b/src/array/map/mod.rs index b69ac7e2ac7..2c667b29583 100644 --- a/src/array/map/mod.rs +++ b/src/array/map/mod.rs @@ -106,7 +106,7 @@ impl MapArray { let field = new_empty_array(Self::get_field(&data_type).data_type().clone()).into(); Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![0i32; 1 + length].into(), field, Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/mod.rs b/src/array/mod.rs index 5a0b84b224e..0be802df2f1 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -52,7 +52,7 @@ pub trait Array: Send + Sync { /// The number of null slots on this [`Array`]. /// # Implementation - /// This is `O(1)`. + /// This is `O(1)` since the number of null elements is pre-computed. #[inline] fn null_count(&self) -> usize { if self.data_type() == &DataType::Null { diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index ae753549f75..7ba44345cee 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -305,7 +305,7 @@ impl PrimitiveArray { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - Buffer::new_zeroed(length), + vec![T::default(); length].into(), Some(Bitmap::new_zeroed(length)), ) } diff --git a/src/array/union/mod.rs b/src/array/union/mod.rs index 925a156c11d..dfd87ef6af0 100644 --- a/src/array/union/mod.rs +++ b/src/array/union/mod.rs @@ -142,7 +142,7 @@ impl UnionArray { }; // all from the same field - let types = Buffer::new_zeroed(length); + let types = vec![0i8; length].into(); Self::new(data_type, types, fields, offsets) } else { diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index c216ae0ab59..8c8813c14b5 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -344,7 +344,7 @@ impl Utf8Array { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - Buffer::new_zeroed(length + 1), + vec![O::default(); 1 + length].into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index 1f6774f0a81..758de74f29e 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -1,18 +1,38 @@ use either::Either; use std::{iter::FromIterator, sync::Arc, usize}; -use crate::{trusted_len::TrustedLen, types::NativeType}; +use crate::types::NativeType; use super::bytes::Bytes; -/// [`Buffer`] is a contiguous memory region that can -/// be shared across thread boundaries. +/// [`Buffer`] is a contiguous memory region that can be shared across thread boundaries. +/// /// The easiest way to think about `Buffer` is being equivalent to -/// an immutable `Vec`, with the following differences: +/// a `Arc>`, with the following differences: /// * `T` must be [`NativeType`] -/// * clone is `O(1)` -/// * memory is sharable across thread boundaries (it is under an `Arc`) -/// * it supports external allocated memory (FFI) +/// * slicing the buffer is `O(1)` +/// * it supports external allocated memory (via FFI) +/// +/// The easiest way to create one is to use its implementation of `From` a `Vec`. +/// +/// # Examples +/// ``` +/// use arrow2::buffer::Buffer; +/// +/// let buffer: Buffer = vec![1, 2, 3].into(); +/// assert_eq!(buffer.as_ref(), [1, 2, 3].as_ref()); +/// +/// // it supports copy-on-write semantics (i.e. back to a `Vec`) +/// let vec: Vec = buffer.into_mut().right().unwrap(); +/// assert_eq!(vec, vec![1, 2, 3]); +/// +/// // cloning and slicing is `O(1)` (data is shared) +/// let buffer: Buffer = vec![1, 2, 3].into(); +/// let slice = buffer.clone().slice(1, 1); +/// assert_eq!(slice.as_ref(), [2].as_ref()); +/// // no longer possible to get a vec since `slice` and `buffer` share data +/// let same: Buffer = buffer.into_mut().left().unwrap(); +/// ``` #[derive(Clone, PartialEq)] pub struct Buffer { /// the internal byte buffer. @@ -46,20 +66,6 @@ impl Buffer { Self::default() } - /// Creates a new [`Buffer`] filled with zeros. - #[inline] - pub fn new_zeroed(length: usize) -> Self { - vec![T::default(); length].into() - } - - /// Takes ownership of [`Vec`]. - /// # Implementation - /// This function is `O(1)` - #[inline] - pub fn from_slice>(data: R) -> Self { - data.as_ref().to_vec().into() - } - /// Auxiliary method to create a new Buffer pub(crate) fn from_bytes(bytes: Bytes) -> Self { let length = bytes.len(); @@ -153,53 +159,6 @@ impl Buffer { } } -impl Buffer { - /// Creates a [`Buffer`] from an [`Iterator`] with a trusted length. - /// Prefer this to `collect` whenever possible, as it often enables auto-vectorization. - /// # Example - /// ``` - /// # use arrow2::buffer::Buffer; - /// let v = vec![1u32]; - /// let iter = v.iter().map(|x| x * 2); - /// let buffer = unsafe { Buffer::from_trusted_len_iter(iter) }; - /// assert_eq!(buffer.len(), 1) - /// ``` - #[inline] - pub fn from_trusted_len_iter>(iterator: I) -> Self { - iterator.collect::>().into() - } - - /// Creates a [`Buffer`] from an fallible [`Iterator`] with a trusted length. - #[inline] - pub fn try_from_trusted_len_iter>>( - iterator: I, - ) -> std::result::Result { - Ok(iterator.collect::, E>>()?.into()) - } - - /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length. - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - #[inline] - pub unsafe fn from_trusted_len_iter_unchecked>(iterator: I) -> Self { - iterator.collect::>().into() - } - - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - #[inline] - pub unsafe fn try_from_trusted_len_iter_unchecked< - E, - I: Iterator>, - >( - iterator: I, - ) -> std::result::Result { - Ok(iterator.collect::, E>>()?.into()) - } -} - impl From> for Buffer { #[inline] fn from(p: Vec) -> Self {