diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e98f656d3fe..e91b7f2f83a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -88,27 +88,6 @@ jobs: # --skip io: miri can't handle opening of files, so we skip those run: cargo miri test --features full -- --skip io::parquet --skip io::ipc - miri-checks-custom-allocator: - name: MIRI with custom allocator - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions-rs/toolchain@v1 - with: - toolchain: nightly-2021-12-10 - override: true - - uses: Swatinem/rust-cache@v1 - with: - key: key1 - - name: Install Miri - run: | - rustup component add miri - cargo miri setup - - - name: Run - # --skip io: miri can't handle opening of files, so we skip those - run: cargo miri test --features full,cache_aligned -- --skip io::parquet --skip io::ipc - coverage: name: Coverage runs-on: ubuntu-latest @@ -130,7 +109,6 @@ jobs: run: cargo install cargo-tarpaulin - name: Run coverage run: | - cargo tarpaulin --features cache_aligned --ignore-tests --out Xml cargo tarpaulin --features full --ignore-tests --out Xml - name: Report coverage continue-on-error: true diff --git a/Cargo.toml b/Cargo.toml index 49ad379efbf..98254ca0df5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -207,9 +207,6 @@ compute = [ io_parquet = ["parquet2", "io_ipc", "base64", "futures"] benchmarks = ["rand"] simd = ["packed_simd"] -# uses a custom allocator whose pointers are aligned along cache lines. -# Using this features makes `Buffer` and `MutableBuffer` incompatible with `Vec`. -cache_aligned = [] [package.metadata.cargo-all-features] allowlist = ["compute", "compute_sort", "compute_hash", "compute_nullif"] @@ -238,10 +235,6 @@ harness = false name = "count_zeros" harness = false -[[bench]] -name = "from_trusted_len_iter" -harness = false - [[bench]] name = "growable" harness = false diff --git a/benches/from_trusted_len_iter.rs b/benches/from_trusted_len_iter.rs deleted file mode 100644 index 6cefd9b05dc..00000000000 --- a/benches/from_trusted_len_iter.rs +++ /dev/null @@ -1,27 +0,0 @@ -use criterion::{criterion_group, criterion_main, Criterion}; - -use arrow2::{array::PrimitiveArray, bitmap::*, buffer::*}; - -fn add_benchmark(c: &mut Criterion) { - let values = 0..1026; - - let values = values.collect::>(); - c.bench_function("buffer", |b| { - b.iter(|| MutableBuffer::from_trusted_len_iter(values.clone().into_iter())) - }); - - let bools = values.clone().into_iter().map(|x| x % 5 == 0); - c.bench_function("bitmap", |b| { - b.iter(|| MutableBitmap::from_trusted_len_iter(bools.clone())) - }); - - let maybe_values = values - .into_iter() - .map(|x| if x % 5 == 0 { Some(x) } else { None }); - c.bench_function("primitive", |b| { - b.iter(|| PrimitiveArray::from_trusted_len_iter(maybe_values.clone())) - }); -} - -criterion_group!(benches, add_benchmark); -criterion_main!(benches); diff --git a/benches/iter_list.rs b/benches/iter_list.rs index c629cb4ba4c..24af30591c7 100644 --- a/benches/iter_list.rs +++ b/benches/iter_list.rs @@ -6,7 +6,7 @@ use criterion::{criterion_group, criterion_main, Criterion}; use arrow2::{ array::{ListArray, PrimitiveArray}, bitmap::Bitmap, - buffer::{Buffer, MutableBuffer}, + buffer::Buffer, datatypes::DataType, }; @@ -17,7 +17,7 @@ fn add_benchmark(c: &mut Criterion) { let values = Buffer::from_iter(0..size as i32); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); - let mut offsets = MutableBuffer::from_iter((0..size as i32).step_by(2)); + let mut offsets = (0..size as i32).step_by(2).collect::>(); offsets.push(size as i32); let validity = (0..(offsets.len() - 1)) diff --git a/guide/src/low_level.md b/guide/src/low_level.md index 9f1bc332e1c..aa638a704b6 100644 --- a/guide/src/low_level.md +++ b/guide/src/low_level.md @@ -20,7 +20,7 @@ These hold _all_ data-related memory in this crate. Due to their intrinsic immutability, each container has a corresponding mutable (and non-shareable) variant: -* `MutableBuffer` +* `Vec` * `MutableBitmap` Let's see how these structures are used. @@ -30,43 +30,16 @@ Create a new `Buffer`: ```rust # use arrow2::buffer::Buffer; # fn main() { -let x = Buffer::from(&[1u32, 2, 3]); +let x = vec![1u32, 2, 3]; +let x: Buffer = x.into(); assert_eq!(x.as_slice(), &[1u32, 2, 3]); -let x = x.slice(1, 2); +let x = x.slice(1, 2); // O(1) assert_eq!(x.as_slice(), &[2, 3]); # } ``` -Using a `MutableBuffer`: - -```rust -# use arrow2::buffer::MutableBuffer; -# fn main() { -let mut x: MutableBuffer = (0..3).collect(); -x[1] = 5; -x.push(10); -assert_eq!(x.as_slice(), &[0, 5, 2, 10]) -# } -``` - -The following demonstrates how to efficiently -perform an operation from an iterator of -[TrustedLen](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html): - -```rust -# use arrow2::buffer::MutableBuffer; -# fn main() { -let x = (0..1000).collect::>(); -let y = MutableBuffer::from_trusted_len_iter(x.iter().map(|x| x * 2)); -assert_eq!(y[50], 100); -# } -``` - -Using `from_trusted_len_iter` often causes the compiler to auto-vectorize. - -In this context, `MutableBuffer` has an almost identical API to Rust's `Vec`. -However, contrarily to `Vec`, `Buffer` and `MutableBuffer` only supports +Contrarily to `Vec`, `Buffer` (and all structs in this crate) only supports the following physical types: * `i8-i128` diff --git a/src/alloc/alignment.rs b/src/alloc/alignment.rs deleted file mode 100644 index dbf4602f83a..00000000000 --- a/src/alloc/alignment.rs +++ /dev/null @@ -1,119 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// NOTE: Below code is written for spatial/temporal prefetcher optimizations. Memory allocation -// should align well with usage pattern of cache access and block sizes on layers of storage levels from -// registers to non-volatile memory. These alignments are all cache aware alignments incorporated -// from [cuneiform](https://crates.io/crates/cuneiform) crate. This approach mimicks Intel TBB's -// cache_aligned_allocator which exploits cache locality and minimizes prefetch signals -// resulting in less round trip time between the layers of storage. -// For further info: https://software.intel.com/en-us/node/506094 - -// 32-bit architecture and things other than netburst microarchitecture are using 64 bytes. -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "x86")] -pub const ALIGNMENT: usize = 1 << 6; - -// Intel x86_64: -// L2D streamer from L1: -// Loads data or instructions from memory to the second-level cache. To use the streamer, -// organize the data or instructions in blocks of 128 bytes, aligned on 128 bytes. -// - https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-optimization-manual.pdf -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "x86_64")] -pub const ALIGNMENT: usize = 1 << 7; - -// 24Kc: -// Data Line Size -// - https://s3-eu-west-1.amazonaws.com/downloads-mips/documents/MD00346-2B-24K-DTS-04.00.pdf -// - https://gitlab.e.foundation/e/devices/samsung/n7100/stable_android_kernel_samsung_smdk4412/commit/2dbac10263b2f3c561de68b4c369bc679352ccee -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "mips")] -pub const ALIGNMENT: usize = 1 << 5; -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "mips64")] -pub const ALIGNMENT: usize = 1 << 5; - -// Defaults for powerpc -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "powerpc")] -pub const ALIGNMENT: usize = 1 << 5; - -// Defaults for the ppc 64 -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "powerpc64")] -pub const ALIGNMENT: usize = 1 << 6; - -// e.g.: sifive -// - https://github.com/torvalds/linux/blob/master/Documentation/devicetree/bindings/riscv/sifive-l2-cache.txt#L41 -// in general all of them are the same. -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "riscv")] -pub const ALIGNMENT: usize = 1 << 6; - -// This size is same across all hardware for this architecture. -// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2s390_2include_2asm_2cache_8h.html -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "s390x")] -pub const ALIGNMENT: usize = 1 << 8; - -// This size is same across all hardware for this architecture. -// - https://docs.huihoo.com/doxygen/linux/kernel/3.7/arch_2sparc_2include_2asm_2cache_8h.html#a9400cc2ba37e33279bdbc510a6311fb4 -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "sparc")] -pub const ALIGNMENT: usize = 1 << 5; -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "sparc64")] -pub const ALIGNMENT: usize = 1 << 6; - -// On ARM cache line sizes are fixed. both v6 and v7. -// Need to add board specific or platform specific things later. -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "thumbv6")] -pub const ALIGNMENT: usize = 1 << 5; -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "thumbv7")] -pub const ALIGNMENT: usize = 1 << 5; - -// Operating Systems cache size determines this. -// Currently no way to determine this without runtime inference. -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "wasm32")] -pub const ALIGNMENT: usize = 1 << 6; - -// Same as v6 and v7. -// List goes like that: -// Cortex A, M, R, ARM v7, v7-M, Krait and NeoverseN uses this size. -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "arm")] -pub const ALIGNMENT: usize = 1 << 5; - -// Combined from 4 sectors. Volta says 128. -// Prevent chunk optimizations better to go to the default size. -// If you have smaller data with less padded functionality then use 32 with force option. -// - https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/ -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "nvptx")] -pub const ALIGNMENT: usize = 1 << 7; -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "nvptx64")] -pub const ALIGNMENT: usize = 1 << 7; - -// This size is same across all hardware for this architecture. -/// Cache and allocation multiple alignment size -#[cfg(target_arch = "aarch64")] -pub const ALIGNMENT: usize = 1 << 6; diff --git a/src/alloc/mod.rs b/src/alloc/mod.rs deleted file mode 100644 index 36b9f721583..00000000000 --- a/src/alloc/mod.rs +++ /dev/null @@ -1,128 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Defines memory-related functions, such as allocate/deallocate/reallocate memory -//! regions, cache and allocation alignments. - -use std::mem::size_of; -use std::ptr::NonNull; -use std::{ - alloc::{handle_alloc_error, Layout}, - sync::atomic::AtomicIsize, -}; - -use crate::types::NativeType; - -mod alignment; - -pub use alignment::ALIGNMENT; - -// If this number is not zero after all objects have been `drop`, there is a memory leak -static mut ALLOCATIONS: AtomicIsize = AtomicIsize::new(0); - -/// # Safety -/// This pointer may only be used to check if memory is allocated. -#[inline] -pub unsafe fn dangling() -> NonNull { - NonNull::new_unchecked(ALIGNMENT as *mut T) -} - -/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values. -/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have -/// an unknown or non-zero value and is semantically similar to `malloc`. -pub fn allocate_aligned(size: usize) -> NonNull { - unsafe { - if size == 0 { - dangling() - } else { - let size = size * size_of::(); - ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst); - - let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - let raw_ptr = std::alloc::alloc(layout) as *mut T; - NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) - } - } -} - -/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them. -/// This is more performant than using [allocate_aligned] and setting all bytes to zero -/// and is semantically similar to `calloc`. -pub fn allocate_aligned_zeroed(size: usize) -> NonNull { - unsafe { - if size == 0 { - dangling() - } else { - let size = size * size_of::(); - ALLOCATIONS.fetch_add(size as isize, std::sync::atomic::Ordering::SeqCst); - - let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); - let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T; - NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) - } - } -} - -/// Frees memory previously allocated by [`allocate_aligned_zeroed`] or [`allocate_aligned`]. -/// # Safety -/// This function is sound iff: -/// -/// * `ptr` was allocated by [`allocate_aligned_zeroed`] or [`allocate_aligned`] -/// * `size` must be the same size that was used to allocate that block of memory. -pub unsafe fn free_aligned(ptr: NonNull, size: usize) { - if size != 0 { - let size = size * size_of::(); - ALLOCATIONS.fetch_sub(size as isize, std::sync::atomic::Ordering::SeqCst); - std::alloc::dealloc( - ptr.as_ptr() as *mut u8, - Layout::from_size_align_unchecked(size, ALIGNMENT), - ); - } -} - -/// Reallocates memory previously allocated by [`allocate_aligned_zeroed`] or [`allocate_aligned`]. -/// # Safety -/// This function is sound iff `ptr` was previously allocated by `allocate_aligned` or `allocate_aligned_zeroed` for `old_size` items. -pub unsafe fn reallocate( - ptr: NonNull, - old_size: usize, - new_size: usize, -) -> NonNull { - if old_size == 0 { - return allocate_aligned(new_size); - } - - if new_size == 0 { - free_aligned(ptr, old_size); - return dangling(); - } - let old_size = old_size * size_of::(); - let new_size = new_size * size_of::(); - - ALLOCATIONS.fetch_add( - new_size as isize - old_size as isize, - std::sync::atomic::Ordering::SeqCst, - ); - let raw_ptr = std::alloc::realloc( - ptr.as_ptr() as *mut u8, - Layout::from_size_align_unchecked(old_size, ALIGNMENT), - new_size, - ) as *mut T; - NonNull::new(raw_ptr).unwrap_or_else(|| { - handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT)) - }) -} diff --git a/src/array/binary/from.rs b/src/array/binary/from.rs index 2154a22c16d..c4993a95828 100644 --- a/src/array/binary/from.rs +++ b/src/array/binary/from.rs @@ -33,7 +33,7 @@ impl BinaryArray { impl> FromIterator> for BinaryArray { #[inline] fn from_iter>>(iter: I) -> Self { - MutableBinaryArray::from_iter(iter).into() + MutableBinaryArray::::from_iter(iter).into() } } diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 6a1e8ad3aff..f50141bbd8c 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -29,7 +29,12 @@ pub struct BinaryArray { impl BinaryArray { /// Creates an empty [`BinaryArray`], i.e. whose `.len` is zero. pub fn new_empty(data_type: DataType) -> Self { - Self::from_data(data_type, Buffer::from(&[O::zero()]), Buffer::new(), None) + Self::from_data( + data_type, + Buffer::from(vec![O::zero()]), + Buffer::new(), + None, + ) } /// Creates an null [`BinaryArray`], i.e. whose `.null_count() == .len()`. diff --git a/src/array/binary/mutable.rs b/src/array/binary/mutable.rs index 5e9a97f28cd..2660fe4f0ab 100644 --- a/src/array/binary/mutable.rs +++ b/src/array/binary/mutable.rs @@ -3,7 +3,6 @@ use std::{iter::FromIterator, sync::Arc}; use crate::{ array::{specification::check_offsets, Array, MutableArray, Offset, TryExtend, TryPush}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, trusted_len::TrustedLen, @@ -18,8 +17,8 @@ use super::BinaryArray; #[derive(Debug)] pub struct MutableBinaryArray { data_type: DataType, - offsets: MutableBuffer, - values: MutableBuffer, + offsets: Vec, + values: Vec, validity: Option, } @@ -43,7 +42,7 @@ impl Default for MutableBinaryArray { impl MutableBinaryArray { /// Creates a new empty [`MutableBinaryArray`]. /// # Implementation - /// This allocates a [`MutableBuffer`] of one element + /// This allocates a [`Vec`] of one element pub fn new() -> Self { Self::with_capacity(0) } @@ -55,8 +54,8 @@ impl MutableBinaryArray { /// * The validity is not `None` and its length is different from `offsets`'s length minus one. pub fn from_data( data_type: DataType, - offsets: MutableBuffer, - values: MutableBuffer, + offsets: Vec, + values: Vec, validity: Option, ) -> Self { check_offsets(&offsets, values.len()); @@ -82,25 +81,25 @@ impl MutableBinaryArray { /// # Implementation /// This does not allocate the validity. pub fn with_capacity(capacity: usize) -> Self { - let mut offsets = MutableBuffer::::with_capacity(capacity + 1); + let mut offsets = Vec::::with_capacity(capacity + 1); offsets.push(O::default()); Self { data_type: BinaryArray::::default_data_type(), offsets, - values: MutableBuffer::::new(), + values: Vec::::new(), validity: None, } } /// Initializes a new [`MutableBinaryArray`] with a pre-allocated capacity of slots and values. pub fn with_capacities(capacity: usize, values: usize) -> Self { - let mut offsets = MutableBuffer::::with_capacity(capacity + 1); + let mut offsets = Vec::::with_capacity(capacity + 1); offsets.push(O::default()); Self { data_type: Self::default_data_type(), offsets, - values: MutableBuffer::::with_capacity(values), + values: Vec::::with_capacity(values), validity: None, } } @@ -159,12 +158,12 @@ impl MutableBinaryArray { impl MutableBinaryArray { /// returns its values. - pub fn values(&self) -> &MutableBuffer { + pub fn values(&self) -> &Vec { &self.values } /// returns its offsets. - pub fn offsets(&self) -> &MutableBuffer { + pub fn offsets(&self) -> &Vec { &self.offsets } } @@ -434,15 +433,13 @@ impl> TryPush> for MutableBinaryArray { } } -/// Creates [`MutableBitmap`] and two [`MutableBuffer`]s from an iterator of `Option`. +/// Creates [`MutableBitmap`] and two [`Vec`]s from an iterator of `Option`. /// The first buffer corresponds to a offset buffer, the second one /// corresponds to a values buffer. /// # Safety /// The caller must ensure that `iterator` is `TrustedLen`. #[inline] -unsafe fn trusted_len_unzip( - iterator: I, -) -> (Option, MutableBuffer, MutableBuffer) +unsafe fn trusted_len_unzip(iterator: I) -> (Option, Vec, Vec) where O: Offset, P: AsRef<[u8]>, @@ -451,11 +448,11 @@ where let (_, upper) = iterator.size_hint(); let len = upper.expect("trusted_len_unzip requires an upper limit"); - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(len + 1); + let mut values = Vec::::new(); let mut validity = MutableBitmap::new(); - offsets.push_unchecked(O::default()); + offsets.push(O::default()); extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator); @@ -474,7 +471,7 @@ where #[allow(clippy::type_complexity)] pub(crate) unsafe fn try_trusted_len_unzip( iterator: I, -) -> std::result::Result<(Option, MutableBuffer, MutableBuffer), E> +) -> std::result::Result<(Option, Vec, Vec), E> where O: Offset, P: AsRef<[u8]>, @@ -484,8 +481,8 @@ where let len = upper.expect("trusted_len_unzip requires an upper limit"); let mut null = MutableBitmap::with_capacity(len); - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(len + 1); + let mut values = Vec::::new(); let mut length = O::default(); let mut dst = offsets.as_mut_ptr(); @@ -519,9 +516,7 @@ where /// # Safety /// The caller must ensure that `iterator` is [`TrustedLen`]. #[inline] -pub(crate) unsafe fn trusted_len_values_iter( - iterator: I, -) -> (MutableBuffer, MutableBuffer) +pub(crate) unsafe fn trusted_len_values_iter(iterator: I) -> (Vec, Vec) where O: Offset, P: AsRef<[u8]>, @@ -530,24 +525,24 @@ where let (_, upper) = iterator.size_hint(); let len = upper.expect("trusted_len_unzip requires an upper limit"); - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(len + 1); + let mut values = Vec::::new(); - offsets.push_unchecked(O::default()); + offsets.push(O::default()); extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator); (offsets, values) } -// Populates `offsets` and `values` [`MutableBuffer`]s with information extracted +// Populates `offsets` and `values` [`Vec`]s with information extracted // from the incoming `iterator`. // # Safety // The caller must ensure the `iterator` is [`TrustedLen`] #[inline] unsafe fn extend_from_trusted_len_values_iter( - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, iterator: I, ) where O: Offset, @@ -592,15 +587,15 @@ unsafe fn extend_from_trusted_len_values_iter( offsets.set_len(offsets.len() + additional); } -// Populates `offsets`, `values`, and `validity` [`MutableBuffer`]s with +// Populates `offsets`, `values`, and `validity` [`Vec`]s with // information extracted from the incoming `iterator`. // // # Safety // The caller must ensure that `iterator` is [`TrustedLen`] #[inline] unsafe fn extend_from_trusted_len_iter( - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, iterator: I, ) where @@ -655,10 +650,10 @@ unsafe fn extend_from_trusted_len_iter( offsets.set_len(offsets.len() + additional); } -/// Creates two [`MutableBuffer`]s from an iterator of `&[u8]`. +/// Creates two [`Vec`]s from an iterator of `&[u8]`. /// The first buffer corresponds to a offset buffer, the second to a values buffer. #[inline] -fn values_iter(iterator: I) -> (MutableBuffer, MutableBuffer) +fn values_iter(iterator: I) -> (Vec, Vec) where O: Offset, P: AsRef<[u8]>, @@ -666,8 +661,8 @@ where { let (lower, _) = iterator.size_hint(); - let mut offsets = MutableBuffer::::with_capacity(lower + 1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(lower + 1); + let mut values = Vec::::new(); let mut length = O::default(); offsets.push(length); diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index 46a4bcc143a..d7b28883c85 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, MutableArray}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, }; @@ -18,7 +17,7 @@ use super::{FixedSizeBinaryArray, FixedSizeBinaryValues}; pub struct MutableFixedSizeBinaryArray { data_type: DataType, size: usize, - values: MutableBuffer, + values: Vec, validity: Option, } @@ -36,7 +35,7 @@ impl MutableFixedSizeBinaryArray { /// Canonical method to create a new [`MutableFixedSizeBinaryArray`]. pub fn from_data( data_type: DataType, - values: MutableBuffer, + values: Vec, validity: Option, ) -> Self { let size = FixedSizeBinaryArray::get_size(&data_type); @@ -69,7 +68,7 @@ impl MutableFixedSizeBinaryArray { pub fn with_capacity(size: usize, capacity: usize) -> Self { Self::from_data( DataType::FixedSizeBinary(size), - MutableBuffer::::with_capacity(capacity * size), + Vec::::with_capacity(capacity * size), None, ) } @@ -95,7 +94,7 @@ impl MutableFixedSizeBinaryArray { } } None => { - self.values.extend_constant(self.size, 0); + self.values.resize(self.values.len() + self.size, 0); match &mut self.validity { Some(validity) => validity.push(false), None => self.init_validity(), @@ -168,7 +167,7 @@ impl MutableFixedSizeBinaryArray { /// Accessors impl MutableFixedSizeBinaryArray { /// Returns its values. - pub fn values(&self) -> &MutableBuffer { + pub fn values(&self) -> &Vec { &self.values } @@ -216,7 +215,7 @@ impl MutableArray for MutableFixedSizeBinaryArray { } fn push_null(&mut self) { - self.values.extend_constant(self.size, 0); + self.values.resize(self.values.len() + self.size, 0); } fn shrink_to_fit(&mut self) { diff --git a/src/array/growable/binary.rs b/src/array/growable/binary.rs index 3a2b36fb11f..e4cb2f32a2a 100644 --- a/src/array/growable/binary.rs +++ b/src/array/growable/binary.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, BinaryArray, Offset}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, }; @@ -17,8 +16,8 @@ pub struct GrowableBinary<'a, O: Offset> { arrays: Vec<&'a BinaryArray>, data_type: DataType, validity: MutableBitmap, - values: MutableBuffer, - offsets: MutableBuffer, + values: Vec, + offsets: Vec, length: O, // always equal to the last offset at `offsets`. extend_null_bits: Vec>, } @@ -41,14 +40,14 @@ impl<'a, O: Offset> GrowableBinary<'a, O> { .map(|array| build_extend_null_bits(*array, use_validity)) .collect(); - let mut offsets = MutableBuffer::with_capacity(capacity + 1); + let mut offsets = Vec::with_capacity(capacity + 1); let length = O::default(); - unsafe { offsets.push_unchecked(length) }; + offsets.push(length); Self { arrays, data_type, - values: MutableBuffer::with_capacity(0), + values: Vec::with_capacity(0), offsets, length, validity: MutableBitmap::with_capacity(capacity), @@ -84,7 +83,8 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { } fn extend_validity(&mut self, additional: usize) { - self.offsets.extend_constant(additional, self.length); + self.offsets + .resize(self.offsets.len() + additional, self.length); self.validity.extend_constant(additional, false); } diff --git a/src/array/growable/dictionary.rs b/src/array/growable/dictionary.rs index 7fe1040cea4..8702e56243c 100644 --- a/src/array/growable/dictionary.rs +++ b/src/array/growable/dictionary.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray}, bitmap::MutableBitmap, - buffer::MutableBuffer, }; use super::{ @@ -18,7 +17,7 @@ use super::{ /// the values of each [`DictionaryArray`] one after the other. pub struct GrowableDictionary<'a, K: DictionaryKey> { keys_values: Vec<&'a [K]>, - key_values: MutableBuffer, + key_values: Vec, key_validity: MutableBitmap, offsets: Vec, values: Arc, @@ -73,7 +72,7 @@ impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> { offsets, values, keys_values, - key_values: MutableBuffer::with_capacity(capacity), + key_values: Vec::with_capacity(capacity), key_validity: MutableBitmap::with_capacity(capacity), extend_null_bits, } @@ -107,7 +106,8 @@ impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> { #[inline] fn extend_validity(&mut self, additional: usize) { - self.key_values.extend_constant(additional, T::default()); + self.key_values + .resize(self.key_values.len() + additional, T::default()); self.key_validity.extend_constant(additional, false); } diff --git a/src/array/growable/fixed_binary.rs b/src/array/growable/fixed_binary.rs index 3abdc67fdbd..fc0d958af13 100644 --- a/src/array/growable/fixed_binary.rs +++ b/src/array/growable/fixed_binary.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, FixedSizeBinaryArray}, bitmap::MutableBitmap, - buffer::MutableBuffer, }; use super::{ @@ -15,7 +14,7 @@ use super::{ pub struct GrowableFixedSizeBinary<'a> { arrays: Vec<&'a FixedSizeBinaryArray>, validity: MutableBitmap, - values: MutableBuffer, + values: Vec, extend_null_bits: Vec>, size: usize, // just a cache } @@ -43,7 +42,7 @@ impl<'a> GrowableFixedSizeBinary<'a> { let size = FixedSizeBinaryArray::get_size(arrays[0].data_type()); Self { arrays, - values: MutableBuffer::with_capacity(0), + values: Vec::with_capacity(0), validity: MutableBitmap::with_capacity(capacity), extend_null_bits, size, diff --git a/src/array/growable/list.rs b/src/array/growable/list.rs index 676b7cc07d4..63d8f22d576 100644 --- a/src/array/growable/list.rs +++ b/src/array/growable/list.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, ListArray, Offset}, bitmap::MutableBitmap, - buffer::MutableBuffer, }; use super::{ @@ -59,7 +58,7 @@ pub struct GrowableList<'a, O: Offset> { arrays: Vec<&'a ListArray>, validity: MutableBitmap, values: Box + 'a>, - offsets: MutableBuffer, + offsets: Vec, last_offset: O, // always equal to the last offset at `offsets`. extend_null_bits: Vec>, } @@ -86,9 +85,9 @@ impl<'a, O: Offset> GrowableList<'a, O> { .collect::>(); let values = make_growable(&inner, use_validity, 0); - let mut offsets = MutableBuffer::with_capacity(capacity + 1); + let mut offsets = Vec::with_capacity(capacity + 1); let length = O::default(); - unsafe { offsets.push_unchecked(length) }; + offsets.push(length); Self { arrays, @@ -121,7 +120,8 @@ impl<'a, O: Offset> Growable<'a> for GrowableList<'a, O> { } fn extend_validity(&mut self, additional: usize) { - self.offsets.extend_constant(additional, self.last_offset); + self.offsets + .resize(self.offsets.len() + additional, self.last_offset); self.validity.extend_constant(additional, false); } diff --git a/src/array/growable/primitive.rs b/src/array/growable/primitive.rs index 1485937a31d..d64bab94443 100644 --- a/src/array/growable/primitive.rs +++ b/src/array/growable/primitive.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, PrimitiveArray}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, types::NativeType, }; @@ -18,7 +17,7 @@ pub struct GrowablePrimitive<'a, T: NativeType> { data_type: DataType, arrays: Vec<&'a [T]>, validity: MutableBitmap, - values: MutableBuffer, + values: Vec, extend_null_bits: Vec>, } @@ -52,7 +51,7 @@ impl<'a, T: NativeType> GrowablePrimitive<'a, T> { Self { data_type, arrays, - values: MutableBuffer::with_capacity(capacity), + values: Vec::with_capacity(capacity), validity: MutableBitmap::with_capacity(capacity), extend_null_bits, } diff --git a/src/array/growable/utf8.rs b/src/array/growable/utf8.rs index f17095dab5c..c50c6fd4dbc 100644 --- a/src/array/growable/utf8.rs +++ b/src/array/growable/utf8.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, Offset, Utf8Array}, bitmap::MutableBitmap, - buffer::MutableBuffer, }; use super::{ @@ -15,8 +14,8 @@ use super::{ pub struct GrowableUtf8<'a, O: Offset> { arrays: Vec<&'a Utf8Array>, validity: MutableBitmap, - values: MutableBuffer, - offsets: MutableBuffer, + values: Vec, + offsets: Vec, length: O, // always equal to the last offset at `offsets`. extend_null_bits: Vec>, } @@ -37,13 +36,13 @@ impl<'a, O: Offset> GrowableUtf8<'a, O> { .map(|array| build_extend_null_bits(*array, use_validity)) .collect(); - let mut offsets = MutableBuffer::with_capacity(capacity + 1); + let mut offsets = Vec::with_capacity(capacity + 1); let length = O::default(); - unsafe { offsets.push_unchecked(length) }; + offsets.push(length); Self { arrays: arrays.to_vec(), - values: MutableBuffer::with_capacity(0), + values: Vec::with_capacity(0), offsets, length, validity: MutableBitmap::with_capacity(capacity), @@ -85,7 +84,8 @@ impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> { } fn extend_validity(&mut self, additional: usize) { - self.offsets.extend_constant(additional, self.length); + self.offsets + .resize(self.offsets.len() + additional, self.length); self.validity.extend_constant(additional, false); } diff --git a/src/array/growable/utils.rs b/src/array/growable/utils.rs index 597585244d8..7e39df295de 100644 --- a/src/array/growable/utils.rs +++ b/src/array/growable/utils.rs @@ -1,14 +1,9 @@ use crate::{ array::{Array, Offset}, bitmap::MutableBitmap, - buffer::MutableBuffer, }; -pub(super) fn extend_offsets( - buffer: &mut MutableBuffer, - last_offset: &mut T, - offsets: &[T], -) { +pub(super) fn extend_offsets(buffer: &mut Vec, last_offset: &mut T, offsets: &[T]) { buffer.reserve(offsets.len() - 1); offsets.windows(2).for_each(|offsets| { // compute the new offset @@ -40,7 +35,7 @@ pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> E #[inline] pub(super) fn extend_offset_values( - buffer: &mut MutableBuffer, + buffer: &mut Vec, offsets: &[O], values: &[u8], start: usize, diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 11e6fd7592b..0ced998a027 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -31,7 +31,7 @@ impl ListArray { /// Returns a new empty [`ListArray`]. pub fn new_empty(data_type: DataType) -> Self { let values = new_empty_array(Self::get_child_type(&data_type).clone()).into(); - Self::from_data(data_type, Buffer::from(&[O::zero()]), values, None) + Self::from_data(data_type, Buffer::from(vec![O::zero()]), values, None) } /// Returns a new null [`ListArray`]. diff --git a/src/array/list/mutable.rs b/src/array/list/mutable.rs index 64b0de8ce30..52bb9ec5704 100644 --- a/src/array/list/mutable.rs +++ b/src/array/list/mutable.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use crate::{ array::{Array, MutableArray, Offset, TryExtend, TryPush}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::{DataType, Field}, error::{ArrowError, Result}, }; @@ -14,7 +13,7 @@ use super::ListArray; #[derive(Debug)] pub struct MutableListArray { data_type: DataType, - offsets: MutableBuffer, + offsets: Vec, values: M, validity: Option, } @@ -32,7 +31,7 @@ impl MutableListArray { let values = M::default(); let data_type = ListArray::::default_datatype(values.data_type().clone()); - let mut offsets = MutableBuffer::::with_capacity(capacity + 1); + let mut offsets = Vec::::with_capacity(capacity + 1); offsets.push(O::default()); Self { data_type, @@ -96,7 +95,7 @@ where impl MutableListArray { /// Creates a new [`MutableListArray`] from a [`MutableArray`] and capacity. pub fn new_from(values: M, data_type: DataType, capacity: usize) -> Self { - let mut offsets = MutableBuffer::::with_capacity(capacity + 1); + let mut offsets = Vec::::with_capacity(capacity + 1); offsets.push(O::default()); assert_eq!(values.len(), 0); ListArray::::get_child_field(&data_type); @@ -155,7 +154,7 @@ impl MutableListArray { } /// The offseta - pub fn offsets(&self) -> &MutableBuffer { + pub fn offsets(&self) -> &Vec { &self.offsets } diff --git a/src/array/map/mod.rs b/src/array/map/mod.rs index 9bd05e1f5a5..b1ad5f7e8df 100644 --- a/src/array/map/mod.rs +++ b/src/array/map/mod.rs @@ -47,7 +47,7 @@ impl MapArray { /// Returns a new empty [`MapArray`]. pub fn new_empty(data_type: DataType) -> Self { let field = new_empty_array(Self::get_field(&data_type).data_type().clone()).into(); - Self::from_data(data_type, Buffer::from(&[0i32]), field, None) + Self::from_data(data_type, Buffer::from(vec![0i32]), field, None) } /// Returns a new [`MapArray`]. diff --git a/src/array/primitive/from_natural.rs b/src/array/primitive/from_natural.rs index 93717f4a223..bdb5dafcc7d 100644 --- a/src/array/primitive/from_natural.rs +++ b/src/array/primitive/from_natural.rs @@ -1,7 +1,6 @@ use std::iter::FromIterator; use crate::{ - buffer::{Buffer, MutableBuffer}, trusted_len::TrustedLen, types::{NativeType, NaturalDataType}, }; @@ -27,18 +26,14 @@ impl PrimitiveArray { /// # Implementation /// This does not assume that the iterator has a known length. pub fn from_values>(iter: I) -> Self { - Self::from_data( - T::DATA_TYPE, - MutableBuffer::::from_iter(iter).into(), - None, - ) + Self::from_data(T::DATA_TYPE, Vec::::from_iter(iter).into(), None) } /// Creates a (non-null) [`PrimitiveArray`] from a slice of values. /// # Implementation /// This is essentially a memcopy and is the fastest way to create a [`PrimitiveArray`]. pub fn from_slice>(slice: P) -> Self { - Self::from_data(T::DATA_TYPE, Buffer::::from(slice), None) + Self::from_data(T::DATA_TYPE, Vec::::from(slice.as_ref()).into(), None) } } diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index ac5613b754d..ee82188ab3c 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -3,7 +3,6 @@ use std::{iter::FromIterator, sync::Arc}; use crate::{ array::{Array, MutableArray, TryExtend, TryPush}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, trusted_len::TrustedLen, @@ -17,7 +16,7 @@ use super::PrimitiveArray; #[derive(Debug)] pub struct MutablePrimitiveArray { data_type: DataType, - values: MutableBuffer, + values: Vec, validity: Option, } @@ -55,11 +54,7 @@ impl MutablePrimitiveArray { /// This function panics iff: /// * `data_type` is not supported by the physical type /// * The validity is not `None` and its length is different from the `values`'s length - pub fn from_data( - data_type: DataType, - values: MutableBuffer, - validity: Option, - ) -> Self { + pub fn from_data(data_type: DataType, values: Vec, validity: Option) -> Self { if !T::is_valid(&data_type) { Err(ArrowError::InvalidArgumentError(format!( "Type {} does not support logical type {}", @@ -79,7 +74,7 @@ impl MutablePrimitiveArray { } /// Extract the low-end APIs from the [`MutablePrimitiveArray`]. - pub fn into_data(self) -> (DataType, MutableBuffer, Option) { + pub fn into_data(self) -> (DataType, Vec, Option) { (self.data_type, self.values, self.validity) } } @@ -95,7 +90,7 @@ impl From for MutablePrimitiveArray { assert!(T::is_valid(&data_type)); Self { data_type, - values: MutableBuffer::::new(), + values: Vec::::new(), validity: None, } } @@ -107,7 +102,7 @@ impl MutablePrimitiveArray { assert!(T::is_valid(&data_type)); Self { data_type, - values: MutableBuffer::::with_capacity(capacity), + values: Vec::::with_capacity(capacity), validity: None, } } @@ -146,7 +141,7 @@ impl MutablePrimitiveArray { #[inline] pub fn extend_constant(&mut self, additional: usize, value: Option) { if let Some(value) = value { - self.values.extend_constant(additional, value); + self.values.resize(self.values.len() + additional, value); if let Some(validity) = &mut self.validity { validity.extend_constant(additional, true) } @@ -159,7 +154,8 @@ impl MutablePrimitiveArray { validity.extend_constant(additional, false); self.validity = Some(validity) } - self.values.extend_constant(additional, T::default()); + self.values + .resize(self.values.len() + additional, T::default()); } } @@ -212,7 +208,7 @@ impl MutablePrimitiveArray { where I: Iterator, { - self.values.extend_from_trusted_len_iter_unchecked(iterator); + self.values.extend(iterator); self.update_all_valid(); } @@ -271,7 +267,7 @@ impl MutablePrimitiveArray { /// Accessors impl MutablePrimitiveArray { /// Returns its values. - pub fn values(&self) -> &MutableBuffer { + pub fn values(&self) -> &Vec { &self.values } @@ -316,7 +312,7 @@ impl MutablePrimitiveArray { /// Sets values. /// # Panic /// Panics iff the values' length is not equal to the existing validity's len. - pub fn set_values(&mut self, values: MutableBuffer) { + pub fn set_values(&mut self, values: Vec) { assert_eq!(values.len(), self.values.len()); self.values = values; } @@ -464,7 +460,7 @@ impl MutablePrimitiveArray { pub fn from_trusted_len_values_iter>(iter: I) -> Self { Self { data_type: T::DATA_TYPE, - values: MutableBuffer::::from_trusted_len_iter(iter), + values: iter.collect(), validity: None, } } @@ -476,7 +472,7 @@ impl MutablePrimitiveArray { pub unsafe fn from_trusted_len_values_iter_unchecked>(iter: I) -> Self { Self { data_type: T::DATA_TYPE, - values: MutableBuffer::::from_trusted_len_iter_unchecked(iter), + values: iter.collect(), validity: None, } } @@ -491,7 +487,7 @@ impl>> FromI let mut validity = MutableBitmap::with_capacity(lower); - let values: MutableBuffer = iter + let values: Vec = iter .map(|item| { if let Some(a) = item.borrow() { validity.push(true); @@ -517,7 +513,7 @@ impl>> FromI } } -/// Extends a [`MutableBitmap`] and a [`MutableBuffer`] from an iterator of `Option`. +/// Extends a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`. /// The first buffer corresponds to a bitmap buffer, the second one /// corresponds to a values buffer. /// # Safety @@ -526,7 +522,7 @@ impl>> FromI pub(crate) unsafe fn extend_trusted_len_unzip( iterator: I, validity: &mut MutableBitmap, - buffer: &mut MutableBuffer, + buffer: &mut Vec, ) where T: NativeType, P: std::borrow::Borrow, @@ -536,36 +532,32 @@ pub(crate) unsafe fn extend_trusted_len_unzip( let additional = upper.expect("trusted_len_unzip requires an upper limit"); validity.reserve(additional); - buffer.reserve(additional); - - for item in iterator { - let item = if let Some(item) = item { + let values = iterator.map(|item| { + if let Some(item) = item { validity.push_unchecked(true); *item.borrow() } else { validity.push_unchecked(false); T::default() - }; - buffer.push_unchecked(item); - } + } + }); + buffer.extend(values); } -/// Creates a [`MutableBitmap`] and a [`MutableBuffer`] from an iterator of `Option`. +/// Creates a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`. /// The first buffer corresponds to a bitmap buffer, the second one /// corresponds to a values buffer. /// # Safety /// The caller must ensure that `iterator` is `TrustedLen`. #[inline] -pub(crate) unsafe fn trusted_len_unzip( - iterator: I, -) -> (Option, MutableBuffer) +pub(crate) unsafe fn trusted_len_unzip(iterator: I) -> (Option, Vec) where T: NativeType, P: std::borrow::Borrow, I: Iterator>, { let mut validity = MutableBitmap::new(); - let mut buffer = MutableBuffer::::new(); + let mut buffer = Vec::::new(); extend_trusted_len_unzip(iterator, &mut validity, &mut buffer); @@ -583,7 +575,7 @@ where #[inline] pub(crate) unsafe fn try_trusted_len_unzip( iterator: I, -) -> std::result::Result<(Option, MutableBuffer), E> +) -> std::result::Result<(Option, Vec), E> where T: NativeType, P: std::borrow::Borrow, @@ -593,7 +585,7 @@ where let len = upper.expect("trusted_len_unzip requires an upper limit"); let mut null = MutableBitmap::with_capacity(len); - let mut buffer = MutableBuffer::::with_capacity(len); + let mut buffer = Vec::::with_capacity(len); let mut dst = buffer.as_mut_ptr(); for item in iterator { diff --git a/src/array/utf8/from.rs b/src/array/utf8/from.rs index 8c8f75468ba..9463f7ee365 100644 --- a/src/array/utf8/from.rs +++ b/src/array/utf8/from.rs @@ -86,6 +86,6 @@ impl Utf8Array { impl> FromIterator> for Utf8Array { #[inline] fn from_iter>>(iter: I) -> Self { - MutableUtf8Array::from_iter(iter).into() + MutableUtf8Array::::from_iter(iter).into() } } diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index 2a49f51e7ed..c320793ffd2 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -43,7 +43,12 @@ impl Utf8Array { #[inline] pub fn new_empty(data_type: DataType) -> Self { unsafe { - Self::from_data_unchecked(data_type, Buffer::from(&[O::zero()]), Buffer::new(), None) + Self::from_data_unchecked( + data_type, + Buffer::from(vec![O::zero()]), + Buffer::new(), + None, + ) } } diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index 6a269a88761..c4eb85527d6 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -6,7 +6,6 @@ use crate::{ Array, MutableArray, Offset, TryExtend, TryPush, }, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, trusted_len::TrustedLen, @@ -18,8 +17,8 @@ use super::Utf8Array; #[derive(Debug)] pub struct MutableUtf8Array { data_type: DataType, - offsets: MutableBuffer, - values: MutableBuffer, + offsets: Vec, + values: Vec, validity: Option, } @@ -48,12 +47,11 @@ impl Default for MutableUtf8Array { impl MutableUtf8Array { /// Initializes a new empty [`MutableUtf8Array`]. pub fn new() -> Self { - let mut offsets = MutableBuffer::::new(); - offsets.push(O::default()); + let offsets = vec![O::default()]; Self { data_type: Self::default_data_type(), offsets, - values: MutableBuffer::::new(), + values: Vec::::new(), validity: None, } } @@ -66,8 +64,8 @@ impl MutableUtf8Array { /// * The validity is not `None` and its length is different from `offsets`'s length minus one. pub fn from_data( data_type: DataType, - offsets: MutableBuffer, - values: MutableBuffer, + offsets: Vec, + values: Vec, validity: Option, ) -> Self { check_offsets_and_utf8(&offsets, &values); @@ -94,8 +92,8 @@ impl MutableUtf8Array { /// * The validity is not `None` and its length is different from `offsets`'s length minus one. pub unsafe fn from_data_unchecked( data_type: DataType, - offsets: MutableBuffer, - values: MutableBuffer, + offsets: Vec, + values: Vec, validity: Option, ) -> Self { check_offsets_minimal(&offsets, values.len()); @@ -124,13 +122,13 @@ impl MutableUtf8Array { /// Initializes a new [`MutableUtf8Array`] with a pre-allocated capacity of slots and values. pub fn with_capacities(capacity: usize, values: usize) -> Self { - let mut offsets = MutableBuffer::::with_capacity(capacity + 1); + let mut offsets = Vec::::with_capacity(capacity + 1); offsets.push(O::default()); Self { data_type: Self::default_data_type(), offsets, - values: MutableBuffer::::with_capacity(values), + values: Vec::::with_capacity(values), validity: None, } } @@ -181,12 +179,12 @@ impl MutableUtf8Array { impl MutableUtf8Array { /// returns its values. - pub fn values(&self) -> &MutableBuffer { + pub fn values(&self) -> &Vec { &self.values } /// returns its offsets. - pub fn offsets(&self) -> &MutableBuffer { + pub fn offsets(&self) -> &Vec { &self.offsets } } @@ -479,25 +477,23 @@ impl> TryPush> for MutableUtf8Array { } } -/// Creates [`MutableBitmap`] and two [`MutableBuffer`]s from an iterator of `Option`. +/// Creates [`MutableBitmap`] and two [`Vec`]s from an iterator of `Option`. /// The first buffer corresponds to a offset buffer, the second one /// corresponds to a values buffer. /// # Safety /// The caller must ensure that `iterator` is `TrustedLen`. #[inline] -unsafe fn trusted_len_unzip( - iterator: I, -) -> (Option, MutableBuffer, MutableBuffer) +unsafe fn trusted_len_unzip(iterator: I) -> (Option, Vec, Vec) where O: Offset, P: AsRef, I: Iterator>, { - let mut offsets = MutableBuffer::::with_capacity(1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(1); + let mut values = Vec::::new(); let mut validity = MutableBitmap::new(); - offsets.push_unchecked(O::default()); + offsets.push(O::default()); extend_from_trusted_len_iter(&mut offsets, &mut values, &mut validity, iterator); @@ -516,7 +512,7 @@ where #[allow(clippy::type_complexity)] pub(crate) unsafe fn try_trusted_len_unzip( iterator: I, -) -> std::result::Result<(Option, MutableBuffer, MutableBuffer), E> +) -> std::result::Result<(Option, Vec, Vec), E> where O: Offset, P: AsRef, @@ -526,8 +522,8 @@ where let len = upper.expect("trusted_len_unzip requires an upper limit"); let mut validity = MutableBitmap::with_capacity(len); - let mut offsets = MutableBuffer::::with_capacity(len + 1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(len + 1); + let mut values = Vec::::new(); let mut length = O::default(); let mut dst = offsets.as_mut_ptr(); @@ -566,18 +562,16 @@ where /// # Safety /// The caller must ensure that `iterator` is [`TrustedLen`]. #[inline] -pub(crate) unsafe fn trusted_len_values_iter( - iterator: I, -) -> (MutableBuffer, MutableBuffer) +pub(crate) unsafe fn trusted_len_values_iter(iterator: I) -> (Vec, Vec) where O: Offset, P: AsRef, I: Iterator, { - let mut offsets = MutableBuffer::::with_capacity(1 + iterator.size_hint().1.unwrap()); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(1 + iterator.size_hint().1.unwrap()); + let mut values = Vec::::new(); - offsets.push_unchecked(O::default()); + offsets.push(O::default()); extend_from_trusted_len_values_iter(&mut offsets, &mut values, iterator); @@ -590,8 +584,8 @@ where /// The caller must ensure that `iterator` is [`TrustedLen`] #[inline] unsafe fn extend_from_trusted_len_values_iter( - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, iterator: I, ) where O: Offset, @@ -634,8 +628,8 @@ unsafe fn extend_from_trusted_len_values_iter( /// The caller must ensure that `iterator` is [`TrustedLen`] #[inline] unsafe fn extend_from_trusted_len_iter( - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, iterator: I, ) where @@ -680,10 +674,10 @@ unsafe fn extend_from_trusted_len_iter( offsets.set_len(offsets.len() + additional); } -/// Creates two [`MutableBuffer`]s from an iterator of `&str`. +/// Creates two [`Vec`]s from an iterator of `&str`. /// The first buffer corresponds to a offset buffer, the second to a values buffer. #[inline] -fn values_iter(iterator: I) -> (MutableBuffer, MutableBuffer) +fn values_iter(iterator: I) -> (Vec, Vec) where O: Offset, P: AsRef, @@ -691,8 +685,8 @@ where { let (lower, _) = iterator.size_hint(); - let mut offsets = MutableBuffer::::with_capacity(lower + 1); - let mut values = MutableBuffer::::new(); + let mut offsets = Vec::::with_capacity(lower + 1); + let mut values = Vec::::new(); let mut length = O::default(); offsets.push(length); diff --git a/src/bitmap/bitmap_ops.rs b/src/bitmap/bitmap_ops.rs index 28584d3c6fa..bcc4e433ee8 100644 --- a/src/bitmap/bitmap_ops.rs +++ b/src/bitmap/bitmap_ops.rs @@ -1,12 +1,45 @@ use std::ops::{BitAnd, BitOr, BitXor, Not}; -use crate::buffer::MutableBuffer; +use crate::trusted_len::TrustedLen; use super::{ - utils::{BitChunkIterExact, BitChunksExact}, + utils::{BitChunk, BitChunkIterExact, BitChunksExact}, Bitmap, }; +/// # Safety +/// The iterator must be [`TrustedLen`]. +pub unsafe fn from_chunk_iter_unchecked>( + iterator: I, +) -> Vec { + let (_, upper) = iterator.size_hint(); + let upper = upper.expect("try_from_trusted_len_iter requires an upper limit"); + let len = upper * std::mem::size_of::(); + + let mut buffer = Vec::with_capacity(len); + + let mut dst = buffer.as_mut_ptr(); + for item in iterator { + let bytes = item.to_ne_bytes(); + for i in 0..std::mem::size_of::() { + std::ptr::write(dst, bytes[i]); + dst = dst.add(1); + } + } + assert_eq!( + dst.offset_from(buffer.as_ptr()) as usize, + len, + "Trusted iterator length was not accurately reported" + ); + buffer.set_len(len); + buffer +} + +/// Creates a Vec from a [`TrustedLen`] of [`BitChunk`], +pub fn chunk_iter_to_vec>(iter: I) -> Vec { + unsafe { from_chunk_iter_unchecked(iter) } +} + /// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`]. pub fn quaternary(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap where @@ -30,13 +63,12 @@ where .zip(a3_chunks) .zip(a4_chunks) .map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4)); - let buffer = MutableBuffer::from_chunk_iter( - chunks.chain(std::iter::once(op(rem_a1, rem_a2, rem_a3, rem_a4))), - ); + let buffer = + chunk_iter_to_vec(chunks.chain(std::iter::once(op(rem_a1, rem_a2, rem_a3, rem_a4)))); let length = a1.len(); - Bitmap::from_u8_buffer(buffer, length) + Bitmap::from_u8_vec(buffer, length) } /// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`]. @@ -59,12 +91,11 @@ where .zip(a3_chunks) .map(|((a1, a2), a3)| op(a1, a2, a3)); - let buffer = - MutableBuffer::from_chunk_iter(chunks.chain(std::iter::once(op(rem_a1, rem_a2, rem_a3)))); + let buffer = chunk_iter_to_vec(chunks.chain(std::iter::once(op(rem_a1, rem_a2, rem_a3)))); let length = a1.len(); - Bitmap::from_u8_buffer(buffer, length) + Bitmap::from_u8_vec(buffer, length) } /// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`]. @@ -82,12 +113,11 @@ where .zip(rhs_chunks) .map(|(left, right)| op(left, right)); - let buffer = - MutableBuffer::from_chunk_iter(chunks.chain(std::iter::once(op(rem_lhs, rem_rhs)))); + let buffer = chunk_iter_to_vec(chunks.chain(std::iter::once(op(rem_lhs, rem_rhs)))); let length = lhs.len(); - Bitmap::from_u8_buffer(buffer, length) + Bitmap::from_u8_vec(buffer, length) } fn unary_impl(iter: I, op: F, length: usize) -> Bitmap @@ -99,9 +129,9 @@ where let iterator = iter.map(op).chain(std::iter::once(rem)); - let buffer = MutableBuffer::from_chunk_iter(iterator); + let buffer = chunk_iter_to_vec(iterator); - Bitmap::from_u8_buffer(buffer, length) + Bitmap::from_u8_vec(buffer, length) } /// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`]. diff --git a/src/bitmap/immutable.rs b/src/bitmap/immutable.rs index 31a9bef7a04..eea39d78480 100644 --- a/src/bitmap/immutable.rs +++ b/src/bitmap/immutable.rs @@ -1,7 +1,7 @@ use std::iter::FromIterator; use std::sync::Arc; -use crate::{buffer::bytes::Bytes, buffer::MutableBuffer, trusted_len::TrustedLen}; +use crate::{buffer::bytes::Bytes, trusted_len::TrustedLen}; use super::{ utils::{count_zeros, fmt, get_bit, get_bit_unchecked, BitChunk, BitChunks, BitmapIter}, @@ -77,12 +77,13 @@ impl Bitmap { } } - /// Creates a new [`Bitmap`] from [`MutableBuffer`] and a length. + /// Creates a new [`Bitmap`] from [`Vec`] and a length. + /// This function is `O(1)` /// # Panic /// Panics iff `length <= buffer.len() * 8` #[inline] - pub fn from_u8_buffer(buffer: MutableBuffer, length: usize) -> Self { - Bitmap::from_bytes(buffer.into(), length) + pub fn from_u8_vec(vec: Vec, length: usize) -> Self { + Bitmap::from_bytes(vec.into(), length) } /// Creates a new [`Bitmap`] from a slice and length. @@ -90,8 +91,8 @@ impl Bitmap { /// Panics iff `length <= bytes.len() * 8` #[inline] pub fn from_u8_slice>(buffer: T, length: usize) -> Self { - let buffer = MutableBuffer::::from(buffer.as_ref()); - Bitmap::from_u8_buffer(buffer, length) + let buffer = Vec::::from(buffer.as_ref()); + Bitmap::from_u8_vec(buffer, length) } /// Counts the nulls (unset bits) starting from `offset` bits and for `length` bits. diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index 6638a5fbb72..960a0016e4b 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -2,20 +2,20 @@ use std::hint::unreachable_unchecked; use std::iter::FromIterator; use crate::bitmap::utils::merge_reversed; -use crate::{buffer::MutableBuffer, trusted_len::TrustedLen}; +use crate::trusted_len::TrustedLen; use super::utils::{count_zeros, fmt, get_bit, set, set_bit, BitmapIter}; use super::Bitmap; /// A container to store booleans. [`MutableBitmap`] is semantically equivalent /// to [`Vec`], but each value is stored as a single bit, thereby achieving a compression of 8x. -/// This container is the counterpart of [`MutableBuffer`] for boolean values. +/// This container is the counterpart of [`Vec`] for boolean values. /// [`MutableBitmap`] can be converted to a [`Bitmap`] at `O(1)`. /// The main difference against [`Vec`] is that a bitmap cannot be represented as `&[bool]`. /// # Implementation -/// This container is backed by [`MutableBuffer`]. +/// This container is backed by [`Vec`]. pub struct MutableBitmap { - buffer: MutableBuffer, + buffer: Vec, // invariant: length.saturating_add(7) / 8 == buffer.len(); length: usize, } @@ -37,7 +37,7 @@ impl MutableBitmap { #[inline] pub fn new() -> Self { Self { - buffer: MutableBuffer::new(), + buffer: Vec::new(), length: 0, } } @@ -53,7 +53,7 @@ impl MutableBitmap { #[inline] pub fn from_len_zeroed(length: usize) -> Self { Self { - buffer: MutableBuffer::from_len_zeroed(length.saturating_add(7) / 8), + buffer: vec![0; length.saturating_add(7) / 8], length, } } @@ -62,7 +62,7 @@ impl MutableBitmap { #[inline] pub fn with_capacity(capacity: usize) -> Self { Self { - buffer: MutableBuffer::with_capacity(capacity.saturating_add(7) / 8), + buffer: Vec::with_capacity(capacity.saturating_add(7) / 8), length: 0, } } @@ -97,7 +97,7 @@ impl MutableBitmap { #[inline] pub unsafe fn push_unchecked(&mut self, value: bool) { if self.length % 8 == 0 { - self.buffer.push_unchecked(0); + self.buffer.push(0); } let byte = self.buffer.as_mut_slice().last_mut().unwrap(); *byte = set(*byte, self.length % 8, value); @@ -152,9 +152,8 @@ impl MutableBitmap { let existing = self.length.saturating_add(7) / 8; let required = (self.length + additional).saturating_add(7) / 8; // add remaining as full bytes - self.buffer.extend_from_trusted_len_iter( - std::iter::repeat(0b11111111u8).take(required - existing), - ); + self.buffer + .extend(std::iter::repeat(0b11111111u8).take(required - existing)); self.length += additional; } } @@ -211,18 +210,18 @@ impl MutableBitmap { } /// Shrinks the capacity of the [`MutableBitmap`] to fit its current length. - /// When the feature `cache_aligned`, the new capacity will be a multiple of 64 bytes. pub fn shrink_to_fit(&mut self) { self.buffer.shrink_to_fit(); } } impl MutableBitmap { - /// Initializes a [`MutableBitmap`] from a [`MutableBuffer`] and a length. + /// Initializes a [`MutableBitmap`] from a [`Vec`] and a length. + /// This function is `O(1)`. /// # Panic /// Panics iff the length is larger than the length of the buffer times 8. #[inline] - pub fn from_buffer(buffer: MutableBuffer, length: usize) -> Self { + pub fn from_vec(buffer: Vec, length: usize) -> Self { assert!(length <= buffer.len() * 8); Self { buffer, length } } @@ -261,7 +260,7 @@ impl FromIterator for MutableBitmap { let mut iterator = iter.into_iter(); let mut buffer = { let byte_capacity: usize = iterator.size_hint().0.saturating_add(7) / 8; - MutableBuffer::with_capacity(byte_capacity) + Vec::with_capacity(byte_capacity) }; let mut length = 0; @@ -301,7 +300,7 @@ impl FromIterator for MutableBitmap { } // Soundness: capacity was allocated above - unsafe { buffer.push_unchecked(byte_accum) }; + buffer.push(byte_accum); if exhausted { break; } @@ -357,12 +356,12 @@ unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator`] from `iterator` /// # Safety /// The iterator MUST be [`TrustedLen`]. #[inline] unsafe fn extend_aligned_trusted_iter_unchecked( - buffer: &mut MutableBuffer, + buffer: &mut Vec, mut iterator: impl Iterator, ) -> usize { let additional_bits = iterator.size_hint().1.unwrap(); @@ -386,14 +385,14 @@ unsafe fn extend_aligned_trusted_iter_unchecked( // remaining complete bytes for _ in 0..(remainder / 8) { let byte = unsafe { get_byte_unchecked(8, &mut iterator) }; - buffer.push_unchecked(byte) + buffer.push(byte) } // remaining bits let remainder = remainder % 8; if remainder > 0 { let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) }; - buffer.push_unchecked(byte) + buffer.push(byte) } additional_bits } @@ -462,7 +461,7 @@ impl MutableBitmap { where I: Iterator, { - let mut buffer = MutableBuffer::::new(); + let mut buffer = Vec::::new(); let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator); @@ -498,7 +497,7 @@ impl MutableBitmap { { let length = iterator.size_hint().1.unwrap(); - let mut buffer = MutableBuffer::::from_len_zeroed((length + 7) / 8); + let mut buffer = vec![0u8; (length + 7) / 8]; let chunks = length / 8; let reminder = length % 8; @@ -557,7 +556,7 @@ impl MutableBitmap { .chain(std::iter::once(remaining.as_ref())) .map(|w| merge_reversed(w[0], w[1], 8 - own_offset)) .take(additional.saturating_add(7) / 8); - self.buffer.extend_from_trusted_len_iter(bytes); + self.buffer.extend(bytes); self.length += length; } diff --git a/src/buffer/bytes.rs b/src/buffer/bytes.rs index e057f0fd942..66d8839992d 100644 --- a/src/buffer/bytes.rs +++ b/src/buffer/bytes.rs @@ -7,8 +7,6 @@ use std::{ptr::NonNull, sync::Arc}; use crate::ffi; use crate::types::NativeType; -#[cfg(feature = "cache_aligned")] -use crate::vec::AlignedVec as Vec; /// Mode of deallocating memory regions pub enum Deallocation { @@ -91,9 +89,6 @@ impl Drop for Bytes { fn drop(&mut self) { match &self.deallocation { Deallocation::Native(capacity) => unsafe { - #[cfg(feature = "cache_aligned")] - let _ = Vec::from_raw_parts(self.ptr, self.len, *capacity); - #[cfg(not(feature = "cache_aligned"))] let _ = Vec::from_raw_parts(self.ptr.as_ptr(), self.len, *capacity); }, // foreign interface knows how to deallocate itself. @@ -126,6 +121,20 @@ impl Debug for Bytes { } } +impl From> for Bytes { + #[inline] + fn from(mut data: Vec) -> Self { + let ptr = NonNull::new(data.as_mut_ptr()).unwrap(); + let len = data.len(); + let capacity = data.capacity(); + + let result = unsafe { Bytes::new(ptr, len, Deallocation::Native(capacity)) }; + // so that the memory region is not deallocated. + std::mem::forget(data); + result + } +} + // This is sound because `Bytes` is an immutable container unsafe impl Send for Bytes {} unsafe impl Sync for Bytes {} diff --git a/src/buffer/immutable.rs b/src/buffer/immutable.rs index b162afaef0c..a890699afd3 100644 --- a/src/buffer/immutable.rs +++ b/src/buffer/immutable.rs @@ -1,9 +1,8 @@ -use std::{convert::AsRef, iter::FromIterator, sync::Arc, usize}; +use std::{iter::FromIterator, sync::Arc, usize}; use crate::{trusted_len::TrustedLen, types::NativeType}; use super::bytes::Bytes; -use super::mutable::MutableBuffer; /// [`Buffer`] is a contiguous memory region that can /// be shared across thread boundaries. @@ -35,7 +34,7 @@ impl std::fmt::Debug for Buffer { impl Default for Buffer { #[inline] fn default() -> Self { - MutableBuffer::new().into() + Vec::new().into() } } @@ -49,17 +48,15 @@ impl Buffer { /// Creates a new [`Buffer`] filled with zeros. #[inline] pub fn new_zeroed(length: usize) -> Self { - MutableBuffer::from_len_zeroed(length).into() + vec![T::default(); length].into() } /// Takes ownership of [`Vec`]. /// # Implementation /// This function is `O(1)` - #[cfg(not(feature = "cache_aligned"))] - #[cfg_attr(docsrs, doc(cfg(not(feature = "cache_aligned"))))] #[inline] - pub fn from_vec(data: Vec) -> Self { - MutableBuffer::from_vec(data).into() + pub fn from_slice>(data: R) -> Self { + data.as_ref().to_vec().into() } /// Auxiliary method to create a new Buffer @@ -141,7 +138,7 @@ impl Buffer { /// ``` #[inline] pub fn from_trusted_len_iter>(iterator: I) -> Self { - MutableBuffer::from_trusted_len_iter(iterator).into() + iterator.collect::>().into() } /// # Safety @@ -151,7 +148,7 @@ impl Buffer { pub fn try_from_trusted_len_iter>>( iterator: I, ) -> std::result::Result { - Ok(MutableBuffer::try_from_trusted_len_iter(iterator)?.into()) + Ok(iterator.collect::, E>>()?.into()) } /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length. @@ -160,7 +157,7 @@ impl Buffer { /// to use it on an iterator that reports an incorrect length. #[inline] pub unsafe fn from_trusted_len_iter_unchecked>(iterator: I) -> Self { - MutableBuffer::from_trusted_len_iter_unchecked(iterator).into() + iterator.collect::>().into() } /// # Safety @@ -173,14 +170,19 @@ impl Buffer { >( iterator: I, ) -> std::result::Result { - Ok(MutableBuffer::try_from_trusted_len_iter_unchecked(iterator)?.into()) + Ok(iterator.collect::, E>>()?.into()) } } -impl> From for Buffer { +impl From> for Buffer { #[inline] - fn from(p: U) -> Self { - MutableBuffer::from(p).into() + fn from(p: Vec) -> Self { + let bytes: Bytes = p.into(); + Self { + offset: 0, + length: bytes.len(), + data: Arc::new(bytes), + } } } @@ -196,6 +198,6 @@ impl std::ops::Deref for Buffer { impl FromIterator for Buffer { #[inline] fn from_iter>(iter: I) -> Self { - MutableBuffer::from_iter(iter).into() + Vec::from_iter(iter).into() } } diff --git a/src/buffer/mod.rs b/src/buffer/mod.rs index f64aad44fd7..dc055f8ce04 100644 --- a/src/buffer/mod.rs +++ b/src/buffer/mod.rs @@ -1,11 +1,8 @@ #![deny(missing_docs)] -//! Contains [`Buffer`] and [`MutableBuffer`], containers for all Arrow -//! physical types (e.g. i32, f64). +//! Contains [`Buffer`], an immutable container for all Arrow physical types (e.g. i32, f64). mod immutable; -mod mutable; pub(crate) mod bytes; pub use immutable::Buffer; -pub use mutable::MutableBuffer; diff --git a/src/buffer/mutable.rs b/src/buffer/mutable.rs deleted file mode 100644 index 70c4e4415d0..00000000000 --- a/src/buffer/mutable.rs +++ /dev/null @@ -1,495 +0,0 @@ -use std::iter::FromIterator; -use std::ptr::NonNull; -use std::usize; - -use crate::trusted_len::TrustedLen; -use crate::types::{BitChunk, NativeType}; - -use super::bytes::{Bytes, Deallocation}; -#[cfg(feature = "cache_aligned")] -use crate::vec::AlignedVec as Vec; - -use super::immutable::Buffer; - -/// A [`MutableBuffer`] is this crates' interface to store types that are byte-like such as `i32`. -/// It behaves like a [`Vec`] but can only hold types supported by the arrow format -/// (`u8-u64`, `i8-i128`, `f32,f64`, [`crate::types::days_ms`] and [`crate::types::months_days_ns`]). -/// When the feature `cache_aligned` is active, memory is allocated along cache lines and in multiple of 64 bytes. -/// A [`MutableBuffer`] can be converted to a [`Buffer`] via `.into`. -/// # Example -/// ``` -/// # use arrow2::buffer::{Buffer, MutableBuffer}; -/// let mut buffer = MutableBuffer::::new(); -/// buffer.push(256); -/// buffer.extend_from_slice(&[1]); -/// assert_eq!(buffer.as_slice(), &[256, 1]); -/// let buffer: Buffer = buffer.into(); -/// assert_eq!(buffer.as_slice(), &[256, 1]) -/// ``` -pub struct MutableBuffer { - data: Vec, -} - -#[cfg(not(feature = "cache_aligned"))] -#[cfg_attr(docsrs, doc(cfg(not(feature = "cache_aligned"))))] -impl From> for Vec { - fn from(data: MutableBuffer) -> Self { - data.data - } -} - -impl std::fmt::Debug for MutableBuffer { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::Debug::fmt(&**self, f) - } -} - -impl PartialEq for MutableBuffer { - fn eq(&self, other: &Self) -> bool { - self.as_slice() == other.as_slice() - } -} - -impl MutableBuffer { - /// Creates an empty [`MutableBuffer`]. This does not allocate in the heap. - #[inline] - pub fn new() -> Self { - Self { data: Vec::new() } - } - - /// Allocate a new [`MutableBuffer`] with initial capacity to be at least `capacity`. - #[inline] - pub fn with_capacity(capacity: usize) -> Self { - Self { - data: Vec::with_capacity(capacity), - } - } - - /// Takes ownership of [`Vec`]. - #[cfg(not(feature = "cache_aligned"))] - #[cfg_attr(docsrs, doc(cfg(not(feature = "cache_aligned"))))] - #[inline] - pub fn from_vec(data: Vec) -> Self { - Self { data } - } - - /// Allocates a new [MutableBuffer] with `len` and capacity to be at least `len` - /// where data is zeroed. - /// # Example - /// ``` - /// # use arrow2::buffer::{Buffer, MutableBuffer}; - /// let mut buffer = MutableBuffer::::from_len_zeroed(127); - /// assert_eq!(buffer.len(), 127); - /// assert!(buffer.capacity() >= 127); - /// let data = buffer.as_mut_slice(); - /// assert_eq!(data[126], 0u8); - /// ``` - #[inline] - pub fn from_len_zeroed(len: usize) -> Self { - #[cfg(not(feature = "cache_aligned"))] - let data = vec![T::default(); len]; - #[cfg(feature = "cache_aligned")] - let data = Vec::from_len_zeroed(len); - Self { data } - } - - /// Ensures that this buffer has at least `self.len + additional` bytes. This re-allocates iff - /// `self.len + additional > capacity`. - /// # Example - /// ``` - /// # use arrow2::buffer::{Buffer, MutableBuffer}; - /// let mut buffer = MutableBuffer::::new(); - /// buffer.reserve(253); // allocates for the first time - /// (0..253u8).for_each(|i| buffer.push(i)); // no reallocation - /// let buffer: Buffer = buffer.into(); - /// assert_eq!(buffer.len(), 253); - /// ``` - // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just - // exits. - #[inline(always)] - pub fn reserve(&mut self, additional: usize) { - self.data.reserve(additional) - } - - /// Resizes the buffer, either truncating its contents (with no change in capacity), or - /// growing it (potentially reallocating it) and writing `value` in the newly available bytes. - /// # Example - /// ``` - /// # use arrow2::buffer::{Buffer, MutableBuffer}; - /// let mut buffer = MutableBuffer::::new(); - /// buffer.resize(253, 2); // allocates for the first time - /// assert_eq!(buffer.as_slice()[252], 2u8); - /// ``` - // For performance reasons, this must be inlined so that the `if` is executed inside the caller, and not as an extra call that just - // exits. - #[inline(always)] - pub fn resize(&mut self, new_len: usize, value: T) { - self.data.resize(new_len, value) - } - - /// Returns whether this buffer is empty. - #[inline] - pub fn is_empty(&self) -> bool { - self.data.is_empty() - } - - /// Returns the length (the number of items) in this buffer. - /// The invariant `buffer.len() <= buffer.capacity()` is always upheld. - #[inline] - pub fn len(&self) -> usize { - self.data.len() - } - - /// Returns the total capacity in this buffer. - /// The invariant `buffer.len() <= buffer.capacity()` is always upheld. - #[inline] - pub fn capacity(&self) -> usize { - self.data.capacity() - } - - /// Clear all existing data from this buffer. - #[inline] - pub fn clear(&mut self) { - self.data.clear() - } - - /// Shortens the buffer. - /// If `len` is greater or equal to the buffers' current length, this has no effect. - #[inline] - pub fn truncate(&mut self, len: usize) { - self.data.truncate(len) - } - - /// Returns the data stored in this buffer as a slice. - #[inline] - pub fn as_slice(&self) -> &[T] { - self.data.as_slice() - } - - /// Returns the data stored in this buffer as a mutable slice. - #[inline] - pub fn as_mut_slice(&mut self) -> &mut [T] { - self.data.as_mut_slice() - } - - /// Returns a raw pointer to this buffer's internal memory - /// This pointer is guaranteed to be aligned along cache-lines. - #[inline] - pub fn as_ptr(&self) -> *const T { - self.data.as_ptr() - } - - /// Returns a mutable raw pointer to this buffer's internal memory - /// This pointer is guaranteed to be aligned along cache-lines. - #[inline] - pub fn as_mut_ptr(&mut self) -> *mut T { - self.data.as_mut_ptr() - } - - /// Extends this buffer from a slice of items, increasing its capacity if needed. - /// # Example - /// ``` - /// # use arrow2::buffer::MutableBuffer; - /// let mut buffer = MutableBuffer::new(); - /// buffer.extend_from_slice(&[2u32, 0]); - /// assert_eq!(buffer.len(), 2) - /// ``` - #[inline] - pub fn extend_from_slice(&mut self, items: &[T]) { - self.data.extend_from_slice(items) - } - - /// Pushes a new item to the buffer, increasing its capacity if needed. - /// # Example - /// ``` - /// # use arrow2::buffer::MutableBuffer; - /// let mut buffer = MutableBuffer::new(); - /// buffer.push(256u32); - /// assert_eq!(buffer.len(), 1) - /// ``` - #[inline] - pub fn push(&mut self, item: T) { - self.data.push(item) - } - - /// Extends the buffer with a new item without checking for sufficient capacity - /// Safety - /// Caller must ensure that `self.capacity() - self.len() >= 1` - #[inline] - pub(crate) unsafe fn push_unchecked(&mut self, item: T) { - let dst = self.as_mut_ptr().add(self.len()); - std::ptr::write(dst, item); - self.data.set_len(self.data.len() + 1); - } - - /// Sets the length of this buffer. - /// # Safety - /// The caller must uphold the following invariants: - /// * ensure no reads are performed on any - /// item within `[len, capacity - len]` - /// * ensure `len <= self.capacity()` - #[inline] - pub unsafe fn set_len(&mut self, len: usize) { - debug_assert!(len <= self.capacity()); - self.data.set_len(len); - } - - /// Extends this buffer by `additional` items of value `value`. - #[inline] - pub fn extend_constant(&mut self, additional: usize, value: T) { - self.resize(self.len() + additional, value) - } - - /// Shrinks the capacity of the [`MutableBuffer`] to fit its current length. - /// When the feature `cache_aligned`, the new capacity will be a multiple of 64 bytes. - /// - /// # Example - /// ``` - /// # use arrow2::buffer::MutableBuffer; - /// - /// let mut buffer = MutableBuffer::::with_capacity(16); - /// assert_eq!(buffer.capacity(), 16); - /// buffer.push(1); - /// buffer.push(2); - /// - /// buffer.shrink_to_fit(); - /// assert!(buffer.capacity() < 16); // 2 or 8 depending on feature `cache_aligned` - /// ``` - pub fn shrink_to_fit(&mut self) { - self.data.shrink_to_fit(); - } -} - -impl Extend for MutableBuffer { - fn extend>(&mut self, iter: T) { - self.data.extend(iter) - } -} - -impl MutableBuffer { - /// Extends `self` from a [`TrustedLen`] iterator. - #[inline] - pub fn extend_from_trusted_len_iter>(&mut self, iterator: I) { - unsafe { self.extend_from_trusted_len_iter_unchecked(iterator) } - } - - /// Extends `self` from an iterator. - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - // This inline has been validated to offer 50% improvement in operations like `take`. - #[inline] - pub unsafe fn extend_from_trusted_len_iter_unchecked>( - &mut self, - iterator: I, - ) { - let (_, upper) = iterator.size_hint(); - let upper = upper.expect("trusted_len_iter requires an upper limit"); - let len = upper; - - let self_len = self.len(); - - self.reserve(len); - let mut dst = self.as_mut_ptr().add(self_len); - for item in iterator { - // note how there is no reserve here (compared with `extend_from_iter`) - std::ptr::write(dst, item); - dst = dst.add(1); - } - assert_eq!( - dst.offset_from(self.as_ptr().add(self_len)) as usize, - upper, - "Trusted iterator length was not accurately reported" - ); - self.set_len(self_len + len); - } - - /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length. - /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. - /// # Example - /// ``` - /// # use arrow2::buffer::MutableBuffer; - /// let v = vec![1u32]; - /// let iter = v.iter().map(|x| x * 2); - /// let buffer = MutableBuffer::from_trusted_len_iter(iter); - /// assert_eq!(buffer.len(), 1) - /// ``` - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - // This implementation is required for two reasons: - // 1. there is no trait `TrustedLen` in stable rust and therefore - // we can't specialize `extend` for `TrustedLen` like `Vec` does. - // 2. `from_trusted_len_iter` is faster. - #[inline] - pub fn from_trusted_len_iter + TrustedLen>(iterator: I) -> Self { - let mut buffer = MutableBuffer::new(); - buffer.extend_from_trusted_len_iter(iterator); - buffer - } - - /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length. - /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - // This implementation is required for two reasons: - // 1. there is no trait `TrustedLen` in stable rust and therefore - // we can't specialize `extend` for `TrustedLen` like `Vec` does. - // 2. `from_trusted_len_iter` is faster. - #[inline] - pub unsafe fn from_trusted_len_iter_unchecked>(iterator: I) -> Self { - let mut buffer = MutableBuffer::new(); - buffer.extend_from_trusted_len_iter_unchecked(iterator); - buffer - } - - /// Creates a [`MutableBuffer`] from a fallible [`TrustedLen`] iterator. - #[inline] - pub fn try_from_trusted_len_iter>>( - iterator: I, - ) -> std::result::Result { - unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) } - } - - /// Creates a [`MutableBuffer`] from an [`Iterator`] with a trusted (upper) length or errors - /// if any of the items of the iterator is an error. - /// Prefer this to `collect` whenever possible, as it is faster ~60% faster. - /// The only difference between this and [`Self::try_from_trusted_len_iter`] is that this works - /// on any iterator, while `try_from_trusted_len_iter` requires the iterator to implement the trait - /// [`TrustedLen`], which not every iterator currently implements due to limitations of the Rust compiler. - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - // This inline has been validated to offer 50% improvement in operations like `take`. - #[inline] - pub unsafe fn try_from_trusted_len_iter_unchecked< - E, - I: Iterator>, - >( - iterator: I, - ) -> std::result::Result { - let (_, upper) = iterator.size_hint(); - let upper = upper.expect("try_from_trusted_len_iter requires an upper limit"); - let len = upper; - - let mut buffer = MutableBuffer::with_capacity(len); - - let mut dst = buffer.as_mut_ptr(); - for item in iterator { - std::ptr::write(dst, item?); - dst = dst.add(1); - } - assert_eq!( - dst.offset_from(buffer.as_ptr()) as usize, - upper, - "Trusted iterator length was not accurately reported" - ); - buffer.set_len(len); - Ok(buffer) - } -} - -impl FromIterator for MutableBuffer { - fn from_iter>(iter: I) -> Self { - let data = Vec::from_iter(iter); - Self { data } - } -} - -impl Default for MutableBuffer { - fn default() -> Self { - Self::new() - } -} - -impl std::ops::Deref for MutableBuffer { - type Target = [T]; - - #[inline] - fn deref(&self) -> &[T] { - &self.data - } -} - -impl std::ops::DerefMut for MutableBuffer { - #[inline] - fn deref_mut(&mut self) -> &mut [T] { - &mut self.data - } -} - -impl> From

for MutableBuffer { - #[inline] - fn from(slice: P) -> Self { - let mut buffer = MutableBuffer::new(); - buffer.extend_from_slice(slice.as_ref()); - buffer - } -} - -impl From> for Buffer { - #[inline] - fn from(buffer: MutableBuffer) -> Self { - Self::from_bytes(buffer.into()) - } -} - -impl From> for Bytes { - #[inline] - fn from(buffer: MutableBuffer) -> Self { - let mut data = buffer.data; - let ptr = NonNull::new(data.as_mut_ptr()).unwrap(); - let len = data.len(); - let capacity = data.capacity(); - - let result = unsafe { Bytes::new(ptr, len, Deallocation::Native(capacity)) }; - // so that the memory region is not deallocated. - std::mem::forget(data); - result - } -} - -impl MutableBuffer { - /// Creates a [`MutableBuffer`] from an iterator of `u64`. - #[inline] - pub fn from_chunk_iter>(iter: I) -> Self { - // TrustedLen - unsafe { Self::from_chunk_iter_unchecked(iter) } - } - - /// # Safety - /// This method assumes that the iterator's size is correct and is undefined behavior - /// to use it on an iterator that reports an incorrect length. - #[inline] - pub unsafe fn from_chunk_iter_unchecked>( - iterator: I, - ) -> Self { - let (_, upper) = iterator.size_hint(); - let upper = upper.expect("try_from_trusted_len_iter requires an upper limit"); - let len = upper * std::mem::size_of::(); - - let mut buffer = MutableBuffer::with_capacity(len); - - let mut dst = buffer.as_mut_ptr(); - for item in iterator { - let bytes = item.to_ne_bytes(); - for i in 0..std::mem::size_of::() { - std::ptr::write(dst, bytes[i]); - dst = dst.add(1); - } - } - assert_eq!( - dst.offset_from(buffer.as_ptr()) as usize, - len, - "Trusted iterator length was not accurately reported" - ); - buffer.set_len(len); - buffer - } -} - -// This is sound because `NativeType` is `Send+Sync`, and -// `MutableBuffer` has the invariants of `Vec` (which is `Send+Sync`) -unsafe impl Send for MutableBuffer {} -unsafe impl Sync for MutableBuffer {} diff --git a/src/compute/README.md b/src/compute/README.md index 8db46ce67f3..247a0780012 100644 --- a/src/compute/README.md +++ b/src/compute/README.md @@ -19,7 +19,7 @@ This module is composed by independent operations common in analytics. Below are * Kernels SHOULD use the arrays' logical type to decide whether kernels can be applied on an array. For example, `Date32 + Date32` is meaningless and SHOULD NOT be implemented. -* Kernels SHOULD be implemented via `clone`, `slice` or the `iterator` API provided by `Buffer`, `Bitmap`, `MutableBuffer` or `MutableBitmap`. +* Kernels SHOULD be implemented via `clone`, `slice` or the `iterator` API provided by `Buffer`, `Bitmap`, `Vec` or `MutableBitmap`. * Kernels MUST NOT use any API to read bits other than the ones provided by `Bitmap`. diff --git a/src/compute/cast/primitive_to.rs b/src/compute/cast/primitive_to.rs index bbffdbdb2fe..5b0f5fa4f0e 100644 --- a/src/compute/cast/primitive_to.rs +++ b/src/compute/cast/primitive_to.rs @@ -1,6 +1,5 @@ use std::hash::Hash; -use crate::buffer::MutableBuffer; use crate::error::Result; use crate::{ array::*, @@ -17,8 +16,8 @@ use super::CastOptions; pub fn primitive_to_binary( from: &PrimitiveArray, ) -> BinaryArray { - let mut values: MutableBuffer = MutableBuffer::with_capacity(from.len()); - let mut offsets: MutableBuffer = MutableBuffer::with_capacity(from.len() + 1); + let mut values: Vec = Vec::with_capacity(from.len()); + let mut offsets: Vec = Vec::with_capacity(from.len() + 1); offsets.push(O::default()); let mut offset: usize = 0; @@ -83,8 +82,8 @@ where pub fn primitive_to_utf8( from: &PrimitiveArray, ) -> Utf8Array { - let mut values: MutableBuffer = MutableBuffer::with_capacity(from.len()); - let mut offsets: MutableBuffer = MutableBuffer::with_capacity(from.len() + 1); + let mut values: Vec = Vec::with_capacity(from.len()); + let mut offsets: Vec = Vec::with_capacity(from.len() + 1); offsets.push(O::default()); let mut offset: usize = 0; diff --git a/src/compute/comparison/primitive.rs b/src/compute/comparison/primitive.rs index 5a427ecfd6a..257db4ba4c7 100644 --- a/src/compute/comparison/primitive.rs +++ b/src/compute/comparison/primitive.rs @@ -2,7 +2,6 @@ use crate::{ array::{BooleanArray, PrimitiveArray}, bitmap::{Bitmap, MutableBitmap}, - buffer::MutableBuffer, datatypes::DataType, types::NativeType, }; @@ -22,20 +21,20 @@ where let rhs_chunks_iter = rhs.chunks_exact(8); let rhs_remainder = rhs_chunks_iter.remainder(); - let mut values = MutableBuffer::with_capacity((lhs.len() + 7) / 8); + let mut values = Vec::with_capacity((lhs.len() + 7) / 8); let iterator = lhs_chunks_iter.zip(rhs_chunks_iter).map(|(lhs, rhs)| { let lhs = T::Simd::from_chunk(lhs); let rhs = T::Simd::from_chunk(rhs); op(lhs, rhs) }); - values.extend_from_trusted_len_iter(iterator); + values.extend(iterator); if !lhs_remainder.is_empty() { let lhs = T::Simd::from_incomplete_chunk(lhs_remainder, T::default()); let rhs = T::Simd::from_incomplete_chunk(rhs_remainder, T::default()); values.push(op(lhs, rhs)) }; - MutableBitmap::from_buffer(values, lhs.len()) + MutableBitmap::from_vec(values, lhs.len()) } /// Evaluate `op(lhs, rhs)` for [`PrimitiveArray`]s using a specified @@ -65,12 +64,12 @@ where let lhs_chunks_iter = lhs.values().chunks_exact(8); let lhs_remainder = lhs_chunks_iter.remainder(); - let mut values = MutableBuffer::with_capacity((lhs.len() + 7) / 8); + let mut values = Vec::with_capacity((lhs.len() + 7) / 8); let iterator = lhs_chunks_iter.map(|lhs| { let lhs = T::Simd::from_chunk(lhs); op(lhs, rhs) }); - values.extend_from_trusted_len_iter(iterator); + values.extend(iterator); if !lhs_remainder.is_empty() { let lhs = T::Simd::from_incomplete_chunk(lhs_remainder, T::default()); @@ -79,7 +78,7 @@ where BooleanArray::from_data( DataType::Boolean, - Bitmap::from_u8_buffer(values, lhs.len()), + Bitmap::from_u8_vec(values, lhs.len()), validity, ) } diff --git a/src/compute/filter.rs b/src/compute/filter.rs index 403df3e0f73..af0a6ef8e04 100644 --- a/src/compute/filter.rs +++ b/src/compute/filter.rs @@ -2,9 +2,9 @@ use crate::array::growable::{make_growable, Growable}; use crate::bitmap::{utils::SlicesIterator, Bitmap, MutableBitmap}; use crate::datatypes::DataType; +use crate::error::Result; use crate::record_batch::RecordBatch; use crate::{array::*, types::NativeType}; -use crate::{buffer::MutableBuffer, error::Result}; /// Function that can filter arbitrary arrays pub type Filter<'a> = Box Box + 'a + Send + Sync>; @@ -16,7 +16,7 @@ fn filter_nonnull_primitive( assert_eq!(array.len(), mask.len()); let filter_count = mask.len() - mask.null_count(); - let mut buffer = MutableBuffer::::with_capacity(filter_count); + let mut buffer = Vec::::with_capacity(filter_count); if let Some(validity) = array.validity() { let mut new_validity = MutableBitmap::with_capacity(filter_count); @@ -28,7 +28,7 @@ fn filter_nonnull_primitive( .filter(|x| x.1) .map(|x| x.0) .for_each(|(item, is_valid)| unsafe { - buffer.push_unchecked(*item); + buffer.push(*item); new_validity.push_unchecked(is_valid); }); @@ -44,7 +44,7 @@ fn filter_nonnull_primitive( .zip(mask.iter()) .filter(|x| x.1) .map(|x| x.0) - .for_each(|item| unsafe { buffer.push_unchecked(*item) }); + .for_each(|item| buffer.push(*item)); PrimitiveArray::::from_data(array.data_type().clone(), buffer.into(), None) } diff --git a/src/compute/sort/boolean.rs b/src/compute/sort/boolean.rs index 1bf2b6f4a16..d5dd5df191d 100644 --- a/src/compute/sort/boolean.rs +++ b/src/compute/sort/boolean.rs @@ -1,6 +1,5 @@ use crate::{ array::{BooleanArray, PrimitiveArray}, - buffer::MutableBuffer, types::Index, }; @@ -32,14 +31,14 @@ pub fn sort_boolean( nulls.reverse(); } - let mut values = MutableBuffer::::with_capacity(values.len()); + let mut values = Vec::::with_capacity(values.len()); if options.nulls_first { values.extend_from_slice(nulls.as_slice()); - valids.iter().for_each(|x| values.push(x.0)); + values.extend(valids.iter().map(|x| x.0)); } else { // nulls last - valids.iter().for_each(|x| values.push(x.0)); + values.extend(valids.iter().map(|x| x.0)); values.extend_from_slice(nulls.as_slice()); } diff --git a/src/compute/sort/common.rs b/src/compute/sort/common.rs index 04a6c635078..8153ffbce82 100644 --- a/src/compute/sort/common.rs +++ b/src/compute/sort/common.rs @@ -1,4 +1,4 @@ -use crate::{array::PrimitiveArray, bitmap::Bitmap, buffer::MutableBuffer, types::Index}; +use crate::{array::PrimitiveArray, bitmap::Bitmap, types::Index}; use super::SortOptions; @@ -96,7 +96,7 @@ where let limit = limit.min(length); let indices = if let Some(validity) = validity { - let mut indices = MutableBuffer::::from_len_zeroed(length); + let mut indices = vec![I::default(); length]; if options.nulls_first { let mut nulls = 0; let mut valids = 0; @@ -153,11 +153,8 @@ where indices } else { - let mut indices = MutableBuffer::from_trusted_len_iter(I::range(0, length).unwrap()); + let mut indices = I::range(0, length).unwrap().collect::>(); - // Soundness: - // indices are by construction `< values.len()` - // limit is by construction `< values.len()` sort_unstable_by(&mut indices, get, cmp, descending, limit); indices.truncate(limit); indices.shrink_to_fit(); diff --git a/src/compute/sort/lex_sort.rs b/src/compute/sort/lex_sort.rs index 77856902c4c..97f1e1edd94 100644 --- a/src/compute/sort/lex_sort.rs +++ b/src/compute/sort/lex_sort.rs @@ -4,7 +4,6 @@ use crate::compute::take; use crate::error::{ArrowError, Result}; use crate::{ array::{ord, Array, PrimitiveArray}, - buffer::MutableBuffer, types::Index, }; @@ -168,12 +167,7 @@ pub fn lexsort_to_indices( Ordering::Equal }; - // Safety: `0..row_count` is TrustedLen - let mut values = unsafe { - MutableBuffer::from_trusted_len_iter_unchecked( - (0..row_count).map(|x| I::from_usize(x).unwrap()), - ) - }; + let mut values = I::range(0, row_count).unwrap().collect::>(); if let Some(limit) = limit { let limit = limit.min(row_count); diff --git a/src/compute/sort/mod.rs b/src/compute/sort/mod.rs index 43fd2070cb5..adcec045c4d 100644 --- a/src/compute/sort/mod.rs +++ b/src/compute/sort/mod.rs @@ -2,7 +2,6 @@ use std::cmp::Ordering; use crate::array::ord; -use crate::buffer::MutableBuffer; use crate::compute::take; use crate::datatypes::*; use crate::error::{ArrowError, Result}; @@ -351,13 +350,9 @@ where let values = valids.iter().map(|tuple| tuple.0); let mut values = if options.nulls_first { - let mut buffer = MutableBuffer::::from_trusted_len_iter(null_indices.into_iter()); - buffer.extend(values); - buffer + null_indices.into_iter().chain(values).collect::>() } else { - let mut buffer = MutableBuffer::::from_trusted_len_iter(values); - buffer.extend(null_indices); - buffer + values.chain(null_indices.into_iter()).collect::>() }; values.truncate(limit.unwrap_or_else(|| values.len())); diff --git a/src/compute/sort/primitive/sort.rs b/src/compute/sort/primitive/sort.rs index 144b96cc19a..6d854ead200 100644 --- a/src/compute/sort/primitive/sort.rs +++ b/src/compute/sort/primitive/sort.rs @@ -16,7 +16,7 @@ // under the License. use crate::bitmap::Bitmap; -use crate::buffer::{Buffer, MutableBuffer}; +use crate::buffer::Buffer; use crate::{ array::{Array, PrimitiveArray}, bitmap::{utils::SlicesIterator, MutableBitmap}, @@ -71,8 +71,7 @@ where { assert!(limit <= values.len()); if options.nulls_first && limit < validity.null_count() { - let mut buffer = MutableBuffer::::with_capacity(limit); - buffer.extend_constant(limit, T::default()); + let buffer = vec![T::default(); limit]; let bitmap = MutableBitmap::from_trusted_len_iter(std::iter::repeat(false).take(limit)); return (buffer.into(), bitmap.into()); } @@ -80,7 +79,7 @@ where let nulls = std::iter::repeat(false).take(validity.null_count()); let valids = std::iter::repeat(true).take(values.len() - validity.null_count()); - let mut buffer = MutableBuffer::::with_capacity(values.len()); + let mut buffer = Vec::::with_capacity(values.len()); let mut new_validity = MutableBitmap::with_capacity(values.len()); let slices = SlicesIterator::new(validity); @@ -89,7 +88,7 @@ where new_validity.extend_from_trusted_len_iter(nulls.chain(valids).take(limit)); // extend buffer with constants followed by non-null values - buffer.extend_constant(validity.null_count(), T::default()); + buffer.resize(validity.null_count(), T::default()); for (start, len) in slices { buffer.extend_from_slice(&values[start..start + len]) } @@ -120,7 +119,7 @@ where if limit > values.len() - validity.null_count() { // extend remaining with nulls - buffer.extend_constant(validity.null_count(), T::default()); + buffer.resize(buffer.len() + validity.null_count(), T::default()); } }; // values are sorted, we can now truncate the remaining. @@ -150,7 +149,7 @@ where let (buffer, validity) = if let Some(validity) = validity { sort_nullable(values, validity, cmp, options, limit) } else { - let mut buffer = MutableBuffer::::new(); + let mut buffer = Vec::::new(); buffer.extend_from_slice(values); sort_values(buffer.as_mut_slice(), cmp, options.descending, limit); diff --git a/src/compute/substring.rs b/src/compute/substring.rs index 56ad9239775..2502eb3d9ba 100644 --- a/src/compute/substring.rs +++ b/src/compute/substring.rs @@ -17,8 +17,8 @@ //! Defines kernel to extract a substring of a \[Large\]StringArray -use crate::{array::*, buffer::MutableBuffer}; use crate::{ + array::*, datatypes::DataType, error::{ArrowError, Result}, }; @@ -76,8 +76,8 @@ fn binary_substring( let offsets = array.offsets(); let values = array.values(); - let mut new_offsets = MutableBuffer::::with_capacity(array.len() + 1); - let mut new_values = MutableBuffer::::new(); // we have no way to estimate how much this will be. + let mut new_offsets = Vec::::with_capacity(array.len() + 1); + let mut new_values = Vec::::new(); // we have no way to estimate how much this will be. let mut length_so_far = O::zero(); new_offsets.push(length_so_far); diff --git a/src/compute/take/generic_binary.rs b/src/compute/take/generic_binary.rs index 733cb56c55a..87b8f01fdfe 100644 --- a/src/compute/take/generic_binary.rs +++ b/src/compute/take/generic_binary.rs @@ -1,14 +1,14 @@ use crate::{ array::{GenericBinaryArray, Offset, PrimitiveArray}, bitmap::{Bitmap, MutableBitmap}, - buffer::{Buffer, MutableBuffer}, + buffer::Buffer, }; use super::Index; pub fn take_values(length: O, starts: &[O], offsets: &[O], values: &[u8]) -> Buffer { let new_len = length.to_usize(); - let mut buffer = MutableBuffer::with_capacity(new_len); + let mut buffer = Vec::with_capacity(new_len); starts .iter() .zip(offsets.windows(2)) @@ -27,7 +27,7 @@ pub fn take_no_validity( indices: &[I], ) -> (Buffer, Buffer, Option) { let mut length = O::default(); - let mut buffer = MutableBuffer::::new(); + let mut buffer = Vec::::new(); let offsets = indices.iter().map(|index| { let index = index.to_usize(); let start = offsets[index]; @@ -61,7 +61,7 @@ pub fn take_values_validity>( let offsets = values.offsets(); let values_values = values.values(); - let mut starts = MutableBuffer::::with_capacity(indices.len()); + let mut starts = Vec::::with_capacity(indices.len()); let offsets = indices.iter().map(|index| { let index = index.to_usize(); let start = offsets[index]; @@ -85,7 +85,7 @@ pub fn take_indices_validity( ) -> (Buffer, Buffer, Option) { let mut length = O::default(); - let mut starts = MutableBuffer::::with_capacity(indices.len()); + let mut starts = Vec::::with_capacity(indices.len()); let offsets = indices.values().iter().map(|index| { let index = index.to_usize(); match offsets.get(index + 1) { @@ -119,7 +119,7 @@ pub fn take_values_indices_validity::with_capacity(indices.len()); + let mut starts = Vec::::with_capacity(indices.len()); let offsets = indices.iter().map(|index| { match index { Some(index) => { diff --git a/src/compute/take/list.rs b/src/compute/take/list.rs index 6e877e55b88..5e8b1d10e7c 100644 --- a/src/compute/take/list.rs +++ b/src/compute/take/list.rs @@ -62,156 +62,3 @@ pub fn take( growable.into() } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::{ - array::{Array, MutableListArray, MutablePrimitiveArray, PrimitiveArray, TryExtend}, - bitmap::Bitmap, - buffer::Buffer, - datatypes::DataType, - }; - use std::sync::Arc; - - #[test] - fn list_with_no_none() { - let values = Buffer::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); - - let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( - data_type, - Buffer::from([0, 2, 2, 6, 9, 10]), - Arc::new(values), - None, - ); - - let indices = PrimitiveArray::from([Some(4i32), Some(1), Some(3)]); - let result = take(&array, &indices); - - let expected_values = Buffer::from([9, 6, 7, 8]); - let expected_values = - PrimitiveArray::::from_data(DataType::Int32, expected_values, None); - let expected_type = ListArray::::default_datatype(DataType::Int32); - let expected = ListArray::::from_data( - expected_type, - Buffer::from([0, 1, 1, 4]), - Arc::new(expected_values), - None, - ); - - assert_eq!(result, expected) - } - - #[test] - fn list_with_none() { - let values = Buffer::from([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); - - let validity_values = vec![true, false, true, true, true]; - let validity = Bitmap::from_trusted_len_iter(validity_values.into_iter()); - - let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( - data_type, - Buffer::from([0, 2, 2, 6, 9, 10]), - Arc::new(values), - Some(validity), - ); - - let indices = PrimitiveArray::from([Some(4i32), None, Some(2), Some(3)]); - let result = take(&array, &indices); - - let data_expected = vec![ - Some(vec![Some(9i32)]), - None, - Some(vec![Some(2i32), Some(3), Some(4), Some(5)]), - Some(vec![Some(6i32), Some(7), Some(8)]), - ]; - - let mut expected = MutableListArray::>::new(); - expected.try_extend(data_expected).unwrap(); - let expected: ListArray = expected.into(); - - assert_eq!(result, expected) - } - - #[test] - fn list_both_validity() { - let values = vec![ - Some(vec![Some(2i32), Some(3), Some(4), Some(5)]), - None, - Some(vec![Some(9i32)]), - Some(vec![Some(6i32), Some(7), Some(8)]), - ]; - - let mut array = MutableListArray::>::new(); - array.try_extend(values).unwrap(); - let array: ListArray = array.into(); - - let indices = PrimitiveArray::from([Some(3i32), None, Some(1), Some(0)]); - let result = take(&array, &indices); - - let data_expected = vec![ - Some(vec![Some(6i32), Some(7), Some(8)]), - None, - None, - Some(vec![Some(2i32), Some(3), Some(4), Some(5)]), - ]; - let mut expected = MutableListArray::>::new(); - expected.try_extend(data_expected).unwrap(); - let expected: ListArray = expected.into(); - - assert_eq!(result, expected) - } - - #[test] - fn test_nested() { - let values = Buffer::from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); - - let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( - data_type, - Buffer::from([0, 2, 4, 7, 7, 8, 10]), - Arc::new(values), - None, - ); - - let data_type = ListArray::::default_datatype(array.data_type().clone()); - let nested = ListArray::::from_data( - data_type, - Buffer::from([0, 2, 5, 6]), - Arc::new(array), - None, - ); - - let indices = PrimitiveArray::from([Some(0i32), Some(1)]); - let result = take(&nested, &indices); - - // expected data - let expected_values = Buffer::from([1, 2, 3, 4, 5, 6, 7, 8]); - let expected_values = - PrimitiveArray::::from_data(DataType::Int32, expected_values, None); - - let expected_data_type = ListArray::::default_datatype(DataType::Int32); - let expected_array = ListArray::::from_data( - expected_data_type, - Buffer::from([0, 2, 4, 7, 7, 8]), - Arc::new(expected_values), - None, - ); - - let expected_data_type = - ListArray::::default_datatype(expected_array.data_type().clone()); - let expected_nested = ListArray::::from_data( - expected_data_type, - Buffer::from([0, 2, 5]), - Arc::new(expected_array), - None, - ); - - assert_eq!(result, expected_nested); - } -} diff --git a/src/compute/take/primitive.rs b/src/compute/take/primitive.rs index 5f88643a112..70b358a5582 100644 --- a/src/compute/take/primitive.rs +++ b/src/compute/take/primitive.rs @@ -1,7 +1,7 @@ use crate::{ array::{Array, PrimitiveArray}, bitmap::{Bitmap, MutableBitmap}, - buffer::{Buffer, MutableBuffer}, + buffer::Buffer, types::NativeType, }; @@ -12,10 +12,12 @@ fn take_no_validity( values: &[T], indices: &[I], ) -> (Buffer, Option) { - let values = indices.iter().map(|index| values[index.to_usize()]); - let buffer = MutableBuffer::from_trusted_len_iter(values); + let values = indices + .iter() + .map(|index| values[index.to_usize()]) + .collect::>(); - (buffer.into(), None) + (values.into(), None) } // take implementation when only values contain nulls @@ -31,10 +33,13 @@ fn take_values_validity( let validity = MutableBitmap::from_trusted_len_iter(validity); let values_values = values.values(); - let values = indices.iter().map(|index| values_values[index.to_usize()]); - let buffer = MutableBuffer::from_trusted_len_iter(values); - (buffer.into(), validity.into()) + let values = indices + .iter() + .map(|index| values_values[index.to_usize()]) + .collect::>(); + + (values.into(), validity.into()) } // take implementation when only indices contain nulls @@ -43,23 +48,26 @@ fn take_indices_validity( indices: &PrimitiveArray, ) -> (Buffer, Option) { let validity = indices.validity().unwrap(); - let values = indices.values().iter().enumerate().map(|(i, index)| { - let index = index.to_usize(); - match values.get(index) { - Some(value) => *value, - None => { - if !validity.get_bit(i) { - T::default() - } else { - panic!("Out-of-bounds index {}", index) + let values = indices + .values() + .iter() + .enumerate() + .map(|(i, index)| { + let index = index.to_usize(); + match values.get(index) { + Some(value) => *value, + None => { + if !validity.get_bit(i) { + T::default() + } else { + panic!("Out-of-bounds index {}", index) + } } } - } - }); + }) + .collect::>(); - let buffer = MutableBuffer::from_trusted_len_iter(values); - - (buffer.into(), indices.validity().cloned()) + (values.into(), indices.validity().cloned()) } // take implementation when both values and indices contain nulls @@ -72,19 +80,21 @@ fn take_values_indices_validity( let values_validity = values.validity().unwrap(); let values_values = values.values(); - let values = indices.iter().map(|index| match index { - Some(index) => { - let index = index.to_usize(); - bitmap.push(values_validity.get_bit(index)); - values_values[index] - } - None => { - bitmap.push(false); - T::default() - } - }); - let buffer = MutableBuffer::from_trusted_len_iter(values); - (buffer.into(), bitmap.into()) + let values = indices + .iter() + .map(|index| match index { + Some(index) => { + let index = index.to_usize(); + bitmap.push(values_validity.get_bit(index)); + values_values[index] + } + None => { + bitmap.push(false); + T::default() + } + }) + .collect::>(); + (values.into(), bitmap.into()) } /// `take` implementation for primitive arrays diff --git a/src/doc/lib.md b/src/doc/lib.md index 270fe26810e..b9879f60879 100644 --- a/src/doc/lib.md +++ b/src/doc/lib.md @@ -86,6 +86,3 @@ functionality, such as: The feature `simd` (not part of `full`) produces more explicit SIMD instructions via [`packed_simd`](https://github.com/rust-lang/packed_simd), but requires the nightly channel. - -The feature `cache_aligned` uses a custom allocator instead of `Vec`, which may be -more performant but is not interoperable with `Vec`. diff --git a/src/io/avro/read/nested.rs b/src/io/avro/read/nested.rs index f60281c72b2..a72ddb08486 100644 --- a/src/io/avro/read/nested.rs +++ b/src/io/avro/read/nested.rs @@ -2,7 +2,6 @@ use std::sync::Arc; use crate::array::*; use crate::bitmap::*; -use crate::buffer::*; use crate::datatypes::*; use crate::error::*; @@ -10,14 +9,14 @@ use crate::error::*; #[derive(Debug)] pub struct DynMutableListArray { data_type: DataType, - offsets: MutableBuffer, + offsets: Vec, values: Box, validity: Option, } impl DynMutableListArray { pub fn new_from(values: Box, data_type: DataType, capacity: usize) -> Self { - let mut offsets = MutableBuffer::::with_capacity(capacity + 1); + let mut offsets = Vec::::with_capacity(capacity + 1); offsets.push(O::default()); assert_eq!(values.len(), 0); ListArray::::get_child_field(&data_type); diff --git a/src/io/ipc/read/array/binary.rs b/src/io/ipc/read/array/binary.rs index 485e2451637..ec286b4cc17 100644 --- a/src/io/ipc/read/array/binary.rs +++ b/src/io/ipc/read/array/binary.rs @@ -45,7 +45,7 @@ where compression, ) // Older versions of the IPC format sometimes do not report an offset - .or_else(|_| Result::Ok(Buffer::::from(&[O::default()])))?; + .or_else(|_| Result::Ok(Buffer::::from(vec![O::default()])))?; let last_offset = offsets.as_slice()[offsets.len() - 1].to_usize(); let values = read_buffer( diff --git a/src/io/ipc/read/array/list.rs b/src/io/ipc/read/array/list.rs index 65495cd3aa3..d01f8a1ce2e 100644 --- a/src/io/ipc/read/array/list.rs +++ b/src/io/ipc/read/array/list.rs @@ -48,7 +48,7 @@ where compression, ) // Older versions of the IPC format sometimes do not report an offset - .or_else(|_| Result::Ok(Buffer::::from(&[O::default()])))?; + .or_else(|_| Result::Ok(Buffer::::from(vec![O::default()])))?; let field = ListArray::::get_child_field(&data_type); diff --git a/src/io/ipc/read/array/map.rs b/src/io/ipc/read/array/map.rs index bbd518e8d4b..c1cd0670bfc 100644 --- a/src/io/ipc/read/array/map.rs +++ b/src/io/ipc/read/array/map.rs @@ -44,7 +44,7 @@ pub fn read_map( compression, ) // Older versions of the IPC format sometimes do not report an offset - .or_else(|_| Result::Ok(Buffer::::from(&[0i32])))?; + .or_else(|_| Result::Ok(Buffer::::from(vec![0i32])))?; let field = MapArray::get_field(&data_type); diff --git a/src/io/ipc/read/array/utf8.rs b/src/io/ipc/read/array/utf8.rs index 93d024b9f13..d2851a1c4d3 100644 --- a/src/io/ipc/read/array/utf8.rs +++ b/src/io/ipc/read/array/utf8.rs @@ -45,7 +45,7 @@ where compression, ) // Older versions of the IPC format sometimes do not report an offset - .or_else(|_| Result::Ok(Buffer::::from(&[O::default()])))?; + .or_else(|_| Result::Ok(Buffer::::from(vec![O::default()])))?; let last_offset = offsets.as_slice()[offsets.len() - 1].to_usize(); let values = read_buffer( diff --git a/src/io/ipc/read/read_basic.rs b/src/io/ipc/read/read_basic.rs index 356938ec4f8..840bab44bee 100644 --- a/src/io/ipc/read/read_basic.rs +++ b/src/io/ipc/read/read_basic.rs @@ -6,7 +6,7 @@ use arrow_format::ipc::Message::{BodyCompression, CompressionType}; use crate::buffer::Buffer; use crate::error::{ArrowError, Result}; -use crate::{bitmap::Bitmap, buffer::MutableBuffer, types::NativeType}; +use crate::{bitmap::Bitmap, types::NativeType}; use super::super::compression; use super::super::endianess::is_native_little_endian; @@ -14,7 +14,7 @@ use super::super::endianess::is_native_little_endian; fn read_swapped( reader: &mut R, length: usize, - buffer: &mut MutableBuffer, + buffer: &mut Vec, is_little_endian: bool, ) -> Result<()> { // slow case where we must reverse bits @@ -49,7 +49,7 @@ fn read_uncompressed_buffer( buffer_length: usize, length: usize, is_little_endian: bool, -) -> Result> { +) -> Result> { let bytes = length * std::mem::size_of::(); if bytes > buffer_length { return Err(ArrowError::OutOfSpec( @@ -67,7 +67,7 @@ fn read_uncompressed_buffer( // it is undefined behavior to call read_exact on un-initialized, https://doc.rust-lang.org/std/io/trait.Read.html#tymethod.read // see also https://github.com/MaikKlein/ash/issues/354#issue-781730580 - let mut buffer = MutableBuffer::::from_len_zeroed(length); + let mut buffer = vec![T::default(); length]; if is_native_little_endian() == is_little_endian { // fast case where we can just copy the contents as is @@ -91,7 +91,7 @@ fn read_compressed_buffer( length: usize, is_little_endian: bool, compression: BodyCompression, -) -> Result> { +) -> Result> { if is_little_endian != is_native_little_endian() { return Err(ArrowError::NotYetImplemented( "Reading compressed and big endian IPC".to_string(), @@ -100,7 +100,7 @@ fn read_compressed_buffer( // it is undefined behavior to call read_exact on un-initialized, https://doc.rust-lang.org/std/io/trait.Read.html#tymethod.read // see also https://github.com/MaikKlein/ash/issues/354#issue-781730580 - let mut buffer = MutableBuffer::::from_len_zeroed(length); + let mut buffer = vec![T::default(); length]; // decompress first // todo: move this allocation to an external buffer for re-use @@ -159,12 +159,12 @@ fn read_uncompressed_bitmap( length: usize, bytes: usize, reader: &mut R, -) -> Result> { +) -> Result> { // something is wrong if we can't `length` assert!(length <= bytes * 8); // it is undefined behavior to call read_exact on un-initialized, https://doc.rust-lang.org/std/io/trait.Read.html#tymethod.read // see also https://github.com/MaikKlein/ash/issues/354#issue-781730580 - let mut buffer = MutableBuffer::::from_len_zeroed(bytes); + let mut buffer = vec![0; bytes]; reader.read_exact(buffer.as_mut_slice())?; Ok(buffer) @@ -175,8 +175,8 @@ fn read_compressed_bitmap( bytes: usize, compression: BodyCompression, reader: &mut R, -) -> Result> { - let mut buffer = MutableBuffer::::from_len_zeroed((length + 7) / 8); +) -> Result> { + let mut buffer = vec![0; (length + 7) / 8]; // read all first // todo: move this allocation to an external buffer for re-use diff --git a/src/io/json/read/deserialize.rs b/src/io/json/read/deserialize.rs index 64da0d775fc..4c71ec199c5 100644 --- a/src/io/json/read/deserialize.rs +++ b/src/io/json/read/deserialize.rs @@ -27,7 +27,6 @@ use crate::types::NaturalDataType; use crate::{ array::*, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::{DataType, IntervalUnit}, types::NativeType, }; @@ -130,7 +129,7 @@ fn read_list(rows: &[&Value], data_type: DataType) -> ListArray { let mut validity = MutableBitmap::with_capacity(rows.len()); let mut inner = Vec::<&Value>::with_capacity(rows.len()); - let mut offsets = MutableBuffer::::with_capacity(rows.len() + 1); + let mut offsets = Vec::::with_capacity(rows.len() + 1); offsets.push(O::zero()); rows.iter().fold(O::zero(), |mut length, row| { match row { diff --git a/src/io/parquet/read/binary/basic.rs b/src/io/parquet/read/binary/basic.rs index 96b5b21adb0..492a5e5643c 100644 --- a/src/io/parquet/read/binary/basic.rs +++ b/src/io/parquet/read/binary/basic.rs @@ -7,7 +7,6 @@ use parquet2::{ use crate::{ array::Offset, bitmap::{utils::BitmapIter, MutableBitmap}, - buffer::MutableBuffer, error::Result, }; @@ -20,8 +19,8 @@ fn read_dict_buffer( indices_buffer: &[u8], additional: usize, dict: &BinaryPageDict, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) { let length = (offsets.len() - 1) + additional; @@ -70,7 +69,7 @@ fn read_dict_buffer( values.extend_from_slice(&dict_values[dict_offset_i..dict_offset_ip1]); }) } else { - offsets.extend_constant(additional, last_offset) + offsets.resize(values.len() + additional, last_offset); } } } @@ -82,8 +81,8 @@ fn read_dict_required( indices_buffer: &[u8], additional: usize, dict: &BinaryPageDict, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) { let dict_values = dict.values(); @@ -113,8 +112,8 @@ fn read_delta_optional( validity_buffer: &[u8], values_buffer: &[u8], additional: usize, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) { let length = (offsets.len() - 1) + additional; @@ -151,7 +150,7 @@ fn read_delta_optional( offsets.push(last_offset); }) } else { - offsets.extend_constant(additional, last_offset) + offsets.resize(values.len() + additional, last_offset); } } } @@ -166,8 +165,8 @@ fn read_plain_optional( validity_buffer: &[u8], values_buffer: &[u8], additional: usize, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) { let length = (offsets.len() - 1) + additional; @@ -205,7 +204,7 @@ fn read_plain_optional( values.extend_from_slice(value) }) } else { - offsets.extend_constant(additional, last_offset) + offsets.resize(values.len() + additional, last_offset); } } } @@ -215,8 +214,8 @@ fn read_plain_optional( pub(super) fn read_plain_required( buffer: &[u8], additional: usize, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, ) { let mut last_offset = *offsets.as_mut_slice().last().unwrap(); @@ -236,8 +235,8 @@ pub(super) fn read_plain_required( pub(super) fn extend_from_page( page: &DataPage, descriptor: &ColumnDescriptor, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) -> Result<()> { let additional = page.num_values(); diff --git a/src/io/parquet/read/binary/dictionary.rs b/src/io/parquet/read/binary/dictionary.rs index 6e650974f42..7ab4ff076c2 100644 --- a/src/io/parquet/read/binary/dictionary.rs +++ b/src/io/parquet/read/binary/dictionary.rs @@ -11,7 +11,6 @@ use super::super::utils as other_utils; use crate::{ array::{Array, DictionaryArray, DictionaryKey, Offset, PrimitiveArray, Utf8Array}, bitmap::{utils::BitmapIter, MutableBitmap}, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, }; @@ -22,9 +21,9 @@ fn read_dict_optional( indices_buffer: &[u8], additional: usize, dict: &BinaryPageDict, - indices: &mut MutableBuffer, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + indices: &mut Vec, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) where K: DictionaryKey, @@ -32,7 +31,7 @@ fn read_dict_optional( { let length = indices.len() + additional; values.extend_from_slice(dict.values()); - offsets.extend_from_trusted_len_iter( + offsets.extend( dict.offsets() .iter() .map(|x| O::from_usize(*x as usize).unwrap()), @@ -72,7 +71,7 @@ fn read_dict_optional( indices.push(index) }) } else { - indices.extend_constant(additional, *indices.last().unwrap()) + indices.resize(indices.len() + additional, *indices.last().unwrap()); } } } @@ -82,9 +81,9 @@ fn read_dict_optional( fn extend_from_page( page: &DataPage, descriptor: &ColumnDescriptor, - indices: &mut MutableBuffer, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + indices: &mut Vec, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) -> Result<()> where @@ -136,9 +135,9 @@ where I: FallibleStreamingIterator, { let capacity = metadata.num_values() as usize; - let mut indices = MutableBuffer::::with_capacity(capacity); - let mut values = MutableBuffer::::with_capacity(0); - let mut offsets = MutableBuffer::::with_capacity(1 + capacity); + let mut indices = Vec::::with_capacity(capacity); + let mut values = Vec::::with_capacity(0); + let mut offsets = Vec::::with_capacity(1 + capacity); let mut validity = MutableBitmap::with_capacity(capacity); while let Some(page) = iter.next()? { extend_from_page( diff --git a/src/io/parquet/read/binary/mod.rs b/src/io/parquet/read/binary/mod.rs index fad5c0dcd89..179c831aa29 100644 --- a/src/io/parquet/read/binary/mod.rs +++ b/src/io/parquet/read/binary/mod.rs @@ -4,7 +4,6 @@ use parquet2::{metadata::ColumnChunkMetaData, page::DataPage, FallibleStreamingI use crate::{ array::{Array, Offset}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, io::parquet::read::binary::utils::finish_array, @@ -31,8 +30,8 @@ where I: FallibleStreamingIterator, { let capacity = metadata.num_values() as usize; - let mut values = MutableBuffer::::with_capacity(0); - let mut offsets = MutableBuffer::::with_capacity(1 + capacity); + let mut values = Vec::::with_capacity(0); + let mut offsets = Vec::::with_capacity(1 + capacity); offsets.push(O::default()); let mut validity = MutableBitmap::with_capacity(capacity); @@ -76,8 +75,8 @@ where I: Stream>, { let capacity = metadata.num_values() as usize; - let mut values = MutableBuffer::::with_capacity(0); - let mut offsets = MutableBuffer::::with_capacity(1 + capacity); + let mut values = Vec::::with_capacity(0); + let mut offsets = Vec::::with_capacity(1 + capacity); offsets.push(O::default()); let mut validity = MutableBitmap::with_capacity(capacity); diff --git a/src/io/parquet/read/binary/nested.rs b/src/io/parquet/read/binary/nested.rs index 3c57aab9a5f..3cb9d1eeb58 100644 --- a/src/io/parquet/read/binary/nested.rs +++ b/src/io/parquet/read/binary/nested.rs @@ -9,14 +9,14 @@ use super::super::nested_utils::*; use super::super::utils; use super::basic::read_plain_required; -use crate::{array::Offset, bitmap::MutableBitmap, buffer::MutableBuffer, error::Result}; +use crate::{array::Offset, bitmap::MutableBitmap, error::Result}; fn read_values<'a, O, D, G>( def_levels: D, max_def: u32, mut new_values: G, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) where O: Offset, @@ -46,8 +46,8 @@ fn read( def_level_encoding: (&Encoding, i16), is_nullable: bool, nested: &mut Vec>, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) { let max_rep_level = rep_level_encoding.1 as u32; @@ -97,8 +97,8 @@ pub(super) fn extend_from_page( descriptor: &ColumnDescriptor, is_nullable: bool, nested: &mut Vec>, - offsets: &mut MutableBuffer, - values: &mut MutableBuffer, + offsets: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, ) -> Result<()> { let additional = page.num_values(); diff --git a/src/io/parquet/read/binary/utils.rs b/src/io/parquet/read/binary/utils.rs index 3b72782b4e7..4b1cea1f3ab 100644 --- a/src/io/parquet/read/binary/utils.rs +++ b/src/io/parquet/read/binary/utils.rs @@ -1,14 +1,13 @@ use crate::{ array::{Array, BinaryArray, Offset, Utf8Array}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, }; pub(super) fn finish_array( data_type: DataType, - offsets: MutableBuffer, - values: MutableBuffer, + offsets: Vec, + values: Vec, validity: MutableBitmap, ) -> Box { match data_type { diff --git a/src/io/parquet/read/fixed_size_binary.rs b/src/io/parquet/read/fixed_size_binary.rs index 01973e8db7e..39c850e8d49 100644 --- a/src/io/parquet/read/fixed_size_binary.rs +++ b/src/io/parquet/read/fixed_size_binary.rs @@ -9,7 +9,6 @@ use super::{ColumnChunkMetaData, ColumnDescriptor}; use crate::{ array::FixedSizeBinaryArray, bitmap::{utils::BitmapIter, MutableBitmap}, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, }; @@ -24,7 +23,7 @@ pub(crate) fn read_dict_buffer( additional: usize, size: usize, dict: &FixedLenByteArrayPageDict, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, ) { let length = values.len() + additional * size; @@ -50,7 +49,7 @@ pub(crate) fn read_dict_buffer( let index = indices.next().unwrap() as usize; values.extend_from_slice(&dict_values[index * size..(index + 1) * size]); } else { - values.extend_constant(size, 0); + values.resize(values.len() + size, 0); } } } @@ -63,7 +62,7 @@ pub(crate) fn read_dict_buffer( values.extend_from_slice(&dict_values[index * size..(index + 1) * size]); }) } else { - values.extend_constant(additional * size, 0) + values.resize(values.len() + additional * size, 0); } } } @@ -76,7 +75,7 @@ pub(crate) fn read_dict_required( additional: usize, size: usize, dict: &FixedLenByteArrayPageDict, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, ) { let dict_values = dict.values(); @@ -100,7 +99,7 @@ pub(crate) fn read_optional( values_buffer: &[u8], additional: usize, size: usize, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, ) { let length = values.len() + additional * size; @@ -122,7 +121,7 @@ pub(crate) fn read_optional( let value = values_iterator.next().unwrap(); values.extend_from_slice(value); } else { - values.extend_constant(size, 0) + values.resize(values.len() + size, 0); } } } @@ -135,19 +134,14 @@ pub(crate) fn read_optional( values.extend_from_slice(value) }) } else { - values.extend_constant(additional * size, 0) + values.resize(values.len() + additional * size, 0); } } } } } -pub(crate) fn read_required( - buffer: &[u8], - additional: usize, - size: usize, - values: &mut MutableBuffer, -) { +pub(crate) fn read_required(buffer: &[u8], additional: usize, size: usize, values: &mut Vec) { assert_eq!(buffer.len(), additional * size); values.extend_from_slice(buffer); } @@ -164,7 +158,7 @@ where let size = FixedSizeBinaryArray::get_size(&data_type); let capacity = metadata.num_values() as usize; - let mut values = MutableBuffer::::with_capacity(capacity * size); + let mut values = Vec::::with_capacity(capacity * size); let mut validity = MutableBitmap::with_capacity(capacity); while let Some(page) = iter.next()? { extend_from_page( @@ -196,7 +190,7 @@ where let size = FixedSizeBinaryArray::get_size(&data_type); let capacity = metadata.num_values() as usize; - let mut values = MutableBuffer::::with_capacity(capacity * size); + let mut values = Vec::::with_capacity(capacity * size); let mut validity = MutableBitmap::with_capacity(capacity); pin_mut!(pages); // needed for iteration @@ -222,7 +216,7 @@ pub(crate) fn extend_from_page( page: &DataPage, size: usize, descriptor: &ColumnDescriptor, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, ) -> Result<()> { let additional = page.num_values(); diff --git a/src/io/parquet/read/nested_utils.rs b/src/io/parquet/read/nested_utils.rs index bad1dcbdcf0..f8dc6c04d6c 100644 --- a/src/io/parquet/read/nested_utils.rs +++ b/src/io/parquet/read/nested_utils.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use crate::{ array::{Array, ListArray}, bitmap::{Bitmap, MutableBitmap}, - buffer::{Buffer, MutableBuffer}, + buffer::Buffer, datatypes::{DataType, Field}, error::{ArrowError, Result}, }; @@ -60,7 +60,7 @@ impl Nested for NestedPrimitive { #[derive(Debug, Default)] pub struct NestedOptional { pub validity: MutableBitmap, - pub offsets: MutableBuffer, + pub offsets: Vec, } impl Nested for NestedOptional { @@ -95,7 +95,7 @@ impl Nested for NestedOptional { impl NestedOptional { pub fn with_capacity(capacity: usize) -> Self { - let offsets = MutableBuffer::::with_capacity(capacity + 1); + let offsets = Vec::::with_capacity(capacity + 1); let validity = MutableBitmap::with_capacity(capacity); Self { validity, offsets } } @@ -103,7 +103,7 @@ impl NestedOptional { #[derive(Debug, Default)] pub struct NestedValid { - pub offsets: MutableBuffer, + pub offsets: Vec, } impl Nested for NestedValid { @@ -136,7 +136,7 @@ impl Nested for NestedValid { impl NestedValid { pub fn with_capacity(capacity: usize) -> Self { - let offsets = MutableBuffer::::with_capacity(capacity + 1); + let offsets = Vec::::with_capacity(capacity + 1); Self { offsets } } } diff --git a/src/io/parquet/read/primitive/basic.rs b/src/io/parquet/read/primitive/basic.rs index 6662b247304..883ca314908 100644 --- a/src/io/parquet/read/primitive/basic.rs +++ b/src/io/parquet/read/primitive/basic.rs @@ -9,7 +9,6 @@ use super::utils::ExactChunksIter; use super::ColumnDescriptor; use crate::{ bitmap::{utils::BitmapIter, MutableBitmap}, - buffer::MutableBuffer, error::Result, types::NativeType as ArrowNativeType, }; @@ -19,7 +18,7 @@ fn read_dict_buffer_optional( indices_buffer: &[u8], additional: usize, dict: &PrimitivePageDict, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) where @@ -65,7 +64,7 @@ fn read_dict_buffer_optional( values.push(value) }) } else { - values.extend_constant(additional, A::default()) + values.resize(values.len() + additional, A::default()); } } } @@ -76,7 +75,7 @@ fn read_dict_buffer_required( indices_buffer: &[u8], additional: usize, dict: &PrimitivePageDict, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) where @@ -102,7 +101,7 @@ fn read_nullable( validity_buffer: &[u8], values_buffer: &[u8], additional: usize, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) where @@ -140,19 +139,15 @@ fn read_nullable( values.push(value) }) } else { - values.extend_constant(additional, A::default()) + values.resize(values.len() + additional, A::default()); } } } } } -fn read_required( - values_buffer: &[u8], - additional: usize, - values: &mut MutableBuffer, - op: F, -) where +fn read_required(values_buffer: &[u8], additional: usize, values: &mut Vec, op: F) +where T: NativeType, A: ArrowNativeType, F: Fn(T) -> A, @@ -162,13 +157,13 @@ fn read_required( let iterator = iterator.map(op); - values.extend_from_trusted_len_iter(iterator); + values.extend(iterator); } pub fn extend_from_page( page: &DataPage, descriptor: &ColumnDescriptor, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) -> Result<()> diff --git a/src/io/parquet/read/primitive/dictionary.rs b/src/io/parquet/read/primitive/dictionary.rs index 6a8caeb17d2..01e67b66702 100644 --- a/src/io/parquet/read/primitive/dictionary.rs +++ b/src/io/parquet/read/primitive/dictionary.rs @@ -12,7 +12,6 @@ use super::{ColumnChunkMetaData, ColumnDescriptor}; use crate::{ array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray}, bitmap::{utils::BitmapIter, MutableBitmap}, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, types::NativeType as ArrowNativeType, @@ -24,8 +23,8 @@ fn read_dict_optional( indices_buffer: &[u8], additional: usize, dict: &PrimitivePageDict, - indices: &mut MutableBuffer, - values: &mut MutableBuffer, + indices: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) where @@ -35,7 +34,7 @@ fn read_dict_optional( F: Fn(T) -> A, { let dict_values = dict.values(); - values.extend_from_trusted_len_iter(dict_values.iter().map(|x| op(*x))); + values.extend(dict_values.iter().map(|x| op(*x))); // SPEC: Data page format: the bit width used to encode the entry ids stored as 1 byte (max bit width = 32), // SPEC: followed by the values encoded using RLE/Bit packed described above (with the given bit width). @@ -71,7 +70,7 @@ fn read_dict_optional( indices.push(index) }) } else { - values.extend_constant(additional, A::default()) + values.resize(values.len() + additional, A::default()); } } } @@ -81,8 +80,8 @@ fn read_dict_optional( fn extend_from_page( page: &DataPage, descriptor: &ColumnDescriptor, - indices: &mut MutableBuffer, - values: &mut MutableBuffer, + indices: &mut Vec, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) -> Result<()> @@ -140,8 +139,8 @@ where I: FallibleStreamingIterator, { let capacity = metadata.num_values() as usize; - let mut indices = MutableBuffer::::with_capacity(capacity); - let mut values = MutableBuffer::::with_capacity(capacity); + let mut indices = Vec::::with_capacity(capacity); + let mut values = Vec::::with_capacity(capacity); let mut validity = MutableBitmap::with_capacity(capacity); while let Some(page) = iter.next()? { extend_from_page( diff --git a/src/io/parquet/read/primitive/mod.rs b/src/io/parquet/read/primitive/mod.rs index bb7b5dc325f..357f6440a55 100644 --- a/src/io/parquet/read/primitive/mod.rs +++ b/src/io/parquet/read/primitive/mod.rs @@ -11,7 +11,6 @@ use super::{ColumnChunkMetaData, ColumnDescriptor}; use crate::{ array::{Array, PrimitiveArray}, bitmap::MutableBitmap, - buffer::MutableBuffer, datatypes::DataType, error::{ArrowError, Result}, types::NativeType as ArrowNativeType, @@ -34,7 +33,7 @@ where I: Stream>, { let capacity = metadata.num_values() as usize; - let mut values = MutableBuffer::::with_capacity(capacity); + let mut values = Vec::::with_capacity(capacity); let mut validity = MutableBitmap::with_capacity(capacity); pin_mut!(pages); // needed for iteration @@ -77,7 +76,7 @@ where I: FallibleStreamingIterator, { let capacity = metadata.num_values() as usize; - let mut values = MutableBuffer::::with_capacity(capacity); + let mut values = Vec::::with_capacity(capacity); let mut validity = MutableBitmap::with_capacity(capacity); let is_nullable = nested.pop().unwrap().is_nullable(); diff --git a/src/io/parquet/read/primitive/nested.rs b/src/io/parquet/read/primitive/nested.rs index 237f23ee83e..84303cf2056 100644 --- a/src/io/parquet/read/primitive/nested.rs +++ b/src/io/parquet/read/primitive/nested.rs @@ -9,7 +9,7 @@ use super::super::nested_utils::extend_offsets; use super::ColumnDescriptor; use super::{super::utils, utils::ExactChunksIter, Nested}; use crate::{ - bitmap::MutableBitmap, buffer::MutableBuffer, error::Result, trusted_len::TrustedLen, + bitmap::MutableBitmap, error::Result, trusted_len::TrustedLen, types::NativeType as ArrowNativeType, }; @@ -18,7 +18,7 @@ fn read_values( max_def: u32, mut new_values: G, op: F, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, ) where T: NativeType, @@ -38,15 +38,14 @@ fn read_values( }); } -fn read_values_required(new_values: G, op: F, values: &mut MutableBuffer) +fn read_values_required(new_values: G, op: F, values: &mut Vec) where T: NativeType, G: TrustedLen, A: ArrowNativeType, F: Fn(T) -> A, { - let iterator = new_values.map(op); - values.extend_from_trusted_len_iter(iterator); + values.extend(new_values.map(op)); } #[allow(clippy::too_many_arguments)] @@ -59,7 +58,7 @@ fn read( def_level_encoding: (&Encoding, i16), is_nullable: bool, nested: &mut Vec>, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) where @@ -108,7 +107,7 @@ pub fn extend_from_page( descriptor: &ColumnDescriptor, is_nullable: bool, nested: &mut Vec>, - values: &mut MutableBuffer, + values: &mut Vec, validity: &mut MutableBitmap, op: F, ) -> Result<()> diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 2f1b6f52dfe..8d0b5cc1215 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -14,7 +14,7 @@ pub mod stream; use crate::array::*; use crate::bitmap::Bitmap; -use crate::buffer::{Buffer, MutableBuffer}; +use crate::buffer::Buffer; use crate::datatypes::*; use crate::error::{ArrowError, Result}; use crate::io::parquet::read::is_type_nullable; @@ -240,11 +240,11 @@ pub fn array_to_page( .as_any() .downcast_ref::>() .unwrap(); - let mut values = MutableBuffer::::with_capacity(12 * array.len()); + let mut values = Vec::::with_capacity(12 * array.len()); array.values().iter().for_each(|x| { let bytes = &x.to_le_bytes(); values.extend_from_slice(bytes); - values.extend_constant(8, 0); + values.resize(values.len() + 8, 0); }); let array = FixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(12), @@ -258,10 +258,10 @@ pub fn array_to_page( .as_any() .downcast_ref::>() .unwrap(); - let mut values = MutableBuffer::::with_capacity(12 * array.len()); + let mut values = Vec::::with_capacity(12 * array.len()); array.values().iter().for_each(|x| { let bytes = &x.to_le_bytes(); - values.extend_constant(4, 0); // months + values.resize(values.len() + 4, 0); // months values.extend_from_slice(bytes); // days and seconds }); let array = FixedSizeBinaryArray::from_data( @@ -302,7 +302,7 @@ pub fn array_to_page( primitive::array_to_page::(&array, options, descriptor) } else { let size = decimal_length_from_precision(precision); - let mut values = MutableBuffer::::with_capacity(size * array.len()); + let mut values = Vec::::with_capacity(size * array.len()); array.values().iter().for_each(|x| { let bytes = &x.to_be_bytes()[16 - size..]; values.extend_from_slice(bytes) diff --git a/src/lib.rs b/src/lib.rs index 4b71b9991e1..fe3d42c4e40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,16 +7,12 @@ #[macro_use] pub mod array; -#[cfg(feature = "cache_aligned")] -mod alloc; pub mod bitmap; pub mod buffer; pub mod error; pub mod scalar; pub mod trusted_len; pub mod types; -#[cfg(feature = "cache_aligned")] -mod vec; pub mod compute; pub mod io; diff --git a/src/vec.rs b/src/vec.rs deleted file mode 100644 index 48966c0c616..00000000000 --- a/src/vec.rs +++ /dev/null @@ -1,336 +0,0 @@ -use std::iter::FromIterator; -use std::ptr::NonNull; - -use crate::alloc; -use crate::types::NativeType; - -/// Returns the nearest number that is `>=` than `num` and is a multiple of 64 -#[inline] -fn round_upto_multiple_of_64(num: usize) -> usize { - round_upto_power_of_2(num, 64) -} - -/// Returns the nearest multiple of `factor` that is `>=` than `num`. Here `factor` must -/// be a power of 2. -fn round_upto_power_of_2(num: usize, factor: usize) -> usize { - debug_assert!(factor > 0 && (factor & (factor - 1)) == 0); - (num + (factor - 1)) & !(factor - 1) -} - -#[inline] -fn capacity_multiple_of_64(capacity: usize) -> usize { - round_upto_multiple_of_64(capacity * std::mem::size_of::()) / std::mem::size_of::() -} - -/// # Safety -/// `ptr` must be allocated for `old_capacity`. -#[inline] -unsafe fn reallocate( - ptr: NonNull, - old_capacity: usize, - new_capacity: usize, -) -> (NonNull, usize) { - let new_capacity = capacity_multiple_of_64::(new_capacity); - let new_capacity = std::cmp::max(new_capacity, old_capacity * 2); - let ptr = alloc::reallocate(ptr, old_capacity, new_capacity); - (ptr, new_capacity) -} - -/// An interface equivalent to `std::vec::Vec` with an allocator aligned along cache lines. -pub(crate) struct AlignedVec { - // dangling iff capacity = 0 - ptr: NonNull, - // invariant: len <= capacity - len: usize, - capacity: usize, -} - -impl Drop for AlignedVec { - fn drop(&mut self) { - unsafe { alloc::free_aligned(self.ptr, self.capacity) } - } -} - -impl AlignedVec { - #[inline] - pub fn new() -> Self { - let ptr = alloc::allocate_aligned(0); - Self { - ptr, - len: 0, - capacity: 0, - } - } - - #[inline] - pub fn clear(&mut self) { - self.len = 0 - } - - #[inline] - pub fn capacity(&self) -> usize { - self.capacity - } - - #[inline] - pub fn truncate(&mut self, len: usize) { - if len < self.len { - self.len = len; - } - } - - /// Sets the length of this buffer. - /// # Safety: - /// The caller must uphold the following invariants: - /// * ensure no reads are performed on any - /// item within `[len, capacity - len]` - /// * ensure `len <= self.capacity()` - #[inline] - pub unsafe fn set_len(&mut self, len: usize) { - debug_assert!(len <= self.capacity()); - self.len = len; - } - - /// Returns the data stored in this buffer as a slice. - #[inline] - pub fn as_slice(&self) -> &[T] { - self - } - - /// Returns the data stored in this buffer as a mutable slice. - #[inline] - pub fn as_mut_slice(&mut self) -> &mut [T] { - self - } - - /// Returns a raw pointer to this buffer's internal memory - /// This pointer is guaranteed to be aligned along cache-lines. - #[inline] - pub fn as_ptr(&self) -> *const T { - self.ptr.as_ptr() - } - - /// Returns a mutable raw pointer to this buffer's internal memory - /// This pointer is guaranteed to be aligned along cache-lines. - #[inline] - pub fn as_mut_ptr(&mut self) -> *mut T { - self.ptr.as_ptr() - } - - #[inline] - pub fn with_capacity(capacity: usize) -> Self { - let capacity = capacity_multiple_of_64::(capacity); - let ptr = alloc::allocate_aligned(capacity); - Self { - ptr, - len: 0, - capacity, - } - } - - #[inline] - pub fn len(&self) -> usize { - self.len - } - - #[inline] - pub fn is_empty(&self) -> bool { - self.len == 0 - } - - #[inline(always)] - pub fn reserve(&mut self, additional: usize) { - let required_cap = self.len + additional; - if required_cap > self.capacity { - // JUSTIFICATION - // Benefit - // necessity - // Soundness - // `self.data` is valid for `self.capacity`. - let (ptr, new_capacity) = unsafe { reallocate(self.ptr, self.capacity, required_cap) }; - self.ptr = ptr; - self.capacity = new_capacity; - } - } - - #[inline(always)] - pub fn resize(&mut self, new_len: usize, value: T) { - if new_len > self.len { - if self.capacity == 0 && value == T::default() { - // edge case where the allocate - let required_cap = capacity_multiple_of_64::(new_len); - let ptr = alloc::allocate_aligned_zeroed(required_cap); - self.ptr = ptr; - self.capacity = required_cap; - self.len = new_len; - return; - } - - let diff = new_len - self.len; - self.reserve(diff); - unsafe { - // write the value - let mut ptr = self.ptr.as_ptr().add(self.len); - (0..diff).for_each(|_| { - std::ptr::write(ptr, value); - ptr = ptr.add(1); - }) - } - } - // this truncates the buffer when new_len < self.len - self.len = new_len; - } - - #[inline] - pub fn extend_from_slice(&mut self, items: &[T]) { - let additional = items.len(); - self.reserve(additional); - unsafe { - let dst = self.ptr.as_ptr().add(self.len); - let src = items.as_ptr(); - std::ptr::copy_nonoverlapping(src, dst, additional) - } - self.len += additional; - } - - #[inline] - pub fn push(&mut self, item: T) { - self.reserve(1); - unsafe { - let dst = self.ptr.as_ptr().add(self.len) as *mut T; - std::ptr::write(dst, item); - } - self.len += 1; - } - - pub fn shrink_to_fit(&mut self) { - let new_capacity = capacity_multiple_of_64::(self.len); - if new_capacity < self.capacity { - // JUSTIFICATION - // Benefit - // necessity - // Soundness - // `self.ptr` is valid for `self.capacity`. - let ptr = unsafe { alloc::reallocate(self.ptr, self.capacity, new_capacity) }; - - self.ptr = ptr; - self.capacity = new_capacity; - } - } - - #[inline] - pub fn from_len_zeroed(len: usize) -> Self { - let new_capacity = capacity_multiple_of_64::(len); - let ptr = alloc::allocate_aligned_zeroed(new_capacity); - Self { - ptr, - len, - capacity: new_capacity, - } - } - - #[inline] - pub unsafe fn from_raw_parts(ptr: NonNull, length: usize, capacity: usize) -> Self { - Self { - ptr, - capacity, - len: length, - } - } -} - -impl Default for AlignedVec { - fn default() -> Self { - Self::new() - } -} - -impl std::ops::Deref for AlignedVec { - type Target = [T]; - - #[inline] - fn deref(&self) -> &[T] { - unsafe { std::slice::from_raw_parts(self.as_ptr(), self.len) } - } -} - -impl std::ops::DerefMut for AlignedVec { - #[inline] - fn deref_mut(&mut self) -> &mut [T] { - unsafe { std::slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } - } -} - -impl Extend for AlignedVec { - fn extend>(&mut self, iter: T) { - let mut iterator = iter.into_iter(); - let (lower, _) = iterator.size_hint(); - let additional = lower; - self.reserve(additional); - - // this is necessary because of https://github.com/rust-lang/rust/issues/32155 - let mut len = SetLenOnDrop::new(&mut self.len); - let mut dst = unsafe { self.ptr.as_ptr().add(len.local_len) as *mut A }; - let capacity = self.capacity; - - while len.local_len < capacity { - if let Some(item) = iterator.next() { - unsafe { - std::ptr::write(dst, item); - dst = dst.add(1); - } - len.local_len += 1; - } else { - break; - } - } - drop(len); - - iterator.for_each(|item| self.push(item)); - } -} - -struct SetLenOnDrop<'a> { - len: &'a mut usize, - local_len: usize, -} - -impl<'a> SetLenOnDrop<'a> { - #[inline] - fn new(len: &'a mut usize) -> Self { - SetLenOnDrop { - local_len: *len, - len, - } - } -} - -impl Drop for SetLenOnDrop<'_> { - #[inline] - fn drop(&mut self) { - *self.len = self.local_len; - } -} - -impl FromIterator for AlignedVec { - fn from_iter>(iter: I) -> Self { - let mut iterator = iter.into_iter(); - - // first iteration, which will likely reserve sufficient space for the buffer. - let mut buffer = match iterator.next() { - None => AlignedVec::new(), - Some(element) => { - let (lower, _) = iterator.size_hint(); - let mut buffer = AlignedVec::with_capacity(lower.saturating_add(1)); - unsafe { - std::ptr::write(buffer.as_mut_ptr(), element); - buffer.len = 1; - } - buffer - } - }; - - buffer.extend(iterator); - buffer - } -} diff --git a/tests/it/array/binary/mod.rs b/tests/it/array/binary/mod.rs index 69905af8adb..8d2b9086014 100644 --- a/tests/it/array/binary/mod.rs +++ b/tests/it/array/binary/mod.rs @@ -86,24 +86,24 @@ fn with_validity() { #[test] #[should_panic] fn wrong_offsets() { - let offsets = Buffer::from(&[0, 5, 4]); // invalid offsets - let values = Buffer::from(b"abbbbb"); + let offsets = Buffer::from_slice([0, 5, 4]); // invalid offsets + let values = Buffer::from_slice(b"abbbbb"); BinaryArray::::from_data(DataType::Binary, offsets, values, None); } #[test] #[should_panic] fn wrong_data_type() { - let offsets = Buffer::from(&[0, 4]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 4]); + let values = Buffer::from_slice(b"abbb"); BinaryArray::::from_data(DataType::Int8, offsets, values, None); } #[test] #[should_panic] fn value_with_wrong_offsets_panics() { - let offsets = Buffer::from(&[0, 10, 11, 4]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 10, 11, 4]); + let values = Buffer::from_slice(b"abbb"); // the 10-11 is not checked let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); @@ -115,8 +115,8 @@ fn value_with_wrong_offsets_panics() { #[test] #[should_panic] fn index_out_of_bounds_panics() { - let offsets = Buffer::from(&[0, 1, 2, 4]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 1, 2, 4]); + let values = Buffer::from_slice(b"abbb"); let array = BinaryArray::::from_data(DataType::Utf8, offsets, values, None); array.value(3); @@ -125,8 +125,8 @@ fn index_out_of_bounds_panics() { #[test] #[should_panic] fn value_unchecked_with_wrong_offsets_panics() { - let offsets = Buffer::from(&[0, 10, 11, 4]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 10, 11, 4]); + let values = Buffer::from_slice(b"abbb"); // the 10-11 is not checked let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); diff --git a/tests/it/array/equal/list.rs b/tests/it/array/equal/list.rs index d0fa8098b74..8c0ff89b6e6 100644 --- a/tests/it/array/equal/list.rs +++ b/tests/it/array/equal/list.rs @@ -69,7 +69,7 @@ fn test_list_offsets() { #[test] fn test_bla() { - let offsets = Buffer::from([0, 3, 3, 6]); + let offsets = Buffer::from_slice([0, 3, 3, 6]); let data_type = ListArray::::default_datatype(DataType::Int32); let values = Arc::new(Int32Array::from([ Some(1), @@ -83,7 +83,7 @@ fn test_bla() { let lhs = ListArray::::from_data(data_type, offsets, values, Some(validity)); let lhs = lhs.slice(1, 2); - let offsets = Buffer::from([0, 0, 3]); + let offsets = Buffer::from_slice([0, 0, 3]); let data_type = ListArray::::default_datatype(DataType::Int32); let values = Arc::new(Int32Array::from([Some(4), None, Some(6)])); let validity = Bitmap::from([false, true]); diff --git a/tests/it/array/fixed_size_binary/mod.rs b/tests/it/array/fixed_size_binary/mod.rs index 79a1bf21311..ea5dcd52cc7 100644 --- a/tests/it/array/fixed_size_binary/mod.rs +++ b/tests/it/array/fixed_size_binary/mod.rs @@ -6,7 +6,7 @@ mod mutable; fn basics() { let array = FixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - Buffer::from([1, 2, 3, 4, 5, 6]), + Buffer::from_slice([1, 2, 3, 4, 5, 6]), Some(Bitmap::from([true, false, true])), ); assert_eq!(array.size(), 2); @@ -23,7 +23,7 @@ fn basics() { #[test] fn with_validity() { - let values = Buffer::from([1, 2, 3, 4, 5, 6]); + let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]); let a = FixedSizeBinaryArray::from_data(DataType::FixedSizeBinary(2), values, None); let a = a.with_validity(Some(Bitmap::from([true, false, true]))); assert!(a.validity().is_some()); @@ -31,7 +31,7 @@ fn with_validity() { #[test] fn display() { - let values = Buffer::from([1, 2, 3, 4, 5, 6]); + let values = Buffer::from_slice([1, 2, 3, 4, 5, 6]); let a = FixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), values, diff --git a/tests/it/array/fixed_size_binary/mutable.rs b/tests/it/array/fixed_size_binary/mutable.rs index 65779e1e136..4739baeb239 100644 --- a/tests/it/array/fixed_size_binary/mutable.rs +++ b/tests/it/array/fixed_size_binary/mutable.rs @@ -1,18 +1,17 @@ use arrow2::array::*; use arrow2::bitmap::{Bitmap, MutableBitmap}; -use arrow2::buffer::MutableBuffer; use arrow2::datatypes::DataType; #[test] fn basic() { let a = MutableFixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - MutableBuffer::from([1, 2, 3, 4]), + Vec::from([1, 2, 3, 4]), None, ); assert_eq!(a.len(), 2); assert_eq!(a.data_type(), &DataType::FixedSizeBinary(2)); - assert_eq!(a.values(), &MutableBuffer::from([1, 2, 3, 4])); + assert_eq!(a.values(), &Vec::from([1, 2, 3, 4])); assert_eq!(a.validity(), None); assert_eq!(a.value(1), &[3, 4]); assert_eq!(unsafe { a.value_unchecked(1) }, &[3, 4]); @@ -23,25 +22,25 @@ fn basic() { fn equal() { let a = MutableFixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - MutableBuffer::from([1, 2, 3, 4]), + Vec::from([1, 2, 3, 4]), None, ); assert_eq!(a, a); let b = MutableFixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - MutableBuffer::from([1, 2]), + Vec::from([1, 2]), None, ); assert_eq!(b, b); assert!(a != b); let a = MutableFixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - MutableBuffer::from([1, 2, 3, 4]), + Vec::from([1, 2, 3, 4]), Some(MutableBitmap::from([true, false])), ); let b = MutableFixedSizeBinaryArray::from_data( DataType::FixedSizeBinary(2), - MutableBuffer::from([1, 2, 3, 4]), + Vec::from([1, 2, 3, 4]), Some(MutableBitmap::from([false, true])), ); assert_eq!(a, a); diff --git a/tests/it/array/growable/utils.rs b/tests/it/array/growable/utils.rs deleted file mode 100644 index 981e14ca767..00000000000 --- a/tests/it/array/growable/utils.rs +++ /dev/null @@ -1,75 +0,0 @@ -use crate::{ - array::{Array, Offset}, - bitmap::{Bitmap, MutableBitmap}, - buffer::MutableBuffer, -}; - -pub(super) fn extend_offsets( - buffer: &mut MutableBuffer, - last_offset: &mut T, - offsets: &[T], -) { - buffer.reserve(offsets.len() - 1); - offsets.windows(2).for_each(|offsets| { - // compute the new offset - let length = offsets[1] - offsets[0]; - *last_offset += length; - buffer.push(*last_offset); - }); -} - -pub(super) type ExtendNullBits<'a> = Box; - -pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> ExtendNullBits { - if let Some(bitmap) = array.validity() { - Box::new(move |validity, start, len| { - assert!(start + len <= bitmap.len()); - unsafe { - let iter = (start..start + len).map(|i| bitmap.get_bit_unchecked(i)); - validity.extend_from_trusted_len_iter_unchecked(iter); - }; - }) - } else if use_validity { - Box::new(|validity, _, len| { - let iter = (0..len).map(|_| true); - unsafe { - validity.extend_from_trusted_len_iter_unchecked(iter); - }; - }) - } else { - Box::new(|_, _, _| {}) - } -} - -#[inline] -pub(super) fn extend_validity( - mutable_validity: &mut MutableBitmap, - validity: Option<&Bitmap>, - start: usize, - len: usize, - use_validity: bool, -) { - if let Some(bitmap) = validity { - assert!(start + len <= bitmap.len()); - unsafe { - let iter = (start..start + len).map(|i| bitmap.get_bit_unchecked(i)); - mutable_validity.extend_from_trusted_len_iter_unchecked(iter); - }; - } else if use_validity { - mutable_validity.extend_constant(len, true); - }; -} - -#[inline] -pub(super) fn extend_offset_values( - buffer: &mut MutableBuffer, - offsets: &[O], - values: &[u8], - start: usize, - len: usize, -) { - let start_values = offsets[start].to_usize(); - let end_values = offsets[start + len].to_usize(); - let new_values = &values[start_values..end_values]; - buffer.extend_from_slice(new_values); -} diff --git a/tests/it/array/list/mod.rs b/tests/it/array/list/mod.rs index b3717465d95..28c47daf0ed 100644 --- a/tests/it/array/list/mod.rs +++ b/tests/it/array/list/mod.rs @@ -8,13 +8,13 @@ mod mutable; #[test] fn display() { - let values = Buffer::from([1, 2, 3, 4, 5]); + let values = Buffer::from_slice([1, 2, 3, 4, 5]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from([0, 2, 2, 3, 5]), + Buffer::from_slice([0, 2, 2, 3, 5]), Arc::new(values), None, ); @@ -28,38 +28,47 @@ fn display() { #[test] #[should_panic(expected = "The child's datatype must match the inner type of the \'data_type\'")] fn test_nested_panic() { - let values = Buffer::from([1, 2, 3, 4, 5]); + let values = Buffer::from_slice([1, 2, 3, 4, 5]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type.clone(), - Buffer::from([0, 2, 2, 3, 5]), + Buffer::from_slice([0, 2, 2, 3, 5]), Arc::new(values), None, ); // The datatype for the nested array has to be created considering // the nested structure of the child data - let _ = ListArray::::from_data(data_type, Buffer::from([0, 2, 4]), Arc::new(array), None); + let _ = ListArray::::from_data( + data_type, + Buffer::from_slice([0, 2, 4]), + Arc::new(array), + None, + ); } #[test] fn test_nested_display() { - let values = Buffer::from([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let values = Buffer::from_slice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); let values = PrimitiveArray::::from_data(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); let array = ListArray::::from_data( data_type, - Buffer::from([0, 2, 4, 7, 7, 8, 10]), + Buffer::from_slice([0, 2, 4, 7, 7, 8, 10]), Arc::new(values), None, ); let data_type = ListArray::::default_datatype(array.data_type().clone()); - let nested = - ListArray::::from_data(data_type, Buffer::from([0, 2, 5, 6]), Arc::new(array), None); + let nested = ListArray::::from_data( + data_type, + Buffer::from_slice([0, 2, 5, 6]), + Arc::new(array), + None, + ); let expected = "ListArray[\nListArray[\nInt32[1, 2],\nInt32[3, 4]\n],\nListArray[\nInt32[5, 6, 7],\nInt32[],\nInt32[8]\n],\nListArray[\nInt32[9, 10]\n]\n]"; assert_eq!(format!("{}", nested), expected); diff --git a/tests/it/array/list/mutable.rs b/tests/it/array/list/mutable.rs index 649663bb8b2..e87d5354f22 100644 --- a/tests/it/array/list/mutable.rs +++ b/tests/it/array/list/mutable.rs @@ -16,14 +16,14 @@ fn basics() { let values = PrimitiveArray::::from_data( DataType::Int32, - Buffer::from([1, 2, 3, 4, 0, 6]), + Buffer::from_slice([1, 2, 3, 4, 0, 6]), Some(Bitmap::from([true, true, true, true, false, true])), ); let data_type = ListArray::::default_datatype(DataType::Int32); let expected = ListArray::::from_data( data_type, - Buffer::from([0, 3, 3, 6]), + Buffer::from_slice([0, 3, 3, 6]), Arc::new(values), Some(Bitmap::from([true, false, true])), ); diff --git a/tests/it/array/primitive/mod.rs b/tests/it/array/primitive/mod.rs index 7e3a0532c3f..8be3db908dc 100644 --- a/tests/it/array/primitive/mod.rs +++ b/tests/it/array/primitive/mod.rs @@ -273,6 +273,6 @@ fn months_days_ns() { #[test] #[should_panic] fn wrong_data_type() { - let values = Buffer::from(b"abbb"); + let values = Buffer::from_slice(b"abbb"); PrimitiveArray::from_data(DataType::Utf8, values, None); } diff --git a/tests/it/array/primitive/mutable.rs b/tests/it/array/primitive/mutable.rs index 9615aa022a5..33d73241ddf 100644 --- a/tests/it/array/primitive/mutable.rs +++ b/tests/it/array/primitive/mutable.rs @@ -1,7 +1,6 @@ use arrow2::{ array::*, bitmap::{Bitmap, MutableBitmap}, - buffer::MutableBuffer, datatypes::DataType, error::Result, }; @@ -11,13 +10,13 @@ use std::iter::FromIterator; fn from_and_into_data() { let a = MutablePrimitiveArray::from_data( DataType::Int32, - MutableBuffer::from([1i32, 0]), + Vec::from([1i32, 0]), Some(MutableBitmap::from([true, false])), ); assert_eq!(a.len(), 2); let (a, b, c) = a.into_data(); assert_eq!(a, DataType::Int32); - assert_eq!(b, MutableBuffer::from([1i32, 0])); + assert_eq!(b, Vec::from([1i32, 0])); assert_eq!(c, Some(MutableBitmap::from([true, false]))); } @@ -25,7 +24,7 @@ fn from_and_into_data() { fn to() { let a = MutablePrimitiveArray::from_data( DataType::Int32, - MutableBuffer::from([1i32, 0]), + Vec::from([1i32, 0]), Some(MutableBitmap::from([true, false])), ); let a = a.to(DataType::Date32); @@ -36,7 +35,7 @@ fn to() { fn values_mut_slice() { let mut a = MutablePrimitiveArray::from_data( DataType::Int32, - MutableBuffer::from([1i32, 0]), + Vec::from([1i32, 0]), Some(MutableBitmap::from([true, false])), ); let values = a.values_mut_slice(); @@ -56,7 +55,7 @@ fn push() { assert!(!a.is_valid(1)); assert!(!a.is_valid(2)); - assert_eq!(a.values(), &MutableBuffer::from([1, 0, 0])); + assert_eq!(a.values(), &Vec::from([1, 0, 0])); } #[test] @@ -70,7 +69,7 @@ fn set() { assert!(a.is_valid(0)); assert!(a.is_valid(1)); - assert_eq!(a.values(), &MutableBuffer::from([2, 1])); + assert_eq!(a.values(), &Vec::from([2, 1])); let mut a = MutablePrimitiveArray::::from_slice([1, 2]); @@ -81,7 +80,7 @@ fn set() { assert!(a.is_valid(0)); assert!(!a.is_valid(1)); - assert_eq!(a.values(), &MutableBuffer::from([2, 0])); + assert_eq!(a.values(), &Vec::from([2, 0])); } #[test] @@ -123,7 +122,7 @@ fn extend_trusted_len() { a.validity(), Some(&MutableBitmap::from([true, true, false, true])) ); - assert_eq!(a.values(), &MutableBuffer::::from([1, 2, 0, 4])); + assert_eq!(a.values(), &Vec::::from([1, 2, 0, 4])); } #[test] @@ -132,7 +131,7 @@ fn extend_constant_no_validity() { a.push(Some(1)); a.extend_constant(2, Some(3)); assert_eq!(a.validity(), None); - assert_eq!(a.values(), &MutableBuffer::::from([1, 3, 3])); + assert_eq!(a.values(), &Vec::::from([1, 3, 3])); } #[test] @@ -144,7 +143,7 @@ fn extend_constant_validity() { a.validity(), Some(&MutableBitmap::from([true, false, false])) ); - assert_eq!(a.values(), &MutableBuffer::::from([1, 0, 0])); + assert_eq!(a.values(), &Vec::::from([1, 0, 0])); } #[test] @@ -156,7 +155,7 @@ fn extend_constant_validity_inverse() { a.validity(), Some(&MutableBitmap::from([false, true, true])) ); - assert_eq!(a.values(), &MutableBuffer::::from([0, 1, 1])); + assert_eq!(a.values(), &Vec::::from([0, 1, 1])); } #[test] @@ -168,7 +167,7 @@ fn extend_constant_validity_none() { a.validity(), Some(&MutableBitmap::from([false, false, false])) ); - assert_eq!(a.values(), &MutableBuffer::::from([0, 0, 0])); + assert_eq!(a.values(), &Vec::::from([0, 0, 0])); } #[test] @@ -176,7 +175,7 @@ fn extend_trusted_len_values() { let mut a = MutablePrimitiveArray::::new(); a.extend_trusted_len_values(vec![1, 2, 3].into_iter()); assert_eq!(a.validity(), None); - assert_eq!(a.values(), &MutableBuffer::::from([1, 2, 3])); + assert_eq!(a.values(), &Vec::::from([1, 2, 3])); let mut a = MutablePrimitiveArray::::new(); a.push(None); @@ -192,7 +191,7 @@ fn extend_from_slice() { let mut a = MutablePrimitiveArray::::new(); a.extend_from_slice(&[1, 2, 3]); assert_eq!(a.validity(), None); - assert_eq!(a.values(), &MutableBuffer::::from([1, 2, 3])); + assert_eq!(a.values(), &Vec::::from([1, 2, 3])); let mut a = MutablePrimitiveArray::::new(); a.push(None); @@ -215,7 +214,7 @@ fn set_validity() { #[test] fn set_values() { let mut a = MutablePrimitiveArray::::from_slice([1, 2]); - a.set_values(MutableBuffer::from([1, 3])); + a.set_values(Vec::from([1, 3])); assert_eq!(a.values().as_slice(), [1, 3]); } @@ -229,6 +228,6 @@ fn try_from_trusted_len_iter() { #[test] #[should_panic] fn wrong_data_type() { - let values = MutableBuffer::from(b"abbb"); + let values = vec![1u8]; MutablePrimitiveArray::from_data(DataType::Utf8, values, None); } diff --git a/tests/it/array/union.rs b/tests/it/array/union.rs index 9ed49a272b6..cbb4b20a667 100644 --- a/tests/it/array/union.rs +++ b/tests/it/array/union.rs @@ -9,7 +9,7 @@ fn display() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from(&[0, 0, 1]); + let types = Buffer::from_slice([0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, @@ -29,7 +29,7 @@ fn slice() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from(&[0, 0, 1]); + let types = Buffer::from_slice([0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc, @@ -39,7 +39,7 @@ fn slice() -> Result<()> { let result = array.slice(1, 2); - let types = Buffer::from(&[0, 1]); + let types = Buffer::from_slice([0, 1]); let expected = UnionArray::from_data(data_type, types, fields, None); assert_eq!(expected, result); diff --git a/tests/it/array/utf8/mod.rs b/tests/it/array/utf8/mod.rs index d95df91a776..ac007507330 100644 --- a/tests/it/array/utf8/mod.rs +++ b/tests/it/array/utf8/mod.rs @@ -56,10 +56,10 @@ fn from() { #[test] fn from_slice() { - let b = Utf8Array::::from_slice(&["a", "b", "cc"]); + let b = Utf8Array::::from_slice(["a", "b", "cc"]); - let offsets = Buffer::from(&[0, 1, 2, 4]); - let values = Buffer::from("abcc".as_bytes()); + let offsets = Buffer::from_slice([0, 1, 2, 4]); + let values = Buffer::from_slice("abcc".as_bytes()); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -70,8 +70,8 @@ fn from_slice() { fn from_iter_values() { let b = Utf8Array::::from_iter_values(["a", "b", "cc"].iter()); - let offsets = Buffer::from(&[0, 1, 2, 4]); - let values = Buffer::from("abcc".as_bytes()); + let offsets = Buffer::from_slice([0, 1, 2, 4]); + let values = Buffer::from_slice(b"abcc"); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -83,8 +83,8 @@ fn from_trusted_len_iter() { let b = Utf8Array::::from_trusted_len_iter(vec![Some("a"), Some("b"), Some("cc")].into_iter()); - let offsets = Buffer::from(&[0, 1, 2, 4]); - let values = Buffer::from("abcc".as_bytes()); + let offsets = Buffer::from_slice([0, 1, 2, 4]); + let values = Buffer::from_slice(b"abcc"); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -100,8 +100,8 @@ fn try_from_trusted_len_iter() { ) .unwrap(); - let offsets = Buffer::from(&[0, 1, 2, 4]); - let values = Buffer::from("abcc".as_bytes()); + let offsets = Buffer::from_slice([0, 1, 2, 4]); + let values = Buffer::from_slice("abcc".as_bytes()); assert_eq!( b, Utf8Array::::from_data(DataType::Utf8, offsets, values, None) @@ -111,32 +111,32 @@ fn try_from_trusted_len_iter() { #[test] #[should_panic] fn not_utf8() { - let offsets = Buffer::from(&[0, 4]); - let values = Buffer::from([0, 159, 146, 150]); // invalid utf8 + let offsets = Buffer::from_slice([0, 4]); + let values = Buffer::from_slice([0, 159, 146, 150]); // invalid utf8 Utf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn not_utf8_individually() { - let offsets = Buffer::from(&[0, 1, 2]); - let values = Buffer::from([207, 128]); // each is invalid utf8, but together is valid + let offsets = Buffer::from_slice([0, 1, 2]); + let values = Buffer::from_slice([207, 128]); // each is invalid utf8, but together is valid Utf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn wrong_offsets() { - let offsets = Buffer::from(&[0, 5, 4]); // invalid offsets - let values = Buffer::from(b"abbbbb"); + let offsets = Buffer::from_slice([0, 5, 4]); // invalid offsets + let values = Buffer::from_slice(b"abbbbb"); Utf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn wrong_data_type() { - let offsets = Buffer::from(&[0, 4]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 4]); + let values = Buffer::from_slice(b"abbb"); Utf8Array::::from_data(DataType::Int8, offsets, values, None); } @@ -144,32 +144,32 @@ fn wrong_data_type() { #[should_panic] fn out_of_bounds_offsets_panics() { // the 10 is out of bounds - let offsets = Buffer::from(&[0, 10, 11]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 10, 11]); + let values = Buffer::from_slice(b"abbb"); let _ = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn decreasing_offset_and_ascii_panics() { - let offsets = Buffer::from(&[0, 2, 1]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 2, 1]); + let values = Buffer::from_slice(b"abbb"); let _ = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn decreasing_offset_and_utf8_panics() { - let offsets = Buffer::from(&[0, 2, 4, 2]); // not increasing - let values = Buffer::from([207, 128, 207, 128, 207, 128]); // valid utf8 + let offsets = Buffer::from_slice([0, 2, 4, 2]); // not increasing + let values = Buffer::from_slice([207, 128, 207, 128, 207, 128]); // valid utf8 let _ = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn index_out_of_bounds_panics() { - let offsets = Buffer::from(&[0, 1, 2, 4]); - let values = Buffer::from(b"abbb"); + let offsets = Buffer::from_slice([0, 1, 2, 4]); + let values = Buffer::from_slice(b"abbb"); let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); array.value(3); diff --git a/tests/it/array/utf8/mutable.rs b/tests/it/array/utf8/mutable.rs index cde2a9eeafe..893b92c6806 100644 --- a/tests/it/array/utf8/mutable.rs +++ b/tests/it/array/utf8/mutable.rs @@ -1,6 +1,5 @@ use arrow2::array::{MutableUtf8Array, Utf8Array}; use arrow2::bitmap::Bitmap; -use arrow2::buffer::MutableBuffer; use arrow2::datatypes::DataType; #[test] @@ -24,8 +23,8 @@ fn push_null() { #[test] #[should_panic] fn not_utf8() { - let offsets = MutableBuffer::from(&[0, 4]); - let values = MutableBuffer::from([0, 159, 146, 150]); // invalid utf8 + let offsets = vec![0, 4]; + let values = vec![0, 159, 146, 150]; // invalid utf8 MutableUtf8Array::::from_data(DataType::Utf8, offsets, values, None); } @@ -33,16 +32,16 @@ fn not_utf8() { #[test] #[should_panic] fn wrong_offsets() { - let offsets = MutableBuffer::from(&[0, 5, 4]); // invalid offsets - let values = MutableBuffer::from(b"abbbbb"); + let offsets = vec![0, 5, 4]; // invalid offsets + let values = vec![0, 1, 2, 3, 4, 5]; MutableUtf8Array::::from_data(DataType::Utf8, offsets, values, None); } #[test] #[should_panic] fn wrong_data_type() { - let offsets = MutableBuffer::from(&[0, 4]); // invalid offsets - let values = MutableBuffer::from(b"abbb"); + let offsets = vec![0, 4]; // invalid offsets + let values = vec![1, 2, 3, 4]; MutableUtf8Array::::from_data(DataType::Int8, offsets, values, None); } diff --git a/tests/it/bitmap/mod.rs b/tests/it/bitmap/mod.rs index 3b415d0a6c2..2151c7b3bb0 100644 --- a/tests/it/bitmap/mod.rs +++ b/tests/it/bitmap/mod.rs @@ -5,7 +5,7 @@ mod utils; use proptest::prelude::*; -use arrow2::{bitmap::Bitmap, buffer::MutableBuffer}; +use arrow2::bitmap::Bitmap; /// Returns a strategy of an arbitrary sliced [`Bitmap`] of size up to 1000 pub(crate) fn bitmap_strategy() -> impl Strategy { @@ -26,8 +26,8 @@ pub(crate) fn bitmap_strategy() -> impl Strategy { } fn create_bitmap>(bytes: P, len: usize) -> Bitmap { - let buffer = MutableBuffer::::from(bytes.as_ref()); - Bitmap::from_u8_buffer(buffer, len) + let buffer = Vec::::from(bytes.as_ref()); + Bitmap::from_u8_vec(buffer, len) } #[test] diff --git a/tests/it/bitmap/mutable.rs b/tests/it/bitmap/mutable.rs index 432a59083fc..bde1fdd5399 100644 --- a/tests/it/bitmap/mutable.rs +++ b/tests/it/bitmap/mutable.rs @@ -1,7 +1,4 @@ -use arrow2::{ - bitmap::{Bitmap, MutableBitmap}, - buffer::MutableBuffer, -}; +use arrow2::bitmap::{Bitmap, MutableBitmap}; #[test] fn from_slice() { @@ -162,7 +159,7 @@ fn extend_from_bitmap() { #[test] fn extend_from_bitmap_offset() { let other = Bitmap::from_u8_slice(&[0b00111111], 8); - let mut bitmap = MutableBitmap::from_buffer(MutableBuffer::from(&[1, 0, 0b00101010]), 22); + let mut bitmap = MutableBitmap::from_vec(vec![1, 0, 0b00101010], 22); // call is optimized to perform a memcopy bitmap.extend_from_bitmap(&other); @@ -172,7 +169,7 @@ fn extend_from_bitmap_offset() { // more than one byte let other = Bitmap::from_u8_slice(&[0b00111111, 0b00001111, 0b0001100], 20); - let mut bitmap = MutableBitmap::from_buffer(MutableBuffer::from(&[1, 0, 0b00101010]), 22); + let mut bitmap = MutableBitmap::from_vec(vec![1, 0, 0b00101010], 22); // call is optimized to perform a memcopy bitmap.extend_from_bitmap(&other); diff --git a/tests/it/buffer/immutable.rs b/tests/it/buffer/immutable.rs index 5fbf711ec5f..48c4e24cb66 100644 --- a/tests/it/buffer/immutable.rs +++ b/tests/it/buffer/immutable.rs @@ -17,14 +17,14 @@ fn new_zeroed() { #[test] fn from_slice() { - let buffer = Buffer::::from(&[0, 1, 2]); + let buffer = Buffer::::from_slice([0, 1, 2]); assert_eq!(buffer.len(), 3); assert_eq!(buffer.as_slice(), &[0, 1, 2]); } #[test] fn slice() { - let buffer = Buffer::::from(&[0, 1, 2, 3]); + let buffer = Buffer::::from_slice([0, 1, 2, 3]); let buffer = buffer.slice(1, 2); assert_eq!(buffer.len(), 2); assert_eq!(buffer.as_slice(), &[1, 2]); @@ -54,7 +54,7 @@ fn try_from_trusted_len_iter() { #[test] fn as_ptr() { - let buffer = Buffer::::from(&[0, 1, 2, 3]); + let buffer = Buffer::::from_slice([0, 1, 2, 3]); let buffer = buffer.slice(1, 2); let ptr = buffer.as_ptr(); assert_eq!(unsafe { *ptr }, 1); @@ -62,16 +62,15 @@ fn as_ptr() { #[test] fn debug() { - let buffer = Buffer::::from(&[0, 1, 2, 3]); + let buffer = Buffer::::from_slice([0, 1, 2, 3]); let buffer = buffer.slice(1, 2); let a = format!("{:?}", buffer); assert_eq!(a, "[1, 2]") } -#[cfg(not(feature = "cache_aligned"))] #[test] fn from_vec() { - let buffer = Buffer::::from_vec(vec![0, 1, 2]); + let buffer = Buffer::::from(vec![0, 1, 2]); assert_eq!(buffer.len(), 3); assert_eq!(buffer.as_slice(), &[0, 1, 2]); } diff --git a/tests/it/buffer/mod.rs b/tests/it/buffer/mod.rs index 2ad875845b5..723312cd1a8 100644 --- a/tests/it/buffer/mod.rs +++ b/tests/it/buffer/mod.rs @@ -1,2 +1 @@ mod immutable; -mod mutable; diff --git a/tests/it/buffer/mutable.rs b/tests/it/buffer/mutable.rs deleted file mode 100644 index ba26f4a6ebc..00000000000 --- a/tests/it/buffer/mutable.rs +++ /dev/null @@ -1,137 +0,0 @@ -use arrow2::buffer::{Buffer, MutableBuffer}; - -#[test] -fn default() { - let b = MutableBuffer::::default(); - assert_eq!(b.len(), 0); - assert!(b.is_empty()); -} - -#[test] -fn with_capacity() { - let b = MutableBuffer::::with_capacity(6); - assert!(b.capacity() >= 6); - assert!(b.is_empty()); -} - -#[test] -fn from_len_zeroed() { - let b = MutableBuffer::::from_len_zeroed(3); - assert_eq!(b.len(), 3); - assert!(!b.is_empty()); - assert_eq!(b.as_slice(), &[0, 0, 0]); -} - -#[test] -fn resize() { - let mut b = MutableBuffer::::new(); - b.resize(3, 1); - assert_eq!(b.len(), 3); - assert_eq!(b.as_slice(), &[1, 1, 1]); - assert_eq!(b.as_mut_slice(), &[1, 1, 1]); -} - -// branch that uses alloc_zeroed -#[test] -fn resize_from_zero() { - let mut b = MutableBuffer::::new(); - b.resize(3, 0); - assert_eq!(b.len(), 3); - assert_eq!(b.as_slice(), &[0, 0, 0]); -} - -#[test] -fn resize_smaller() { - let mut b = MutableBuffer::::from_len_zeroed(3); - b.resize(2, 1); - assert_eq!(b.len(), 2); - assert_eq!(b.as_slice(), &[0, 0]); -} - -#[test] -fn extend_from_slice() { - let mut b = MutableBuffer::::from_len_zeroed(1); - b.extend_from_slice(&[1, 2]); - assert_eq!(b.len(), 3); - assert_eq!(b.as_slice(), &[0, 1, 2]); - - assert_eq!(unsafe { *b.as_ptr() }, 0); - assert_eq!(unsafe { *b.as_mut_ptr() }, 0); -} - -#[test] -fn push() { - let mut b = MutableBuffer::::new(); - for _ in 0..17 { - b.push(1); - } - assert_eq!(b.len(), 17); -} - -#[test] -fn capacity() { - let b = MutableBuffer::::with_capacity(10); - assert!(b.capacity() >= 10); - - let mut b = MutableBuffer::::with_capacity(16); - b.reserve(4); - assert_eq!(b.capacity(), 16); - b.extend_from_slice(&[0.1; 16]); - b.reserve(4); - assert_eq!(b.capacity(), 32); -} - -#[test] -fn extend() { - let mut b = MutableBuffer::::new(); - b.extend(0..3); - assert_eq!(b.as_slice(), &[0, 1, 2]); -} - -#[test] -fn extend_constant() { - let mut b = MutableBuffer::::new(); - b.extend_constant(3, 1); - assert_eq!(b.as_slice(), &[1, 1, 1]); -} - -#[test] -fn from_iter() { - let b = (0..3).collect::>(); - assert_eq!(b.as_slice(), &[0, 1, 2]); -} - -#[test] -fn from_as_ref() { - let b = MutableBuffer::::from(&[0, 1, 2]); - assert_eq!(b.as_slice(), &[0, 1, 2]); -} - -#[test] -fn from_trusted_len_iter() { - let b = unsafe { MutableBuffer::::from_trusted_len_iter_unchecked(0..3) }; - assert_eq!(b.as_slice(), &[0, 1, 2]); -} - -#[test] -fn try_from_trusted_len_iter() { - let iter = (0..3).map(Result::<_, String>::Ok); - let buffer = - unsafe { MutableBuffer::::try_from_trusted_len_iter_unchecked(iter) }.unwrap(); - assert_eq!(buffer.len(), 3); - assert_eq!(buffer.as_slice(), &[0, 1, 2]); -} - -#[test] -fn to_buffer() { - let b = (0..3).collect::>(); - let b: Buffer = b.into(); - assert_eq!(b.as_slice(), &[0, 1, 2]); -} - -#[test] -fn debug() { - let buffer = MutableBuffer::::from(&[0, 1, 2, 3]); - let a = format!("{:?}", buffer); - assert_eq!(a, "[0, 1, 2, 3]") -} diff --git a/tests/it/compute/take.rs b/tests/it/compute/take.rs index c5dfbc88493..676152b727f 100644 --- a/tests/it/compute/take.rs +++ b/tests/it/compute/take.rs @@ -4,6 +4,7 @@ use arrow2::compute::take::{can_take, take}; use arrow2::datatypes::{DataType, Field, IntervalUnit}; use arrow2::error::Result; use arrow2::{array::*, bitmap::MutableBitmap, types::NativeType}; +use arrow2::{bitmap::Bitmap, buffer::Buffer}; fn test_take_primitive( data: &[Option], @@ -174,3 +175,141 @@ fn unsigned_take() { let a = take(&values, &indices).unwrap(); assert_eq!(a.len(), 0) } + +#[test] +fn list_with_no_none() { + let values = Buffer::from_slice([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + + let data_type = ListArray::::default_datatype(DataType::Int32); + let array = ListArray::::from_data( + data_type, + Buffer::from_slice([0, 2, 2, 6, 9, 10]), + Arc::new(values), + None, + ); + + let indices = PrimitiveArray::from([Some(4i32), Some(1), Some(3)]); + let result = take(&array, &indices).unwrap(); + + let expected_values = Buffer::from_slice([9, 6, 7, 8]); + let expected_values = PrimitiveArray::::from_data(DataType::Int32, expected_values, None); + let expected_type = ListArray::::default_datatype(DataType::Int32); + let expected = ListArray::::from_data( + expected_type, + Buffer::from_slice([0, 1, 1, 4]), + Arc::new(expected_values), + None, + ); + + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn list_with_none() { + let values = Buffer::from_slice([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + + let validity_values = vec![true, false, true, true, true]; + let validity = Bitmap::from_trusted_len_iter(validity_values.into_iter()); + + let data_type = ListArray::::default_datatype(DataType::Int32); + let array = ListArray::::from_data( + data_type, + Buffer::from_slice([0, 2, 2, 6, 9, 10]), + Arc::new(values), + Some(validity), + ); + + let indices = PrimitiveArray::from([Some(4i32), None, Some(2), Some(3)]); + let result = take(&array, &indices).unwrap(); + + let data_expected = vec![ + Some(vec![Some(9i32)]), + None, + Some(vec![Some(2i32), Some(3), Some(4), Some(5)]), + Some(vec![Some(6i32), Some(7), Some(8)]), + ]; + + let mut expected = MutableListArray::>::new(); + expected.try_extend(data_expected).unwrap(); + let expected: ListArray = expected.into(); + + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn list_both_validity() { + let values = vec![ + Some(vec![Some(2i32), Some(3), Some(4), Some(5)]), + None, + Some(vec![Some(9i32)]), + Some(vec![Some(6i32), Some(7), Some(8)]), + ]; + + let mut array = MutableListArray::>::new(); + array.try_extend(values).unwrap(); + let array: ListArray = array.into(); + + let indices = PrimitiveArray::from([Some(3i32), None, Some(1), Some(0)]); + let result = take(&array, &indices).unwrap(); + + let data_expected = vec![ + Some(vec![Some(6i32), Some(7), Some(8)]), + None, + None, + Some(vec![Some(2i32), Some(3), Some(4), Some(5)]), + ]; + let mut expected = MutableListArray::>::new(); + expected.try_extend(data_expected).unwrap(); + let expected: ListArray = expected.into(); + + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn test_nested() { + let values = Buffer::from_slice([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + + let data_type = ListArray::::default_datatype(DataType::Int32); + let array = ListArray::::from_data( + data_type, + Buffer::from_slice([0, 2, 4, 7, 7, 8, 10]), + Arc::new(values), + None, + ); + + let data_type = ListArray::::default_datatype(array.data_type().clone()); + let nested = ListArray::::from_data( + data_type, + Buffer::from_slice([0, 2, 5, 6]), + Arc::new(array), + None, + ); + + let indices = PrimitiveArray::from([Some(0i32), Some(1)]); + let result = take(&nested, &indices).unwrap(); + + // expected data + let expected_values = Buffer::from_slice([1, 2, 3, 4, 5, 6, 7, 8]); + let expected_values = PrimitiveArray::::from_data(DataType::Int32, expected_values, None); + + let expected_data_type = ListArray::::default_datatype(DataType::Int32); + let expected_array = ListArray::::from_data( + expected_data_type, + Buffer::from_slice([0, 2, 4, 7, 7, 8]), + Arc::new(expected_values), + None, + ); + + let expected_data_type = ListArray::::default_datatype(expected_array.data_type().clone()); + let expected = ListArray::::from_data( + expected_data_type, + Buffer::from_slice([0, 2, 5]), + Arc::new(expected_array), + None, + ); + + assert_eq!(expected, result.as_ref()); +} diff --git a/tests/it/io/json/read.rs b/tests/it/io/json/read.rs index 0ba1e8528e3..824ae1806d6 100644 --- a/tests/it/io/json/read.rs +++ b/tests/it/io/json/read.rs @@ -171,7 +171,7 @@ fn nested_list_arrays() { ); let expected = ListArray::from_data( a_list_data_type, - Buffer::from([0i32, 2, 3, 6, 6, 6]), + Buffer::from_slice([0i32, 2, 3, 6, 6, 6]), Arc::new(a_struct) as Arc, Some(Bitmap::from_u8_slice([0b00010111], 5)), ); diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index 3d514f333cd..6baeee60f54 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -263,7 +263,7 @@ fn write_list_of_struct() { // [{"c11": 5, "c12": {"c121": "g"}}] let c1 = ListArray::::from_data( c1_datatype, - Buffer::from(&[0, 2, 2, 3]), + Buffer::from_slice([0, 2, 2, 3]), Arc::new(s), Some(Bitmap::from_u8_slice([0b00000101], 3)), ); diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 70771c3ad88..dec3adc7209 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -34,7 +34,7 @@ pub fn read_column( } pub fn pyarrow_nested_nullable(column: usize) -> Box { - let offsets = Buffer::::from([0, 2, 2, 5, 8, 8, 11, 11, 12]); + let offsets = Buffer::from_slice([0, 2, 2, 5, 8, 8, 11, 11, 12]); let values = match column { 0 => { diff --git a/tests/it/io/print.rs b/tests/it/io/print.rs index 07a294942e1..1af0b471d48 100644 --- a/tests/it/io/print.rs +++ b/tests/it/io/print.rs @@ -392,7 +392,7 @@ fn write_union() -> Result<()> { Field::new("b", DataType::Utf8, true), ]; let data_type = DataType::Union(fields, None, UnionMode::Sparse); - let types = Buffer::from(&[0, 0, 1]); + let types = Buffer::from_slice([0, 0, 1]); let fields = vec![ Arc::new(Int32Array::from(&[Some(1), None, Some(2)])) as Arc, Arc::new(Utf8Array::::from(&[Some("a"), Some("b"), Some("c")])) as Arc,