diff --git a/src/ffi/mmap.rs b/src/ffi/mmap.rs index 98143610da9..2d7afcb1c29 100644 --- a/src/ffi/mmap.rs +++ b/src/ffi/mmap.rs @@ -1,6 +1,9 @@ //! Functionality to mmap in-memory data regions. use std::sync::Arc; +use crate::array::BooleanArray; +use crate::datatypes::DataType; +use crate::error::Error; use crate::{ array::{FromFfi, PrimitiveArray}, types::NativeType, @@ -28,6 +31,7 @@ pub(crate) unsafe fn create_array< buffers: I, children: II, dictionary: Option, + offset: Option, ) -> ArrowArray { let buffers_ptr = buffers .map(|maybe_buffer| match maybe_buffer { @@ -54,7 +58,7 @@ pub(crate) unsafe fn create_array< ArrowArray { length: num_rows as i64, null_count: null_count as i64, - offset: 0, // IPC files are by definition not offset + offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset n_buffers, n_children, buffers: private_data.buffers_ptr.as_mut_ptr(), @@ -113,9 +117,52 @@ pub unsafe fn slice(slice: &[T]) -> PrimitiveArray { [validity, Some(ptr)].into_iter(), [].into_iter(), None, + None, ); let array = InternalArrowArray::new(array, T::PRIMITIVE.into()); // safety: we just created a valid array unsafe { PrimitiveArray::::try_from_ffi(array) }.unwrap() } + +/// Creates a (non-null) [`BooleanArray`] from a slice of bits. +/// This does not have memcopy and is the fastest way to create a [`BooleanArray`]. +/// +/// This can be useful if you want to apply arrow kernels on slices without incurring +/// a memcopy cost. +/// +/// The `offset` indicates where the first bit starts in the first byte. +/// +/// # Safety +/// +/// Using this function is not unsafe, but the returned BooleanArrays's lifetime is bound to the lifetime +/// of the slice. The returned [`BooleanArray`] _must not_ outlive the passed slice. +pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> Result { + if offset >= 8 { + return Err(Error::InvalidArgumentError("offset should be < 8".into())); + }; + if data.len() * 8 - offset > length { + return Err(Error::InvalidArgumentError("given length is oob".into())); + } + let null_count = 0; + let validity = None; + + let ptr = data.as_ptr() as *const u8; + let data = Arc::new(data); + + // safety: the underlying assumption of this function: the array will not be used + // beyond the + let array = create_array( + data, + length, + null_count, + [validity, Some(ptr)].into_iter(), + [].into_iter(), + None, + Some(offset), + ); + let array = InternalArrowArray::new(array, DataType::Boolean); + + // safety: we just created a valid array + Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap()) +} diff --git a/src/mmap/array.rs b/src/mmap/array.rs index 15ba3ac8823..2167113e790 100644 --- a/src/mmap/array.rs +++ b/src/mmap/array.rs @@ -110,6 +110,7 @@ fn mmap_binary>( [validity, Some(offsets), Some(values)].into_iter(), [].into_iter(), None, + None, ) }) } @@ -144,6 +145,7 @@ fn mmap_fixed_size_binary>( [validity, Some(values)].into_iter(), [].into_iter(), None, + None, ) }) } @@ -172,6 +174,7 @@ fn mmap_null>( [].into_iter(), [].into_iter(), None, + None, ) }) } @@ -210,6 +213,7 @@ fn mmap_boolean>( [validity, Some(values)].into_iter(), [].into_iter(), None, + None, ) }) } @@ -244,6 +248,7 @@ fn mmap_primitive>( [validity, Some(values)].into_iter(), [].into_iter(), None, + None, ) }) } @@ -296,6 +301,7 @@ fn mmap_list>( [validity, Some(offsets)].into_iter(), [values].into_iter(), None, + None, ) }) } @@ -347,6 +353,7 @@ fn mmap_fixed_size_list>( [validity].into_iter(), [values].into_iter(), None, + None, ) }) } @@ -403,6 +410,7 @@ fn mmap_struct>( [validity].into_iter(), values.into_iter(), None, + None, ) }) } @@ -447,6 +455,7 @@ fn mmap_dict>( [validity, Some(values)].into_iter(), [].into_iter(), Some(export_array_to_c(dictionary)), + None, ) }) } diff --git a/tests/it/ffi/mod.rs b/tests/it/ffi/mod.rs index 0ae97a86627..5a06722bb83 100644 --- a/tests/it/ffi/mod.rs +++ b/tests/it/ffi/mod.rs @@ -8,3 +8,14 @@ fn mmap_slice() { assert_eq!(array.values().as_ref(), &[1, 2, 3]); // note: when `slice` is dropped, array must be dropped as-well since by construction of `slice` they share their lifetimes. } + +#[test] +fn mmap_bitmap() { + let slice = &[123u8, 255]; + let array = unsafe { arrow2::ffi::mmap::bitmap(slice, 2, 14) }.unwrap(); + assert_eq!( + array.values_iter().collect::>(), + &[false, true, true, true, true, false, true, true, true, true, true, true, true, true] + ); + // note: when `slice` is dropped, array must be dropped as-well since by construction of `slice` they share their lifetimes. +}