Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added memmap &[u8] as BooleanArray #1436

Merged
merged 2 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion src/ffi/mmap.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Functionality to mmap in-memory data regions.
use std::sync::Arc;

use crate::array::BooleanArray;
use crate::datatypes::DataType;
use crate::error::Error;
use crate::{
array::{FromFfi, PrimitiveArray},
types::NativeType,
Expand Down Expand Up @@ -28,6 +31,7 @@ pub(crate) unsafe fn create_array<
buffers: I,
children: II,
dictionary: Option<ArrowArray>,
offset: Option<usize>,
) -> ArrowArray {
let buffers_ptr = buffers
.map(|maybe_buffer| match maybe_buffer {
Expand All @@ -54,7 +58,7 @@ pub(crate) unsafe fn create_array<
ArrowArray {
length: num_rows as i64,
null_count: null_count as i64,
offset: 0, // IPC files are by definition not offset
offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset
n_buffers,
n_children,
buffers: private_data.buffers_ptr.as_mut_ptr(),
Expand Down Expand Up @@ -113,9 +117,52 @@ pub unsafe fn slice<T: NativeType>(slice: &[T]) -> PrimitiveArray<T> {
[validity, Some(ptr)].into_iter(),
[].into_iter(),
None,
None,
);
let array = InternalArrowArray::new(array, T::PRIMITIVE.into());

// safety: we just created a valid array
unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
}

/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
///
/// This can be useful if you want to apply arrow kernels on slices without incurring
/// a memcopy cost.
///
/// The `offset` indicates where the first bit starts in the first byte.
///
/// # Safety
///
/// Using this function is not unsafe, but the returned BooleanArrays's lifetime is bound to the lifetime
/// of the slice. The returned [`BooleanArray`] _must not_ outlive the passed slice.
pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> Result<BooleanArray, Error> {
if offset >= 8 {
return Err(Error::InvalidArgumentError("offset should be < 8".into()));
};
if length > data.len() * 8 - offset {
return Err(Error::InvalidArgumentError("given length is oob".into()));
}
let null_count = 0;
let validity = None;

let ptr = data.as_ptr() as *const u8;
let data = Arc::new(data);

// safety: the underlying assumption of this function: the array will not be used
// beyond the
let array = create_array(
data,
length,
null_count,
[validity, Some(ptr)].into_iter(),
[].into_iter(),
None,
Some(offset),
);
let array = InternalArrowArray::new(array, DataType::Boolean);

// safety: we just created a valid array
Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap())
}
9 changes: 9 additions & 0 deletions src/mmap/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ fn mmap_binary<O: Offset, T: AsRef<[u8]>>(
[validity, Some(offsets), Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -144,6 +145,7 @@ fn mmap_fixed_size_binary<T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -172,6 +174,7 @@ fn mmap_null<T: AsRef<[u8]>>(
[].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -210,6 +213,7 @@ fn mmap_boolean<T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -244,6 +248,7 @@ fn mmap_primitive<P: NativeType, T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -296,6 +301,7 @@ fn mmap_list<O: Offset, T: AsRef<[u8]>>(
[validity, Some(offsets)].into_iter(),
[values].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -347,6 +353,7 @@ fn mmap_fixed_size_list<T: AsRef<[u8]>>(
[validity].into_iter(),
[values].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -403,6 +410,7 @@ fn mmap_struct<T: AsRef<[u8]>>(
[validity].into_iter(),
values.into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -447,6 +455,7 @@ fn mmap_dict<K: DictionaryKey, T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
Some(export_array_to_c(dictionary)),
None,
)
})
}
Expand Down
11 changes: 11 additions & 0 deletions tests/it/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,14 @@ fn mmap_slice() {
assert_eq!(array.values().as_ref(), &[1, 2, 3]);
// note: when `slice` is dropped, array must be dropped as-well since by construction of `slice` they share their lifetimes.
}

#[test]
fn mmap_bitmap() {
let slice = &[123u8, 255];
let array = unsafe { arrow2::ffi::mmap::bitmap(slice, 2, 14) }.unwrap();
assert_eq!(
array.values_iter().collect::<Vec<_>>(),
&[false, true, true, true, true, false, true, true, true, true, true, true, true, true]
);
// note: when `slice` is dropped, array must be dropped as-well since by construction of `slice` they share their lifetimes.
}