Skip to content

Commit

Permalink
Added memmap &[u8] as BooleanArray (jorgecarleitao#1436)
Browse files Browse the repository at this point in the history
* feat: memmap bits slice as BooleanArray

* swap cmp
  • Loading branch information
ritchie46 committed Apr 5, 2023
1 parent dbd0228 commit 20b6d07
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 1 deletion.
49 changes: 48 additions & 1 deletion src/ffi/mmap.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
//! Functionality to mmap in-memory data regions.
use std::sync::Arc;

use crate::array::BooleanArray;
use crate::datatypes::DataType;
use crate::error::Error;
use crate::{
array::{FromFfi, PrimitiveArray},
types::NativeType,
Expand Down Expand Up @@ -28,6 +31,7 @@ pub(crate) unsafe fn create_array<
buffers: I,
children: II,
dictionary: Option<ArrowArray>,
offset: Option<usize>,
) -> ArrowArray {
let buffers_ptr = buffers
.map(|maybe_buffer| match maybe_buffer {
Expand All @@ -54,7 +58,7 @@ pub(crate) unsafe fn create_array<
ArrowArray {
length: num_rows as i64,
null_count: null_count as i64,
offset: 0, // IPC files are by definition not offset
offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset
n_buffers,
n_children,
buffers: private_data.buffers_ptr.as_mut_ptr(),
Expand Down Expand Up @@ -113,9 +117,52 @@ pub unsafe fn slice<T: NativeType>(slice: &[T]) -> PrimitiveArray<T> {
[validity, Some(ptr)].into_iter(),
[].into_iter(),
None,
None,
);
let array = InternalArrowArray::new(array, T::PRIMITIVE.into());

// safety: we just created a valid array
unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
}

/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
///
/// This can be useful if you want to apply arrow kernels on slices without incurring
/// a memcopy cost.
///
/// The `offset` indicates where the first bit starts in the first byte.
///
/// # Safety
///
/// Using this function is not unsafe, but the returned BooleanArrays's lifetime is bound to the lifetime
/// of the slice. The returned [`BooleanArray`] _must not_ outlive the passed slice.
pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> Result<BooleanArray, Error> {
if offset >= 8 {
return Err(Error::InvalidArgumentError("offset should be < 8".into()));
};
if length > data.len() * 8 - offset {
return Err(Error::InvalidArgumentError("given length is oob".into()));
}
let null_count = 0;
let validity = None;

let ptr = data.as_ptr() as *const u8;
let data = Arc::new(data);

// safety: the underlying assumption of this function: the array will not be used
// beyond the
let array = create_array(
data,
length,
null_count,
[validity, Some(ptr)].into_iter(),
[].into_iter(),
None,
Some(offset),
);
let array = InternalArrowArray::new(array, DataType::Boolean);

// safety: we just created a valid array
Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap())
}
9 changes: 9 additions & 0 deletions src/mmap/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ fn mmap_binary<O: Offset, T: AsRef<[u8]>>(
[validity, Some(offsets), Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -144,6 +145,7 @@ fn mmap_fixed_size_binary<T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -172,6 +174,7 @@ fn mmap_null<T: AsRef<[u8]>>(
[].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -210,6 +213,7 @@ fn mmap_boolean<T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -244,6 +248,7 @@ fn mmap_primitive<P: NativeType, T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -296,6 +301,7 @@ fn mmap_list<O: Offset, T: AsRef<[u8]>>(
[validity, Some(offsets)].into_iter(),
[values].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -347,6 +353,7 @@ fn mmap_fixed_size_list<T: AsRef<[u8]>>(
[validity].into_iter(),
[values].into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -403,6 +410,7 @@ fn mmap_struct<T: AsRef<[u8]>>(
[validity].into_iter(),
values.into_iter(),
None,
None,
)
})
}
Expand Down Expand Up @@ -447,6 +455,7 @@ fn mmap_dict<K: DictionaryKey, T: AsRef<[u8]>>(
[validity, Some(values)].into_iter(),
[].into_iter(),
Some(export_array_to_c(dictionary)),
None,
)
})
}
Expand Down
11 changes: 11 additions & 0 deletions tests/it/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,14 @@ fn mmap_slice() {
assert_eq!(array.values().as_ref(), &[1, 2, 3]);
// note: when `slice` is dropped, array must be dropped as-well since by construction of `slice` they share their lifetimes.
}

#[test]
fn mmap_bitmap() {
let slice = &[123u8, 255];
let array = unsafe { arrow2::ffi::mmap::bitmap(slice, 2, 14) }.unwrap();
assert_eq!(
array.values_iter().collect::<Vec<_>>(),
&[false, true, true, true, true, false, true, true, true, true, true, true, true, true]
);
// note: when `slice` is dropped, array must be dropped as-well since by construction of `slice` they share their lifetimes.
}

0 comments on commit 20b6d07

Please sign in to comment.