From 0d83e51eead91b57e8d020f2144587dc58599ca3 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 20 Oct 2021 03:57:36 +0000 Subject: [PATCH] More tests. --- src/array/binary/ffi.rs | 9 +++++---- src/array/boolean/mod.rs | 6 ++++++ src/array/fixed_size_binary/ffi.rs | 23 +++++++++++++++++++++++ src/array/fixed_size_binary/mod.rs | 23 ++--------------------- src/array/fixed_size_list/ffi.rs | 16 ++++++++++++++++ src/array/fixed_size_list/mod.rs | 13 ++----------- src/array/list/ffi.rs | 12 ++++++------ src/array/map/ffi.rs | 15 +++++++++++---- src/array/specification.rs | 5 ++--- src/array/utf8/ffi.rs | 9 +++++---- tests/it/ffi.rs | 25 ++++++++++++++++++++++--- 11 files changed, 100 insertions(+), 56 deletions(-) create mode 100644 src/array/fixed_size_binary/ffi.rs create mode 100644 src/array/fixed_size_list/ffi.rs diff --git a/src/array/binary/ffi.rs b/src/array/binary/ffi.rs index c4c0f698543..5f1ff1a3030 100644 --- a/src/array/binary/ffi.rs +++ b/src/array/binary/ffi.rs @@ -16,12 +16,13 @@ unsafe impl ToFfi for BinaryArray { .map(|x| x.offset()) .unwrap_or_default(); - assert!(self.offsets.offset() >= offset); + let offsets = std::ptr::NonNull::new(unsafe { + self.offsets.as_ptr().offset(-(offset as isize)) as *mut u8 + }); + vec![ self.validity.as_ref().map(|x| x.as_ptr()), - std::ptr::NonNull::new(unsafe { - self.offsets.as_ptr().offset(-(offset as isize)) as *mut u8 - }), + offsets, std::ptr::NonNull::new(self.values.as_ptr() as *mut u8), ] } diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 06935af24ee..15618f18f40 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -45,6 +45,9 @@ impl BooleanArray { if data_type.to_physical_type() != PhysicalType::Boolean { panic!("BooleanArray can only be initialized with DataType::Boolean") } + if matches!(&validity, Some(bitmap) if bitmap.offset() != values.offset()) { + panic!("validity's bitmap offset must equal values offset") + } Self { data_type, values, @@ -91,6 +94,9 @@ impl BooleanArray { if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) { panic!("validity should be as least as large as the array") } + if matches!(&validity, Some(bitmap) if bitmap.offset() != self.values.offset()) { + panic!("validity's bitmap offset must equal values offset") + } let mut arr = self.clone(); arr.validity = validity; arr diff --git a/src/array/fixed_size_binary/ffi.rs b/src/array/fixed_size_binary/ffi.rs new file mode 100644 index 00000000000..ff8a89d763d --- /dev/null +++ b/src/array/fixed_size_binary/ffi.rs @@ -0,0 +1,23 @@ +use crate::array::ffi::ToFfi; + +use super::FixedSizeBinaryArray; + +unsafe impl ToFfi for FixedSizeBinaryArray { + fn buffers(&self) -> Vec>> { + let offset = self + .validity + .as_ref() + .map(|x| x.offset()) + .unwrap_or_default(); + let offset = offset * self.size(); + + // note that this may point to _before_ the pointer. This is fine because the C data interface + // requires users to only access past the offset + let values = + std::ptr::NonNull::new( + unsafe { self.values.as_ptr().offset(-(offset as isize)) } as *mut u8 + ); + + vec![self.validity.as_ref().map(|x| x.as_ptr()), values] + } +} diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index a6a02488136..faf3ec7ff72 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -1,7 +1,8 @@ use crate::{bitmap::Bitmap, buffer::Buffer, datatypes::DataType, error::Result}; -use super::{display_fmt, display_helper, ffi::ToFfi, Array}; +use super::{display_fmt, display_helper, Array}; +mod ffi; mod iterator; mod mutable; pub use mutable::*; @@ -179,26 +180,6 @@ impl std::fmt::Display for FixedSizeBinaryArray { } } -unsafe impl ToFfi for FixedSizeBinaryArray { - fn buffers(&self) -> Vec>> { - let offset = self - .validity - .as_ref() - .map(|x| x.offset()) - .unwrap_or_default(); - let offset = offset * self.size(); - - assert!(self.values.offset() >= offset); - - vec![ - self.validity.as_ref().map(|x| x.as_ptr()), - std::ptr::NonNull::new( - unsafe { self.values.as_ptr().offset(-(offset as isize)) } as *mut u8 - ), - ] - } -} - impl FixedSizeBinaryArray { /// Creates a [`FixedSizeBinaryArray`] from an fallible iterator of optional `[u8]`. pub fn try_from_iter, I: IntoIterator>>( diff --git a/src/array/fixed_size_list/ffi.rs b/src/array/fixed_size_list/ffi.rs new file mode 100644 index 00000000000..c803dc6ac25 --- /dev/null +++ b/src/array/fixed_size_list/ffi.rs @@ -0,0 +1,16 @@ +use std::sync::Arc; + +use crate::array::ffi::ToFfi; +use crate::array::Array; + +use super::FixedSizeListArray; + +unsafe impl ToFfi for FixedSizeListArray { + fn buffers(&self) -> Vec>> { + vec![self.validity.as_ref().map(|x| x.as_ptr())] + } + + fn children(&self) -> Vec> { + vec![self.values().clone()] + } +} diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index 86ecbec3758..6c29c2a6b58 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -5,8 +5,9 @@ use crate::{ datatypes::{DataType, Field}, }; -use super::{display_fmt, ffi::ToFfi, new_empty_array, new_null_array, Array}; +use super::{display_fmt, new_empty_array, new_null_array, Array}; +mod ffi; mod iterator; pub use iterator::*; mod mutable; @@ -191,13 +192,3 @@ impl std::fmt::Display for FixedSizeListArray { display_fmt(self.iter(), "FixedSizeListArray", f, true) } } - -unsafe impl ToFfi for FixedSizeListArray { - fn buffers(&self) -> Vec>> { - vec![self.validity.as_ref().map(|x| x.as_ptr())] - } - - fn children(&self) -> Vec> { - vec![self.values().clone()] - } -} diff --git a/src/array/list/ffi.rs b/src/array/list/ffi.rs index c17a5dbd43c..cb354727f92 100644 --- a/src/array/list/ffi.rs +++ b/src/array/list/ffi.rs @@ -13,14 +13,14 @@ unsafe impl ToFfi for ListArray { .map(|x| x.offset()) .unwrap_or_default(); - assert!(self.offsets.offset() >= offset); - - vec![ - self.validity.as_ref().map(|x| x.as_ptr()), + // note that this may point to _before_ the pointer. This is fine because the C data interface + // requires users to only access past the offset + let offsets = std::ptr::NonNull::new( unsafe { self.offsets.as_ptr().offset(-(offset as isize)) } as *mut u8 - ), - ] + ); + + vec![self.validity.as_ref().map(|x| x.as_ptr()), offsets] } fn children(&self) -> Vec> { diff --git a/src/array/map/ffi.rs b/src/array/map/ffi.rs index 953d5dd0d8e..8161a17d02f 100644 --- a/src/array/map/ffi.rs +++ b/src/array/map/ffi.rs @@ -7,10 +7,17 @@ use super::MapArray; unsafe impl ToFfi for MapArray { fn buffers(&self) -> Vec>> { - vec![ - self.validity.as_ref().map(|x| x.as_ptr()), - std::ptr::NonNull::new(self.offsets.as_ptr() as *mut u8), - ] + let offset = self + .validity + .as_ref() + .map(|x| x.offset()) + .unwrap_or_default(); + + let offsets = std::ptr::NonNull::new(unsafe { + self.offsets.as_ptr().offset(-(offset as isize)) as *mut u8 + }); + + vec![self.validity.as_ref().map(|x| x.as_ptr()), offsets] } fn children(&self) -> Vec> { diff --git a/src/array/specification.rs b/src/array/specification.rs index 1edcb70653f..0b26f4a67fc 100644 --- a/src/array/specification.rs +++ b/src/array/specification.rs @@ -100,12 +100,11 @@ pub fn check_offsets_and_utf8(offsets: &[O], values: &[u8]) { } /// # Panics iff: +/// * the `offsets` is empty, or /// * the `offsets` is not monotonically increasing, or /// * any offset is larger or equal to `values_len`. pub fn check_offsets(offsets: &[O], values_len: usize) { - if offsets.is_empty() { - return; - } + assert!(!offsets.is_empty()); let mut last = offsets[0]; // assert monotonicity diff --git a/src/array/utf8/ffi.rs b/src/array/utf8/ffi.rs index 118417fc29b..0ef904fa09d 100644 --- a/src/array/utf8/ffi.rs +++ b/src/array/utf8/ffi.rs @@ -14,12 +14,13 @@ unsafe impl ToFfi for Utf8Array { .map(|x| x.offset()) .unwrap_or_default(); - assert!(self.offsets.offset() >= offset); + let offsets = std::ptr::NonNull::new(unsafe { + self.offsets.as_ptr().offset(-(offset as isize)) as *mut u8 + }); + vec![ self.validity.as_ref().map(|x| x.as_ptr()), - std::ptr::NonNull::new(unsafe { - self.offsets.as_ptr().offset(-(offset as isize)) as *mut u8 - }), + offsets, std::ptr::NonNull::new(self.values.as_ptr() as *mut u8), ] } diff --git a/tests/it/ffi.rs b/tests/it/ffi.rs index facc143988c..c7912d6cd81 100644 --- a/tests/it/ffi.rs +++ b/tests/it/ffi.rs @@ -1,13 +1,12 @@ use arrow2::array::*; +use arrow2::bitmap::Bitmap; use arrow2::datatypes::{DataType, Field, TimeUnit}; use arrow2::{error::Result, ffi}; use std::collections::BTreeMap; use std::sync::Arc; -fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> { - let array: Arc = Arc::new(expected.clone()); +fn _test_round_trip(array: Arc, expected: Box) -> Result<()> { let field = Field::new("a", array.data_type().clone(), true); - let expected = Box::new(expected) as Box; let array_ptr = Box::new(ffi::Ffi_ArrowArray::empty()); let schema_ptr = Box::new(ffi::Ffi_ArrowSchema::empty()); @@ -32,6 +31,15 @@ fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> { Ok(()) } +fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> { + let array: Arc = Arc::new(expected.clone()); + let expected = Box::new(expected) as Box; + _test_round_trip(array.clone(), clone(expected.as_ref()))?; + + // sliced + _test_round_trip(array.slice(1, 2).into(), expected.slice(1, 2)) +} + fn test_round_trip_schema(field: Field) -> Result<()> { // create a `ArrowArray` from the data. let schema_ptr = Box::new(ffi::Ffi_ArrowSchema::empty()); @@ -138,6 +146,17 @@ fn list_list() -> Result<()> { test_round_trip(array) } +#[test] +fn struct_() -> Result<()> { + let data_type = DataType::Struct(vec![Field::new("a", DataType::Int32, true)]); + let values = vec![Arc::new(Int32Array::from([Some(1), None, Some(3)])) as Arc]; + let validity = Bitmap::from([true, false, true]); + + let array = StructArray::from_data(data_type, values, validity.into()); + + test_round_trip(array) +} + #[test] fn dict() -> Result<()> { let data = vec![Some("a"), Some("a"), None, Some("b")];