From 4d9e1e3549d58094287e4b4cea7c55a9869045d4 Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Sat, 19 Feb 2022 14:13:04 +0100 Subject: [PATCH] Simplified API for FFI (#854) --- arrow-pyarrow-integration-testing/src/lib.rs | 3 +- examples/ffi.rs | 2 +- src/array/binary/ffi.rs | 2 +- src/array/boolean/ffi.rs | 2 +- src/array/fixed_size_binary/ffi.rs | 2 +- src/array/fixed_size_list/ffi.rs | 2 +- src/array/list/ffi.rs | 2 +- src/array/map/ffi.rs | 2 +- src/array/null.rs | 2 +- src/array/primitive/ffi.rs | 2 +- src/array/struct_/ffi.rs | 2 +- src/array/union/ffi.rs | 5 +- src/array/utf8/ffi.rs | 2 +- src/ffi/array.rs | 2 +- src/ffi/ffi.rs | 51 +++++++++----------- src/ffi/mod.rs | 6 +-- src/ffi/schema.rs | 16 +++--- tests/it/ffi.rs | 3 +- 18 files changed, 52 insertions(+), 56 deletions(-) diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index 48d39adcd61..3463cab1bee 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -66,7 +66,8 @@ fn to_rust_array(ob: PyObject, py: Python) -> PyResult> { )?; let field = unsafe { ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)? }; - let array = unsafe { ffi::import_array_from_c(array, &field).map_err(PyO3ArrowError::from)? }; + let array = + unsafe { ffi::import_array_from_c(array, field.data_type).map_err(PyO3ArrowError::from)? }; Ok(array.into()) } diff --git a/examples/ffi.rs b/examples/ffi.rs index 87933385659..b69689fb867 100644 --- a/examples/ffi.rs +++ b/examples/ffi.rs @@ -19,7 +19,7 @@ unsafe fn import( schema: &ffi::Ffi_ArrowSchema, ) -> Result> { let field = ffi::import_field_from_c(schema)?; - ffi::import_array_from_c(array, &field) + ffi::import_array_from_c(array, field.data_type) } fn main() -> Result<()> { diff --git a/src/array/binary/ffi.rs b/src/array/binary/ffi.rs index 2cd1fb2e628..c8a72615bce 100644 --- a/src/array/binary/ffi.rs +++ b/src/array/binary/ffi.rs @@ -52,7 +52,7 @@ unsafe impl ToFfi for BinaryArray { impl FromFfi for BinaryArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; diff --git a/src/array/boolean/ffi.rs b/src/array/boolean/ffi.rs index f07d2d20263..9998e8f00e1 100644 --- a/src/array/boolean/ffi.rs +++ b/src/array/boolean/ffi.rs @@ -50,7 +50,7 @@ unsafe impl ToFfi for BooleanArray { impl FromFfi for BooleanArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let values = unsafe { array.bitmap(1) }?; Ok(Self::from_data(data_type, values, validity)) diff --git a/src/array/fixed_size_binary/ffi.rs b/src/array/fixed_size_binary/ffi.rs index 4d371c44643..7728045c0f4 100644 --- a/src/array/fixed_size_binary/ffi.rs +++ b/src/array/fixed_size_binary/ffi.rs @@ -50,7 +50,7 @@ unsafe impl ToFfi for FixedSizeBinaryArray { impl FromFfi for FixedSizeBinaryArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let values = unsafe { array.buffer::(1) }?; diff --git a/src/array/fixed_size_list/ffi.rs b/src/array/fixed_size_list/ffi.rs index 1c2a2063e6d..5e862f56b48 100644 --- a/src/array/fixed_size_list/ffi.rs +++ b/src/array/fixed_size_list/ffi.rs @@ -35,7 +35,7 @@ unsafe impl ToFfi for FixedSizeListArray { impl FromFfi for FixedSizeListArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let child = unsafe { array.child(0)? }; let values = ffi::try_from(child)?.into(); diff --git a/src/array/list/ffi.rs b/src/array/list/ffi.rs index 11af11bfd6a..67349792305 100644 --- a/src/array/list/ffi.rs +++ b/src/array/list/ffi.rs @@ -52,7 +52,7 @@ unsafe impl ToFfi for ListArray { impl FromFfi for ListArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; let child = unsafe { array.child(0)? }; diff --git a/src/array/map/ffi.rs b/src/array/map/ffi.rs index 0ab6881a78b..5607110ae48 100644 --- a/src/array/map/ffi.rs +++ b/src/array/map/ffi.rs @@ -52,7 +52,7 @@ unsafe impl ToFfi for MapArray { impl FromFfi for MapArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; let child = array.child(0)?; diff --git a/src/array/null.rs b/src/array/null.rs index 9c09b8ce705..3f7d89dc22a 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -96,7 +96,7 @@ unsafe impl ToFfi for NullArray { impl FromFfi for NullArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); Ok(Self::from_data(data_type, array.array().len())) } } diff --git a/src/array/primitive/ffi.rs b/src/array/primitive/ffi.rs index 1c35009c739..f5b4e5438ec 100644 --- a/src/array/primitive/ffi.rs +++ b/src/array/primitive/ffi.rs @@ -51,7 +51,7 @@ unsafe impl ToFfi for PrimitiveArray { impl FromFfi for PrimitiveArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let values = unsafe { array.buffer::(1) }?; diff --git a/src/array/struct_/ffi.rs b/src/array/struct_/ffi.rs index 0168520cdcb..b25b233e2e8 100644 --- a/src/array/struct_/ffi.rs +++ b/src/array/struct_/ffi.rs @@ -29,7 +29,7 @@ unsafe impl ToFfi for StructArray { impl FromFfi for StructArray { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let fields = Self::get_fields(&data_type); let validity = unsafe { array.validity() }?; diff --git a/src/array/union/ffi.rs b/src/array/union/ffi.rs index 5702e4f9d6c..8dd15321ff1 100644 --- a/src/array/union/ffi.rs +++ b/src/array/union/ffi.rs @@ -32,9 +32,8 @@ unsafe impl ToFfi for UnionArray { impl FromFfi for UnionArray { unsafe fn try_from_ffi(array: A) -> Result { - let field = array.field(); - let data_type = field.data_type().clone(); - let fields = Self::get_fields(field.data_type()); + let data_type = array.data_type().clone(); + let fields = Self::get_fields(&data_type); let mut types = unsafe { array.buffer::(0) }?; let offsets = if Self::is_sparse(&data_type) { diff --git a/src/array/utf8/ffi.rs b/src/array/utf8/ffi.rs index 8f5962fa5ea..e8caa53b65e 100644 --- a/src/array/utf8/ffi.rs +++ b/src/array/utf8/ffi.rs @@ -51,7 +51,7 @@ unsafe impl ToFfi for Utf8Array { impl FromFfi for Utf8Array { unsafe fn try_from_ffi(array: A) -> Result { - let data_type = array.field().data_type().clone(); + let data_type = array.data_type().clone(); let validity = unsafe { array.validity() }?; let offsets = unsafe { array.buffer::(1) }?; let values = unsafe { array.buffer::(2)? }; diff --git a/src/ffi/array.rs b/src/ffi/array.rs index 947a3c4e3df..38894cd5970 100644 --- a/src/ffi/array.rs +++ b/src/ffi/array.rs @@ -11,7 +11,7 @@ use crate::{array::*, datatypes::PhysicalType}; /// * the interface is not valid (e.g. a null pointer) pub unsafe fn try_from(array: A) -> Result> { use PhysicalType::*; - Ok(match array.field().data_type().to_physical_type() { + Ok(match array.data_type().to_physical_type() { Null => Box::new(NullArray::try_from_ffi(array)?), Boolean => Box::new(BooleanArray::try_from_ffi(array)?), Primitive(primitive) => with_match_primitive_type!(primitive, |$T| { diff --git a/src/ffi/ffi.rs b/src/ffi/ffi.rs index 528a679dd5b..e6de2d594a1 100644 --- a/src/ffi/ffi.rs +++ b/src/ffi/ffi.rs @@ -24,9 +24,9 @@ use crate::{ bytes::{Bytes, Deallocation}, Buffer, }, - datatypes::{DataType, Field, PhysicalType}, + datatypes::{DataType, PhysicalType}, error::{ArrowError, Result}, - ffi::schema::get_field_child, + ffi::schema::get_child, types::NativeType, }; @@ -317,11 +317,11 @@ fn buffer_len(array: &Ffi_ArrowArray, data_type: &DataType, i: usize) -> Result< fn create_child( array: &Ffi_ArrowArray, - field: &Field, + field: &DataType, parent: Arc, index: usize, ) -> Result> { - let field = get_field_child(field, index)?; + let data_type = get_child(field, index)?; assert!(index < array.n_children as usize); assert!(!array.children.is_null()); unsafe { @@ -329,20 +329,20 @@ fn create_child( assert!(!arr_ptr.is_null()); let arr_ptr = &*arr_ptr; - Ok(ArrowArrayChild::from_raw(arr_ptr, field, parent)) + Ok(ArrowArrayChild::from_raw(arr_ptr, data_type, parent)) } } fn create_dictionary( array: &Ffi_ArrowArray, - field: &Field, + data_type: &DataType, parent: Arc, ) -> Result>> { - if let DataType::Dictionary(_, values, _) = field.data_type() { - let field = Field::new("", values.as_ref().clone(), true); + if let DataType::Dictionary(_, values, _) = data_type { + let data_type = values.as_ref().clone(); assert!(!array.dictionary.is_null()); let array = unsafe { &*array.dictionary }; - Ok(Some(ArrowArrayChild::from_raw(array, field, parent))) + Ok(Some(ArrowArrayChild::from_raw(array, data_type, parent))) } else { Ok(None) } @@ -371,12 +371,7 @@ pub trait ArrowArrayRef: std::fmt::Debug { /// The caller must guarantee that the buffer `index` corresponds to a bitmap. /// This function assumes that the bitmap created from FFI is valid; this is impossible to prove. unsafe fn buffer(&self, index: usize) -> Result> { - create_buffer::( - self.array(), - self.field().data_type(), - self.deallocation(), - index, - ) + create_buffer::(self.array(), self.data_type(), self.deallocation(), index) } /// # Safety @@ -390,18 +385,18 @@ pub trait ArrowArrayRef: std::fmt::Debug { /// # Safety /// The caller must guarantee that the child `index` is valid per c data interface. unsafe fn child(&self, index: usize) -> Result { - create_child(self.array(), self.field(), self.parent().clone(), index) + create_child(self.array(), self.data_type(), self.parent().clone(), index) } fn dictionary(&self) -> Result> { - create_dictionary(self.array(), self.field(), self.parent().clone()) + create_dictionary(self.array(), self.data_type(), self.parent().clone()) } fn n_buffers(&self) -> usize; fn parent(&self) -> &Arc; fn array(&self) -> &Ffi_ArrowArray; - fn field(&self) -> &Field; + fn data_type(&self) -> &DataType; } /// Struct used to move an Array from and to the C Data Interface. @@ -426,19 +421,19 @@ pub trait ArrowArrayRef: std::fmt::Debug { #[derive(Debug)] pub struct ArrowArray { array: Box, - field: Field, + data_type: DataType, } impl ArrowArray { - pub fn new(array: Box, field: Field) -> Self { - Self { array, field } + pub fn new(array: Box, data_type: DataType) -> Self { + Self { array, data_type } } } impl ArrowArrayRef for Arc { /// the data_type as declared in the schema - fn field(&self) -> &Field { - &self.field + fn data_type(&self) -> &DataType { + &self.data_type } fn parent(&self) -> &Arc { @@ -457,14 +452,14 @@ impl ArrowArrayRef for Arc { #[derive(Debug)] pub struct ArrowArrayChild<'a> { array: &'a Ffi_ArrowArray, - field: Field, + data_type: DataType, parent: Arc, } impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { /// the data_type as declared in the schema - fn field(&self) -> &Field { - &self.field + fn data_type(&self) -> &DataType { + &self.data_type } fn parent(&self) -> &Arc { @@ -481,10 +476,10 @@ impl<'a> ArrowArrayRef for ArrowArrayChild<'a> { } impl<'a> ArrowArrayChild<'a> { - fn from_raw(array: &'a Ffi_ArrowArray, field: Field, parent: Arc) -> Self { + fn from_raw(array: &'a Ffi_ArrowArray, data_type: DataType, parent: Arc) -> Self { Self { array, - field, + data_type, parent, } } diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index bcb20c06331..1b32c0cfa52 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -12,7 +12,7 @@ pub(crate) use ffi::{ArrowArray, ArrowArrayRef}; use std::sync::Arc; use crate::array::Array; -use crate::datatypes::Field; +use crate::datatypes::{DataType, Field}; use crate::error::Result; pub use ffi::Ffi_ArrowArray; @@ -50,7 +50,7 @@ pub unsafe fn import_field_from_c(field: &Ffi_ArrowSchema) -> Result { /// valid according to the [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). pub unsafe fn import_array_from_c( array: Box, - field: &Field, + data_type: DataType, ) -> Result> { - try_from(Arc::new(ArrowArray::new(array, field.clone()))) + try_from(Arc::new(ArrowArray::new(array, data_type))) } diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index 38214101879..c00a765672a 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -455,14 +455,14 @@ fn to_format(data_type: &DataType) -> String { } } -pub(super) fn get_field_child(field: &Field, index: usize) -> Result { - match (index, field.data_type()) { - (0, DataType::List(field)) => Ok(field.as_ref().clone()), - (0, DataType::FixedSizeList(field, _)) => Ok(field.as_ref().clone()), - (0, DataType::LargeList(field)) => Ok(field.as_ref().clone()), - (0, DataType::Map(field, _)) => Ok(field.as_ref().clone()), - (index, DataType::Struct(fields)) => Ok(fields[index].clone()), - (index, DataType::Union(fields, _, _)) => Ok(fields[index].clone()), +pub(super) fn get_child(data_type: &DataType, index: usize) -> Result { + match (index, data_type) { + (0, DataType::List(field)) => Ok(field.data_type().clone()), + (0, DataType::FixedSizeList(field, _)) => Ok(field.data_type().clone()), + (0, DataType::LargeList(field)) => Ok(field.data_type().clone()), + (0, DataType::Map(field, _)) => Ok(field.data_type().clone()), + (index, DataType::Struct(fields)) => Ok(fields[index].data_type().clone()), + (index, DataType::Union(fields, _, _)) => Ok(fields[index].data_type().clone()), (child, data_type) => Err(ArrowError::OutOfSpec(format!( "Requested child {} to type {:?} that has no such child", child, data_type diff --git a/tests/it/ffi.rs b/tests/it/ffi.rs index dfc5b5102cb..dd4f53b912e 100644 --- a/tests/it/ffi.rs +++ b/tests/it/ffi.rs @@ -24,7 +24,8 @@ fn _test_round_trip(array: Arc, expected: Box) -> Result<( // import references let result_field = unsafe { ffi::import_field_from_c(schema_ptr.as_ref())? }; - let result_array = unsafe { ffi::import_array_from_c(array_ptr, &result_field)? }; + let result_array = + unsafe { ffi::import_array_from_c(array_ptr, result_field.data_type.clone())? }; assert_eq!(&result_array, &expected); assert_eq!(result_field, field);