Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved API for FFI.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 23, 2021
1 parent 6b9ee21 commit 58eb2db
Show file tree
Hide file tree
Showing 6 changed files with 71 additions and 108 deletions.
25 changes: 18 additions & 7 deletions arrow-pyarrow-integration-testing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@ use pyo3::{libc::uintptr_t, prelude::*};
use arrow2::array::{Array, Int64Array};
use arrow2::ffi;
use arrow2::{array::PrimitiveArray, compute};
use arrow2::{datatypes::DataType, error::ArrowError};
use arrow2::{
datatypes::{DataType, Field},
error::ArrowError,
};

type ArrayRef = Arc<dyn Array>;

Expand Down Expand Up @@ -72,8 +75,11 @@ impl From<PyO3ArrowError> for PyErr {

fn to_rust(ob: PyObject, py: Python) -> PyResult<ArrayRef> {
// prepare a pointer to receive the Array struct
let array = Arc::new(ffi::create_empty());
let (array_ptr, schema_ptr) = array.references();
let array = Box::new(ffi::Ffi_ArrowArray::empty());
let schema = Box::new(ffi::Ffi_ArrowSchema::empty());

let array_ptr = &*array as *const ffi::Ffi_ArrowArray;
let schema_ptr = &*schema as *const ffi::Ffi_ArrowSchema;

// make the conversion through PyArrow's private API
// this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds
Expand All @@ -83,13 +89,18 @@ fn to_rust(ob: PyObject, py: Python) -> PyResult<ArrayRef> {
(array_ptr as uintptr_t, schema_ptr as uintptr_t),
)?;

Ok(ffi::try_from(array).map_err(PyO3ArrowError::from)?.into())
let field = ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)?;
let array = ffi::import_array_from_c(array, &field).map_err(PyO3ArrowError::from)?;

Ok(array.into())
}

fn to_py(array: ArrayRef, py: Python) -> PyResult<PyObject> {
let array_ptr = ffi::export_to_c(array).map_err(PyO3ArrowError::from)?;
let schema_ptr = ffi::export_field_to_c(&Field::new("", array.data_type().clone(), true));
let array_ptr = ffi::export_array_to_c(array);

let (array_ptr, schema_ptr) = array_ptr.references();
let schema_ptr = &*schema_ptr as *const ffi::Ffi_ArrowSchema;
let array_ptr = &*array_ptr as *const ffi::Ffi_ArrowArray;

let pa = py.import("pyarrow")?;

Expand All @@ -112,7 +123,7 @@ fn double(array: PyObject, py: Python) -> PyResult<PyObject> {
PyO3ArrowError::ArrowError(ArrowError::Ffi("Expects an int64".to_string()))
})?;
let array =
compute::arithmetics::basic::add::add(&array, &array).map_err(PyO3ArrowError::from)?;
compute::arithmetics::basic::add::add(array, array).map_err(PyO3ArrowError::from)?;
let array = Arc::new(array);

// export
Expand Down
55 changes: 2 additions & 53 deletions guide/src/ffi.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,8 @@ has a specification, which allows languages to share data
structures via foreign interfaces at zero cost (i.e. via pointers).
This is known as the [C Data interface](https://arrow.apache.org/docs/format/CDataInterface.html).

This crate supports importing from and exporting to most of `DataType`s.
Types currently not supported:

* `FixedSizeBinary`
* `Union`
* `Dictionary`
* `FixedSizeList`
* `Null`

## Export

The API to export an `Array` is as follows:

```rust
use std::sync::Arc;
use arrow2::array::{Array, PrimitiveArray};
use arrow2::datatypes::DataType;
use arrow2::ffi::ArrowArray;

# fn main() {
// Example of an array:
let array = [Some(1), None, Some(123)]
.iter()
.collect::<PrimitiveArray<i32>>()
.to(DataType::Int32);

// export the array.
let ffi_array = ffi::export_to_c(Arc::new(array))?;

// these are mutable pointers to `ArrowArray` and `ArrowSchema` of the C data interface
let (array_ptr, schema_ptr) = ffi_array.references();
# }
```

## Import

The API to import works similarly:
This crate supports importing from and exporting to all `DataType`s.

```rust
use arrow2::array::Array;
use arrow2::ffi;

let array = Arc::new(ffi::create_empty());

// non-owned mutable pointers.
let (array_ptr, schema_ptr) = array.references();

// write to the pointers using any C data interface exporter

// consume it to a `Box<dyn Array>`
let array = ffi::try_from(array)?;
{{#include ../../examples/ffi.rs}}
```

This assumes that the exporter writes to `array_ptr` and `schema_ptr`
according to the c data interface. This is an intrinsically `unsafe` operation.
Failing to do so results in UB.
21 changes: 7 additions & 14 deletions src/ffi/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ impl Ffi_ArrowArray {
/// # Safety
/// This method releases `buffers`. Consumers of this struct *must* call `release` before
/// releasing this struct, or contents in `buffers` leak.
pub fn new(array: Arc<dyn Array>) -> Self {
println!("{:?}", array);
pub(crate) fn new(array: Arc<dyn Array>) -> Self {
let (buffers, children, dictionary) = buffers_children_dictionary(array.as_ref());

let buffers_ptr = buffers
Expand All @@ -115,7 +114,6 @@ impl Ffi_ArrowArray {
.collect::<Box<_>>();
let n_children = children_ptr.len() as i64;

println!("{:?}", dictionary);
let dictionary_ptr =
dictionary.map(|array| Box::into_raw(Box::new(Ffi_ArrowArray::new(array))));

Expand Down Expand Up @@ -144,7 +142,7 @@ impl Ffi_ArrowArray {
}

// create an empty `Ffi_ArrowArray`, which can be used to import data into
fn empty() -> Self {
pub fn empty() -> Self {
Self {
length: 0,
null_count: 0,
Expand All @@ -160,22 +158,17 @@ impl Ffi_ArrowArray {
}

/// the length of the array
pub fn len(&self) -> usize {
pub(crate) fn len(&self) -> usize {
self.length as usize
}

/// whether the array is empty
pub fn is_empty(&self) -> bool {
self.length == 0
}

/// the offset of the array
pub fn offset(&self) -> usize {
pub(crate) fn offset(&self) -> usize {
self.offset as usize
}

/// the null count of the array
pub fn null_count(&self) -> usize {
pub(crate) fn null_count(&self) -> usize {
self.null_count as usize
}
}
Expand Down Expand Up @@ -390,12 +383,12 @@ pub trait ArrowArrayRef {
/// Furthermore, this struct assumes that the incoming data agrees with the C data interface.
#[derive(Debug)]
pub struct ArrowArray {
array: Arc<Ffi_ArrowArray>,
array: Box<Ffi_ArrowArray>,
field: Field,
}

impl ArrowArray {
pub fn new(array: Arc<Ffi_ArrowArray>, field: Field) -> Self {
pub fn new(array: Box<Ffi_ArrowArray>, field: Field) -> Self {
Self { array, field }
}
}
Expand Down
22 changes: 13 additions & 9 deletions src/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,32 @@ mod array;
mod ffi;
mod schema;

pub use array::try_from;
pub use ffi::{ArrowArray, ArrowArrayRef};
pub(crate) use array::try_from;
pub(crate) use ffi::{ArrowArray, ArrowArrayRef};

use std::sync::Arc;

use crate::array::Array;
use crate::datatypes::Field;
use crate::error::Result;

use ffi::*;
use schema::Ffi_ArrowSchema;
pub use ffi::Ffi_ArrowArray;
pub use schema::Ffi_ArrowSchema;

use self::schema::to_field;

/// Exports an `Array` to the C data interface.
pub fn export_array_to_c(array: Arc<dyn Array>) -> Arc<Ffi_ArrowArray> {
Arc::new(Ffi_ArrowArray::new(array))
/// # Safety
/// The pointer must be allocated and valid
pub unsafe fn export_array_to_c(array: Arc<dyn Array>, ptr: *mut Ffi_ArrowArray) {
*ptr = Ffi_ArrowArray::new(array);
}

/// Exports a [`Field`] to the C data interface.
pub fn export_field_to_c(field: &Field) -> Arc<Ffi_ArrowSchema> {
Arc::new(Ffi_ArrowSchema::new(field))
/// # Safety
/// The pointer must be allocated and valid
pub unsafe fn export_field_to_c(field: &Field, ptr: *mut Ffi_ArrowSchema) {
*ptr = Ffi_ArrowSchema::new(field)
}

/// Imports a [`Field`] from the C data interface.
Expand All @@ -35,6 +39,6 @@ pub fn import_field_from_c(field: &Ffi_ArrowSchema) -> Result<Field> {
}

/// Imports a [`Field`] from the C data interface.
pub fn import_array_from_c(array: Arc<Ffi_ArrowArray>, field: &Field) -> Result<Box<dyn Array>> {
pub fn import_array_from_c(array: Box<Ffi_ArrowArray>, field: &Field) -> Result<Box<dyn Array>> {
try_from(Arc::new(ArrowArray::new(array, field.clone())))
}
14 changes: 7 additions & 7 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ struct SchemaPrivateData {

/// ABI-compatible struct for `ArrowSchema` from C Data Interface
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
/// This was created by bindgen
// This was created by bindgen
#[repr(C)]
#[derive(Debug)]
pub struct Ffi_ArrowSchema {
Expand Down Expand Up @@ -52,7 +52,7 @@ unsafe extern "C" fn c_release_schema(schema: *mut Ffi_ArrowSchema) {

impl Ffi_ArrowSchema {
/// creates a new [Ffi_ArrowSchema]
pub fn new(field: &Field) -> Self {
pub(crate) fn new(field: &Field) -> Self {
let format = to_format(field.data_type());
let name = field.name().clone();

Expand Down Expand Up @@ -126,7 +126,7 @@ impl Ffi_ArrowSchema {
}

/// returns the format of this schema.
pub fn format(&self) -> &str {
pub(crate) fn format(&self) -> &str {
assert!(!self.format.is_null());
// safe because the lifetime of `self.format` equals `self`
unsafe { CStr::from_ptr(self.format) }
Expand All @@ -135,26 +135,26 @@ impl Ffi_ArrowSchema {
}

/// returns the name of this schema.
pub fn name(&self) -> &str {
pub(crate) fn name(&self) -> &str {
assert!(!self.name.is_null());
// safe because the lifetime of `self.name` equals `self`
unsafe { CStr::from_ptr(self.name) }.to_str().unwrap()
}

pub fn child(&self, index: usize) -> &'static Self {
pub(crate) fn child(&self, index: usize) -> &'static Self {
assert!(index < self.n_children as usize);
assert!(!self.name.is_null());
unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
}

pub fn dictionary(&self) -> Option<&'static Self> {
pub(crate) fn dictionary(&self) -> Option<&'static Self> {
if self.dictionary.is_null() {
return None;
};
Some(unsafe { self.dictionary.as_ref().unwrap() })
}

pub fn nullable(&self) -> bool {
pub(crate) fn nullable(&self) -> bool {
(self.flags / 2) & 1 == 1
}
}
Expand Down
42 changes: 24 additions & 18 deletions tests/it/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,24 @@ use arrow2::datatypes::{DataType, Field, TimeUnit};
use arrow2::{error::Result, ffi};
use std::sync::Arc;

fn test_release(expected: impl Array + 'static) -> Result<()> {
// create a `ArrowArray` from the data.
let b: Arc<dyn Array> = Arc::new(expected);

// export the array
let _ = ffi::export_array_to_c(b);

Ok(())
}

fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> {
let array: Arc<dyn Array> = Arc::new(expected.clone());
let field = Field::new("a", array.data_type().clone(), true);
let expected = Box::new(expected) as Box<dyn Array>;

// create references
let array_ptr = ffi::export_array_to_c(array);
let schema_ptr = ffi::export_field_to_c(&field);
let array_ptr = Box::new(ffi::Ffi_ArrowArray::empty());
let schema_ptr = Box::new(ffi::Ffi_ArrowSchema::empty());

let array_ptr = Box::into_raw(array_ptr);
let schema_ptr = Box::into_raw(schema_ptr);

unsafe {
ffi::export_array_to_c(array, array_ptr);
ffi::export_field_to_c(&field, schema_ptr);
}

let array_ptr = unsafe { Box::from_raw(array_ptr) };
let schema_ptr = unsafe { Box::from_raw(schema_ptr) };

// import references
let result_field = ffi::import_field_from_c(schema_ptr.as_ref())?;
Expand All @@ -31,20 +31,26 @@ fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> {
Ok(())
}

fn test_round_trip_schema(expected: Field) -> Result<()> {
fn test_round_trip_schema(field: Field) -> Result<()> {
// create a `ArrowArray` from the data.
let schema = ffi::export_field_to_c(&expected);
let schema_ptr = Box::new(ffi::Ffi_ArrowSchema::empty());

let schema_ptr = Box::into_raw(schema_ptr);

let result = ffi::import_field_from_c(&schema)?;
unsafe { ffi::export_field_to_c(&field, schema_ptr) };

assert_eq!(result, expected);
let schema_ptr = unsafe { Box::from_raw(schema_ptr) };

let result = ffi::import_field_from_c(schema_ptr.as_ref())?;

assert_eq!(result, field);
Ok(())
}

#[test]
fn u32() -> Result<()> {
let data = Int32Array::from(&[Some(2), None, Some(1), None]);
test_release(data)
test_round_trip(data)
}

#[test]
Expand Down

0 comments on commit 58eb2db

Please sign in to comment.