diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index a493c909142..48d39adcd61 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -65,8 +65,8 @@ fn to_rust_array(ob: PyObject, py: Python) -> PyResult> { (array_ptr as Py_uintptr_t, schema_ptr as Py_uintptr_t), )?; - let field = ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)?; - let array = ffi::import_array_from_c(array, &field).map_err(PyO3ArrowError::from)?; + let field = unsafe { ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)? }; + let array = unsafe { ffi::import_array_from_c(array, &field).map_err(PyO3ArrowError::from)? }; Ok(array.into()) } @@ -108,7 +108,7 @@ fn to_rust_field(ob: PyObject, py: Python) -> PyResult { // this changes the pointer's memory and is thus unsafe. In particular, `_export_to_c` can go out of bounds ob.call_method1(py, "_export_to_c", (schema_ptr as Py_uintptr_t,))?; - let field = ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)?; + let field = unsafe { ffi::import_field_from_c(schema.as_ref()).map_err(PyO3ArrowError::from)? }; Ok(field) } diff --git a/examples/ffi.rs b/examples/ffi.rs index 172c3159b4a..87933385659 100644 --- a/examples/ffi.rs +++ b/examples/ffi.rs @@ -14,7 +14,7 @@ unsafe fn export( ffi::export_field_to_c(&field, schema_ptr); } -fn import( +unsafe fn import( array: Box, schema: &ffi::Ffi_ArrowSchema, ) -> Result> { @@ -47,7 +47,8 @@ fn main() -> Result<()> { let schema_ptr = unsafe { Box::from_raw(schema_ptr) }; // and finally interpret the written memory into a new array. - let new_array = import(array_ptr, schema_ptr.as_ref())?; + // Safety: we used `export`, which is a valid exporter to the C data interface + let new_array = unsafe { import(array_ptr, schema_ptr.as_ref())? }; // which is equal to the exported array assert_eq!(array.as_ref(), new_array.as_ref()); diff --git a/src/array/binary/ffi.rs b/src/array/binary/ffi.rs index 9a4257cabf1..3030dedca31 100644 --- a/src/array/binary/ffi.rs +++ b/src/array/binary/ffi.rs @@ -23,8 +23,8 @@ unsafe impl ToFfi for BinaryArray { } } -unsafe impl FromFfi for BinaryArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for BinaryArray { + unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); let expected = if O::is_large() { DataType::LargeBinary diff --git a/src/array/boolean/ffi.rs b/src/array/boolean/ffi.rs index 745c27e54c0..e4264d93829 100644 --- a/src/array/boolean/ffi.rs +++ b/src/array/boolean/ffi.rs @@ -21,8 +21,8 @@ unsafe impl ToFfi for BooleanArray { } } -unsafe impl FromFfi for BooleanArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for BooleanArray { + unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); assert_eq!(data_type, DataType::Boolean); let length = array.array().len(); diff --git a/src/array/dictionary/ffi.rs b/src/array/dictionary/ffi.rs index 8fe0e49917b..c6f8f8fa201 100644 --- a/src/array/dictionary/ffi.rs +++ b/src/array/dictionary/ffi.rs @@ -17,8 +17,8 @@ unsafe impl ToFfi for DictionaryArray { } } -unsafe impl FromFfi for DictionaryArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for DictionaryArray { + unsafe fn try_from_ffi(array: A) -> Result { // keys: similar to PrimitiveArray, but the datatype is the inner one let length = array.array().len(); let offset = array.array().offset(); diff --git a/src/array/ffi.rs b/src/array/ffi.rs index 7368f45420b..aeab1469313 100644 --- a/src/array/ffi.rs +++ b/src/array/ffi.rs @@ -22,9 +22,12 @@ pub unsafe trait ToFfi { /// Trait describing how a struct imports into itself from the /// [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). -pub unsafe trait FromFfi: Sized { +pub trait FromFfi: Sized { /// Convert itself from FFI. - fn try_from_ffi(array: T) -> Result; + /// # Safety + /// This function is intrinsically `unsafe` as it requires the FFI to be made according + /// to the [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) + unsafe fn try_from_ffi(array: T) -> Result; } macro_rules! ffi_dyn { diff --git a/src/array/list/ffi.rs b/src/array/list/ffi.rs index 97bea8cbc32..14235ce1ddd 100644 --- a/src/array/list/ffi.rs +++ b/src/array/list/ffi.rs @@ -22,8 +22,8 @@ unsafe impl ToFfi for ListArray { } } -unsafe impl FromFfi for ListArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for ListArray { + unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); let length = array.array().len(); let offset = array.array().offset(); diff --git a/src/array/primitive/ffi.rs b/src/array/primitive/ffi.rs index 1c3f534970d..3418e2516fc 100644 --- a/src/array/primitive/ffi.rs +++ b/src/array/primitive/ffi.rs @@ -22,8 +22,8 @@ unsafe impl ToFfi for PrimitiveArray { } } -unsafe impl FromFfi for PrimitiveArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for PrimitiveArray { + unsafe fn try_from_ffi(array: A) -> Result { let data_type = array.field().data_type().clone(); let length = array.array().len(); let offset = array.array().offset(); diff --git a/src/array/struct_.rs b/src/array/struct_.rs index 8c772b857cf..8dc327a5719 100644 --- a/src/array/struct_.rs +++ b/src/array/struct_.rs @@ -233,8 +233,8 @@ unsafe impl ToFfi for StructArray { } } -unsafe impl FromFfi for StructArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for StructArray { + unsafe fn try_from_ffi(array: A) -> Result { let field = array.field(); let fields = Self::get_fields(field.data_type()).to_vec(); diff --git a/src/array/union/ffi.rs b/src/array/union/ffi.rs index f07b7092eab..640a7d6f0ab 100644 --- a/src/array/union/ffi.rs +++ b/src/array/union/ffi.rs @@ -27,8 +27,8 @@ unsafe impl ToFfi for UnionArray { } } -unsafe impl FromFfi for UnionArray { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for UnionArray { + unsafe fn try_from_ffi(array: A) -> Result { let field = array.field(); let data_type = field.data_type().clone(); let fields = Self::get_fields(field.data_type()); diff --git a/src/array/utf8/ffi.rs b/src/array/utf8/ffi.rs index 13631725916..98d47efa9dc 100644 --- a/src/array/utf8/ffi.rs +++ b/src/array/utf8/ffi.rs @@ -20,8 +20,8 @@ unsafe impl ToFfi for Utf8Array { } } -unsafe impl FromFfi for Utf8Array { - fn try_from_ffi(array: A) -> Result { +impl FromFfi for Utf8Array { + unsafe fn try_from_ffi(array: A) -> Result { let length = array.array().len(); let offset = array.array().offset(); let mut validity = unsafe { array.validity() }?; diff --git a/src/ffi/array.rs b/src/ffi/array.rs index 518102ecbed..9bb83908e80 100644 --- a/src/ffi/array.rs +++ b/src/ffi/array.rs @@ -1,20 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - //! Contains functionality to load an ArrayData from the C Data Interface use super::ffi::ArrowArrayRef; @@ -27,7 +10,7 @@ use crate::{array::*, datatypes::PhysicalType}; /// If and only if: /// * the data type is not supported /// * the interface is not valid (e.g. a null pointer) -pub fn try_from(array: A) -> Result> { +pub unsafe fn try_from(array: A) -> Result> { use PhysicalType::*; Ok(match array.field().data_type().to_physical_type() { Boolean => Box::new(BooleanArray::try_from_ffi(array)?), diff --git a/src/ffi/mod.rs b/src/ffi/mod.rs index 15504487c94..8c3dac73fc6 100644 --- a/src/ffi/mod.rs +++ b/src/ffi/mod.rs @@ -34,11 +34,20 @@ pub unsafe fn export_field_to_c(field: &Field, ptr: *mut Ffi_ArrowSchema) { } /// Imports a [`Field`] from the C data interface. -pub fn import_field_from_c(field: &Ffi_ArrowSchema) -> Result { +/// # Safety +/// This function is intrinsically `unsafe` and relies on a [`Ffi_ArrowSchema`] +/// valid according to the [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). +pub unsafe fn import_field_from_c(field: &Ffi_ArrowSchema) -> Result { to_field(field) } -/// Imports a [`Field`] from the C data interface. -pub fn import_array_from_c(array: Box, field: &Field) -> Result> { +/// Imports an [`Array`] from the C data interface. +/// # Safety +/// This function is intrinsically `unsafe` and relies on a [`Ffi_ArrowArray`] +/// valid according to the [C data interface](https://arrow.apache.org/docs/format/CDataInterface.html) (FFI). +pub unsafe fn import_array_from_c( + array: Box, + field: &Field, +) -> Result> { try_from(Arc::new(ArrowArray::new(array, field.clone()))) } diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index a5915ec77aa..41b062d4996 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -199,7 +199,7 @@ impl Drop for Ffi_ArrowSchema { } } -pub fn to_field(schema: &Ffi_ArrowSchema) -> Result { +pub(crate) unsafe fn to_field(schema: &Ffi_ArrowSchema) -> Result { let dictionary = schema.dictionary(); let data_type = if let Some(dictionary) = dictionary { let indices_data_type = to_data_type(schema)?; @@ -224,7 +224,7 @@ pub fn to_field(schema: &Ffi_ArrowSchema) -> Result { Ok(field) } -fn to_data_type(schema: &Ffi_ArrowSchema) -> Result { +unsafe fn to_data_type(schema: &Ffi_ArrowSchema) -> Result { Ok(match schema.format() { "n" => DataType::Null, "b" => DataType::Boolean, diff --git a/src/lib.rs b/src/lib.rs index a6ca6598496..f7bc3d339b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ //! Doc provided by README +// So that we have more control over what is `unsafe` inside an `unsafe` block +#![allow(unused_unsafe)] #![cfg_attr(docsrs, feature(doc_cfg))] #[macro_use] diff --git a/tests/it/ffi.rs b/tests/it/ffi.rs index 1bc86f3a638..facc143988c 100644 --- a/tests/it/ffi.rs +++ b/tests/it/ffi.rs @@ -24,8 +24,8 @@ fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> { let schema_ptr = unsafe { Box::from_raw(schema_ptr) }; // import references - let result_field = ffi::import_field_from_c(schema_ptr.as_ref())?; - let result_array = ffi::import_array_from_c(array_ptr, &result_field)?; + let result_field = unsafe { ffi::import_field_from_c(schema_ptr.as_ref())? }; + let result_array = unsafe { ffi::import_array_from_c(array_ptr, &result_field)? }; assert_eq!(&result_array, &expected); assert_eq!(result_field, field); @@ -42,7 +42,7 @@ fn test_round_trip_schema(field: Field) -> Result<()> { let schema_ptr = unsafe { Box::from_raw(schema_ptr) }; - let result = ffi::import_field_from_c(schema_ptr.as_ref())?; + let result = unsafe { ffi::import_field_from_c(schema_ptr.as_ref())? }; assert_eq!(result, field); Ok(())