From 641edd4698c50fe784d5b5b7e29e557ea0a8eb38 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 1 Sep 2021 21:45:58 +0000 Subject: [PATCH] Added support for exporting metadata via FFI. --- src/ffi/schema.rs | 77 ++++++++++++++++++++++++++++++++++++++++++----- tests/it/ffi.rs | 7 +++++ 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index 95078c335a9..a3b12a70efc 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -1,4 +1,4 @@ -use std::{ffi::CStr, ffi::CString, ptr}; +use std::{collections::BTreeMap, convert::TryInto, ffi::CStr, ffi::CString, ptr}; use crate::{ datatypes::{DataType, Field, IntervalUnit, TimeUnit}, @@ -7,6 +7,9 @@ use crate::{ #[allow(dead_code)] struct SchemaPrivateData { + name: CString, + format: CString, + metadata: Option>, children_ptr: Box<[*mut Ffi_ArrowSchema]>, dictionary: Option<*mut Ffi_ArrowSchema>, } @@ -35,9 +38,6 @@ unsafe extern "C" fn c_release_schema(schema: *mut Ffi_ArrowSchema) { } let schema = &mut *schema; - // take ownership back to release it. - CString::from_raw(schema.format as *mut std::os::raw::c_char); - CString::from_raw(schema.name as *mut std::os::raw::c_char); let private = Box::from_raw(schema.private_data as *mut SchemaPrivateData); for child in private.children_ptr.iter() { let _ = Box::from_raw(*child); @@ -91,16 +91,28 @@ impl Ffi_ArrowSchema { None }; + let metadata = field.metadata().as_ref().map(metadata_to_bytes); + + let name = CString::new(name).unwrap(); + let format = CString::new(format).unwrap(); + let mut private = Box::new(SchemaPrivateData { + name, + format, + metadata, children_ptr, dictionary: dictionary.map(Box::into_raw), }); // Self { - format: CString::new(format).unwrap().into_raw(), - name: CString::new(name).unwrap().into_raw(), - metadata: std::ptr::null_mut(), + format: private.format.as_ptr(), + name: private.name.as_ptr(), + metadata: private + .metadata + .as_ref() + .map(|x| x.as_ptr()) + .unwrap_or(std::ptr::null()) as *const ::std::os::raw::c_char, flags, n_children, children: private.children_ptr.as_mut_ptr(), @@ -180,7 +192,10 @@ pub fn to_field(schema: &Ffi_ArrowSchema) -> Result { } else { to_data_type(schema)? }; - Ok(Field::new(schema.name(), data_type, schema.nullable())) + let metadata = unsafe { metadata_from_bytes(schema.metadata) }; + let mut field = Field::new(schema.name(), data_type, schema.nullable()); + field.set_metadata(metadata); + Ok(field) } fn to_data_type(schema: &Ffi_ArrowSchema) -> Result { @@ -375,3 +390,49 @@ pub(super) fn get_field_child(field: &Field, index: usize) -> Result { ))), } } + +fn metadata_to_bytes(metadata: &BTreeMap) -> Vec { + let a = (metadata.len() as i32).to_ne_bytes().to_vec(); + metadata.iter().fold(a, |mut acc, (key, value)| { + acc.extend((key.len() as i32).to_ne_bytes()); + acc.extend(key.as_bytes()); + acc.extend((value.len() as i32).to_ne_bytes()); + acc.extend(value.as_bytes()); + acc + }) +} + +unsafe fn read_ne_i32(ptr: *const u8) -> i32 { + let slice = std::slice::from_raw_parts(ptr, 4); + i32::from_ne_bytes(slice.try_into().unwrap()) +} + +unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str { + let slice = std::slice::from_raw_parts(ptr, len); + std::str::from_utf8(slice).unwrap() +} + +unsafe fn metadata_from_bytes( + data: *const ::std::os::raw::c_char, +) -> Option> { + let mut data = data as *const u8; // u8 = i8 + if data.is_null() { + return None; + }; + let len = read_ne_i32(data); + data = data.add(4); + + let mut result = BTreeMap::new(); + for _ in 0..len { + let key_len = read_ne_i32(data) as usize; + data = data.add(4); + let key = read_bytes(data, key_len); + data = data.add(key_len); + let value_len = read_ne_i32(data) as usize; + data = data.add(4); + let value = read_bytes(data, value_len); + data = data.add(value_len); + result.insert(key.to_string(), value.to_string()); + } + Some(result) +} diff --git a/tests/it/ffi.rs b/tests/it/ffi.rs index d76cf5b32b3..856034c00f9 100644 --- a/tests/it/ffi.rs +++ b/tests/it/ffi.rs @@ -1,6 +1,7 @@ use arrow2::array::*; use arrow2::datatypes::{DataType, Field, TimeUnit}; use arrow2::{error::Result, ffi}; +use std::collections::BTreeMap; use std::sync::Arc; fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> { @@ -163,5 +164,11 @@ fn schema() -> Result<()> { DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)), true, ); + test_round_trip_schema(field)?; + + let field = Field::new("a", DataType::Int32, true); + let mut metadata = BTreeMap::new(); + metadata.insert("some".to_string(), "stuff".to_string()); + let field = field.with_metadata(metadata); test_round_trip_schema(field) }