Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added support for field's metadata via FFI #362

Merged
merged 1 commit into from
Sep 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 69 additions & 8 deletions src/ffi/schema.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{ffi::CStr, ffi::CString, ptr};
use std::{collections::BTreeMap, convert::TryInto, ffi::CStr, ffi::CString, ptr};

use crate::{
datatypes::{DataType, Field, IntervalUnit, TimeUnit},
Expand All @@ -7,6 +7,9 @@ use crate::{

#[allow(dead_code)]
struct SchemaPrivateData {
name: CString,
format: CString,
metadata: Option<Vec<u8>>,
children_ptr: Box<[*mut Ffi_ArrowSchema]>,
dictionary: Option<*mut Ffi_ArrowSchema>,
}
Expand Down Expand Up @@ -35,9 +38,6 @@ unsafe extern "C" fn c_release_schema(schema: *mut Ffi_ArrowSchema) {
}
let schema = &mut *schema;

// take ownership back to release it.
CString::from_raw(schema.format as *mut std::os::raw::c_char);
CString::from_raw(schema.name as *mut std::os::raw::c_char);
let private = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
for child in private.children_ptr.iter() {
let _ = Box::from_raw(*child);
Expand Down Expand Up @@ -91,16 +91,28 @@ impl Ffi_ArrowSchema {
None
};

let metadata = field.metadata().as_ref().map(metadata_to_bytes);

let name = CString::new(name).unwrap();
let format = CString::new(format).unwrap();

let mut private = Box::new(SchemaPrivateData {
name,
format,
metadata,
children_ptr,
dictionary: dictionary.map(Box::into_raw),
});

// <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
Self {
format: CString::new(format).unwrap().into_raw(),
name: CString::new(name).unwrap().into_raw(),
metadata: std::ptr::null_mut(),
format: private.format.as_ptr(),
name: private.name.as_ptr(),
metadata: private
.metadata
.as_ref()
.map(|x| x.as_ptr())
.unwrap_or(std::ptr::null()) as *const ::std::os::raw::c_char,
flags,
n_children,
children: private.children_ptr.as_mut_ptr(),
Expand Down Expand Up @@ -180,7 +192,10 @@ pub fn to_field(schema: &Ffi_ArrowSchema) -> Result<Field> {
} else {
to_data_type(schema)?
};
Ok(Field::new(schema.name(), data_type, schema.nullable()))
let metadata = unsafe { metadata_from_bytes(schema.metadata) };
let mut field = Field::new(schema.name(), data_type, schema.nullable());
field.set_metadata(metadata);
Ok(field)
}

fn to_data_type(schema: &Ffi_ArrowSchema) -> Result<DataType> {
Expand Down Expand Up @@ -375,3 +390,49 @@ pub(super) fn get_field_child(field: &Field, index: usize) -> Result<Field> {
))),
}
}

fn metadata_to_bytes(metadata: &BTreeMap<String, String>) -> Vec<u8> {
let a = (metadata.len() as i32).to_ne_bytes().to_vec();
metadata.iter().fold(a, |mut acc, (key, value)| {
acc.extend((key.len() as i32).to_ne_bytes());
acc.extend(key.as_bytes());
acc.extend((value.len() as i32).to_ne_bytes());
acc.extend(value.as_bytes());
acc
})
}

unsafe fn read_ne_i32(ptr: *const u8) -> i32 {
let slice = std::slice::from_raw_parts(ptr, 4);
i32::from_ne_bytes(slice.try_into().unwrap())
}

unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
let slice = std::slice::from_raw_parts(ptr, len);
std::str::from_utf8(slice).unwrap()
}

unsafe fn metadata_from_bytes(
data: *const ::std::os::raw::c_char,
) -> Option<BTreeMap<String, String>> {
let mut data = data as *const u8; // u8 = i8
if data.is_null() {
return None;
};
let len = read_ne_i32(data);
data = data.add(4);

let mut result = BTreeMap::new();
for _ in 0..len {
let key_len = read_ne_i32(data) as usize;
data = data.add(4);
let key = read_bytes(data, key_len);
data = data.add(key_len);
let value_len = read_ne_i32(data) as usize;
data = data.add(4);
let value = read_bytes(data, value_len);
data = data.add(value_len);
result.insert(key.to_string(), value.to_string());
}
Some(result)
}
7 changes: 7 additions & 0 deletions tests/it/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use arrow2::array::*;
use arrow2::datatypes::{DataType, Field, TimeUnit};
use arrow2::{error::Result, ffi};
use std::collections::BTreeMap;
use std::sync::Arc;

fn test_round_trip(expected: impl Array + Clone + 'static) -> Result<()> {
Expand Down Expand Up @@ -163,5 +164,11 @@ fn schema() -> Result<()> {
DataType::Dictionary(Box::new(DataType::UInt32), Box::new(DataType::Utf8)),
true,
);
test_round_trip_schema(field)?;

let field = Field::new("a", DataType::Int32, true);
let mut metadata = BTreeMap::new();
metadata.insert("some".to_string(), "stuff".to_string());
let field = field.with_metadata(metadata);
test_round_trip_schema(field)
}