From b75effcc403165c82bf4423c41fe0e48fd18f16d Mon Sep 17 00:00:00 2001 From: John Hughes Date: Thu, 17 Nov 2022 14:55:47 +0100 Subject: [PATCH 1/2] Support `Extension` type in `ffi::schema::get_child()` --- src/ffi/schema.rs | 57 +++++++++++++++++++++++++------------------- tests/it/ffi/data.rs | 18 ++++++++++++++ 2 files changed, 50 insertions(+), 25 deletions(-) diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index 5e6cc4180cc..650f51854a4 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -37,6 +37,35 @@ unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) { schema.release = None; } +/// allocate (and hold) the children +fn schema_children(data_type: &DataType, flags: &mut i64) -> Vec> { + match data_type { + DataType::List(field) => { + vec![Box::new(ArrowSchema::new(field.as_ref()))] + } + DataType::FixedSizeList(field, _) => { + vec![Box::new(ArrowSchema::new(field.as_ref()))] + } + DataType::LargeList(field) => { + vec![Box::new(ArrowSchema::new(field.as_ref()))] + } + DataType::Map(field, is_sorted) => { + *flags += (*is_sorted as i64) * 4; + vec![Box::new(ArrowSchema::new(field.as_ref()))] + } + DataType::Struct(fields) => fields + .iter() + .map(|field| Box::new(ArrowSchema::new(field))) + .collect::>(), + DataType::Union(fields, _, _) => fields + .iter() + .map(|field| Box::new(ArrowSchema::new(field))) + .collect::>(), + DataType::Extension(_, inner, _) => schema_children(inner, flags), + _ => vec![], + } +} + impl ArrowSchema { /// creates a new [ArrowSchema] pub(crate) fn new(field: &Field) -> Self { @@ -45,31 +74,8 @@ impl ArrowSchema { let mut flags = field.is_nullable as i64 * 2; - // allocate (and hold) the children - let children_vec = match field.data_type() { - DataType::List(field) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::FixedSizeList(field, _) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::LargeList(field) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::Map(field, is_sorted) => { - flags += (*is_sorted as i64) * 4; - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::Struct(fields) => fields - .iter() - .map(|field| Box::new(ArrowSchema::new(field))) - .collect::>(), - DataType::Union(fields, _, _) => fields - .iter() - .map(|field| Box::new(ArrowSchema::new(field))) - .collect::>(), - _ => vec![], - }; + let children_vec = schema_children(&field.data_type(), &mut flags); + // note: this cannot be done along with the above because the above is fallible and this op leaks. let children_ptr = children_vec .into_iter() @@ -480,6 +486,7 @@ pub(super) fn get_child(data_type: &DataType, index: usize) -> Result (0, DataType::Map(field, _)) => Ok(field.data_type().clone()), (index, DataType::Struct(fields)) => Ok(fields[index].data_type().clone()), (index, DataType::Union(fields, _, _)) => Ok(fields[index].data_type().clone()), + (index, DataType::Extension(_, subtype, _)) => get_child(subtype, index), (child, data_type) => Err(Error::OutOfSpec(format!( "Requested child {} to type {:?} that has no such child", child, data_type diff --git a/tests/it/ffi/data.rs b/tests/it/ffi/data.rs index 337326b6319..80d87ca38af 100644 --- a/tests/it/ffi/data.rs +++ b/tests/it/ffi/data.rs @@ -346,3 +346,21 @@ fn extension() -> Result<()> { ); test_round_trip_schema(field) } + +#[test] +fn extension_children() -> Result<()> { + let field = Field::new( + "a", + DataType::Extension( + "b".to_string(), + Box::new(DataType::Struct(vec![Field::new( + "c", + DataType::Int32, + true, + )])), + None, + ), + true, + ); + test_round_trip_schema(field) +} From f55295a984db779b4c4f7bd46974f40e20026683 Mon Sep 17 00:00:00 2001 From: John Hughes Date: Fri, 18 Nov 2022 13:43:35 +0100 Subject: [PATCH 2/2] Fix clippy errors --- src/ffi/schema.rs | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index 650f51854a4..143a2f477ec 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -38,31 +38,21 @@ unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) { } /// allocate (and hold) the children -fn schema_children(data_type: &DataType, flags: &mut i64) -> Vec> { +fn schema_children(data_type: &DataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> { match data_type { - DataType::List(field) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::FixedSizeList(field, _) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::LargeList(field) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] + DataType::List(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => { + Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))]) } DataType::Map(field, is_sorted) => { *flags += (*is_sorted as i64) * 4; - vec![Box::new(ArrowSchema::new(field.as_ref()))] + Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))]) } - DataType::Struct(fields) => fields + DataType::Struct(fields) | DataType::Union(fields, _, _) => fields .iter() - .map(|field| Box::new(ArrowSchema::new(field))) - .collect::>(), - DataType::Union(fields, _, _) => fields - .iter() - .map(|field| Box::new(ArrowSchema::new(field))) - .collect::>(), + .map(|field| Box::into_raw(Box::new(ArrowSchema::new(field)))) + .collect::>(), DataType::Extension(_, inner, _) => schema_children(inner, flags), - _ => vec![], + _ => Box::new([]), } } @@ -74,13 +64,8 @@ impl ArrowSchema { let mut flags = field.is_nullable as i64 * 2; - let children_vec = schema_children(&field.data_type(), &mut flags); - // note: this cannot be done along with the above because the above is fallible and this op leaks. - let children_ptr = children_vec - .into_iter() - .map(Box::into_raw) - .collect::>(); + let children_ptr = schema_children(field.data_type(), &mut flags); let n_children = children_ptr.len() as i64; let dictionary = if let DataType::Dictionary(_, values, is_ordered) = field.data_type() {