diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index 5e6cc4180cc..143a2f477ec 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -37,6 +37,25 @@ unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) { schema.release = None; } +/// allocate (and hold) the children +fn schema_children(data_type: &DataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> { + match data_type { + DataType::List(field) | DataType::FixedSizeList(field, _) | DataType::LargeList(field) => { + Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))]) + } + DataType::Map(field, is_sorted) => { + *flags += (*is_sorted as i64) * 4; + Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))]) + } + DataType::Struct(fields) | DataType::Union(fields, _, _) => fields + .iter() + .map(|field| Box::into_raw(Box::new(ArrowSchema::new(field)))) + .collect::>(), + DataType::Extension(_, inner, _) => schema_children(inner, flags), + _ => Box::new([]), + } +} + impl ArrowSchema { /// creates a new [ArrowSchema] pub(crate) fn new(field: &Field) -> Self { @@ -45,36 +64,8 @@ impl ArrowSchema { let mut flags = field.is_nullable as i64 * 2; - // allocate (and hold) the children - let children_vec = match field.data_type() { - DataType::List(field) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::FixedSizeList(field, _) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::LargeList(field) => { - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::Map(field, is_sorted) => { - flags += (*is_sorted as i64) * 4; - vec![Box::new(ArrowSchema::new(field.as_ref()))] - } - DataType::Struct(fields) => fields - .iter() - .map(|field| Box::new(ArrowSchema::new(field))) - .collect::>(), - DataType::Union(fields, _, _) => fields - .iter() - .map(|field| Box::new(ArrowSchema::new(field))) - .collect::>(), - _ => vec![], - }; // note: this cannot be done along with the above because the above is fallible and this op leaks. - let children_ptr = children_vec - .into_iter() - .map(Box::into_raw) - .collect::>(); + let children_ptr = schema_children(field.data_type(), &mut flags); let n_children = children_ptr.len() as i64; let dictionary = if let DataType::Dictionary(_, values, is_ordered) = field.data_type() { @@ -480,6 +471,7 @@ pub(super) fn get_child(data_type: &DataType, index: usize) -> Result (0, DataType::Map(field, _)) => Ok(field.data_type().clone()), (index, DataType::Struct(fields)) => Ok(fields[index].data_type().clone()), (index, DataType::Union(fields, _, _)) => Ok(fields[index].data_type().clone()), + (index, DataType::Extension(_, subtype, _)) => get_child(subtype, index), (child, data_type) => Err(Error::OutOfSpec(format!( "Requested child {} to type {:?} that has no such child", child, data_type diff --git a/tests/it/ffi/data.rs b/tests/it/ffi/data.rs index 1f8606aee8c..bc87eda68c8 100644 --- a/tests/it/ffi/data.rs +++ b/tests/it/ffi/data.rs @@ -346,3 +346,21 @@ fn extension() -> Result<()> { ); test_round_trip_schema(field) } + +#[test] +fn extension_children() -> Result<()> { + let field = Field::new( + "a", + DataType::Extension( + "b".to_string(), + Box::new(DataType::Struct(vec![Field::new( + "c", + DataType::Int32, + true, + )])), + None, + ), + true, + ); + test_round_trip_schema(field) +}