Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added FFI for unionArray.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 13, 2021
1 parent 11b4356 commit c57d2d5
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 3 deletions.
35 changes: 35 additions & 0 deletions arrow-pyarrow-integration-testing/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,38 @@ def test_dict(self):
b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_sparse_union(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.UnionArray.from_sparse(
pyarrow.array([0, 1, 1, 0, 1], pyarrow.int8()),
[
pyarrow.array(["a", "", "", "", "c"], pyarrow.utf8()),
pyarrow.array([0, 1, 2, None, 0], pyarrow.int64()),
],
)
b = arrow_pyarrow_integration_testing.round_trip(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type

def test_dense_union(self):
"""
Python -> Rust -> Python
"""
a = pyarrow.UnionArray.from_dense(
pyarrow.array([0, 1, 1, 0, 1], pyarrow.int8()),
pyarrow.array([0, 1, 2, 3, 4], type=pyarrow.int32()),
[
pyarrow.array(["a", "", "", "", "c"], pyarrow.utf8()),
pyarrow.array([0, 1, 2, None, 0], pyarrow.int64()),
],
)
b = arrow_pyarrow_integration_testing.round_trip(a)

b.validate(full=True)
assert a.to_pylist() == b.to_pylist()
assert a.type == b.type
2 changes: 1 addition & 1 deletion src/array/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ pub fn buffers_children_dictionary(array: &dyn Array) -> BuffersChildren {
DataType::LargeList(_) => ffi_dyn!(array, ListArray::<i64>),
DataType::FixedSizeList(_, _) => ffi_dyn!(array, FixedSizeListArray),
DataType::Struct(_) => ffi_dyn!(array, StructArray),
DataType::Union(_, _, _) => todo!(),
DataType::Union(_, _, _) => ffi_dyn!(array, UnionArray),
DataType::Dictionary(key_type, _) => match key_type.as_ref() {
DataType::Int8 => ffi_dict_dyn!(array, DictionaryArray::<i8>),
DataType::Int16 => ffi_dict_dyn!(array, DictionaryArray::<i16>),
Expand Down
59 changes: 59 additions & 0 deletions src/array/union/ffi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use std::sync::Arc;

use crate::{array::FromFfi, error::Result, ffi};

use super::super::{ffi::ToFfi, Array};
use super::UnionArray;

unsafe impl ToFfi for UnionArray {
fn buffers(&self) -> Vec<Option<std::ptr::NonNull<u8>>> {
if let Some(offsets) = &self.offsets {
vec![
None,
std::ptr::NonNull::new(self.types.as_ptr() as *mut u8),
std::ptr::NonNull::new(offsets.as_ptr() as *mut u8),
]
} else {
vec![None, std::ptr::NonNull::new(self.types.as_ptr() as *mut u8)]
}
}

fn offset(&self) -> usize {
self.offset
}

fn children(&self) -> Vec<Arc<dyn Array>> {
self.fields.clone()
}
}

unsafe impl<A: ffi::ArrowArrayRef> FromFfi<A> for UnionArray {
fn try_from_ffi(array: A) -> Result<Self> {
let field = array.field()?;
let data_type = field.data_type().clone();
let fields = Self::get_fields(field.data_type());

// 0 is null bitmap (which was deprecated for the union type)
let mut types = unsafe { array.buffer::<i8>(0) }?;
let offsets = if Self::is_sparse(&data_type) {
None
} else {
Some(unsafe { array.buffer::<i32>(1) }?)
};

let length = array.array().len();
let offset = array.array().offset();
let fields = (0..fields.len())
.map(|index| {
let child = array.child(index)?;
Ok(ffi::try_from(child)?.into())
})
.collect::<Result<Vec<Arc<dyn Array>>>>()?;

if offset > 0 {
types = types.slice(offset, length);
};

Ok(Self::from_data(data_type, types, fields, offsets))
}
}
11 changes: 10 additions & 1 deletion src/array/union/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::{

use super::{new_empty_array, Array};

mod ffi;
mod iterator;

/// A union
Expand Down Expand Up @@ -168,7 +169,15 @@ impl UnionArray {
if let DataType::Union(fields, _, _) = data_type {
fields
} else {
panic!("Wrong datatype passed to Struct.")
panic!("Wrong datatype passed to UnionArray.")
}
}

pub fn is_sparse(data_type: &DataType) -> bool {
if let DataType::Union(_, _, is_sparse) = data_type {
*is_sparse
} else {
panic!("Wrong datatype passed to UnionArray.")
}
}
}
1 change: 1 addition & 0 deletions src/ffi/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ pub fn try_from<A: ArrowArrayRef>(array: A) -> Result<Box<dyn Array>> {
DataType::UInt64 => Box::new(DictionaryArray::<u64>::try_from_ffi(array)?),
_ => unreachable!(),
},
DataType::Union(_, _, _) => Box::new(UnionArray::try_from_ffi(array)?),
data_type => {
return Err(ArrowError::NotYetImplemented(format!(
"Reading DataType \"{}\" is not yet supported.",
Expand Down
33 changes: 32 additions & 1 deletion src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ impl Ffi_ArrowSchema {
.iter()
.map(|field| Ok(Box::new(Ffi_ArrowSchema::try_new(field.clone())?)))
.collect::<Result<Vec<_>>>()?,
DataType::Union(fields, _, _) => fields
.iter()
.map(|field| Ok(Box::new(Ffi_ArrowSchema::try_new(field.clone())?)))
.collect::<Result<Vec<_>>>()?,
_ => vec![],
};
// note: this cannot be done along with the above because the above is fallible and this op leaks.
Expand Down Expand Up @@ -255,6 +259,21 @@ fn to_data_type(schema: &Ffi_ArrowSchema) -> Result<DataType> {
ArrowError::Ffi("Decimal scale is not a valid integer".to_string())
})?;
DataType::Decimal(precision, scale)
} else if !parts.is_empty() && ((parts[0] == "+us") || (parts[0] == "+ud")) {
// union
let is_sparse = parts[0] == "+us";
let type_ids = parts[1]
.split(',')
.map(|x| {
x.parse::<i32>().map_err(|_| {
ArrowError::Ffi("Union type id is not a valid integer".to_string())
})
})
.collect::<Result<Vec<_>>>()?;
let fields = (0..schema.n_children as usize)
.map(|x| to_field(schema.child(x)))
.collect::<Result<Vec<_>>>()?;
DataType::Union(fields, Some(type_ids), is_sparse)
} else {
return Err(ArrowError::Ffi(format!(
"The datatype \"{}\" is still not supported in Rust implementation",
Expand Down Expand Up @@ -316,7 +335,19 @@ fn to_format(data_type: &DataType) -> Result<String> {
DataType::Struct(_) => "+s",
DataType::FixedSizeBinary(size) => return Ok(format!("w{}", size)),
DataType::FixedSizeList(_, size) => return Ok(format!("+w:{}", size)),
DataType::Union(_, _, _) => todo!(),
DataType::Union(f, ids, is_sparse) => {
let sparsness = if *is_sparse { 's' } else { 'd' };
let mut r = format!("+u{}:", sparsness);
let ids = if let Some(ids) = ids {
ids.iter()
.fold(String::new(), |a, b| a + &b.to_string() + ",")
} else {
(0..f.len()).fold(String::new(), |a, b| a + &b.to_string() + ",")
};
let ids = &ids[..ids.len() - 1]; // take away last ","
r.push_str(ids);
return Ok(r);
}
DataType::Dictionary(index, _) => return to_format(index.as_ref()),
_ => todo!(),
}
Expand Down

0 comments on commit c57d2d5

Please sign in to comment.