Skip to content

Commit

Permalink
More conversions to and from arrow-rs (#15)
Browse files Browse the repository at this point in the history
This implements conversions between `arrow2` and `arrow-rs` for:

* `ListArray`
* `BitMap/NullBuffer` (only had one way before)
* `OffsetsBuffer`
  • Loading branch information
emilk authored Jan 9, 2025
1 parent d1fe42d commit c762f39
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 1 deletion.
139 changes: 138 additions & 1 deletion src/array/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
offset::{Offset, Offsets, OffsetsBuffer},
};

use super::{new_empty_array, specification::try_check_offsets_bounds, Array};
use super::{new_empty_array, specification::try_check_offsets_bounds, Array, PrimitiveArray};

#[cfg(feature = "arrow")]
mod data;
Expand Down Expand Up @@ -242,3 +242,140 @@ impl<O: Offset> Array for ListArray<O> {
Box::new(self.clone().with_validity(validity))
}
}

/// arrow2 -> arrow1 conversion
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_array::OffsetSizeTrait> From<ListArray<O>>
for arrow_array::GenericListArray<O>
{
fn from(value: ListArray<O>) -> Self {
let field = ListArray::<O>::get_child_field(value.data_type());
let field = Arc::new(arrow_schema::Field::new(
"item",
field.data_type.clone().into(),
field.is_nullable,
));
let offsets = value.offsets().clone().into();
let values = value.values().clone().into();
let nulls = value.validity().map(|x| x.clone().into());
Self::new(field, offsets, values, nulls)
}
}

/// arrow1 -> arrow2 conversion
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_array::OffsetSizeTrait> From<arrow_array::GenericListArray<O>>
for ListArray<O>
{
fn from(array1: arrow_array::GenericListArray<O>) -> Self {
let (field1, offset_buffer1, array1, nulls1) = array1.into_parts();
let data_type1 = field1.data_type().clone();
Self::new(
Self::default_datatype(data_type1.into()),
offset_buffer1.into(),
array1.into(),
nulls1.map(Bitmap::from_arrow),
)
}
}

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_list_array_conversion_non_null() {
#![allow(clippy::zero_prefixed_literal)]
/*
We build this:
[0_001, 0_002],
[1_001, 1_002, 1_003],
[],
[3_001, 3_002],
[4_001],
*/
let offsets = OffsetsBuffer::<i32>::from(Offsets::try_from(vec![0, 2, 5, 5, 7, 8]).unwrap());
let values = PrimitiveArray::<i16>::from_vec(vec![
0_001_i16, 0_002, //
1_001, 1_002, 1_003, //
//
3_001, 3_002, //
4_001,
]);
// let bitmap = Some(Bitmap::from([true, truefalse, true]));
let bitmap = None;

let list_array = ListArray::new(
DataType::List(Arc::new(Field::new("item", DataType::Int16, true))),
offsets,
values.boxed(),
bitmap,
);

// Skip first and last elements:
let list_array = list_array.sliced(1, 3);

assert_eq!(list_array.len(), 3);
assert_eq!(list_array.value(0).len(), 3);
assert_eq!(list_array.value(1).len(), 0);
assert_eq!(list_array.value(2).len(), 2);

let list_array_1 = arrow_array::ListArray::from(list_array.clone());
assert_eq!(list_array_1.value_length(0), 3);
assert_eq!(list_array_1.value_length(1), 0);
assert_eq!(list_array_1.value_length(2), 2);

let roundtripped = ListArray::from(list_array_1);

assert_eq!(list_array, roundtripped);
}

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_list_array_conversion_nullable() {
#![allow(clippy::zero_prefixed_literal)]
/*
We build this:
[0_001, 0_002],
[1_001, 1_002, 1_003],
[],
[3_001, 3_002],
null,
[4_001],
*/
let offsets = OffsetsBuffer::<i32>::from(Offsets::try_from(vec![0, 2, 5, 5, 7, 7, 8]).unwrap());
let values = PrimitiveArray::<i16>::from_vec(vec![
0_001_i16, 0_002, //
1_001, 1_002, 1_003, //
// []
3_001, 3_002, //
// null
4_001,
]);
let bitmap = Some(Bitmap::from([true, true, true, true, false, true]));

let list_array = ListArray::new(
DataType::List(Arc::new(Field::new("item", DataType::Int16, true))),
offsets,
values.boxed(),
bitmap,
);

// Skip first and last elements:
let list_array = list_array.sliced(1, 4);

assert_eq!(list_array.len(), 4);
assert_eq!(list_array.value(0).len(), 3);
assert_eq!(list_array.value(1).len(), 0);
assert_eq!(list_array.value(2).len(), 2);
assert_eq!(list_array.value(3).len(), 0); // null

let list_array_1 = arrow_array::ListArray::from(list_array.clone());
assert_eq!(list_array_1.value_length(0), 3);
assert_eq!(list_array_1.value_length(1), 0);
assert_eq!(list_array_1.value_length(2), 2);
assert_eq!(list_array_1.value_length(3), 0); // null

let roundtripped = ListArray::from(list_array_1);

assert_eq!(list_array, roundtripped);
}
40 changes: 40 additions & 0 deletions src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ impl Bitmap {
})
}

/// Convert from `arrow-rs` `NullBuffer`
#[cfg(feature = "arrow")]
pub fn from_arrow(nulls: arrow_buffer::buffer::NullBuffer) -> Self {
let offset = nulls.offset();
let len = nulls.len();
let null_count = nulls.null_count();
let bytes = crate::buffer::to_bytes(nulls.into_inner().into_inner());
// SAFETY: the invariants are held by the input
unsafe { Self::from_inner_unchecked(bytes.into(), offset, len, null_count) }
}

/// Returns the length of the [`Bitmap`].
#[inline]
pub fn len(&self) -> usize {
Expand Down Expand Up @@ -491,3 +502,32 @@ impl From<Bitmap> for arrow_buffer::buffer::NullBuffer {
unsafe { arrow_buffer::buffer::NullBuffer::new_unchecked(buffer, null_count) }
}
}

// // Can't implement this because of `impl<P: AsRef<[bool]>> From<P> for Bitmap`
// #[cfg(feature = "arrow")]
// impl From<arrow_buffer::buffer::NullBuffer> for Bitmap {
// fn from(value: arrow_buffer::buffer::NullBuffer) -> Self {
// let buffer = value.buffer.into();
// let null_count = value.null_count;
// // Safety: null count is accurate
// unsafe { Self::from_unchecked(buffer, null_count) }
// }
// }

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_nullbuffer_conversion() {
let mut bitmap2 = Bitmap::from([false, true, false, false, true, false, false, false, true]);
bitmap2.slice(1, 6);

assert_eq!(
bitmap2,
Bitmap::from([true, false, false, true, false, false])
);

let nulls1 = arrow_buffer::buffer::NullBuffer::from(bitmap2.clone());
assert_eq!(nulls1.null_count(), bitmap2.null_count());

let back_again = Bitmap::from_arrow(nulls1);
assert_eq!(back_again, bitmap2);
}
44 changes: 44 additions & 0 deletions src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,3 +541,47 @@ impl<O: Offset> std::ops::Deref for OffsetsBuffer<O> {
self.0.as_slice()
}
}

/// arrow1 -> arrow2
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_buffer::ArrowNativeType> From<arrow_buffer::OffsetBuffer<O>>
for OffsetsBuffer<O>
{
fn from(offset_buffer2: arrow_buffer::OffsetBuffer<O>) -> Self {
let buffer1: arrow_buffer::Buffer = offset_buffer2.into_inner().into_inner();
// SAFETY: the input buffer is guaranteed to be valid
unsafe { Self::new_unchecked(buffer1.into()) }
}
}

/// arrow2 -> arrow1
#[cfg(feature = "arrow")]
impl<O: Offset + arrow_buffer::ArrowNativeType> From<OffsetsBuffer<O>>
for arrow_buffer::OffsetBuffer<O>
{
fn from(offsets_buffer: OffsetsBuffer<O>) -> Self {
let num_elements = offsets_buffer.len();
Self::new(arrow_buffer::ScalarBuffer::new(
offsets_buffer.into_inner().into(),
0,
num_elements,
))
}
}

#[cfg(feature = "arrow")]
#[test]
fn test_arrow_offsets_buffer_conversion() {
let mut arrow2_offsets =
OffsetsBuffer::<i32>::from(Offsets::try_from(vec![0, 1, 3, 3, 12, 42]).unwrap());
arrow2_offsets.slice(1, 4);

assert_eq!(arrow2_offsets.as_slice(), [1, 3, 3, 12]);

let arrow1_offsets: arrow_buffer::OffsetBuffer<i32> = arrow2_offsets.clone().into();
assert_eq!(arrow1_offsets.as_ref(), [1, 3, 3, 12]);

let back_again = OffsetsBuffer::from(arrow1_offsets);
assert_eq!(back_again, arrow2_offsets);
assert_eq!(back_again.as_slice(), [1, 3, 3, 12]);
}

0 comments on commit c762f39

Please sign in to comment.