Skip to content

Commit

Permalink
feat: LogicalArray to support logical types (#124)
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrobbel authored Jan 29, 2024
1 parent 8be7a6c commit e576ed3
Show file tree
Hide file tree
Showing 24 changed files with 387 additions and 21 deletions.
7 changes: 7 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,22 @@ rustdoc-args = ["--cfg", "docsrs"]
default = []
arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema", "narrow-derive?/arrow-rs"]
derive = ["dep:narrow-derive"]
uuid = ["dep:uuid"]

[dependencies]
arrow-array = { version = "50.0.0", optional = true }
arrow-buffer = { version = "50.0.0", optional = true }
arrow-schema = { version = "50.0.0", optional = true }
narrow-derive = { path = "narrow-derive", version = "^0.4.0", optional = true }
uuid = { version = "1.7.0", optional = true }

[dev-dependencies]
arrow-cast = { version = "50.0.0", default-features = false, features = ["prettyprint"] }
bytes = "1.5.0"
criterion = { version = "0.5.1", default-features = false }
rand = { version = "0.8.5", default-features = false, features = ["small_rng"] }
parquet = { version = "50.0.0", default-features = false, features = ["arrow"] }
uuid = "1.7.0"

[profile.bench]
lto = true
Expand Down
4 changes: 4 additions & 0 deletions examples/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ fn main() {
ArrayType,
};
use parquet::arrow::{arrow_reader::ParquetRecordBatchReader, ArrowWriter};
use uuid::Uuid;

#[derive(ArrayType, Default)]
struct Bar(Option<bool>);
Expand All @@ -21,6 +22,7 @@ fn main() {
e: Option<Vec<Option<bool>>>,
f: Bar,
g: [u8; 8],
h: Uuid,
}
let input = [
Foo {
Expand All @@ -31,6 +33,7 @@ fn main() {
e: Some(vec![Some(true), None]),
f: Bar(Some(true)),
g: [1, 2, 3, 4, 5, 6, 7, 8],
h: Uuid::from_u128(1234),
},
Foo {
a: 42,
Expand All @@ -40,6 +43,7 @@ fn main() {
e: None,
f: Bar(None),
g: [9, 10, 11, 12, 13, 14, 15, 16],
h: Uuid::from_u128(42),
},
];

Expand Down
9 changes: 6 additions & 3 deletions src/array/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferRef, BufferRefMut, BufferType, VecBuffer},
nullable::Nullable,
validity::Validity,
validity::{Nullability, Validity},
Index, Length,
};

Expand All @@ -32,9 +32,12 @@ where
}
}

impl<const NULLABLE: bool, Buffer: BufferType> Array for BooleanArray<NULLABLE, Buffer> where
Bitmap<Buffer>: Validity<NULLABLE>
impl<const NULLABLE: bool, Buffer: BufferType> Array for BooleanArray<NULLABLE, Buffer>
where
Bitmap<Buffer>: Validity<NULLABLE>,
bool: Nullability<NULLABLE>,
{
type Item = <bool as Nullability<NULLABLE>>::Item;
}

impl<const NULLABLE: bool, Buffer: BufferType> BufferRef<u8> for BooleanArray<NULLABLE, Buffer>
Expand Down
4 changes: 3 additions & 1 deletion src/array/fixed_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferMut, BufferType, VecBuffer},
nullable::Nullable,
validity::Validity,
validity::{Nullability, Validity},
Index, Length,
};

Expand Down Expand Up @@ -41,7 +41,9 @@ impl<const N: usize, T: Array, const NULLABLE: bool, Buffer: BufferType> Array
for FixedSizeListArray<N, T, NULLABLE, Buffer>
where
T: Validity<NULLABLE>,
[<T as Array>::Item; N]: Nullability<NULLABLE>,
{
type Item = <[<T as Array>::Item; N] as Nullability<NULLABLE>>::Item;
}

impl<const N: usize, T: Array, Buffer: BufferType> BitmapRef
Expand Down
4 changes: 3 additions & 1 deletion src/array/fixed_size_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{Buffer, BufferType, VecBuffer},
nullable::Nullable,
validity::Validity,
validity::{Nullability, Validity},
FixedSize, Index, Length,
};
use std::{ops, slice::SliceIndex};
Expand Down Expand Up @@ -66,7 +66,9 @@ impl<T: FixedSize, const NULLABLE: bool, Buffer: BufferType> Array
for FixedSizePrimitiveArray<T, NULLABLE, Buffer>
where
<Buffer as BufferType>::Buffer<T>: Validity<NULLABLE>,
T: Nullability<NULLABLE>,
{
type Item = <T as Nullability<NULLABLE>>::Item;
}

// todo(mbrobbel): buffer_ref traits?
Expand Down
5 changes: 4 additions & 1 deletion src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ mod variable_size_list;
pub use variable_size_list::*;

/// Types that store their data in Arrow arrays.
pub trait Array {}
pub trait Array {
/// The items stored in this array.
type Item;
}

/// Types that can be stored in Arrow arrays.
// Note: the generic `T` is required to allow impls on foreign wrappers e.g.
Expand Down
9 changes: 6 additions & 3 deletions src/array/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferType, VecBuffer},
nullable::Nullable,
validity::Validity,
validity::{Nullability, Validity},
Index, Length,
};
use std::{
Expand Down Expand Up @@ -44,9 +44,12 @@ pub struct NullArray<T: Unit = (), const NULLABLE: bool = false, Buffer: BufferT
where
Nulls<T>: Validity<NULLABLE>;

impl<T: Unit, const NULLABLE: bool, Buffer: BufferType> Array for NullArray<T, NULLABLE, Buffer> where
Nulls<T>: Validity<NULLABLE>
impl<T: Unit, const NULLABLE: bool, Buffer: BufferType> Array for NullArray<T, NULLABLE, Buffer>
where
Nulls<T>: Validity<NULLABLE>,
T: Nullability<NULLABLE>,
{
type Item = <T as Nullability<NULLABLE>>::Item;
}

impl<T: Unit, const NULLABLE: bool, Buffer: BufferType> Default for NullArray<T, NULLABLE, Buffer>
Expand Down
4 changes: 3 additions & 1 deletion src/array/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferType, VecBuffer},
offset::OffsetElement,
validity::Validity,
validity::{Nullability, Validity},
Index, Length,
};

Expand Down Expand Up @@ -44,7 +44,9 @@ impl<const NULLABLE: bool, OffsetItem: OffsetElement, Buffer: BufferType> Array
for StringArray<NULLABLE, OffsetItem, Buffer>
where
<Buffer as BufferType>::Buffer<OffsetItem>: Validity<NULLABLE>,
String: Nullability<NULLABLE>,
{
type Item = <String as Nullability<NULLABLE>>::Item;
}

impl<const NULLABLE: bool, OffsetItem: OffsetElement, Buffer: BufferType> Default
Expand Down
4 changes: 3 additions & 1 deletion src/array/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferType, VecBuffer},
nullable::Nullable,
validity::Validity,
validity::{Nullability, Validity},
Length,
};

Expand All @@ -30,7 +30,9 @@ impl<T: StructArrayType, const NULLABLE: bool, Buffer: BufferType> Array
for StructArray<T, NULLABLE, Buffer>
where
<T as StructArrayType>::Array<Buffer>: Validity<NULLABLE>,
T: Nullability<NULLABLE>,
{
type Item = <T as Nullability<NULLABLE>>::Item;
}

impl<T: StructArrayType, const NULLABLE: bool, Buffer: BufferType> Default
Expand Down
4 changes: 3 additions & 1 deletion src/array/variable_size_binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{Buffer, BufferType, VecBuffer},
offset::{Offset, OffsetElement},
validity::Validity,
validity::{Nullability, Validity},
Index, Length,
};

Expand All @@ -30,7 +30,9 @@ impl<const NULLABLE: bool, OffsetItem: OffsetElement, Buffer: BufferType> Array
for VariableSizeBinaryArray<NULLABLE, OffsetItem, Buffer>
where
<Buffer as BufferType>::Buffer<OffsetItem>: Validity<NULLABLE>,
Vec<u8>: Nullability<NULLABLE>,
{
type Item = <Vec<u8> as Nullability<NULLABLE>>::Item;
}

impl<const NULLABLE: bool, OffsetItem: OffsetElement, Buffer: BufferType> Default
Expand Down
4 changes: 3 additions & 1 deletion src/array/variable_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap},
buffer::{BufferType, VecBuffer},
offset::{Offset, OffsetElement},
validity::Validity,
validity::{Nullability, Validity},
Index, Length,
};
use std::fmt::{Debug, Formatter, Result};
Expand All @@ -24,7 +24,9 @@ impl<T: Array, const NULLABLE: bool, OffsetItem: OffsetElement, Buffer: BufferTy
for VariableSizeListArray<T, NULLABLE, OffsetItem, Buffer>
where
<Buffer as BufferType>::Buffer<OffsetItem>: Validity<NULLABLE>,
Vec<T>: Nullability<NULLABLE>,
{
type Item = <Vec<T> as Nullability<NULLABLE>>::Item;
}

impl<T: Array, const NULLABLE: bool, OffsetItem: OffsetElement, Buffer: BufferType> Debug
Expand Down
9 changes: 7 additions & 2 deletions src/arrow/array/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,20 @@
use std::sync::Arc;

use crate::{
array::BooleanArray, arrow::ArrowArray, bitmap::Bitmap, buffer::BufferType, nullable::Nullable,
validity::Validity,
array::BooleanArray,
arrow::ArrowArray,
bitmap::Bitmap,
buffer::BufferType,
nullable::Nullable,
validity::{Nullability, Validity},
};
use arrow_buffer::{BooleanBuffer, NullBuffer};
use arrow_schema::{DataType, Field};

impl<const NULLABLE: bool, Buffer: BufferType> ArrowArray for BooleanArray<NULLABLE, Buffer>
where
Bitmap<Buffer>: Validity<NULLABLE>,
bool: Nullability<NULLABLE>,
{
type Array = arrow_array::BooleanArray;

Expand Down
3 changes: 2 additions & 1 deletion src/arrow/array/fixed_size_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@ use crate::{
bitmap::Bitmap,
buffer::BufferType,
nullable::Nullable,
validity::Validity,
validity::{Nullability, Validity},
};

impl<const N: usize, T: ArrowArray, const NULLABLE: bool, Buffer: BufferType> ArrowArray
for FixedSizeListArray<N, T, NULLABLE, Buffer>
where
T: Validity<NULLABLE>,
[<T as Array>::Item; N]: Nullability<NULLABLE>,
{
type Array = arrow_array::FixedSizeListArray;

Expand Down
10 changes: 8 additions & 2 deletions src/arrow/array/fixed_size_primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ use arrow_buffer::{NullBuffer, ScalarBuffer};
use arrow_schema::{DataType, Field};

use crate::{
array::FixedSizePrimitiveArray, arrow::ArrowArray, bitmap::Bitmap, buffer::BufferType,
nullable::Nullable, validity::Validity, FixedSize,
array::FixedSizePrimitiveArray,
arrow::ArrowArray,
bitmap::Bitmap,
buffer::BufferType,
nullable::Nullable,
validity::{Nullability, Validity},
FixedSize,
};

/// Create the `ArrowArray` impl and required conversions.
Expand All @@ -21,6 +26,7 @@ macro_rules! arrow_array_convert {
for FixedSizePrimitiveArray<$ty, NULLABLE, Buffer>
where
<Buffer as BufferType>::Buffer<$ty>: Validity<NULLABLE>,
$ty: Nullability<NULLABLE>,
{
type Array = arrow_array::PrimitiveArray<$primitive_type>;

Expand Down
73 changes: 73 additions & 0 deletions src/arrow/array/logical.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
//! Interop with [`arrow-rs`] arrays for logical arrays.
use std::sync::Arc;

use crate::{
array::UnionType,
arrow::ArrowArray,
buffer::BufferType,
logical::{LogicalArray, LogicalArrayType},
offset::OffsetElement,
validity::{Nullability, Validity},
};

impl<
T: LogicalArrayType,
const NULLABLE: bool,
Buffer: BufferType,
OffsetItem: OffsetElement,
UnionLayout: UnionType,
> ArrowArray for LogicalArray<T, NULLABLE, Buffer, OffsetItem, UnionLayout>
where
<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout>:
Validity<NULLABLE> + ArrowArray,
T: Nullability<NULLABLE>,
{
type Array =
<<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout> as ArrowArray>::Array;

fn as_field(name: &str) -> arrow_schema::Field {
<<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout> as ArrowArray>::as_field(
name,
)
}
}

impl<
T: LogicalArrayType,
const NULLABLE: bool,
Buffer: BufferType,
OffsetItem: OffsetElement,
UnionLayout: UnionType,
> From<Arc<dyn arrow_array::Array>>
for LogicalArray<T, NULLABLE, Buffer, OffsetItem, UnionLayout>
where
<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout>: Validity<NULLABLE>,
<<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout> as Validity<NULLABLE>>::Storage<Buffer>: From<Arc<dyn arrow_array::Array>>,
{
fn from(value: Arc<dyn arrow_array::Array>) -> Self {
Self(value.into())
}
}

impl<
T: LogicalArrayType,
const NULLABLE: bool,
Buffer: BufferType,
OffsetItem: OffsetElement,
UnionLayout: UnionType,
> From<LogicalArray<T, NULLABLE, Buffer, OffsetItem, UnionLayout>>
for arrow_array::FixedSizeListArray
where
<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout>: Validity<NULLABLE>,
arrow_array::FixedSizeListArray:
From<
<<T as LogicalArrayType>::Array<Buffer, OffsetItem, UnionLayout> as Validity<
NULLABLE,
>>::Storage<Buffer>,
>,
{
fn from(value: LogicalArray<T, NULLABLE, Buffer, OffsetItem, UnionLayout>) -> Self {
value.0.into()
}
}
1 change: 1 addition & 0 deletions src/arrow/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ mod fixed_size_primitive;
mod string;
mod r#struct;
pub use r#struct::StructArrayTypeFields;
mod logical;
mod variable_size_list;
Loading

0 comments on commit e576ed3

Please sign in to comment.