From 4d0d33390521c369f26f9aa2940f13c6266d0ad7 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 2 Aug 2023 00:13:36 +0200 Subject: [PATCH 01/27] Setup `arrow-buffer` interop --- .github/workflows/bench.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/test.yml | 16 ++++---- Cargo.toml | 4 +- src/bitmap/mod.rs | 15 +++++++ src/buffer.rs | 75 +++++++++++++++++++++++++++++++++++ 6 files changed, 103 insertions(+), 11 deletions(-) diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 69644dcc..e2ffe50e 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -31,7 +31,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-bench- ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - - run: cargo bench --bench narrow -- --output-format=bencher | tee output.txt + - run: cargo bench --bench narrow --all-features -- --output-format=bencher | tee output.txt - uses: actions/upload-artifact@v3 with: name: benchmark-results diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 2a29c530..67ffa1ae 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -32,7 +32,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - run: cargo install cargo-smart-release@0.20.0 --locked || true - - run: cargo check --all + - run: cargo check --all --all-features - id: changelog run: cargo changelog --no-preview narrow narrow-derive || echo "skip=true" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c19bf575..32f301cc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-msrv- ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - - run: cargo check --all + - run: cargo check --all --all-features check: name: Check @@ -49,7 +49,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-check- ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - - run: cargo check --all + - run: cargo check --all --all-features test: name: Test @@ -75,8 +75,8 @@ jobs: - uses: dtolnay/install@master with: crate: cargo-expand - - run: cargo test --all - - run: cargo test --benches + - run: cargo test --all --all-features + - run: cargo test --benches --all-features rustfmt: name: Rustfmt @@ -110,7 +110,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-clippy- ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - - run: cargo clippy --all -- -Dwarnings + - run: cargo clippy --all --all-features -- -Dwarnings miri: name: Miri @@ -133,7 +133,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - run: cargo miri setup - - run: cargo miri test + - run: cargo miri test --all-features coverage: name: Coverage @@ -163,8 +163,8 @@ jobs: - uses: dtolnay/install@master with: crate: cargo-expand - - run: cargo build --all - - run: cargo test --all + - run: cargo build --all --all-features + - run: cargo test --all --all-features env: LLVM_PROFILE_FILE: "narrow-%p-%m.profraw" - name: Install grcov diff --git a/Cargo.toml b/Cargo.toml index 1e8095fd..0c5d5b57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,11 +28,13 @@ keywords.workspace = true categories.workspace = true [features] -default = ["derive", "unsafe"] +default = ["arrow-buffer", "derive", "unsafe"] +arrow-buffer = ["dep:arrow-buffer"] derive = ["dep:narrow-derive"] unsafe = [] [dependencies] +arrow-buffer = { version = "44.0.0", default-features = false, optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } [dev-dependencies] diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index 83b26995..1c947969 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -471,4 +471,19 @@ mod tests { mem::size_of::>() + 2 * mem::size_of::() ); } + + #[test] + #[cfg(feature = "arrow-buffer")] + fn arrow_buffer() { + use crate::buffer::{ArrowBuffer, ArrowMutableBuffer}; + + let input = vec![true, false, true]; + let bitmap = input.into_iter().collect::>(); + assert_eq!(bitmap.len(), 3); + + let input = vec![true, false, true]; + let bitmap = input.into_iter().collect::>(); + assert_eq!(bitmap.len(), 3); + assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); + } } diff --git a/src/buffer.rs b/src/buffer.rs index b01f603b..e87a34a9 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -358,6 +358,66 @@ impl BufferMut for Rc<[T]> { } } +#[cfg(feature = "arrow-buffer")] +/// A [BufferType] implementation for [arrow_buffer::Buffer]. +pub struct ArrowBuffer; + +#[cfg(feature = "arrow-buffer")] +impl BufferType for ArrowBuffer { + type Buffer = arrow_buffer::Buffer; +} + +#[cfg(feature = "arrow-buffer")] +impl Buffer for arrow_buffer::Buffer { + fn as_slice(&self) -> &[T] { + // This only works if FixedSize: arrow_buffer::ArrowNativeType, however that forces us to remove some FixedSize impls. + // self.typed_data() + + // Instead we copy the logic here and rely on our trait for safety. + // https://github.com/apache/arrow-rs/blob/5724cf21c23aa9d5a3ef06b6381cf267903746ee/arrow-buffer/src/buffer/immutable.rs#L228-L235 + let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::() }; + assert!(prefix.is_empty() && suffix.is_empty()); + offsets + } +} + +#[cfg(feature = "arrow-buffer")] +/// A [BufferType] implementation for [arrow_buffer::Buffer]. +pub struct ArrowMutableBuffer; + +#[cfg(feature = "arrow-buffer")] +impl BufferType for ArrowMutableBuffer { + type Buffer = arrow_buffer::Buffer; +} + +#[cfg(feature = "arrow-buffer")] +impl Buffer for arrow_buffer::MutableBuffer { + fn as_slice(&self) -> &[T] { + // This only works if FixedSize: arrow_buffer::ArrowNativeType, however that forces us to remove some FixedSize impls. + // self.typed_data() + + // Instead we copy the logic here and rely on our trait for safety. + // https://github.com/apache/arrow-rs/blob/5724cf21c23aa9d5a3ef06b6381cf267903746ee/arrow-buffer/src/buffer/immutable.rs#L228-L235 + let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::() }; + assert!(prefix.is_empty() && suffix.is_empty()); + offsets + } +} + +#[cfg(feature = "arrow-buffer")] +impl BufferMut for arrow_buffer::MutableBuffer { + fn as_mut_slice(&mut self) -> &mut [T] { + // This only works if FixedSize: arrow_buffer::ArrowNativeType, however that forces us to remove some FixedSize impls. + // self.typed_data() + + // Instead we copy the logic here and rely on our trait for safety. + // https://github.com/apache/arrow-rs/blob/5724cf21c23aa9d5a3ef06b6381cf267903746ee/arrow-buffer/src/buffer/mutable.rs#L351-L359 + let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::() }; + assert!(prefix.is_empty() && suffix.is_empty()); + offsets + } +} + #[cfg(test)] mod tests { use super::*; @@ -422,4 +482,19 @@ mod tests { &[0, 2, 3, 4, 5, 6] ); } + + #[test] + #[cfg(feature = "arrow-buffer")] + fn arrow() { + let buffer = arrow_buffer::Buffer::from_vec(vec![1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); + + let mut buffer = arrow_buffer::MutableBuffer::from_vec(vec![1u64, 2, 3, 4]); + assert_eq!( + <_ as BufferMut>::as_mut_slice(&mut buffer), + &[1, 2, 3, 4] + ); + <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); + } } From 3cfe877c2db7c29a9ec91f02e34102f12c1e1588 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 2 Aug 2023 00:30:41 +0200 Subject: [PATCH 02/27] Some fixes and tests --- src/array/fixed_size_primitive.rs | 16 ++++++++++++++++ src/buffer.rs | 2 +- src/length.rs | 14 ++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 33329acd..f6c42b4f 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -205,4 +205,20 @@ mod tests { mem::size_of::() + mem::size_of::() ); } + + #[test] + #[cfg(feature = "arrow-buffer")] + fn arrow_buffer() { + use crate::buffer::{ArrowBuffer, ArrowMutableBuffer}; + + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + // TODO(mbrobbel): nr of bytes + assert_eq!(array.len(), 4); + + let input = [Some(1), None, Some(3), Some(4)]; + let array = input + .into_iter() + .collect::>(); + } } diff --git a/src/buffer.rs b/src/buffer.rs index e87a34a9..6d850a9d 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -387,7 +387,7 @@ pub struct ArrowMutableBuffer; #[cfg(feature = "arrow-buffer")] impl BufferType for ArrowMutableBuffer { - type Buffer = arrow_buffer::Buffer; + type Buffer = arrow_buffer::MutableBuffer; } #[cfg(feature = "arrow-buffer")] diff --git a/src/length.rs b/src/length.rs index f685fd7d..d4540c70 100644 --- a/src/length.rs +++ b/src/length.rs @@ -90,3 +90,17 @@ impl Length for Option { } } } + +#[cfg(feature = "arrow-buffer")] +impl Length for arrow_buffer::Buffer { + fn len(&self) -> usize { + arrow_buffer::Buffer::len(self) + } +} + +#[cfg(feature = "arrow-buffer")] +impl Length for arrow_buffer::MutableBuffer { + fn len(&self) -> usize { + arrow_buffer::MutableBuffer::len(self) + } +} From 7b5dab923acb8ed8656bc6567a6a0615bbedcf43 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Fri, 4 Aug 2023 22:43:42 +0200 Subject: [PATCH 03/27] Use `BufferBuilder` abstraction --- Cargo.toml | 2 +- src/array/fixed_size_primitive.rs | 13 +++--- src/bitmap/mod.rs | 4 +- src/buffer.rs | 71 +++++++++---------------------- src/fixed_size.rs | 11 ++++- src/length.rs | 16 +++---- 6 files changed, 47 insertions(+), 70 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 0c5d5b57..9ee539e4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ derive = ["dep:narrow-derive"] unsafe = [] [dependencies] -arrow-buffer = { version = "44.0.0", default-features = false, optional = true } +arrow-buffer = { git = "https://github.com/mbrobbel/arrow-rs.git", branch = "arrow-native-type", optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } [dev-dependencies] diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index f6c42b4f..1751be74 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -209,16 +209,17 @@ mod tests { #[test] #[cfg(feature = "arrow-buffer")] fn arrow_buffer() { - use crate::buffer::{ArrowBuffer, ArrowMutableBuffer}; + use crate::buffer::ArrowBuffer; let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - // TODO(mbrobbel): nr of bytes + let mut array = input.into_iter().collect::>(); assert_eq!(array.len(), 4); + // Use arrow_buffer + array.0.append_n(5, 5); + assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); let input = [Some(1), None, Some(3), Some(4)]; - let array = input - .into_iter() - .collect::>(); + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), 4); } } diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index 1c947969..dabacf5d 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -475,14 +475,14 @@ mod tests { #[test] #[cfg(feature = "arrow-buffer")] fn arrow_buffer() { - use crate::buffer::{ArrowBuffer, ArrowMutableBuffer}; + use crate::buffer::ArrowBuffer; let input = vec![true, false, true]; let bitmap = input.into_iter().collect::>(); assert_eq!(bitmap.len(), 3); let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); + let bitmap = input.into_iter().collect::>(); assert_eq!(bitmap.len(), 3); assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); } diff --git a/src/buffer.rs b/src/buffer.rs index 6d850a9d..8141cf3a 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -359,64 +359,33 @@ impl BufferMut for Rc<[T]> { } #[cfg(feature = "arrow-buffer")] -/// A [BufferType] implementation for [arrow_buffer::Buffer]. -pub struct ArrowBuffer; +mod arrow { + use super::{Buffer, BufferMut, BufferType}; + use crate::FixedSize; + use arrow_buffer::{ArrowNativeType, BufferBuilder}; -#[cfg(feature = "arrow-buffer")] -impl BufferType for ArrowBuffer { - type Buffer = arrow_buffer::Buffer; -} - -#[cfg(feature = "arrow-buffer")] -impl Buffer for arrow_buffer::Buffer { - fn as_slice(&self) -> &[T] { - // This only works if FixedSize: arrow_buffer::ArrowNativeType, however that forces us to remove some FixedSize impls. - // self.typed_data() + /// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. + pub struct ArrowBuffer; - // Instead we copy the logic here and rely on our trait for safety. - // https://github.com/apache/arrow-rs/blob/5724cf21c23aa9d5a3ef06b6381cf267903746ee/arrow-buffer/src/buffer/immutable.rs#L228-L235 - let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::() }; - assert!(prefix.is_empty() && suffix.is_empty()); - offsets + impl BufferType for ArrowBuffer { + type Buffer = BufferBuilder; } -} - -#[cfg(feature = "arrow-buffer")] -/// A [BufferType] implementation for [arrow_buffer::Buffer]. -pub struct ArrowMutableBuffer; - -#[cfg(feature = "arrow-buffer")] -impl BufferType for ArrowMutableBuffer { - type Buffer = arrow_buffer::MutableBuffer; -} -#[cfg(feature = "arrow-buffer")] -impl Buffer for arrow_buffer::MutableBuffer { - fn as_slice(&self) -> &[T] { - // This only works if FixedSize: arrow_buffer::ArrowNativeType, however that forces us to remove some FixedSize impls. - // self.typed_data() + impl Buffer for BufferBuilder { + fn as_slice(&self) -> &[T] { + BufferBuilder::as_slice(self) + } + } - // Instead we copy the logic here and rely on our trait for safety. - // https://github.com/apache/arrow-rs/blob/5724cf21c23aa9d5a3ef06b6381cf267903746ee/arrow-buffer/src/buffer/immutable.rs#L228-L235 - let (prefix, offsets, suffix) = unsafe { self.as_slice().align_to::() }; - assert!(prefix.is_empty() && suffix.is_empty()); - offsets + impl BufferMut for BufferBuilder { + fn as_mut_slice(&mut self) -> &mut [T] { + BufferBuilder::as_slice_mut(self) + } } } #[cfg(feature = "arrow-buffer")] -impl BufferMut for arrow_buffer::MutableBuffer { - fn as_mut_slice(&mut self) -> &mut [T] { - // This only works if FixedSize: arrow_buffer::ArrowNativeType, however that forces us to remove some FixedSize impls. - // self.typed_data() - - // Instead we copy the logic here and rely on our trait for safety. - // https://github.com/apache/arrow-rs/blob/5724cf21c23aa9d5a3ef06b6381cf267903746ee/arrow-buffer/src/buffer/mutable.rs#L351-L359 - let (prefix, offsets, suffix) = unsafe { self.as_slice_mut().align_to_mut::() }; - assert!(prefix.is_empty() && suffix.is_empty()); - offsets - } -} +pub use arrow::*; #[cfg(test)] mod tests { @@ -486,10 +455,10 @@ mod tests { #[test] #[cfg(feature = "arrow-buffer")] fn arrow() { - let buffer = arrow_buffer::Buffer::from_vec(vec![1, 2, 3, 4]); + let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - let mut buffer = arrow_buffer::MutableBuffer::from_vec(vec![1u64, 2, 3, 4]); + let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]); assert_eq!( <_ as BufferMut>::as_mut_slice(&mut buffer), &[1, 2, 3, 4] diff --git a/src/fixed_size.rs b/src/fixed_size.rs index 67c20b5f..17460314 100644 --- a/src/fixed_size.rs +++ b/src/fixed_size.rs @@ -10,11 +10,20 @@ use std::fmt::Debug; /// fixed-size types. /// /// This trait is sealed to prevent downstream implementations. +#[cfg(not(feature = "arrow-buffer"))] pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static { /// The fixed-size of this type in bytes. const SIZE: usize = std::mem::size_of::(); } +#[cfg(feature = "arrow-buffer")] +pub trait FixedSize: + arrow_buffer::ArrowNativeType + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static +{ + /// The fixed-size of this type in bytes. + const SIZE: usize = std::mem::size_of::(); +} + impl FixedSize for i8 {} impl FixedSize for i16 {} impl FixedSize for i32 {} @@ -47,7 +56,7 @@ mod sealed { #[cfg(test)] mod tests { - use super::*; + use crate::FixedSize; #[test] fn size() { diff --git a/src/length.rs b/src/length.rs index d4540c70..6b6f904b 100644 --- a/src/length.rs +++ b/src/length.rs @@ -92,15 +92,13 @@ impl Length for Option { } #[cfg(feature = "arrow-buffer")] -impl Length for arrow_buffer::Buffer { - fn len(&self) -> usize { - arrow_buffer::Buffer::len(self) - } -} +mod arrow { + use crate::Length; + use arrow_buffer::{ArrowNativeType, BufferBuilder}; -#[cfg(feature = "arrow-buffer")] -impl Length for arrow_buffer::MutableBuffer { - fn len(&self) -> usize { - arrow_buffer::MutableBuffer::len(self) + impl Length for BufferBuilder { + fn len(&self) -> usize { + BufferBuilder::len(self) + } } } From 26e746cb40da0a90cb8311a3e4108256f57859b9 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Fri, 4 Aug 2023 23:32:44 +0200 Subject: [PATCH 04/27] Add `arrow-array` feature for zero-copy array interop --- Cargo.toml | 4 ++- src/array/fixed_size_primitive.rs | 56 +++++++++++++++++++++++++++++++ src/bitmap/mod.rs | 15 +++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 9ee539e4..97b8b05a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,12 +28,14 @@ keywords.workspace = true categories.workspace = true [features] -default = ["arrow-buffer", "derive", "unsafe"] +default = ["arrow-array", "arrow-buffer", "derive", "unsafe"] +arrow-array = ["dep:arrow-array", "arrow-buffer"] arrow-buffer = ["dep:arrow-buffer"] derive = ["dep:narrow-derive"] unsafe = [] [dependencies] +arrow-array = { git = "https://github.com/mbrobbel/arrow-rs.git", branch = "arrow-native-type", optional = true } arrow-buffer = { git = "https://github.com/mbrobbel/arrow-rs.git", branch = "arrow-native-type", optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 1751be74..27559f5d 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -136,6 +136,62 @@ impl BitmapRefMut for FixedSizePrimitiveArray< impl ValidityBitmap for FixedSizePrimitiveArray {} +#[cfg(feature = "arrow-array")] +mod arrow { + use super::FixedSizePrimitiveArray; + use crate::{buffer::ArrowBuffer, FixedSize, Length}; + use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; + use arrow_buffer::{NullBuffer, ScalarBuffer}; + + impl> + From> for PrimitiveArray + { + fn from(mut value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + Self::new(ScalarBuffer::new(value.0.finish(), 0, len), None) + } + } + + impl> + From> for PrimitiveArray + { + fn from(mut value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + Self::new( + ScalarBuffer::new(value.0.data.finish(), 0, len), + Some(NullBuffer::new(value.0.validity.into())), + ) + } + } + + #[cfg(test)] + mod test { + + #[test] + #[cfg(feature = "arrow-array")] + fn arrow_array() { + use crate::{array::Int8Array, buffer::ArrowBuffer}; + use arrow_array::{types::Int8Type, Array, PrimitiveArray}; + + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + + let input = [Some(1), None, Some(3), Some(4)]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + assert_eq!(array.null_count(), 1); + } + + #[test] + fn convert() {} + } +} + +pub use arrow::*; + #[cfg(test)] mod tests { use super::*; diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index dabacf5d..6dc4947c 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -299,6 +299,21 @@ impl Length for Bitmap { impl ValidityBitmap for Bitmap {} +#[cfg(feature = "arrow-buffer")] +mod arrow { + use super::Bitmap; + use crate::buffer::ArrowBuffer; + use arrow_buffer::BooleanBuffer; + + impl From> for BooleanBuffer { + fn from(mut value: Bitmap) -> Self { + BooleanBuffer::new(value.buffer.finish(), 0, value.bits) + } + } +} + +pub use arrow::*; + #[cfg(test)] mod tests { use crate::buffer::{ArrayBuffer, BoxBuffer, BufferRefMut, SliceBuffer}; From e2b40f1c10c41132a764d3a67342d0afa9abf44f Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Sat, 5 Aug 2023 13:52:48 +0200 Subject: [PATCH 05/27] Make conversion generic over buffer type --- src/array/fixed_size_primitive.rs | 37 ++++++++++++++++++++++--------- src/bitmap/mod.rs | 11 +++++---- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 27559f5d..d5e9c38a 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -139,26 +139,35 @@ impl ValidityBitmap for FixedSizePrimitiveArra #[cfg(feature = "arrow-array")] mod arrow { use super::FixedSizePrimitiveArray; - use crate::{buffer::ArrowBuffer, FixedSize, Length}; + use crate::{ + bitmap::Bitmap, + buffer::{ArrowBuffer, BufferType}, + FixedSize, Length, + }; use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; - use arrow_buffer::{NullBuffer, ScalarBuffer}; + use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; - impl> - From> for PrimitiveArray + impl, Buffer: BufferType> + From> for PrimitiveArray + where + ::Buffer: Length + Into<::Buffer>, { - fn from(mut value: FixedSizePrimitiveArray) -> Self { + fn from(value: FixedSizePrimitiveArray) -> Self { let len = value.len(); - Self::new(ScalarBuffer::new(value.0.finish(), 0, len), None) + Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None) } } - impl> - From> for PrimitiveArray + impl, Buffer: BufferType> + From> for PrimitiveArray + where + ::Buffer: Length + Into<::Buffer>, + Bitmap: Into, { - fn from(mut value: FixedSizePrimitiveArray) -> Self { + fn from(value: FixedSizePrimitiveArray) -> Self { let len = value.len(); Self::new( - ScalarBuffer::new(value.0.data.finish(), 0, len), + ScalarBuffer::new(value.0.data.into().finish(), 0, len), Some(NullBuffer::new(value.0.validity.into())), ) } @@ -170,7 +179,7 @@ mod arrow { #[test] #[cfg(feature = "arrow-array")] fn arrow_array() { - use crate::{array::Int8Array, buffer::ArrowBuffer}; + use crate::{array::Int8Array, bitmap::ValidityBitmap, buffer::ArrowBuffer}; use arrow_array::{types::Int8Type, Array, PrimitiveArray}; let input = [1, 2, 3, 4]; @@ -178,8 +187,14 @@ mod arrow { let array = PrimitiveArray::::from(array); assert_eq!(array.len(), 4); + // let input = [1, 2, 3, 4]; + // let array = input.into_iter().collect::>(); + // let array = PrimitiveArray::::from(array); + // assert_eq!(array.len(), 4); + let input = [Some(1), None, Some(3), Some(4)]; let array = input.into_iter().collect::>(); + assert_eq!(array.null_count(), 1); let array = PrimitiveArray::::from(array); assert_eq!(array.len(), 4); assert_eq!(array.null_count(), 1); diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index 6dc4947c..ae44cdf3 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -302,12 +302,15 @@ impl ValidityBitmap for Bitmap {} #[cfg(feature = "arrow-buffer")] mod arrow { use super::Bitmap; - use crate::buffer::ArrowBuffer; + use crate::buffer::{ArrowBuffer, BufferType}; use arrow_buffer::BooleanBuffer; - impl From> for BooleanBuffer { - fn from(mut value: Bitmap) -> Self { - BooleanBuffer::new(value.buffer.finish(), 0, value.bits) + impl From> for BooleanBuffer + where + ::Buffer: Into<::Buffer>, + { + fn from(value: Bitmap) -> Self { + BooleanBuffer::new(value.buffer.into().finish(), 0, value.bits) } } } From 44e35671a0935ad82c10b93960c8667af09d5dc8 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Sat, 5 Aug 2023 14:21:41 +0200 Subject: [PATCH 06/27] Add `BufferType` implementation for `arrow_buffer::ScalarBuffer` --- src/buffer.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/buffer.rs b/src/buffer.rs index 8141cf3a..fbca21b9 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -362,7 +362,7 @@ impl BufferMut for Rc<[T]> { mod arrow { use super::{Buffer, BufferMut, BufferType}; use crate::FixedSize; - use arrow_buffer::{ArrowNativeType, BufferBuilder}; + use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; /// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. pub struct ArrowBuffer; @@ -382,6 +382,19 @@ mod arrow { BufferBuilder::as_slice_mut(self) } } + + /// A [BufferType] implementation for [arrow_buffer::ScalarBuffer]. + pub struct ArrowScalarBuffer; + + impl BufferType for ArrowScalarBuffer { + type Buffer = ScalarBuffer; + } + + impl Buffer for ScalarBuffer { + fn as_slice(&self) -> &[T] { + self + } + } } #[cfg(feature = "arrow-buffer")] @@ -465,5 +478,8 @@ mod tests { ); <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); + + let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); } } From 47de9f053512ea99e1ed628208d2c9b3b4832bc5 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Sat, 5 Aug 2023 20:44:10 +0200 Subject: [PATCH 07/27] Bump MSRV to `1.70.0` --- .github/workflows/test.yml | 2 +- Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 32f301cc..4b500567 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - uses: dtolnay/rust-toolchain@1.65.0 + - uses: dtolnay/rust-toolchain@1.70.0 id: rust-toolchain - uses: actions/cache@v3 with: diff --git a/Cargo.toml b/Cargo.toml index 97b8b05a..d2af3cd1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [workspace.package] authors = ["Matthijs Brobbel "] edition = "2021" -rust-version = "1.65.0" +rust-version = "1.70.0" description = "An implementation of Apache Arrow" readme = "README.md" repository = "https://github.com/mbrobbel/narrow" From 9494ce2f8bf9058e9981f1c1bda0561f93e4f317 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 7 Aug 2023 13:21:57 +0200 Subject: [PATCH 08/27] Implicit conversion to `ArrowBuffer` from `VecBuffer` is now supported --- src/array/fixed_size_primitive.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index d5e9c38a..2638b0ab 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -187,10 +187,10 @@ mod arrow { let array = PrimitiveArray::::from(array); assert_eq!(array.len(), 4); - // let input = [1, 2, 3, 4]; - // let array = input.into_iter().collect::>(); - // let array = PrimitiveArray::::from(array); - // assert_eq!(array.len(), 4); + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); let input = [Some(1), None, Some(3), Some(4)]; let array = input.into_iter().collect::>(); From c5c09f0b311f054a3f604664ac3539db70f40351 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 7 Aug 2023 14:59:00 +0200 Subject: [PATCH 09/27] Fix warning --- src/fixed_size.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fixed_size.rs b/src/fixed_size.rs index d8d800cc..015f123e 100644 --- a/src/fixed_size.rs +++ b/src/fixed_size.rs @@ -21,7 +21,7 @@ pub trait FixedSize: arrow_buffer::ArrowNativeType + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static { /// The fixed-size of this type in bytes. - const SIZE: usize = std::mem::size_of::(); + const SIZE: usize = mem::size_of::(); } impl FixedSize for i8 {} From e39ad3b169c620524a31a8d652203065e2bddfad Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 7 Aug 2023 15:43:37 +0200 Subject: [PATCH 10/27] Move `arrow-rs` interop to `arrow` module --- Cargo.toml | 3 +- src/array/fixed_size_primitive.rs | 88 ------------------------- src/arrow/array/fixed_size_primitive.rs | 76 +++++++++++++++++++++ src/arrow/array/mod.rs | 1 + src/arrow/bitmap.rs | 30 +++++++++ src/arrow/buffer.rs | 57 ++++++++++++++++ src/arrow/length.rs | 14 ++++ src/arrow/mod.rs | 11 ++++ src/bitmap/mod.rs | 39 +---------- src/buffer.rs | 60 ----------------- src/fixed_size.rs | 31 +++++---- src/length.rs | 12 ---- src/lib.rs | 3 + 13 files changed, 216 insertions(+), 209 deletions(-) create mode 100644 src/arrow/array/fixed_size_primitive.rs create mode 100644 src/arrow/array/mod.rs create mode 100644 src/arrow/bitmap.rs create mode 100644 src/arrow/buffer.rs create mode 100644 src/arrow/length.rs create mode 100644 src/arrow/mod.rs diff --git a/Cargo.toml b/Cargo.toml index d2af3cd1..0ac30eff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,7 +28,8 @@ keywords.workspace = true categories.workspace = true [features] -default = ["arrow-array", "arrow-buffer", "derive", "unsafe"] +default = ["arrow", "derive", "unsafe"] +arrow = ["arrow-array", "arrow-buffer"] arrow-array = ["dep:arrow-array", "arrow-buffer"] arrow-buffer = ["dep:arrow-buffer"] derive = ["dep:narrow-derive"] diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 2638b0ab..33329acd 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -136,77 +136,6 @@ impl BitmapRefMut for FixedSizePrimitiveArray< impl ValidityBitmap for FixedSizePrimitiveArray {} -#[cfg(feature = "arrow-array")] -mod arrow { - use super::FixedSizePrimitiveArray; - use crate::{ - bitmap::Bitmap, - buffer::{ArrowBuffer, BufferType}, - FixedSize, Length, - }; - use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; - use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; - - impl, Buffer: BufferType> - From> for PrimitiveArray - where - ::Buffer: Length + Into<::Buffer>, - { - fn from(value: FixedSizePrimitiveArray) -> Self { - let len = value.len(); - Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None) - } - } - - impl, Buffer: BufferType> - From> for PrimitiveArray - where - ::Buffer: Length + Into<::Buffer>, - Bitmap: Into, - { - fn from(value: FixedSizePrimitiveArray) -> Self { - let len = value.len(); - Self::new( - ScalarBuffer::new(value.0.data.into().finish(), 0, len), - Some(NullBuffer::new(value.0.validity.into())), - ) - } - } - - #[cfg(test)] - mod test { - - #[test] - #[cfg(feature = "arrow-array")] - fn arrow_array() { - use crate::{array::Int8Array, bitmap::ValidityBitmap, buffer::ArrowBuffer}; - use arrow_array::{types::Int8Type, Array, PrimitiveArray}; - - let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - - let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - - let input = [Some(1), None, Some(3), Some(4)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.null_count(), 1); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - assert_eq!(array.null_count(), 1); - } - - #[test] - fn convert() {} - } -} - -pub use arrow::*; - #[cfg(test)] mod tests { use super::*; @@ -276,21 +205,4 @@ mod tests { mem::size_of::() + mem::size_of::() ); } - - #[test] - #[cfg(feature = "arrow-buffer")] - fn arrow_buffer() { - use crate::buffer::ArrowBuffer; - - let input = [1, 2, 3, 4]; - let mut array = input.into_iter().collect::>(); - assert_eq!(array.len(), 4); - // Use arrow_buffer - array.0.append_n(5, 5); - assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); - - let input = [Some(1), None, Some(3), Some(4)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.len(), 4); - } } diff --git a/src/arrow/array/fixed_size_primitive.rs b/src/arrow/array/fixed_size_primitive.rs new file mode 100644 index 00000000..b8e909b0 --- /dev/null +++ b/src/arrow/array/fixed_size_primitive.rs @@ -0,0 +1,76 @@ +use crate::{ + array::FixedSizePrimitiveArray, arrow::buffer::ArrowBuffer, bitmap::Bitmap, buffer::BufferType, + FixedSize, Length, +}; +use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; +use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; + +impl, Buffer: BufferType> + From> for PrimitiveArray +where + ::Buffer: Length + Into<::Buffer>, +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None) + } +} + +impl, Buffer: BufferType> + From> for PrimitiveArray +where + ::Buffer: Length + Into<::Buffer>, + Bitmap: Into, +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + Self::new( + ScalarBuffer::new(value.0.data.into().finish(), 0, len), + Some(NullBuffer::new(value.0.validity.into())), + ) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::array::Int8Array; + + #[test] + #[cfg(feature = "arrow-array")] + fn arrow_array() { + use crate::{array::Int8Array, bitmap::ValidityBitmap}; + use arrow_array::{types::Int8Type, Array, PrimitiveArray}; + + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + + let input = [1, 2, 3, 4]; + let array = input.into_iter().collect::>(); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + + let input = [Some(1), None, Some(3), Some(4)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.null_count(), 1); + let array = PrimitiveArray::::from(array); + assert_eq!(array.len(), 4); + assert_eq!(array.null_count(), 1); + } + + #[test] + fn arrow_buffer() { + let input = [1, 2, 3, 4]; + let mut array = input.into_iter().collect::>(); + assert_eq!(array.len(), 4); + // Use arrow_buffer + array.0.append_n(5, 5); + assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); + + let input = [Some(1), None, Some(3), Some(4)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), 4); + } +} diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs new file mode 100644 index 00000000..8c4f437f --- /dev/null +++ b/src/arrow/array/mod.rs @@ -0,0 +1 @@ +pub mod fixed_size_primitive; diff --git a/src/arrow/bitmap.rs b/src/arrow/bitmap.rs new file mode 100644 index 00000000..7f7fe51d --- /dev/null +++ b/src/arrow/bitmap.rs @@ -0,0 +1,30 @@ +use super::buffer::ArrowBuffer; +use crate::{bitmap::Bitmap, buffer::BufferType}; +use arrow_buffer::BooleanBuffer; + +impl From> for BooleanBuffer +where + ::Buffer: Into<::Buffer>, +{ + fn from(value: Bitmap) -> Self { + BooleanBuffer::new(value.buffer.into().finish(), value.offset, value.bits) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Length; + + #[test] + fn arrow_buffer() { + let input = vec![true, false, true]; + let bitmap = input.into_iter().collect::>(); + assert_eq!(bitmap.len(), 3); + + let input = vec![true, false, true]; + let bitmap = input.into_iter().collect::>(); + assert_eq!(bitmap.len(), 3); + assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); + } +} diff --git a/src/arrow/buffer.rs b/src/arrow/buffer.rs new file mode 100644 index 00000000..9eb4296e --- /dev/null +++ b/src/arrow/buffer.rs @@ -0,0 +1,57 @@ +use crate::buffer::{Buffer, BufferMut, BufferType}; +use crate::FixedSize; +use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; + +/// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. +pub struct ArrowBuffer; + +impl BufferType for ArrowBuffer { + type Buffer = BufferBuilder; +} + +impl Buffer for BufferBuilder { + fn as_slice(&self) -> &[T] { + BufferBuilder::as_slice(self) + } +} + +impl BufferMut for BufferBuilder { + fn as_mut_slice(&mut self) -> &mut [T] { + BufferBuilder::as_slice_mut(self) + } +} + +/// A [BufferType] implementation for [arrow_buffer::ScalarBuffer]. +pub struct ArrowScalarBuffer; + +impl BufferType for ArrowScalarBuffer { + type Buffer = ScalarBuffer; +} + +impl Buffer for ScalarBuffer { + fn as_slice(&self) -> &[T] { + self + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn arrow() { + let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); + + let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]); + assert_eq!( + <_ as BufferMut>::as_mut_slice(&mut buffer), + &[1, 2, 3, 4] + ); + <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); + + let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); + } +} diff --git a/src/arrow/length.rs b/src/arrow/length.rs new file mode 100644 index 00000000..bed0ca59 --- /dev/null +++ b/src/arrow/length.rs @@ -0,0 +1,14 @@ +use crate::Length; +use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; + +impl Length for BufferBuilder { + fn len(&self) -> usize { + BufferBuilder::len(self) + } +} + +impl Length for ScalarBuffer { + fn len(&self) -> usize { + self.as_ref().len() + } +} diff --git a/src/arrow/mod.rs b/src/arrow/mod.rs new file mode 100644 index 00000000..89d442ea --- /dev/null +++ b/src/arrow/mod.rs @@ -0,0 +1,11 @@ +#[cfg(feature = "arrow-array")] +pub mod array; + +#[cfg(feature = "arrow-buffer")] +pub mod bitmap; + +#[cfg(feature = "arrow-buffer")] +pub mod buffer; + +#[cfg(feature = "arrow-buffer")] +pub mod length; diff --git a/src/bitmap/mod.rs b/src/bitmap/mod.rs index ae44cdf3..36aa5e11 100644 --- a/src/bitmap/mod.rs +++ b/src/bitmap/mod.rs @@ -42,14 +42,14 @@ pub trait BitmapRefMut: BitmapRef { // todo(mb): implement ops pub struct Bitmap { /// The bits are stored in this buffer of bytes. - buffer: ::Buffer, + pub(crate) buffer: ::Buffer, /// The number of bits stored in the bitmap. - bits: usize, + pub(crate) bits: usize, /// An offset (in number of bits) in the buffer. This enables zero-copy /// slicing of the bitmap on non-byte boundaries. - offset: usize, + pub(crate) offset: usize, } impl BitmapRef for Bitmap { @@ -299,24 +299,6 @@ impl Length for Bitmap { impl ValidityBitmap for Bitmap {} -#[cfg(feature = "arrow-buffer")] -mod arrow { - use super::Bitmap; - use crate::buffer::{ArrowBuffer, BufferType}; - use arrow_buffer::BooleanBuffer; - - impl From> for BooleanBuffer - where - ::Buffer: Into<::Buffer>, - { - fn from(value: Bitmap) -> Self { - BooleanBuffer::new(value.buffer.into().finish(), 0, value.bits) - } - } -} - -pub use arrow::*; - #[cfg(test)] mod tests { use crate::buffer::{ArrayBuffer, BoxBuffer, BufferRefMut, SliceBuffer}; @@ -489,19 +471,4 @@ mod tests { mem::size_of::>() + 2 * mem::size_of::() ); } - - #[test] - #[cfg(feature = "arrow-buffer")] - fn arrow_buffer() { - use crate::buffer::ArrowBuffer; - - let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); - assert_eq!(bitmap.len(), 3); - - let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); - assert_eq!(bitmap.len(), 3); - assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); - } } diff --git a/src/buffer.rs b/src/buffer.rs index fbca21b9..b01f603b 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -358,48 +358,6 @@ impl BufferMut for Rc<[T]> { } } -#[cfg(feature = "arrow-buffer")] -mod arrow { - use super::{Buffer, BufferMut, BufferType}; - use crate::FixedSize; - use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; - - /// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. - pub struct ArrowBuffer; - - impl BufferType for ArrowBuffer { - type Buffer = BufferBuilder; - } - - impl Buffer for BufferBuilder { - fn as_slice(&self) -> &[T] { - BufferBuilder::as_slice(self) - } - } - - impl BufferMut for BufferBuilder { - fn as_mut_slice(&mut self) -> &mut [T] { - BufferBuilder::as_slice_mut(self) - } - } - - /// A [BufferType] implementation for [arrow_buffer::ScalarBuffer]. - pub struct ArrowScalarBuffer; - - impl BufferType for ArrowScalarBuffer { - type Buffer = ScalarBuffer; - } - - impl Buffer for ScalarBuffer { - fn as_slice(&self) -> &[T] { - self - } - } -} - -#[cfg(feature = "arrow-buffer")] -pub use arrow::*; - #[cfg(test)] mod tests { use super::*; @@ -464,22 +422,4 @@ mod tests { &[0, 2, 3, 4, 5, 6] ); } - - #[test] - #[cfg(feature = "arrow-buffer")] - fn arrow() { - let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - - let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]); - assert_eq!( - <_ as BufferMut>::as_mut_slice(&mut buffer), - &[1, 2, 3, 4] - ); - <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); - - let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - } } diff --git a/src/fixed_size.rs b/src/fixed_size.rs index 015f123e..129b216c 100644 --- a/src/fixed_size.rs +++ b/src/fixed_size.rs @@ -3,6 +3,7 @@ use crate::array::ArrayType; use std::{fmt::Debug, mem}; +#[cfg(not(feature = "arrow-buffer"))] /// Subtrait for fixed-size types. /// /// This exists to be used as trait bound where one or more of the supertraits @@ -10,20 +11,34 @@ use std::{fmt::Debug, mem}; /// fixed-size types. /// /// This trait is sealed to prevent downstream implementations. -#[cfg(not(feature = "arrow-buffer"))] pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static { /// The fixed-size of this type in bytes. const SIZE: usize = mem::size_of::(); } #[cfg(feature = "arrow-buffer")] +/// Subtrait for fixed-size types. +/// +/// This exists to be used as trait bound where one or more of the supertraits +/// of this trait are required, and to restrict certain implementations to +/// fixed-size types. +/// +/// This trait is sealed to prevent downstream implementations. pub trait FixedSize: - arrow_buffer::ArrowNativeType + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static + arrow_buffer::ArrowNativeType { /// The fixed-size of this type in bytes. const SIZE: usize = mem::size_of::(); } +mod sealed { + /// Used to seal [super::FixedSize]. + pub trait Sealed {} + + // Prevent downstream implementation of [super::FixedSize]. + impl Sealed for T where T: super::FixedSize {} +} + impl FixedSize for i8 {} impl FixedSize for i16 {} impl FixedSize for i32 {} @@ -43,19 +58,11 @@ impl FixedSize for f64 {} impl FixedSize for () {} -impl FixedSize for [T; N] {} - -mod sealed { - /// Used to seal [super::FixedSize]. - pub trait Sealed {} - - // Prevent downstream implementation of [super::FixedSize]. - impl Sealed for T where T: super::FixedSize {} -} +impl FixedSize for [T; N] {} #[cfg(test)] mod tests { - use crate::FixedSize; + use super::FixedSize; #[test] fn size() { diff --git a/src/length.rs b/src/length.rs index 6b6f904b..f685fd7d 100644 --- a/src/length.rs +++ b/src/length.rs @@ -90,15 +90,3 @@ impl Length for Option { } } } - -#[cfg(feature = "arrow-buffer")] -mod arrow { - use crate::Length; - use arrow_buffer::{ArrowNativeType, BufferBuilder}; - - impl Length for BufferBuilder { - fn len(&self) -> usize { - BufferBuilder::len(self) - } - } -} diff --git a/src/lib.rs b/src/lib.rs index ab19d8a5..c9caa723 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,9 @@ pub(crate) mod validity; pub mod array; +#[cfg(any(feature = "arrow-array", feature = "arrow-buffer"))] +pub mod arrow; + // Re-export `narrow_derive` macros when the `derive` feature is enabled. #[cfg(feature = "derive")] pub use narrow_derive::ArrayType; From afbb9624ba0307c60f0e8ec828e2efe5c37967ea Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 7 Aug 2023 21:19:30 +0200 Subject: [PATCH 11/27] Rename feature to `arrow-rs` --- Cargo.toml | 6 ++--- src/arrow/array/boolean.rs | 45 ++++++++++++++++++++++++++++++++++++++ src/arrow/array/mod.rs | 2 +- src/arrow/bitmap.rs | 18 +++++++++++---- src/arrow/buffer.rs | 4 ++-- src/arrow/mod.rs | 18 +++++++-------- src/fixed_size.rs | 11 +++++++--- src/lib.rs | 3 ++- 8 files changed, 82 insertions(+), 25 deletions(-) create mode 100644 src/arrow/array/boolean.rs diff --git a/Cargo.toml b/Cargo.toml index 0ac30eff..20ce5853 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,10 +28,8 @@ keywords.workspace = true categories.workspace = true [features] -default = ["arrow", "derive", "unsafe"] -arrow = ["arrow-array", "arrow-buffer"] -arrow-array = ["dep:arrow-array", "arrow-buffer"] -arrow-buffer = ["dep:arrow-buffer"] +default = ["arrow-rs", "derive", "unsafe"] +arrow-rs = ["dep:arrow-array", "dep:arrow-buffer"] derive = ["dep:narrow-derive"] unsafe = [] diff --git a/src/arrow/array/boolean.rs b/src/arrow/array/boolean.rs new file mode 100644 index 00000000..32b28f91 --- /dev/null +++ b/src/arrow/array/boolean.rs @@ -0,0 +1,45 @@ +use crate::{array::BooleanArray, bitmap::Bitmap, buffer::BufferType}; +use arrow_buffer::{BooleanBuffer, NullBuffer}; + +impl From> for arrow_array::BooleanArray +where + Bitmap: Into, +{ + fn from(value: BooleanArray) -> Self { + arrow_array::BooleanArray::new(value.0.into(), None) + } +} + +impl From> for arrow_array::BooleanArray +where + Bitmap: Into + Into, +{ + fn from(value: BooleanArray) -> Self { + arrow_array::BooleanArray::new(value.0.data.into(), Some(value.0.validity.into())) + } +} + +#[cfg(test)] +mod tests { + use arrow_array::Array; + + use super::*; + use crate::{bitmap::ValidityBitmap, Length}; + + #[test] + fn convert() { + let input = [true, false, true, true]; + let array = input.into_iter().collect::(); + assert_eq!(array.len(), 4); + let array: arrow_array::BooleanArray = array.into(); + assert_eq!(array.len(), 4); + + let input = [Some(true), None, Some(false)]; + let array = input.into_iter().collect::>(); + assert_eq!(array.len(), 3); + assert_eq!(array.null_count(), 1); + let array: arrow_array::BooleanArray = array.into(); + assert_eq!(array.len(), 3); + assert_eq!(array.null_count(), 1); + } +} diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs index 8c4f437f..d3e20496 100644 --- a/src/arrow/array/mod.rs +++ b/src/arrow/array/mod.rs @@ -1 +1 @@ -pub mod fixed_size_primitive; +mod fixed_size_primitive; diff --git a/src/arrow/bitmap.rs b/src/arrow/bitmap.rs index 7f7fe51d..f4a82d87 100644 --- a/src/arrow/bitmap.rs +++ b/src/arrow/bitmap.rs @@ -1,6 +1,6 @@ use super::buffer::ArrowBuffer; use crate::{bitmap::Bitmap, buffer::BufferType}; -use arrow_buffer::BooleanBuffer; +use arrow_buffer::{BooleanBuffer, NullBuffer}; impl From> for BooleanBuffer where @@ -11,20 +11,30 @@ where } } +impl From> for NullBuffer +where + ::Buffer: Into<::Buffer>, +{ + fn from(value: Bitmap) -> Self { + Self::new(value.into()) + } +} + #[cfg(test)] mod tests { use super::*; use crate::Length; #[test] - fn arrow_buffer() { + fn convert() { let input = vec![true, false, true]; let bitmap = input.into_iter().collect::>(); assert_eq!(bitmap.len(), 3); + let _: NullBuffer = bitmap.into(); let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); + let bitmap = input.into_iter().collect::(); assert_eq!(bitmap.len(), 3); - assert_eq!(bitmap.into_iter().collect::>(), [true, false, true]); + let _: BooleanBuffer = bitmap.into(); } } diff --git a/src/arrow/buffer.rs b/src/arrow/buffer.rs index 9eb4296e..67f06e3d 100644 --- a/src/arrow/buffer.rs +++ b/src/arrow/buffer.rs @@ -2,7 +2,7 @@ use crate::buffer::{Buffer, BufferMut, BufferType}; use crate::FixedSize; use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; -/// A [BufferType] implementation for [arrow_buffer::BufferBuilder]. +/// A [BufferType] implementation for [BufferBuilder]. pub struct ArrowBuffer; impl BufferType for ArrowBuffer { @@ -21,7 +21,7 @@ impl BufferMut for BufferBuilder { } } -/// A [BufferType] implementation for [arrow_buffer::ScalarBuffer]. +/// A [BufferType] implementation for [ScalarBuffer]. pub struct ArrowScalarBuffer; impl BufferType for ArrowScalarBuffer { diff --git a/src/arrow/mod.rs b/src/arrow/mod.rs index 89d442ea..8293e572 100644 --- a/src/arrow/mod.rs +++ b/src/arrow/mod.rs @@ -1,11 +1,9 @@ -#[cfg(feature = "arrow-array")] -pub mod array; +//! Interop with the [`arrow-rs`] crate. +//! +//! [`arrow-rs`]: https://crates.io/crates/arrow -#[cfg(feature = "arrow-buffer")] -pub mod bitmap; - -#[cfg(feature = "arrow-buffer")] -pub mod buffer; - -#[cfg(feature = "arrow-buffer")] -pub mod length; +mod array; +mod bitmap; +mod buffer; +pub use buffer::*; +mod length; diff --git a/src/fixed_size.rs b/src/fixed_size.rs index 129b216c..9074a0fe 100644 --- a/src/fixed_size.rs +++ b/src/fixed_size.rs @@ -3,7 +3,8 @@ use crate::array::ArrayType; use std::{fmt::Debug, mem}; -#[cfg(not(feature = "arrow-buffer"))] +#[cfg(not(feature = "arrow-rs"))] +#[cfg_attr(docsrs, doc(cfg(all())))] /// Subtrait for fixed-size types. /// /// This exists to be used as trait bound where one or more of the supertraits @@ -16,7 +17,11 @@ pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static const SIZE: usize = mem::size_of::(); } -#[cfg(feature = "arrow-buffer")] +#[cfg(feature = "arrow-rs")] +use arrow_buffer::ArrowNativeType as _arrow_rs_trait; + +#[cfg(feature = "arrow-rs")] +#[cfg_attr(docsrs, doc(cfg(all())))] /// Subtrait for fixed-size types. /// /// This exists to be used as trait bound where one or more of the supertraits @@ -25,7 +30,7 @@ pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static /// /// This trait is sealed to prevent downstream implementations. pub trait FixedSize: - ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static + arrow_buffer::ArrowNativeType + ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static + _arrow_rs_trait { /// The fixed-size of this type in bytes. const SIZE: usize = mem::size_of::(); diff --git a/src/lib.rs b/src/lib.rs index c9caa723..abe77c6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ //! //! An experimental (work-in-progress) implementation of [Apache Arrow](https://arrow.apache.org). +#![cfg_attr(docsrs, feature(doc_auto_cfg, doc_cfg))] #![doc( html_logo_url = "https://raw.githubusercontent.com/mbrobbel/narrow/main/narrow.svg", html_favicon_url = "https://raw.githubusercontent.com/mbrobbel/narrow/main/narrow.svg" @@ -29,7 +30,7 @@ pub(crate) mod validity; pub mod array; -#[cfg(any(feature = "arrow-array", feature = "arrow-buffer"))] +#[cfg(feature = "arrow-rs")] pub mod arrow; // Re-export `narrow_derive` macros when the `derive` feature is enabled. From 21547b16b5823c67c81471c9cdc4cfda0137963d Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 7 Aug 2023 21:19:53 +0200 Subject: [PATCH 12/27] Add `BooleanArray` conversion --- src/array/boolean.rs | 2 +- src/arrow/array/mod.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/array/boolean.rs b/src/array/boolean.rs index 6a26765a..b876e36d 100644 --- a/src/array/boolean.rs +++ b/src/array/boolean.rs @@ -12,7 +12,7 @@ use crate::{ /// /// Values are stored using single bits in a [Bitmap]. pub struct BooleanArray( - as Validity>::Storage, + pub(crate) as Validity>::Storage, ) where Bitmap: Validity; diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs index d3e20496..41862ec9 100644 --- a/src/arrow/array/mod.rs +++ b/src/arrow/array/mod.rs @@ -1 +1,2 @@ +mod boolean; mod fixed_size_primitive; From 1c7ee0813238c3a3fa9dce94ed1d05d77a45e159 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 6 Dec 2023 13:35:49 +0100 Subject: [PATCH 13/27] Update MSRV in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 04086bd4..d8c79f21 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ This crate provides types to support reading and writing instances of abstract d ## Minimum supported Rust version -The minimum supported Rust version for this crate is Rust 1.67.0. +The minimum supported Rust version for this crate is Rust 1.70.0. ## License From 6c4343880fc55c9191fee30c96a3ecf5c513622e Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 7 Dec 2023 13:22:36 +0100 Subject: [PATCH 14/27] Change interaction with `ArrowNativeType` --- .github/workflows/test.yml | 42 ++++++++++--- Cargo.toml | 4 +- src/array/fixed_size_primitive.rs | 45 +++++++++----- src/array/mod.rs | 6 ++ src/array/struct.rs | 16 ++--- src/arrow/array/boolean.rs | 20 +++--- src/arrow/array/fixed_size_primitive.rs | 68 +++++++++++++++------ src/arrow/array/mod.rs | 2 + src/arrow/bitmap.rs | 27 ++++++--- src/arrow/buffer.rs | 81 ++++++++++++++++++++----- src/arrow/length.rs | 10 ++- src/fixed_size.rs | 39 ++++++------ 12 files changed, 254 insertions(+), 106 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 73e0095b..407a2086 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,6 +7,31 @@ permissions: contents: read jobs: + minimal: + name: Minimal + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@stable + id: rust-toolchain + - uses: actions/cache@v3 + with: + path: | + ~/.cargo/bin/ + ~/.cargo/registry/index/ + ~/.cargo/registry/cache/ + ~/.cargo/git/db/ + target/ + key: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-minimal-${{ hashFiles('**/Cargo.toml') }} + restore-keys: | + ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-minimal- + ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- + ${{ runner.os }}-cargo- + - run: cargo check --workspace --all-targets --no-default-features + - run: cargo test --workspace --all-targets --no-default-features + - run: cargo test --workspace --doc --no-default-features + - run: cargo clippy --workspace --all-targets --no-default-features -- -Dwarnings + msrv: name: Minimum supported Rust version runs-on: ubuntu-latest @@ -27,8 +52,9 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-msrv- ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - - run: cargo check --all --all-features - + - run: cargo check --workspace --all-targets --all-features + - run: cargo test --all --all-targets --all-features + - run: cargo test --all --doc --all-features check: name: Check runs-on: ubuntu-latest @@ -75,8 +101,8 @@ jobs: - uses: dtolnay/install@master with: crate: cargo-expand - - run: cargo test --all --all-targets --all-features - - run: cargo test --all --doc --all-features + - run: cargo test --workspace --all-targets --all-features + - run: cargo test --workspace --doc --all-features rustfmt: name: Rustfmt @@ -110,7 +136,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}-clippy- ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - - run: cargo clippy --all --all-targets --all-features -- -Dwarnings + - run: cargo clippy --workspace --all-targets --all-features -- -Dwarnings miri: name: Miri @@ -133,7 +159,7 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - run: cargo miri setup - - run: cargo miri test --all-features + - run: cargo miri test --workspace --all-targets --all-features coverage: name: Coverage @@ -163,8 +189,8 @@ jobs: - uses: dtolnay/install@master with: crate: cargo-expand - - run: cargo build --all --all-targets --all-features - - run: cargo test --all --all-targets --all-features + - run: cargo build --workspace --all-targets --all-features + - run: cargo test --workspace --all-targets --all-features env: LLVM_PROFILE_FILE: "narrow-%p-%m.profraw" - name: Install grcov diff --git a/Cargo.toml b/Cargo.toml index df95b1b4..f9bfca17 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,8 +33,8 @@ arrow-rs = ["dep:arrow-array", "dep:arrow-buffer"] derive = ["dep:narrow-derive"] [dependencies] -arrow-array = { git = "https://github.com/mbrobbel/arrow-rs.git", branch = "arrow-native-type", optional = true } -arrow-buffer = { git = "https://github.com/mbrobbel/arrow-rs.git", branch = "arrow-native-type", optional = true } +arrow-array = { version = "49.0.0", optional = true } +arrow-buffer = { version = "49.0.0", optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } [dev-dependencies] diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index ff560242..eacfeb66 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -34,10 +34,14 @@ type_def!(Int8Array, i8); type_def!(Int16Array, i16); type_def!(Int32Array, i32); type_def!(Int64Array, i64); +#[cfg(not(feature = "arrow-rs"))] +type_def!(Int128Array, i128); type_def!(Uint8Array, u8); type_def!(Uint16Array, u16); type_def!(Uint32Array, u32); type_def!(Uint64Array, u64); +#[cfg(not(feature = "arrow-rs"))] +type_def!(Uint128Array, u128); type_def!(IsizeArray, isize); type_def!(UsizeArray, usize); @@ -195,12 +199,15 @@ mod tests { assert_eq!(array.0.as_slice(), &[1, 2, 3, 4]); assert_eq!(array.0.as_slice(), array.0.as_bytes()); - let input_array = [[1_u8, 2], [3, 4]]; - let array_array = input_array - .into_iter() - .collect::>(); - assert_eq!(array_array.0.as_slice(), &[[1, 2], [3, 4]]); - assert_eq!(<_ as Buffer>::as_bytes(&array_array.0), &[1, 2, 3, 4]); + #[cfg(not(feature = "arrow-rs"))] + { + let input_array = [[1_u8, 2], [3, 4]]; + let array_array = input_array + .into_iter() + .collect::>(); + assert_eq!(array_array.0.as_slice(), &[[1, 2], [3, 4]]); + assert_eq!(<_ as Buffer>::as_bytes(&array_array.0), &[1, 2, 3, 4]); + } } #[test] @@ -221,11 +228,14 @@ mod tests { let array = input.into_iter().collect::>(); assert_eq!(array.into_iter().collect::>(), input); - let input_array = [[1_u8, 2], [3, 4]]; - let array_array = input_array - .into_iter() - .collect::>(); - assert_eq!(array_array.into_iter().collect::>(), input_array); + #[cfg(not(feature = "arrow-rs"))] + { + let input_array = [[1_u8, 2], [3, 4]]; + let array_array = input_array + .into_iter() + .collect::>(); + assert_eq!(array_array.into_iter().collect::>(), input_array); + } } #[test] @@ -241,11 +251,14 @@ mod tests { let array = input.into_iter().collect::>(); assert_eq!(array.len(), input.as_slice().len()); - let input_array = [[1_u8, 2], [3, 4]]; - let array_array = input_array - .into_iter() - .collect::>(); - assert_eq!(array_array.len(), input_array.as_slice().len()); + #[cfg(not(feature = "arrow-rs"))] + { + let input_array = [[1_u8, 2], [3, 4]]; + let array_array = input_array + .into_iter() + .collect::>(); + assert_eq!(array_array.len(), input_array.as_slice().len()); + } let input_nullable = [Some(1_u64), None, Some(3), Some(4)]; let array_nullable = input_nullable diff --git a/src/array/mod.rs b/src/array/mod.rs index 77eb4ffb..5933813a 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -94,14 +94,20 @@ impl_array_type!(u64, FixedSizePrimitiveArray); impl_array_type!(Option, FixedSizePrimitiveArray); impl_array_type!(i64, FixedSizePrimitiveArray); impl_array_type!(Option, FixedSizePrimitiveArray); +#[cfg(not(feature = "arrow-rs"))] impl_array_type!(u128, FixedSizePrimitiveArray); +#[cfg(not(feature = "arrow-rs"))] impl_array_type!(Option, FixedSizePrimitiveArray); impl_array_type!(i128, FixedSizePrimitiveArray); impl_array_type!(Option, FixedSizePrimitiveArray); +#[cfg(not(feature = "arrow-rs"))] impl_array_type!(usize, FixedSizePrimitiveArray); +#[cfg(not(feature = "arrow-rs"))] impl_array_type!(Option, FixedSizePrimitiveArray); +#[cfg(not(feature = "arrow-rs"))] impl_array_type!(isize, FixedSizePrimitiveArray); +#[cfg(not(feature = "arrow-rs"))] impl_array_type!(Option, FixedSizePrimitiveArray); impl_array_type!(f32, FixedSizePrimitiveArray); diff --git a/src/array/struct.rs b/src/array/struct.rs index 76ffe229..b3c1cc08 100644 --- a/src/array/struct.rs +++ b/src/array/struct.rs @@ -119,7 +119,7 @@ mod tests { a: u32, b: Option<()>, c: (), - d: Option<[u128; 2]>, + d: Option<[u64; 2]>, e: bool, f: &'a [u8], g: String, @@ -138,7 +138,7 @@ mod tests { a: ::Array, b: as ArrayType>::Array, c: <() as ArrayType>::Array, - d: as ArrayType>::Array, + d: as ArrayType>::Array, e: ::Array, f: <&'a [u8] as ArrayType>::Array, g: ::Array, @@ -149,7 +149,7 @@ mod tests { ::Array: Default, as ArrayType>::Array: Default, <() as ArrayType>::Array: Default, - as ArrayType>::Array: Default, + as ArrayType>::Array: Default, ::Array: Default, <&'a [u8] as ArrayType>::Array: Default, ::Array: Default, @@ -159,7 +159,7 @@ mod tests { a: ::Array::::default(), b: as ArrayType>::Array::::default(), c: <() as ArrayType>::Array::::default(), - d: as ArrayType>::Array::::default( + d: as ArrayType>::Array::::default( ), e: ::Array::::default(), f: <&'a [u8] as ArrayType>::Array::::default(), @@ -173,8 +173,8 @@ mod tests { ::Array: Extend, as ArrayType>::Array: Extend>, <() as ArrayType>::Array: Extend<()>, - as ArrayType>::Array: - Extend>, + as ArrayType>::Array: + Extend>, ::Array: Extend, <&'a [u8] as ArrayType>::Array: Extend<&'a [u8]>, ::Array: Extend, @@ -208,8 +208,8 @@ mod tests { as ArrayType>::Array: Default + Extend>, <() as ArrayType>::Array: Default + Extend<()>, - as ArrayType>::Array: - Default + Extend>, + as ArrayType>::Array: + Default + Extend>, ::Array: Default + Extend, <&'a [u8] as ArrayType>::Array: Default + Extend<&'a [u8]>, ::Array: Default + Extend, diff --git a/src/arrow/array/boolean.rs b/src/arrow/array/boolean.rs index 32b28f91..001c645e 100644 --- a/src/arrow/array/boolean.rs +++ b/src/arrow/array/boolean.rs @@ -1,3 +1,5 @@ +//! Interop with `arrow-rs` boolean array. + use crate::{array::BooleanArray, bitmap::Bitmap, buffer::BufferType}; use arrow_buffer::{BooleanBuffer, NullBuffer}; @@ -31,15 +33,15 @@ mod tests { let input = [true, false, true, true]; let array = input.into_iter().collect::(); assert_eq!(array.len(), 4); - let array: arrow_array::BooleanArray = array.into(); - assert_eq!(array.len(), 4); + let array_arrow: arrow_array::BooleanArray = array.into(); + assert_eq!(array_arrow.len(), 4); - let input = [Some(true), None, Some(false)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.len(), 3); - assert_eq!(array.null_count(), 1); - let array: arrow_array::BooleanArray = array.into(); - assert_eq!(array.len(), 3); - assert_eq!(array.null_count(), 1); + let input_nullable = [Some(true), None, Some(false)]; + let array_nullable = input_nullable.into_iter().collect::>(); + assert_eq!(array_nullable.len(), 3); + assert_eq!(array_nullable.null_count(), 1); + let array_arrow_nullable: arrow_array::BooleanArray = array_nullable.into(); + assert_eq!(array_arrow_nullable.len(), 3); + assert_eq!(array_arrow_nullable.null_count(), 1); } } diff --git a/src/arrow/array/fixed_size_primitive.rs b/src/arrow/array/fixed_size_primitive.rs index b8e909b0..60e1b3bf 100644 --- a/src/arrow/array/fixed_size_primitive.rs +++ b/src/arrow/array/fixed_size_primitive.rs @@ -1,5 +1,11 @@ +//! Interop with `arrow-rs` fixed-sized primitive array. + use crate::{ - array::FixedSizePrimitiveArray, arrow::buffer::ArrowBuffer, bitmap::Bitmap, buffer::BufferType, + array::FixedSizePrimitiveArray, + arrow::{buffer::ArrowBufferBuilder, ArrowBuffer}, + bitmap::Bitmap, + buffer::BufferType, + nullable::Nullable, FixedSize, Length, }; use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; @@ -8,7 +14,7 @@ use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; impl, Buffer: BufferType> From> for PrimitiveArray where - ::Buffer: Length + Into<::Buffer>, + ::Buffer: Length + Into<::Buffer>, { fn from(value: FixedSizePrimitiveArray) -> Self { let len = value.len(); @@ -19,7 +25,7 @@ where impl, Buffer: BufferType> From> for PrimitiveArray where - ::Buffer: Length + Into<::Buffer>, + ::Buffer: Length + Into<::Buffer>, Bitmap: Into, { fn from(value: FixedSizePrimitiveArray) -> Self { @@ -31,46 +37,68 @@ where } } +impl, U: FixedSize> From> + for FixedSizePrimitiveArray +{ + fn from(value: PrimitiveArray) -> Self { + let (_, scala_buffer, opt_null_buffer) = value.into_parts(); + if let Some(null_buffer) = opt_null_buffer { + Self(Nullable { + data: scala_buffer.into_inner(), + validity: null_buffer.into(), + }) + } else { + Self(Nullable::from(scala_buffer.into_inner())) + } + } +} + #[cfg(test)] mod test { use super::*; use crate::array::Int8Array; #[test] - #[cfg(feature = "arrow-array")] fn arrow_array() { use crate::{array::Int8Array, bitmap::ValidityBitmap}; use arrow_array::{types::Int8Type, Array, PrimitiveArray}; let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); + let array_arrow_buffer = input + .into_iter() + .collect::>(); + let array_arrow_from = PrimitiveArray::::from(array_arrow_buffer); + assert_eq!(array_arrow_from.len(), 4); - let input = [1, 2, 3, 4]; - let array = input.into_iter().collect::>(); - let array = PrimitiveArray::::from(array); + let array = + PrimitiveArray::::from(input.into_iter().collect::>()); assert_eq!(array.len(), 4); - let input = [Some(1), None, Some(3), Some(4)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.null_count(), 1); - let array = PrimitiveArray::::from(array); - assert_eq!(array.len(), 4); - assert_eq!(array.null_count(), 1); + let input_nullable = [Some(1), None, Some(3), Some(4)]; + let array_nullable = input_nullable + .into_iter() + .collect::>(); + assert_eq!(array_nullable.null_count(), 1); + let array_arrow = PrimitiveArray::::from(array_nullable); + assert_eq!(array_arrow.len(), 4); + assert_eq!(array_arrow.null_count(), 1); } #[test] fn arrow_buffer() { let input = [1, 2, 3, 4]; - let mut array = input.into_iter().collect::>(); + let mut array = input + .into_iter() + .collect::>(); assert_eq!(array.len(), 4); // Use arrow_buffer array.0.append_n(5, 5); assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); - let input = [Some(1), None, Some(3), Some(4)]; - let array = input.into_iter().collect::>(); - assert_eq!(array.len(), 4); + let input_nullable = [Some(1), None, Some(3), Some(4)]; + let array_nullable = input_nullable + .into_iter() + .collect::>(); + assert_eq!(array_nullable.len(), 4); } } diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs index 41862ec9..2aa5b571 100644 --- a/src/arrow/array/mod.rs +++ b/src/arrow/array/mod.rs @@ -1,2 +1,4 @@ +//! Interop with [`arrow-array`]. + mod boolean; mod fixed_size_primitive; diff --git a/src/arrow/bitmap.rs b/src/arrow/bitmap.rs index f4a82d87..dc9cdfc1 100644 --- a/src/arrow/bitmap.rs +++ b/src/arrow/bitmap.rs @@ -1,10 +1,12 @@ -use super::buffer::ArrowBuffer; +//! Interop with [`arrow-rs`]'s bitmap. + +use super::{buffer::ArrowBuffer, ArrowBufferBuilder}; use crate::{bitmap::Bitmap, buffer::BufferType}; use arrow_buffer::{BooleanBuffer, NullBuffer}; impl From> for BooleanBuffer where - ::Buffer: Into<::Buffer>, + ::Buffer: Into<::Buffer>, { fn from(value: Bitmap) -> Self { BooleanBuffer::new(value.buffer.into().finish(), value.offset, value.bits) @@ -13,13 +15,25 @@ where impl From> for NullBuffer where - ::Buffer: Into<::Buffer>, + ::Buffer: Into<::Buffer>, { fn from(value: Bitmap) -> Self { Self::new(value.into()) } } +impl From for Bitmap { + fn from(value: NullBuffer) -> Self { + let bits = value.len(); + let offset = value.offset(); + Bitmap { + buffer: value.into_inner().into_inner(), + bits, + offset, + } + } +} + #[cfg(test)] mod tests { use super::*; @@ -28,11 +42,10 @@ mod tests { #[test] fn convert() { let input = vec![true, false, true]; - let bitmap = input.into_iter().collect::>(); - assert_eq!(bitmap.len(), 3); - let _: NullBuffer = bitmap.into(); + let bitmap_arrow = input.iter().collect::>(); + assert_eq!(bitmap_arrow.len(), 3); + let _: NullBuffer = bitmap_arrow.into(); - let input = vec![true, false, true]; let bitmap = input.into_iter().collect::(); assert_eq!(bitmap.len(), 3); let _: BooleanBuffer = bitmap.into(); diff --git a/src/arrow/buffer.rs b/src/arrow/buffer.rs index 67f06e3d..62f4bcb5 100644 --- a/src/arrow/buffer.rs +++ b/src/arrow/buffer.rs @@ -1,39 +1,87 @@ +//! Interop with [`arrow-buffer`]. + use crate::buffer::{Buffer, BufferMut, BufferType}; -use crate::FixedSize; -use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; +use crate::{FixedSize, Index}; +use arrow_buffer::{BufferBuilder, ScalarBuffer}; -/// A [BufferType] implementation for [BufferBuilder]. -pub struct ArrowBuffer; +/// A [`BufferType`] implementation for [`BufferBuilder`]. +#[derive(Clone, Copy)] +pub struct ArrowBufferBuilder; -impl BufferType for ArrowBuffer { +impl BufferType for ArrowBufferBuilder { type Buffer = BufferBuilder; } -impl Buffer for BufferBuilder { +impl Buffer for BufferBuilder { fn as_slice(&self) -> &[T] { BufferBuilder::as_slice(self) } } -impl BufferMut for BufferBuilder { +impl BufferMut for BufferBuilder { fn as_mut_slice(&mut self) -> &mut [T] { BufferBuilder::as_slice_mut(self) } } -/// A [BufferType] implementation for [ScalarBuffer]. +impl Index for BufferBuilder { + type Item<'a> = &'a T + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.as_slice().get_unchecked(index) + } +} + +/// A [`BufferType`] implementation for [`ScalarBuffer`]. +#[derive(Clone, Copy)] pub struct ArrowScalarBuffer; impl BufferType for ArrowScalarBuffer { type Buffer = ScalarBuffer; } -impl Buffer for ScalarBuffer { +impl Buffer for ScalarBuffer { fn as_slice(&self) -> &[T] { self } } +impl Index for ScalarBuffer { + type Item<'a> = &'a T + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.get_unchecked(index) + } +} + +/// A [`BufferType`] implementation for [`arrow_buffer::Buffer`]. +#[derive(Clone, Copy)] +pub struct ArrowBuffer; + +impl BufferType for ArrowBuffer { + type Buffer = arrow_buffer::Buffer; +} + +impl Buffer for arrow_buffer::Buffer { + fn as_slice(&self) -> &[T] { + arrow_buffer::Buffer::typed_data(self) + } +} + +impl Index for arrow_buffer::Buffer { + type Item<'a> = &'a u8 + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.get_unchecked(index) + } +} + #[cfg(test)] mod tests { use super::*; @@ -43,15 +91,18 @@ mod tests { let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - let mut buffer = arrow_buffer::BufferBuilder::from_iter([1u64, 2, 3, 4]); + let mut buffer_builder = arrow_buffer::BufferBuilder::from_iter([1_u64, 2, 3, 4]); assert_eq!( - <_ as BufferMut>::as_mut_slice(&mut buffer), + <_ as BufferMut>::as_mut_slice(&mut buffer_builder), &[1, 2, 3, 4] ); - <_ as BufferMut>::as_mut_slice(&mut buffer)[3] = 42; - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 42]); + <_ as BufferMut>::as_mut_slice(&mut buffer_builder)[3] = 42; + assert_eq!( + <_ as Buffer>::as_slice(&buffer_builder), + &[1, 2, 3, 42] + ); - let buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); + let scalar_buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); + assert_eq!(<_ as Buffer>::as_slice(&scalar_buffer), &[1, 2, 3, 4]); } } diff --git a/src/arrow/length.rs b/src/arrow/length.rs index bed0ca59..50639134 100644 --- a/src/arrow/length.rs +++ b/src/arrow/length.rs @@ -1,5 +1,7 @@ +//! Length implementations for [`arrow-rs`] items. + use crate::Length; -use arrow_buffer::{ArrowNativeType, BufferBuilder, ScalarBuffer}; +use arrow_buffer::{ArrowNativeType, Buffer, BufferBuilder, ScalarBuffer}; impl Length for BufferBuilder { fn len(&self) -> usize { @@ -12,3 +14,9 @@ impl Length for ScalarBuffer { self.as_ref().len() } } + +impl Length for Buffer { + fn len(&self) -> usize { + Buffer::len(self) + } +} diff --git a/src/fixed_size.rs b/src/fixed_size.rs index fba8f304..b8e5e229 100644 --- a/src/fixed_size.rs +++ b/src/fixed_size.rs @@ -3,25 +3,21 @@ use crate::array::ArrayType; use std::{fmt::Debug, mem}; +#[cfg(feature = "arrow-rs")] +/// Module that re-exports the [`arrow_buffer::ArrowNativeType`] trait. +mod arrow_rs { + pub use arrow_buffer::ArrowNativeType as _arrow_rs_trait; +} #[cfg(not(feature = "arrow-rs"))] -#[cfg_attr(docsrs, doc(cfg(all())))] -/// Subtrait for fixed-size types. -/// -/// This exists to be used as trait bound where one or more of the supertraits -/// of this trait are required, and to restrict certain implementations to -/// fixed-size types. -/// -/// This trait is sealed to prevent downstream implementations. -pub trait FixedSize: ArrayType + Copy + Debug + Sized + sealed::Sealed + 'static { - /// The fixed-size of this type in bytes. - const SIZE: usize = mem::size_of::(); +/// Module with empty trait to work around [RFC-3399](https://rust-lang.github.io/rfcs/3399-cfg-attribute-in-where.html). +mod arrow_rs { + /// Empty trait. + pub trait Type {} + impl Type for T {} + pub use Type as _arrow_rs_trait; } +use arrow_rs::_arrow_rs_trait; -#[cfg(feature = "arrow-rs")] -use arrow_buffer::ArrowNativeType as _arrow_rs_trait; - -#[cfg(feature = "arrow-rs")] -#[cfg_attr(docsrs, doc(cfg(all())))] /// Subtrait for fixed-size types. /// /// This exists to be used as trait bound where one or more of the supertraits @@ -38,7 +34,7 @@ pub trait FixedSize: /// Private module for [`sealed::Sealed`] trait. mod sealed { - /// Used to seal [super::FixedSize]. + /// Used to seal [`super::FixedSize`]. pub trait Sealed {} // Prevent downstream implementation of [super::FixedSize]. @@ -54,16 +50,18 @@ impl FixedSize for u8 {} impl FixedSize for u16 {} impl FixedSize for u32 {} impl FixedSize for u64 {} +#[cfg(not(feature = "arrow-rs"))] impl FixedSize for u128 {} +#[cfg(not(feature = "arrow-rs"))] impl FixedSize for isize {} +#[cfg(not(feature = "arrow-rs"))] impl FixedSize for usize {} impl FixedSize for f32 {} impl FixedSize for f64 {} -impl FixedSize for () {} - +#[cfg(not(feature = "arrow-rs"))] impl FixedSize for [T; N] {} #[cfg(test)] @@ -72,9 +70,10 @@ mod tests { #[test] fn size() { - assert_eq!(<()>::SIZE, 0); assert_eq!(u8::SIZE, 1); + #[cfg(not(feature = "arrow-rs"))] assert_eq!(<[u16; 21]>::SIZE, 42); + #[cfg(not(feature = "arrow-rs"))] assert_eq!(<[u8; 1234]>::SIZE, 1234); } } From ef98d35b4660bcee5b7070218ab0a0ce60778b41 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 7 Dec 2023 13:25:40 +0100 Subject: [PATCH 15/27] Add `cargo-expand` to added `minimal` job --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 407a2086..d6d60ffe 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -14,6 +14,9 @@ jobs: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable id: rust-toolchain + - uses: dtolnay/install@master + with: + crate: cargo-expand - uses: actions/cache@v3 with: path: | From e8f94b1862cc6cb72b44d5e0c57b14572c647aea Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 7 Dec 2023 13:26:25 +0100 Subject: [PATCH 16/27] And also to `msrv` job --- .github/workflows/test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d6d60ffe..2d812a46 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,6 +42,9 @@ jobs: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@1.70.0 id: rust-toolchain + - uses: dtolnay/install@master + with: + crate: cargo-expand - uses: actions/cache@v3 with: path: | From 25c7367b5eb2ada1a2ae5b3fad4c695e9956fa3e Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 7 Dec 2023 13:29:32 +0100 Subject: [PATCH 17/27] Fix some clippy warnings --- src/array/fixed_size_primitive.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index eacfeb66..464308e7 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -207,7 +207,7 @@ mod tests { .collect::>(); assert_eq!(array_array.0.as_slice(), &[[1, 2], [3, 4]]); assert_eq!(<_ as Buffer>::as_bytes(&array_array.0), &[1, 2, 3, 4]); - } + }; } #[test] @@ -235,7 +235,7 @@ mod tests { .into_iter() .collect::>(); assert_eq!(array_array.into_iter().collect::>(), input_array); - } + }; } #[test] @@ -258,7 +258,7 @@ mod tests { .into_iter() .collect::>(); assert_eq!(array_array.len(), input_array.as_slice().len()); - } + }; let input_nullable = [Some(1_u64), None, Some(3), Some(4)]; let array_nullable = input_nullable From 12dd33212dd03723ad694351890af0732cc5a161 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 7 Dec 2023 13:32:46 +0100 Subject: [PATCH 18/27] Fix miri job --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2d812a46..aeead4d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -165,7 +165,8 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - run: cargo miri setup - - run: cargo miri test --workspace --all-targets --all-features + - run: cargo miri test --workspace --no-default-features + - run: cargo miri test --workspace --all-features coverage: name: Coverage From 4b24ad9e56dd3c5f7ec398d09e2bbc9e68433819 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 7 Dec 2023 13:36:41 +0100 Subject: [PATCH 19/27] Don't run miri on expand tests --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index aeead4d8..59a97649 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -165,8 +165,8 @@ jobs: ${{ runner.os }}-cargo-${{ steps.rust-toolchain.outputs.cachekey }}- ${{ runner.os }}-cargo- - run: cargo miri setup - - run: cargo miri test --workspace --no-default-features - - run: cargo miri test --workspace --all-features + - run: cargo miri test --no-default-features + - run: cargo miri test --all-features coverage: name: Coverage From ea99db71a558f3397a5aa169f95860bfae92d0dd Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 11 Dec 2023 16:14:39 +0100 Subject: [PATCH 20/27] Split out implementations and add more tests --- Cargo.toml | 19 +- narrow-derive/src/lib.rs | 3 +- src/array/fixed_size_primitive.rs | 9 +- src/array/string.rs | 10 + src/array/struct.rs | 2 +- src/arrow/array/boolean.rs | 155 +++++++++++++-- src/arrow/array/fixed_size_primitive.rs | 238 ++++++++++++++++------ src/arrow/array/mod.rs | 3 + src/arrow/array/string.rs | 188 ++++++++++++++++++ src/arrow/array/struct.rs | 1 + src/arrow/array/variable_size_list.rs | 250 ++++++++++++++++++++++++ src/arrow/bitmap.rs | 53 ----- src/arrow/buffer.rs | 108 ---------- src/arrow/buffer/boolean_buffer.rs | 78 ++++++++ src/arrow/buffer/buffer_builder.rs | 110 +++++++++++ src/arrow/buffer/mod.rs | 39 ++++ src/arrow/buffer/null_buffer.rs | 70 +++++++ src/arrow/buffer/offset_buffer.rs | 85 ++++++++ src/arrow/buffer/scalar_buffer.rs | 113 +++++++++++ src/arrow/length.rs | 22 --- src/arrow/mod.rs | 26 ++- 21 files changed, 1309 insertions(+), 273 deletions(-) create mode 100644 src/arrow/array/string.rs create mode 100644 src/arrow/array/struct.rs create mode 100644 src/arrow/array/variable_size_list.rs delete mode 100644 src/arrow/bitmap.rs delete mode 100644 src/arrow/buffer.rs create mode 100644 src/arrow/buffer/boolean_buffer.rs create mode 100644 src/arrow/buffer/buffer_builder.rs create mode 100644 src/arrow/buffer/mod.rs create mode 100644 src/arrow/buffer/null_buffer.rs create mode 100644 src/arrow/buffer/offset_buffer.rs create mode 100644 src/arrow/buffer/scalar_buffer.rs delete mode 100644 src/arrow/length.rs diff --git a/Cargo.toml b/Cargo.toml index f9bfca17..b2652edf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,17 +29,26 @@ categories.workspace = true [features] default = ["arrow-rs", "derive"] -arrow-rs = ["dep:arrow-array", "dep:arrow-buffer"] +arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema"] derive = ["dep:narrow-derive"] [dependencies] -arrow-array = { version = "49.0.0", optional = true } -arrow-buffer = { version = "49.0.0", optional = true } +# arrow-array = { version = "49.0.0", optional = true } +# arrow-buffer = { version = "49.0.0", optional = true } +# arrow-schema = { version = "49.0.0", optional = true } +arrow-array = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } +arrow-buffer = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } +arrow-schema = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } [dev-dependencies] +# arrow-cast = { version = "49.0.0", default-features = false, features = ["prettyprint"] } +arrow-cast = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["prettyprint"] } +bytes = "1.5.0" criterion = { version = "0.5.1", default-features = false } rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } +parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", features = ["arrow"] } +# parquet = { version = "49.0.0", default-features = false, features = ["arrow"] } [profile.bench] lto = true @@ -48,3 +57,7 @@ codegen-units = 1 [[bench]] name = "narrow" harness = false + +# [[example]] +# name = "parquet" +# required-features = ["arrow-rs", "derive"] diff --git a/narrow-derive/src/lib.rs b/narrow-derive/src/lib.rs index 94679edc..fbed0840 100644 --- a/narrow-derive/src/lib.rs +++ b/narrow-derive/src/lib.rs @@ -12,7 +12,8 @@ const CRATE: &str = "narrow"; static NARROW: Lazy = Lazy::new(|| match proc_macro_crate::crate_name(CRATE) { Ok(found) => match found { - FoundCrate::Itself => "crate".to_string(), + // Requires `extern crate self as narrow` + FoundCrate::Itself => CRATE.to_string(), FoundCrate::Name(name) => name, }, _ => CRATE.to_string(), diff --git a/src/array/fixed_size_primitive.rs b/src/array/fixed_size_primitive.rs index 464308e7..a2d81eb9 100644 --- a/src/array/fixed_size_primitive.rs +++ b/src/array/fixed_size_primitive.rs @@ -3,7 +3,7 @@ use super::Array; use crate::{ bitmap::{Bitmap, BitmapRef, BitmapRefMut, ValidityBitmap}, - buffer::{BufferType, VecBuffer}, + buffer::{Buffer, BufferType, VecBuffer}, nullable::Nullable, validity::Validity, FixedSize, Index, Length, @@ -56,6 +56,13 @@ where { } +// todo(mbrobbel): buffer_ref traits? +impl AsRef<[T]> for FixedSizePrimitiveArray { + fn as_ref(&self) -> &[T] { + self.0.as_slice() + } +} + impl Default for FixedSizePrimitiveArray where diff --git a/src/array/string.rs b/src/array/string.rs index 7a30ec26..e2fe8d05 100644 --- a/src/array/string.rs +++ b/src/array/string.rs @@ -56,6 +56,16 @@ where } } +impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend<&'a &'a str> + for StringArray +where + VariableSizeBinaryArray: Extend<&'a [u8]>, +{ + fn extend>(&mut self, iter: I) { + self.0.extend(iter.into_iter().map(|str| str.as_bytes())); + } +} + impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend> for StringArray where diff --git a/src/array/struct.rs b/src/array/struct.rs index b3c1cc08..be41ebbf 100644 --- a/src/array/struct.rs +++ b/src/array/struct.rs @@ -14,7 +14,7 @@ pub trait StructArrayType: ArrayType { /// The array type that stores items of this struct. Note this differs from /// the [`ArrayType`] array because that wraps this array. Also note that this /// has no [`Array`] bound. - type Array; + type Array; // into this then requires all arraytype impls to provide a field } /// Array for product types. diff --git a/src/arrow/array/boolean.rs b/src/arrow/array/boolean.rs index 001c645e..67dbe4f8 100644 --- a/src/arrow/array/boolean.rs +++ b/src/arrow/array/boolean.rs @@ -1,7 +1,35 @@ //! Interop with `arrow-rs` boolean array. -use crate::{array::BooleanArray, bitmap::Bitmap, buffer::BufferType}; +use std::sync::Arc; + +use crate::{ + array::BooleanArray, arrow::ArrowArray, bitmap::Bitmap, buffer::BufferType, nullable::Nullable, + validity::Validity, +}; use arrow_buffer::{BooleanBuffer, NullBuffer}; +use arrow_schema::{DataType, Field}; + +impl ArrowArray for BooleanArray +where + Bitmap: Validity, +{ + type Array = arrow_array::BooleanArray; + + fn as_field(&self, name: &str) -> arrow_schema::Field { + Field::new(name, DataType::Boolean, NULLABLE) + } +} + +impl From> + for BooleanArray +where + Bitmap: Validity, + Self: From, +{ + fn from(value: Arc) -> Self { + Self::from(arrow_array::BooleanArray::from(value.to_data())) + } +} impl From> for arrow_array::BooleanArray where @@ -21,27 +49,118 @@ where } } +/// Panics when there are nulls +impl From for BooleanArray +where + Bitmap: From, +{ + fn from(value: arrow_array::BooleanArray) -> Self { + let (boolean_buffer, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(_) => panic!("expected array without a null buffer"), + None => BooleanArray(boolean_buffer.into()), + } + } +} + +/// Panics when there are no nulls +// OR allocate one instead and use `TryFrom` conversion? +impl From for BooleanArray +where + Bitmap: From + From, +{ + fn from(value: arrow_array::BooleanArray) -> Self { + let (boolean_buffer, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(null_buffer) => BooleanArray(Nullable { + data: boolean_buffer.into(), + validity: null_buffer.into(), + }), + None => panic!("expected array with a null buffer"), + } + } +} + #[cfg(test)] mod tests { - use arrow_array::Array; + use crate::{array::BooleanArray, buffer::ArcBuffer}; - use super::*; - use crate::{bitmap::ValidityBitmap, Length}; + const INPUT: [bool; 4] = [true, true, false, true]; + const INPUT_NULLABLE: [Option; 4] = [Some(true), None, Some(false), Some(true)]; #[test] - fn convert() { - let input = [true, false, true, true]; - let array = input.into_iter().collect::(); - assert_eq!(array.len(), 4); - let array_arrow: arrow_array::BooleanArray = array.into(); - assert_eq!(array_arrow.len(), 4); - - let input_nullable = [Some(true), None, Some(false)]; - let array_nullable = input_nullable.into_iter().collect::>(); - assert_eq!(array_nullable.len(), 3); - assert_eq!(array_nullable.null_count(), 1); - let array_arrow_nullable: arrow_array::BooleanArray = array_nullable.into(); - assert_eq!(array_arrow_nullable.len(), 3); - assert_eq!(array_arrow_nullable.null_count(), 1); + fn from() { + let boolean_array = INPUT.into_iter().collect::(); + assert_eq!( + arrow_array::BooleanArray::from(boolean_array) + .into_iter() + .flatten() + .collect::>(), + INPUT + ); + + let boolean_array_arc = INPUT + .into_iter() + .collect::>(); + assert_eq!( + arrow_array::BooleanArray::from(boolean_array_arc) + .into_iter() + .flatten() + .collect::>(), + INPUT + ); + + let boolean_array_nullable = INPUT_NULLABLE.into_iter().collect::>(); + assert_eq!( + arrow_array::BooleanArray::from(boolean_array_nullable) + .into_iter() + .collect::>(), + INPUT_NULLABLE + ); + } + + #[test] + #[should_panic(expected = "expected array with a null buffer")] + fn into_nullable() { + let boolean_array = arrow_array::BooleanArray::from(INPUT.into_iter().collect::>()); + let _ = BooleanArray::::from( + boolean_array, + ); + } + + #[test] + #[should_panic(expected = "expected array without a null buffer")] + fn into_non_nullable() { + let boolean_array_nullable = INPUT_NULLABLE + .into_iter() + .collect::(); + let _ = BooleanArray::::from( + boolean_array_nullable, + ); + } + + #[test] + fn into() { + let boolean_array = arrow_array::BooleanArray::from(INPUT.into_iter().collect::>()); + assert_eq!( + BooleanArray::::from( + boolean_array + ) + .into_iter() + .collect::>(), + INPUT + ); + + let boolean_array_nullable = INPUT_NULLABLE + .into_iter() + .collect::(); + assert_eq!( + BooleanArray::::from( + boolean_array_nullable + ) + .into_iter() + .collect::>(), + INPUT_NULLABLE + ); } } diff --git a/src/arrow/array/fixed_size_primitive.rs b/src/arrow/array/fixed_size_primitive.rs index 60e1b3bf..5d8b18df 100644 --- a/src/arrow/array/fixed_size_primitive.rs +++ b/src/arrow/array/fixed_size_primitive.rs @@ -1,104 +1,214 @@ //! Interop with `arrow-rs` fixed-sized primitive array. +use std::sync::Arc; + +use arrow_array::types::{ + ArrowPrimitiveType, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, + UInt16Type, UInt32Type, UInt64Type, UInt8Type, +}; +use arrow_buffer::{NullBuffer, ScalarBuffer}; +use arrow_schema::{DataType, Field}; + use crate::{ - array::FixedSizePrimitiveArray, - arrow::{buffer::ArrowBufferBuilder, ArrowBuffer}, - bitmap::Bitmap, - buffer::BufferType, - nullable::Nullable, - FixedSize, Length, + array::FixedSizePrimitiveArray, arrow::ArrowArray, bitmap::Bitmap, buffer::BufferType, + nullable::Nullable, validity::Validity, FixedSize, }; -use arrow_array::{types::ArrowPrimitiveType, PrimitiveArray}; -use arrow_buffer::{BooleanBuffer, NullBuffer, ScalarBuffer}; + +/// Create the `ArrowArray` impl and required conversions. +macro_rules! arrow_array_convert { + ($ty:ty, $primitive_type:ident, $data_type:ident) => { + impl ArrowArray + for FixedSizePrimitiveArray<$ty, NULLABLE, Buffer> + where + ::Buffer<$ty>: Validity, + { + type Array = arrow_array::PrimitiveArray<$primitive_type>; + + fn as_field(&self, name: &str) -> arrow_schema::Field { + Field::new(name, DataType::$data_type, NULLABLE) + } + } + + impl From> + for FixedSizePrimitiveArray<$ty, NULLABLE, Buffer> + where + ::Buffer<$ty>: Validity, + Self: From>, + { + fn from(value: Arc) -> Self { + Self::from(arrow_array::PrimitiveArray::<$primitive_type>::from( + value.to_data(), + )) + } + } + }; +} + +arrow_array_convert!(u8, UInt8Type, UInt8); +arrow_array_convert!(u16, UInt16Type, UInt16); +arrow_array_convert!(u32, UInt32Type, UInt32); +arrow_array_convert!(u64, UInt64Type, UInt64); + +arrow_array_convert!(i8, Int8Type, Int8); +arrow_array_convert!(i16, Int16Type, Int16); +arrow_array_convert!(i32, Int32Type, Int32); +arrow_array_convert!(i64, Int64Type, Int64); + +arrow_array_convert!(f32, Float32Type, Float32); +arrow_array_convert!(f64, Float64Type, Float64); impl, Buffer: BufferType> - From> for PrimitiveArray + From> for arrow_array::PrimitiveArray where - ::Buffer: Length + Into<::Buffer>, + ::Buffer: Into>, { fn from(value: FixedSizePrimitiveArray) -> Self { - let len = value.len(); - Self::new(ScalarBuffer::new(value.0.into().finish(), 0, len), None) + arrow_array::PrimitiveArray::new(value.0.into(), None) } } impl, Buffer: BufferType> - From> for PrimitiveArray + From> for arrow_array::PrimitiveArray where - ::Buffer: Length + Into<::Buffer>, - Bitmap: Into, + ::Buffer: Into>, + Bitmap: Into, { fn from(value: FixedSizePrimitiveArray) -> Self { - let len = value.len(); - Self::new( - ScalarBuffer::new(value.0.data.into().finish(), 0, len), - Some(NullBuffer::new(value.0.validity.into())), - ) + arrow_array::PrimitiveArray::new(value.0.data.into(), Some(value.0.validity.into())) + } +} + +/// Panics when there are nulls +impl, Buffer: BufferType> + From> for FixedSizePrimitiveArray +where + ::Buffer: From>, +{ + fn from(value: arrow_array::PrimitiveArray) -> Self { + let (_data_type, values, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(_) => panic!("expected array without a null buffer"), + None => FixedSizePrimitiveArray(values.into()), + } } } -impl, U: FixedSize> From> - for FixedSizePrimitiveArray +/// Panics when there are no nulls +impl, Buffer: BufferType> + From> for FixedSizePrimitiveArray +where + ::Buffer: From>, + Bitmap: From, { - fn from(value: PrimitiveArray) -> Self { - let (_, scala_buffer, opt_null_buffer) = value.into_parts(); - if let Some(null_buffer) = opt_null_buffer { - Self(Nullable { - data: scala_buffer.into_inner(), + fn from(value: arrow_array::PrimitiveArray) -> Self { + let (_data_type, values, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(null_buffer) => FixedSizePrimitiveArray(Nullable { + data: values.into(), validity: null_buffer.into(), - }) - } else { - Self(Nullable::from(scala_buffer.into_inner())) + }), + None => panic!("expected array with a null buffer"), } } } +impl From for FixedSizePrimitiveArray +where + ::Buffer: From, +{ + fn from(value: arrow_buffer::Buffer) -> Self { + Self(value.into()) + } +} + #[cfg(test)] -mod test { - use super::*; - use crate::array::Int8Array; +mod tests { + use arrow_array::types::{UInt16Type, UInt32Type}; + + use crate::array::FixedSizePrimitiveArray; + + const INPUT: [u32; 4] = [1, 2, 3, 4]; + const INPUT_NULLABLE: [Option; 4] = [Some(1), None, Some(3), Some(4)]; #[test] - fn arrow_array() { - use crate::{array::Int8Array, bitmap::ValidityBitmap}; - use arrow_array::{types::Int8Type, Array, PrimitiveArray}; + fn from() { + let fixed_size_primitive_array = INPUT.into_iter().collect::>(); + assert_eq!( + arrow_array::PrimitiveArray::::from(fixed_size_primitive_array) + .into_iter() + .flatten() + .collect::>(), + INPUT + ); - let input = [1, 2, 3, 4]; - let array_arrow_buffer = input + let fixed_size_primitive_array_nullable = INPUT_NULLABLE .into_iter() - .collect::>(); - let array_arrow_from = PrimitiveArray::::from(array_arrow_buffer); - assert_eq!(array_arrow_from.len(), 4); + .collect::>(); + assert_eq!( + arrow_array::PrimitiveArray::::from(fixed_size_primitive_array_nullable) + .into_iter() + .collect::>(), + INPUT_NULLABLE + ); + } - let array = - PrimitiveArray::::from(input.into_iter().collect::>()); - assert_eq!(array.len(), 4); + #[test] + #[should_panic(expected = "expected array with a null buffer")] + fn into_nullable() { + let primitive_array = INPUT + .into_iter() + .collect::>(); + let _ = FixedSizePrimitiveArray::< + u32, + true, + crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer, + >::from(primitive_array); + } - let input_nullable = [Some(1), None, Some(3), Some(4)]; - let array_nullable = input_nullable + #[test] + #[should_panic(expected = "expected array without a null buffer")] + fn into_non_nullable() { + let primitive_array_nullable = INPUT_NULLABLE .into_iter() - .collect::>(); - assert_eq!(array_nullable.null_count(), 1); - let array_arrow = PrimitiveArray::::from(array_nullable); - assert_eq!(array_arrow.len(), 4); - assert_eq!(array_arrow.null_count(), 1); + .collect::>(); + let _ = FixedSizePrimitiveArray::< + u16, + false, + crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer, + >::from(primitive_array_nullable); } #[test] - fn arrow_buffer() { - let input = [1, 2, 3, 4]; - let mut array = input + #[allow(clippy::redundant_closure_for_method_calls)] + fn into() { + let primitive_array = INPUT + .into_iter() + .collect::>(); + assert_eq!( + FixedSizePrimitiveArray::< + u32, + false, + crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer, + >::from(primitive_array) + .into_iter() + .copied() + .collect::>(), + INPUT + ); + + let primitive_array_nullable = INPUT_NULLABLE .into_iter() - .collect::>(); - assert_eq!(array.len(), 4); - // Use arrow_buffer - array.0.append_n(5, 5); - assert_eq!(array.0.as_slice(), &[1, 2, 3, 4, 5, 5, 5, 5, 5]); - - let input_nullable = [Some(1), None, Some(3), Some(4)]; - let array_nullable = input_nullable + .collect::>(); + assert_eq!( + FixedSizePrimitiveArray::< + u16, + true, + crate::arrow::buffer::scalar_buffer::ArrowScalarBuffer, + >::from(primitive_array_nullable) .into_iter() - .collect::>(); - assert_eq!(array_nullable.len(), 4); + .map(|opt| opt.copied()) + .collect::>(), + INPUT_NULLABLE + ); } } diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs index 2aa5b571..b2a1d970 100644 --- a/src/arrow/array/mod.rs +++ b/src/arrow/array/mod.rs @@ -2,3 +2,6 @@ mod boolean; mod fixed_size_primitive; +mod string; +mod r#struct; +mod variable_size_list; diff --git a/src/arrow/array/string.rs b/src/arrow/array/string.rs new file mode 100644 index 00000000..432f3ae8 --- /dev/null +++ b/src/arrow/array/string.rs @@ -0,0 +1,188 @@ +//! Interop with [`arrow-rs`] string array. + +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; +use arrow_schema::{DataType, Field}; + +use crate::{ + array::{FixedSizePrimitiveArray, StringArray, VariableSizeBinaryArray}, + arrow::ArrowArray, + bitmap::Bitmap, + buffer::BufferType, + nullable::Nullable, + offset::{Offset, OffsetElement}, + validity::Validity, +}; + +impl + ArrowArray for StringArray +where + ::Buffer: Validity, +{ + type Array = arrow_array::GenericStringArray; + + fn as_field(&self, name: &str) -> arrow_schema::Field { + Field::new(name, DataType::Utf8, NULLABLE) + } +} + +impl + From> for StringArray +where + ::Buffer: Validity, + Self: From>, +{ + fn from(value: Arc) -> Self { + Self::from(arrow_array::GenericStringArray::::from( + value.to_data(), + )) + } +} + +impl + From> for arrow_array::GenericStringArray +where + ::Buffer: Into>, + FixedSizePrimitiveArray: Into, +{ + fn from(value: StringArray) -> Self { + arrow_array::GenericStringArray::new( + // Safety: + // - The narrow offfset buffer contains valid offset data + unsafe { OffsetBuffer::new_unchecked(value.0 .0.offsets.into()) }, + value.0 .0.data.into(), + None, + ) + } +} + +impl + From> for arrow_array::GenericStringArray +where + ::Buffer: Into>, + FixedSizePrimitiveArray: Into, + Bitmap: Into, +{ + fn from(value: StringArray) -> Self { + arrow_array::GenericStringArray::new( + // Safety: + // - The narrow offfset buffer contains valid offset data + unsafe { OffsetBuffer::new_unchecked(value.0 .0.offsets.data.into()) }, + value.0 .0.data.into(), + Some(value.0 .0.offsets.validity.into()), + ) + } +} + +/// Panics when there are nulls +impl + From> for StringArray +where + FixedSizePrimitiveArray: From, + ::Buffer: From>, +{ + fn from(value: arrow_array::GenericStringArray) -> Self { + let (offsets, values, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(_) => panic!("expected array without a null buffer"), + None => StringArray(VariableSizeBinaryArray(Offset { + data: values.into(), + offsets: offsets.into_inner().into(), + })), + } + } +} + +/// Panics when there are no nulls +impl + From> for StringArray +where + FixedSizePrimitiveArray: From, + ::Buffer: From>, + Bitmap: From, +{ + fn from(value: arrow_array::GenericStringArray) -> Self { + let (offsets, values, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(null_buffer) => StringArray(VariableSizeBinaryArray(Offset { + data: values.into(), + offsets: Nullable { + data: offsets.into_inner().into(), + validity: null_buffer.into(), + }, + })), + None => panic!("expected array with a null buffer"), + } + } +} + +#[cfg(test)] +mod tests { + use std::i64; + + use crate::{array::StringArray, arrow::scalar_buffer::ArrowScalarBuffer}; + + const INPUT: [&str; 3] = ["hello", "world", "!"]; + const INPUT_NULLABLE: [Option<&str>; 3] = [Some("hello"), None, Some("!")]; + + #[test] + fn from() { + let string_array = INPUT.into_iter().collect::(); + assert_eq!( + arrow_array::StringArray::from(string_array) + .into_iter() + .flatten() + .collect::>(), + INPUT + ); + + let string_array_nullable = INPUT_NULLABLE + .into_iter() + .collect::>(); + assert_eq!( + arrow_array::GenericStringArray::::from(string_array_nullable) + .into_iter() + .collect::>(), + INPUT_NULLABLE + ); + } + + #[test] + #[should_panic(expected = "expected array with a null buffer")] + fn into_nullable() { + let string_array = INPUT + .into_iter() + .map(ToOwned::to_owned) + .map(Option::Some) + .collect::(); + let _: StringArray = string_array.into(); + } + + #[test] + #[should_panic(expected = "expected array without a null buffer")] + fn into_non_nullable() { + let string_array_nullable = INPUT_NULLABLE + .into_iter() + .collect::(); + let _: StringArray = string_array_nullable.into(); + } + + #[test] + fn into() { + let string_array = INPUT + .into_iter() + .map(ToOwned::to_owned) + .map(Option::Some) + .collect::(); + let _: StringArray = string_array.into(); + // todo(mbrobbel): intoiterator for stringarray + + let string_array_nullable = INPUT_NULLABLE + .into_iter() + .collect::(); + let _: StringArray = string_array_nullable.into(); + // todo(mbrobbel): intoiterator for stringarray + } +} diff --git a/src/arrow/array/struct.rs b/src/arrow/array/struct.rs new file mode 100644 index 00000000..b8492f96 --- /dev/null +++ b/src/arrow/array/struct.rs @@ -0,0 +1 @@ +//! Interop with [`arrow-rs`] struct arrays. diff --git a/src/arrow/array/variable_size_list.rs b/src/arrow/array/variable_size_list.rs new file mode 100644 index 00000000..5e8d1128 --- /dev/null +++ b/src/arrow/array/variable_size_list.rs @@ -0,0 +1,250 @@ +//! Interop with [`arrow-rs`] string array. + +use std::sync::Arc; + +use arrow_array::OffsetSizeTrait; +use arrow_buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; +use arrow_schema::{DataType, Field}; + +use crate::{ + array::{Array, VariableSizeListArray}, + arrow::ArrowArray, + bitmap::Bitmap, + buffer::BufferType, + nullable::Nullable, + offset::{Offset, OffsetElement}, + validity::Validity, +}; + +impl< + T: ArrowArray, + const NULLABLE: bool, + OffsetItem: OffsetElement + OffsetSizeTrait, + Buffer: BufferType, + > ArrowArray for VariableSizeListArray +where + ::Buffer: Validity, +{ + type Array = arrow_array::GenericListArray; + + fn as_field(&self, name: &str) -> arrow_schema::Field { + Field::new( + name, + DataType::List(Arc::new(self.0.data.as_field("item"))), + NULLABLE, + ) + } +} + +impl< + T: Array, + const NULLABLE: bool, + OffsetItem: OffsetElement + OffsetSizeTrait, + Buffer: BufferType, + > From> for VariableSizeListArray +where + ::Buffer: Validity, + Self: From>, +{ + fn from(value: Arc) -> Self { + Self::from(arrow_array::GenericListArray::::from( + value.to_data(), + )) + } +} + +impl + From> + for arrow_array::GenericListArray +where + ::Buffer: Into>, + ::Array: From + 'static, +{ + fn from(value: VariableSizeListArray) -> Self { + arrow_array::GenericListArray::new( + Arc::new(value.0.data.as_field("item")), + // Safety: + // - The narrow offfset buffer contains valid offset data + unsafe { OffsetBuffer::new_unchecked(value.0.offsets.into()) }, + value.0.data.into_array_ref(), + None, + ) + } +} + +impl + From> + for arrow_array::GenericListArray +where + ::Buffer: Into>, + Bitmap: Into, + ::Array: From + 'static, +{ + fn from(value: VariableSizeListArray) -> Self { + arrow_array::GenericListArray::new( + Arc::new(value.0.data.as_field("item")), + // Safety: + // - The narrow offfset buffer contains valid offset data + unsafe { OffsetBuffer::new_unchecked(value.0.offsets.data.into()) }, + value.0.data.into_array_ref(), + Some(value.0.offsets.validity.into()), + ) + } +} + +/// Panics when there are nulls +impl + From> + for VariableSizeListArray +where + T: From>, + ::Buffer: From>, +{ + fn from(value: arrow_array::GenericListArray) -> Self { + let (_field, offsets, values, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(_) => panic!("expected array without a null buffer"), + None => VariableSizeListArray(Offset { + data: values.into(), + offsets: offsets.into_inner().into(), + }), + } + } +} + +/// Panics when there are no nulls +impl + From> + for VariableSizeListArray +where + T: From>, + ::Buffer: From>, + Bitmap: From, +{ + fn from(value: arrow_array::GenericListArray) -> Self { + let (_field, offsets, values, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(null_buffer) => VariableSizeListArray(Offset { + data: values.into(), + offsets: Nullable { + data: offsets.into_inner().into(), + validity: null_buffer.into(), + }, + }), + None => panic!("expected array with a null buffer"), + } + } +} + +#[cfg(test)] +mod tests { + use arrow_array::{ + builder::{ListBuilder, StringBuilder}, + types::UInt16Type, + Array as _, + }; + + use crate::{ + array::{StringArray, Uint16Array, VariableSizeListArray}, + arrow::scalar_buffer::ArrowScalarBuffer, + Length, + }; + + const INPUT: [&[u16]; 3] = [&[1, 2], &[3], &[4]]; + const INPUT_NULLABLE: [Option<&[&str]>; 3] = + [Some(&["hello", " "]), None, Some(&["world", "!"])]; + + #[test] + fn from() { + let variable_size_list_array = INPUT + .into_iter() + .collect::>(); + let list_array = arrow_array::ListArray::from(variable_size_list_array); + assert_eq!(list_array.len(), INPUT.len()); + + let variable_size_list_array_nullable = INPUT_NULLABLE + .into_iter() + .collect::>(); + let list_array_nullable = arrow_array::ListArray::from(variable_size_list_array_nullable); + assert_eq!(list_array_nullable.len(), INPUT_NULLABLE.len()); + } + + #[test] + #[should_panic(expected = "expected array with a null buffer")] + fn into_nullable() { + let list_array = arrow_array::ListArray::from_iter_primitive::( + INPUT + .into_iter() + .map(|opt| opt.iter().copied().map(Option::Some)) + .map(Option::Some), + ); + let _: VariableSizeListArray< + Uint16Array, + true, + i32, + ArrowScalarBuffer, + > = list_array.into(); + } + + #[test] + #[should_panic(expected = "expected array without a null buffer")] + fn into_non_nullable() { + let mut list_builder = + ListBuilder::with_capacity(StringBuilder::new(), INPUT_NULLABLE.len()); + INPUT_NULLABLE.into_iter().for_each(|opt| match opt { + Some(items) => { + for item in items { + list_builder.values().append_value(item); + } + list_builder.append(true); + } + None => { + list_builder.append(false); + } + }); + let list_array_nullable = list_builder.finish(); + let _: VariableSizeListArray< + StringArray, + false, + i32, + ArrowScalarBuffer, + > = list_array_nullable.into(); + } + + #[test] + fn into() { + let list_array = arrow_array::ListArray::from_iter_primitive::( + INPUT + .into_iter() + .map(|opt| opt.iter().copied().map(Option::Some)) + .map(Option::Some), + ); + let _: VariableSizeListArray< + Uint16Array, + false, + i32, + ArrowScalarBuffer, + > = list_array.into(); + + let mut list_builder = + ListBuilder::with_capacity(StringBuilder::new(), INPUT_NULLABLE.len()); + INPUT_NULLABLE.into_iter().for_each(|opt| match opt { + Some(items) => { + for item in items { + list_builder.values().append_value(item); + } + list_builder.append(true); + } + None => { + list_builder.append(false); + } + }); + let list_array_nullable = list_builder.finish(); + let _: VariableSizeListArray< + StringArray, + true, + i32, + ArrowScalarBuffer, + > = list_array_nullable.into(); + } +} diff --git a/src/arrow/bitmap.rs b/src/arrow/bitmap.rs deleted file mode 100644 index dc9cdfc1..00000000 --- a/src/arrow/bitmap.rs +++ /dev/null @@ -1,53 +0,0 @@ -//! Interop with [`arrow-rs`]'s bitmap. - -use super::{buffer::ArrowBuffer, ArrowBufferBuilder}; -use crate::{bitmap::Bitmap, buffer::BufferType}; -use arrow_buffer::{BooleanBuffer, NullBuffer}; - -impl From> for BooleanBuffer -where - ::Buffer: Into<::Buffer>, -{ - fn from(value: Bitmap) -> Self { - BooleanBuffer::new(value.buffer.into().finish(), value.offset, value.bits) - } -} - -impl From> for NullBuffer -where - ::Buffer: Into<::Buffer>, -{ - fn from(value: Bitmap) -> Self { - Self::new(value.into()) - } -} - -impl From for Bitmap { - fn from(value: NullBuffer) -> Self { - let bits = value.len(); - let offset = value.offset(); - Bitmap { - buffer: value.into_inner().into_inner(), - bits, - offset, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::Length; - - #[test] - fn convert() { - let input = vec![true, false, true]; - let bitmap_arrow = input.iter().collect::>(); - assert_eq!(bitmap_arrow.len(), 3); - let _: NullBuffer = bitmap_arrow.into(); - - let bitmap = input.into_iter().collect::(); - assert_eq!(bitmap.len(), 3); - let _: BooleanBuffer = bitmap.into(); - } -} diff --git a/src/arrow/buffer.rs b/src/arrow/buffer.rs deleted file mode 100644 index 62f4bcb5..00000000 --- a/src/arrow/buffer.rs +++ /dev/null @@ -1,108 +0,0 @@ -//! Interop with [`arrow-buffer`]. - -use crate::buffer::{Buffer, BufferMut, BufferType}; -use crate::{FixedSize, Index}; -use arrow_buffer::{BufferBuilder, ScalarBuffer}; - -/// A [`BufferType`] implementation for [`BufferBuilder`]. -#[derive(Clone, Copy)] -pub struct ArrowBufferBuilder; - -impl BufferType for ArrowBufferBuilder { - type Buffer = BufferBuilder; -} - -impl Buffer for BufferBuilder { - fn as_slice(&self) -> &[T] { - BufferBuilder::as_slice(self) - } -} - -impl BufferMut for BufferBuilder { - fn as_mut_slice(&mut self) -> &mut [T] { - BufferBuilder::as_slice_mut(self) - } -} - -impl Index for BufferBuilder { - type Item<'a> = &'a T - where - Self: 'a; - - unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { - self.as_slice().get_unchecked(index) - } -} - -/// A [`BufferType`] implementation for [`ScalarBuffer`]. -#[derive(Clone, Copy)] -pub struct ArrowScalarBuffer; - -impl BufferType for ArrowScalarBuffer { - type Buffer = ScalarBuffer; -} - -impl Buffer for ScalarBuffer { - fn as_slice(&self) -> &[T] { - self - } -} - -impl Index for ScalarBuffer { - type Item<'a> = &'a T - where - Self: 'a; - - unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { - self.get_unchecked(index) - } -} - -/// A [`BufferType`] implementation for [`arrow_buffer::Buffer`]. -#[derive(Clone, Copy)] -pub struct ArrowBuffer; - -impl BufferType for ArrowBuffer { - type Buffer = arrow_buffer::Buffer; -} - -impl Buffer for arrow_buffer::Buffer { - fn as_slice(&self) -> &[T] { - arrow_buffer::Buffer::typed_data(self) - } -} - -impl Index for arrow_buffer::Buffer { - type Item<'a> = &'a u8 - where - Self: 'a; - - unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { - self.get_unchecked(index) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn arrow() { - let buffer = arrow_buffer::BufferBuilder::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&buffer), &[1, 2, 3, 4]); - - let mut buffer_builder = arrow_buffer::BufferBuilder::from_iter([1_u64, 2, 3, 4]); - assert_eq!( - <_ as BufferMut>::as_mut_slice(&mut buffer_builder), - &[1, 2, 3, 4] - ); - <_ as BufferMut>::as_mut_slice(&mut buffer_builder)[3] = 42; - assert_eq!( - <_ as Buffer>::as_slice(&buffer_builder), - &[1, 2, 3, 42] - ); - - let scalar_buffer = arrow_buffer::ScalarBuffer::from_iter([1, 2, 3, 4]); - assert_eq!(<_ as Buffer>::as_slice(&scalar_buffer), &[1, 2, 3, 4]); - } -} diff --git a/src/arrow/buffer/boolean_buffer.rs b/src/arrow/buffer/boolean_buffer.rs new file mode 100644 index 00000000..9ee17bc9 --- /dev/null +++ b/src/arrow/buffer/boolean_buffer.rs @@ -0,0 +1,78 @@ +//! Interop with [`arrow-rs`] boolean buffer. + +use arrow_buffer::BooleanBuffer; + +use crate::{bitmap::Bitmap, buffer::BufferType, Length}; + +impl Length for BooleanBuffer { + fn len(&self) -> usize { + BooleanBuffer::len(self) + } +} + +impl From> for BooleanBuffer +where + ::Buffer: Into, +{ + fn from(value: Bitmap) -> Self { + Self::new(value.buffer.into(), value.offset, value.bits) + } +} + +impl From for Bitmap +where + ::Buffer: From, +{ + fn from(value: BooleanBuffer) -> Self { + let bits = value.len(); + let offset = value.offset(); + Bitmap { + buffer: value.into_inner().into(), + bits, + offset, + } + } +} + +#[cfg(test)] +mod tests { + use crate::{arrow::buffer::scalar_buffer::ArrowScalarBuffer, buffer::ArcBuffer}; + + use super::*; + + const INPUT: [bool; 4] = [true, true, false, true]; + + #[test] + fn length() { + let boolean_buffer = INPUT.into_iter().collect::(); + assert_eq!(Length::len(&boolean_buffer), INPUT.len()); + } + + #[test] + fn from() { + let bitmap = INPUT.into_iter().collect::(); + assert_eq!( + BooleanBuffer::from(bitmap).into_iter().collect::>(), + INPUT + ); + + let bitmap_arc = INPUT.into_iter().collect::>(); + assert_eq!( + BooleanBuffer::from(bitmap_arc) + .into_iter() + .collect::>(), + INPUT + ); + } + + #[test] + fn into() { + let boolean_buffer = INPUT.into_iter().collect::(); + assert_eq!( + Bitmap::::from(boolean_buffer) + .into_iter() + .collect::>(), + INPUT + ); + } +} diff --git a/src/arrow/buffer/buffer_builder.rs b/src/arrow/buffer/buffer_builder.rs new file mode 100644 index 00000000..2270f242 --- /dev/null +++ b/src/arrow/buffer/buffer_builder.rs @@ -0,0 +1,110 @@ +//! Interop with [`arrow-rs`] buffer builder. + +use arrow_buffer::BufferBuilder; + +use crate::{ + array::FixedSizePrimitiveArray, + buffer::{Buffer, BufferMut, BufferType}, + FixedSize, Index, Length, +}; + +/// A [`BufferType`] implementation for [`BufferBuilder`]. +#[derive(Clone, Copy)] +pub struct ArrowBufferBuilder; + +impl BufferType for ArrowBufferBuilder { + type Buffer = BufferBuilder; +} + +impl Buffer for BufferBuilder { + fn as_slice(&self) -> &[T] { + BufferBuilder::as_slice(self) + } +} + +impl BufferMut for BufferBuilder { + fn as_mut_slice(&mut self) -> &mut [T] { + BufferBuilder::as_slice_mut(self) + } +} + +impl Index for BufferBuilder { + type Item<'a> = &'a T + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.as_slice().get_unchecked(index) + } +} + +impl Length for BufferBuilder { + fn len(&self) -> usize { + BufferBuilder::len(self) + } +} + +impl From> + for BufferBuilder +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + // Note: this makes a copy + let buffer = arrow_buffer::MutableBuffer::from(value.0.as_slice().to_vec()); + BufferBuilder::new_from_buffer(buffer) + } +} + +impl From> + for FixedSizePrimitiveArray +where + ::Buffer: From, +{ + fn from(mut value: BufferBuilder) -> Self { + FixedSizePrimitiveArray(value.finish().into()) + } +} + +#[cfg(test)] +mod tests { + use crate::{arrow::scalar_buffer::ArrowScalarBuffer, buffer::ArcBuffer}; + + use super::*; + + const INPUT: [u32; 4] = [1, 2, 3, 4]; + + #[test] + fn length() { + let buffer_builder = INPUT.into_iter().collect::>(); + assert_eq!(Length::len(&buffer_builder), INPUT.len()); + } + + #[test] + fn from() { + let fixed_size_primitive_array = INPUT.into_iter().collect::>(); + assert_eq!( + BufferBuilder::from(fixed_size_primitive_array).as_slice(), + INPUT + ); + + let fixed_size_primitive_array_arc = + INPUT + .into_iter() + .collect::>(); + assert_eq!( + BufferBuilder::from(fixed_size_primitive_array_arc).as_slice(), + INPUT + ); + } + + #[test] + fn into() { + let buffer_builder = INPUT.into_iter().collect::>(); + assert_eq!( + FixedSizePrimitiveArray::<_, false, ArrowScalarBuffer>::from(buffer_builder) + .into_iter() + .copied() + .collect::>(), + INPUT + ); + } +} diff --git a/src/arrow/buffer/mod.rs b/src/arrow/buffer/mod.rs new file mode 100644 index 00000000..d5a03bc6 --- /dev/null +++ b/src/arrow/buffer/mod.rs @@ -0,0 +1,39 @@ +//! Interop with [`arrow-rs`] buffer types. + +pub mod boolean_buffer; +pub mod buffer_builder; +pub mod null_buffer; +pub mod offset_buffer; +pub mod scalar_buffer; + +// /// A [`BufferType`] implementation for &'a arrow Buffer. +// /// +// /// Stores items `T` in an arrow `&Buffer`. +// #[derive(Clone, Copy, Debug)] +// pub struct ArrowRefBuffer<'a>(PhantomData<&'a ()>); + +// impl<'a> BufferType for ArrowRefBuffer<'a> { +// type Buffer = &'a [T]; +// } + +// impl Buffer for &arrow_buffer::Buffer { +// fn as_slice(&self) -> &[T] { +// self.typed_data() +// } +// } + +// impl Length for &arrow_buffer::Buffer { +// fn len(&self) -> usize { +// arrow_buffer::Buffer::len(self) +// } +// } + +// impl Index for &arrow_buffer::Buffer { +// type Item<'a> = &'a u8 +// where +// Self: 'a; + +// unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { +// self.get_unchecked(index) +// } +// } diff --git a/src/arrow/buffer/null_buffer.rs b/src/arrow/buffer/null_buffer.rs new file mode 100644 index 00000000..69a25404 --- /dev/null +++ b/src/arrow/buffer/null_buffer.rs @@ -0,0 +1,70 @@ +//! Interop with [`arrow-rs`] null buffer. + +use arrow_buffer::{BooleanBuffer, NullBuffer}; + +use crate::{bitmap::Bitmap, buffer::BufferType, Length}; + +impl Length for NullBuffer { + fn len(&self) -> usize { + NullBuffer::len(self) + } +} + +impl From> for NullBuffer +where + Bitmap: Into, +{ + fn from(value: Bitmap) -> Self { + Self::new(value.into()) + } +} + +impl From for Bitmap +where + Bitmap: From, +{ + fn from(value: NullBuffer) -> Self { + Bitmap::from(value.into_inner()) + } +} + +#[cfg(test)] +mod tests { + use crate::{arrow::buffer::scalar_buffer::ArrowScalarBuffer, buffer::ArcBuffer}; + + use super::*; + + const INPUT: [bool; 4] = [true, true, false, true]; + + #[test] + fn length() { + let null_buffer = INPUT.into_iter().collect::(); + assert_eq!(Length::len(&null_buffer), INPUT.len()); + } + + #[test] + fn from() { + let bitmap = INPUT.into_iter().collect::(); + assert_eq!( + NullBuffer::from(bitmap).into_iter().collect::>(), + INPUT + ); + + let bitmap_arc = INPUT.into_iter().collect::>(); + assert_eq!( + NullBuffer::from(bitmap_arc).into_iter().collect::>(), + INPUT + ); + } + + #[test] + fn into() { + let null_buffer = INPUT.into_iter().collect::(); + assert_eq!( + Bitmap::::from(null_buffer) + .into_iter() + .collect::>(), + INPUT + ); + } +} diff --git a/src/arrow/buffer/offset_buffer.rs b/src/arrow/buffer/offset_buffer.rs new file mode 100644 index 00000000..7cc80b33 --- /dev/null +++ b/src/arrow/buffer/offset_buffer.rs @@ -0,0 +1,85 @@ +//! Interop with [`arrow-rs`] offset buffer. + +//! Interop with [`arrow-rs`] null buffer. + +use arrow_buffer::{OffsetBuffer, ScalarBuffer}; + +use crate::{array::FixedSizePrimitiveArray, buffer::BufferType, offset::OffsetElement, Length}; + +impl Length for OffsetBuffer { + fn len(&self) -> usize { + self.as_ref().len() + } +} + +impl + From> for OffsetBuffer +where + FixedSizePrimitiveArray: Into>, +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + Self::new(value.into()) + } +} + +impl From> + for FixedSizePrimitiveArray +where + ::Buffer: From>, +{ + fn from(value: OffsetBuffer) -> Self { + Self(value.into_inner().into()) + } +} + +#[cfg(test)] +mod tests { + + use crate::{ + array::FixedSizePrimitiveArray, arrow::buffer::scalar_buffer::ArrowScalarBuffer, + buffer::ArcBuffer, + }; + + use super::*; + + const INPUT: [usize; 4] = [1, 1, 2, 2]; + + #[test] + fn length() { + let offset_buffer = OffsetBuffer::::from_lengths(INPUT); + assert_eq!(Length::len(&offset_buffer), INPUT.len() + 1); + } + + #[test] + fn from() { + let fixed_size_primitive_array = INPUT + .into_iter() + .map(|x| x.try_into().expect("")) + .collect::>(); + assert_eq!( + OffsetBuffer::::from(fixed_size_primitive_array).as_ref(), + [1, 1, 2, 2] + ); + + let fixed_size_primitive_array_arc = INPUT + .into_iter() + .map(|x| x.try_into().expect("")) + .collect::>( + ); + assert_eq!( + OffsetBuffer::::from(fixed_size_primitive_array_arc).as_ref(), + [1, 1, 2, 2] + ); + } + + #[test] + fn into() { + let offset_buffer = OffsetBuffer::::from_lengths(INPUT); + assert_eq!( + FixedSizePrimitiveArray::::from(offset_buffer.clone()) + .into_iter() + .collect::>(), + offset_buffer.iter().collect::>() + ); + } +} diff --git a/src/arrow/buffer/scalar_buffer.rs b/src/arrow/buffer/scalar_buffer.rs new file mode 100644 index 00000000..8ea3e194 --- /dev/null +++ b/src/arrow/buffer/scalar_buffer.rs @@ -0,0 +1,113 @@ +//! Interop with [`arrow-rs`] scalar buffer. + +use arrow_buffer::ScalarBuffer; + +use crate::{ + array::FixedSizePrimitiveArray, + buffer::{Buffer, BufferType}, + FixedSize, Index, Length, +}; + +/// A [`BufferType`] implementation for [`ScalarBuffer`]. +#[derive(Clone, Copy)] +pub struct ArrowScalarBuffer; + +impl BufferType for ArrowScalarBuffer { + type Buffer = ScalarBuffer; +} + +impl Buffer for ScalarBuffer { + fn as_slice(&self) -> &[T] { + self + } +} + +impl Index for ScalarBuffer { + type Item<'a> = &'a T + where + Self: 'a; + + unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { + self.get_unchecked(index) + } +} + +impl Length for ScalarBuffer { + fn len(&self) -> usize { + self.as_ref().len() + } +} + +impl From> + for ScalarBuffer +where + ::Buffer: AsRef<[T]>, +{ + fn from(value: FixedSizePrimitiveArray) -> Self { + let len = value.len(); + // Note: this makes a copy + let buffer = arrow_buffer::Buffer::from_slice_ref(value.0.as_ref()); + ScalarBuffer::new(buffer, 0, len) + } +} + +impl From> + for FixedSizePrimitiveArray +where + ::Buffer: From, +{ + fn from(value: ScalarBuffer) -> Self { + FixedSizePrimitiveArray(value.into_inner().into()) + } +} + +#[cfg(test)] +mod tests { + use crate::buffer::ArcBuffer; + + use super::*; + + const INPUT: [u32; 4] = [1, 2, 3, 4]; + + #[test] + fn length() { + let scalar_buffer = INPUT.into_iter().collect::>(); + assert_eq!(Length::len(&scalar_buffer), INPUT.len()); + } + + #[test] + fn from() { + let fixed_size_primitive_array = INPUT.into_iter().collect::>(); + assert_eq!( + ScalarBuffer::from(fixed_size_primitive_array) + .into_iter() + .copied() + .collect::>(), + INPUT + ); + + let fixed_size_primitive_array_arc = + INPUT + .into_iter() + .collect::>(); + assert_eq!( + ScalarBuffer::from(fixed_size_primitive_array_arc) + .into_iter() + .copied() + .collect::>(), + INPUT + ); + } + + #[test] + fn into() { + let scalar_buffer = INPUT.into_iter().collect::>(); + assert_eq!( + FixedSizePrimitiveArray::<_, false, ArrowScalarBuffer>::from(scalar_buffer) + .into_iter() + .copied() + .collect::>(), + INPUT + ); + } +} diff --git a/src/arrow/length.rs b/src/arrow/length.rs deleted file mode 100644 index 50639134..00000000 --- a/src/arrow/length.rs +++ /dev/null @@ -1,22 +0,0 @@ -//! Length implementations for [`arrow-rs`] items. - -use crate::Length; -use arrow_buffer::{ArrowNativeType, Buffer, BufferBuilder, ScalarBuffer}; - -impl Length for BufferBuilder { - fn len(&self) -> usize { - BufferBuilder::len(self) - } -} - -impl Length for ScalarBuffer { - fn len(&self) -> usize { - self.as_ref().len() - } -} - -impl Length for Buffer { - fn len(&self) -> usize { - Buffer::len(self) - } -} diff --git a/src/arrow/mod.rs b/src/arrow/mod.rs index 8293e572..42c32ccd 100644 --- a/src/arrow/mod.rs +++ b/src/arrow/mod.rs @@ -3,7 +3,29 @@ //! [`arrow-rs`]: https://crates.io/crates/arrow mod array; -mod bitmap; +// mod bitmap; + mod buffer; pub use buffer::*; -mod length; + +use crate::array::Array; +use arrow_array::ArrayRef; +use arrow_schema::Field; +use std::sync::Arc; + +/// Extension trait of [`Array`] for [`arrow-rs`] interop. +pub trait ArrowArray: Array + Sized { + /// The corresponding arrow array + type Array: arrow_array::Array; // + From + 'static; + + /// Returns as array ref + fn into_array_ref(self) -> ArrayRef + where + Self::Array: From + 'static, + { + Arc::::new(self.into()) + } + + /// Returns the field of this array. + fn as_field(&self, name: &str) -> Field; +} From 66b497031f655497b203b06bb8fcf30439a8f71a Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 11 Dec 2023 16:22:48 +0100 Subject: [PATCH 21/27] Generalize stringarray extend impl --- src/array/string.rs | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/array/string.rs b/src/array/string.rs index e2fe8d05..051a43b9 100644 --- a/src/array/string.rs +++ b/src/array/string.rs @@ -46,23 +46,15 @@ where } } -impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend<&'a str> +impl<'a, T: ?Sized, OffsetItem: OffsetElement, Buffer: BufferType> Extend<&'a T> for StringArray where + T: AsRef, VariableSizeBinaryArray: Extend<&'a [u8]>, { - fn extend>(&mut self, iter: I) { - self.0.extend(iter.into_iter().map(str::as_bytes)); - } -} - -impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend<&'a &'a str> - for StringArray -where - VariableSizeBinaryArray: Extend<&'a [u8]>, -{ - fn extend>(&mut self, iter: I) { - self.0.extend(iter.into_iter().map(|str| str.as_bytes())); + fn extend>(&mut self, iter: I) { + self.0 + .extend(iter.into_iter().map(|item| item.as_ref().as_bytes())); } } From bc0f4595a6177ff8a7869bbc77d53cec3a839a12 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 11 Dec 2023 16:40:35 +0100 Subject: [PATCH 22/27] Generalize more string array methods --- src/array/string.rs | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/array/string.rs b/src/array/string.rs index 051a43b9..65ed4207 100644 --- a/src/array/string.rs +++ b/src/array/string.rs @@ -58,14 +58,17 @@ where } } -impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> Extend> +impl<'a, T: ?Sized, OffsetItem: OffsetElement, Buffer: BufferType> Extend> for StringArray where + T: AsRef, VariableSizeBinaryArray: Extend>, { - fn extend>>(&mut self, iter: I) { - self.0 - .extend(iter.into_iter().map(|opt| opt.map(str::as_bytes))); + fn extend>>(&mut self, iter: I) { + self.0.extend( + iter.into_iter() + .map(|opt| opt.map(|item| item.as_ref().as_bytes())), + ); } } @@ -101,23 +104,33 @@ where } } -impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> FromIterator<&'a str> +impl<'a, T: ?Sized, OffsetItem: OffsetElement, Buffer: BufferType> FromIterator<&'a T> for StringArray where + T: AsRef, VariableSizeBinaryArray: FromIterator<&'a [u8]>, { - fn from_iter>(iter: I) -> Self { - Self(iter.into_iter().map(str::as_bytes).collect()) + fn from_iter>(iter: I) -> Self { + Self( + iter.into_iter() + .map(|item| item.as_ref().as_bytes()) + .collect(), + ) } } -impl<'a, OffsetItem: OffsetElement, Buffer: BufferType> FromIterator> +impl<'a, T: ?Sized, OffsetItem: OffsetElement, Buffer: BufferType> FromIterator> for StringArray where + T: AsRef, VariableSizeBinaryArray: FromIterator>, { - fn from_iter>>(iter: I) -> Self { - Self(iter.into_iter().map(|x| x.map(str::as_bytes)).collect()) + fn from_iter>>(iter: I) -> Self { + Self( + iter.into_iter() + .map(|x| x.map(|item| item.as_ref().as_bytes())) + .collect(), + ) } } From 094f3a0cc51916d3c8d7bcec11b778a6ed46769c Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 11 Dec 2023 22:34:46 +0100 Subject: [PATCH 23/27] Add parquet example --- Cargo.toml | 13 +- examples/parquet.rs | 56 +++++ narrow-derive/Cargo.toml | 4 + narrow-derive/src/struct.rs | 132 ++++++++++- .../expand/struct/named/generic.expanded.rs | 60 +++++ .../struct/named/generic_option.expanded.rs | 136 +++++++++++ .../expand/struct/named/simple.expanded.rs | 121 ++++++++++ .../struct/unit/const_generic.expanded.rs | 17 ++ .../unit/const_generic_default.expanded.rs | 17 ++ .../tests/expand/struct/unit/self.expanded.rs | 18 ++ .../expand/struct/unit/simple.expanded.rs | 12 + .../struct/unit/where_clause.expanded.rs | 25 ++ .../expand/struct/unnamed/generic.expanded.rs | 118 ++++++++++ .../struct/unnamed/lifetime.expanded.rs | 58 +++++ .../struct/unnamed/multiple.expanded.rs | 141 ++++++++++++ .../expand/struct/unnamed/nested.expanded.rs | 102 ++++++++ .../struct/unnamed/nested_generic.expanded.rs | 195 ++++++++++++++++ .../expand/struct/unnamed/simple.expanded.rs | 56 +++++ src/arrow/array/boolean.rs | 2 +- src/arrow/array/fixed_size_primitive.rs | 2 +- src/arrow/array/mod.rs | 1 + src/arrow/array/string.rs | 2 +- src/arrow/array/struct.rs | 217 ++++++++++++++++++ src/arrow/array/variable_size_list.rs | 12 +- src/arrow/mod.rs | 16 +- tests/derive.rs | 3 + 26 files changed, 1505 insertions(+), 31 deletions(-) create mode 100644 examples/parquet.rs diff --git a/Cargo.toml b/Cargo.toml index b2652edf..a14f41b1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,25 +29,26 @@ categories.workspace = true [features] default = ["arrow-rs", "derive"] -arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema"] +arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema", "narrow-derive/arrow-rs"] derive = ["dep:narrow-derive"] [dependencies] # arrow-array = { version = "49.0.0", optional = true } # arrow-buffer = { version = "49.0.0", optional = true } # arrow-schema = { version = "49.0.0", optional = true } -arrow-array = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } -arrow-buffer = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } -arrow-schema = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } +arrow-array = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", optional = true } +# arrow-buffer = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } +arrow-buffer = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", optional = true } +arrow-schema = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } [dev-dependencies] # arrow-cast = { version = "49.0.0", default-features = false, features = ["prettyprint"] } -arrow-cast = { git = "https://github.com/apache/arrow-rs", branch = "master", default-features = false, features = ["prettyprint"] } +arrow-cast = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", default-features = false, features = ["prettyprint"] } bytes = "1.5.0" criterion = { version = "0.5.1", default-features = false } rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } -parquet = { git = "https://github.com/apache/arrow-rs", branch = "master", features = ["arrow"] } +parquet = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", features = ["arrow"] } # parquet = { version = "49.0.0", default-features = false, features = ["arrow"] } [profile.bench] diff --git a/examples/parquet.rs b/examples/parquet.rs new file mode 100644 index 00000000..1bef46a0 --- /dev/null +++ b/examples/parquet.rs @@ -0,0 +1,56 @@ +fn main() { + use arrow_array::RecordBatch; + use arrow_cast::pretty; + use bytes::Bytes; + use narrow::{array::StructArray, arrow::buffer_builder::ArrowBufferBuilder, ArrayType}; + use parquet::arrow::{arrow_reader::ParquetRecordBatchReader, ArrowWriter}; + + #[derive(ArrayType, Default)] + struct Bar(Option); + + #[derive(ArrayType, Default)] + struct Foo { + a: u32, + b: Option, + c: bool, + d: String, + e: Option>>, + f: Bar, + } + let input = [ + Foo { + a: 1, + b: Some(2), + c: true, + d: "hello world!".to_string(), + e: Some(vec![Some(true), None]), + f: Bar(Some(true)), + }, + Foo { + a: 42, + b: None, + c: false, + d: "narrow".to_string(), + e: None, + f: Bar(None), + }, + ]; + + let narrow_array = input + .into_iter() + .collect::>(); + + let arrow_struct_array = arrow_array::StructArray::from(narrow_array); + let record_batch = RecordBatch::from(arrow_struct_array); + pretty::print_batches(&[record_batch.clone()]).unwrap(); + + let mut buffer = Vec::new(); + let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), None).unwrap(); + writer.write(&record_batch).unwrap(); + writer.close().unwrap(); + + let mut reader = ParquetRecordBatchReader::try_new(Bytes::from(buffer), 1024).unwrap(); + let read = reader.next().unwrap().unwrap(); + pretty::print_batches(&[read.clone()]).unwrap(); + assert_eq!(record_batch, read); +} diff --git a/narrow-derive/Cargo.toml b/narrow-derive/Cargo.toml index 78c2f749..5980e071 100644 --- a/narrow-derive/Cargo.toml +++ b/narrow-derive/Cargo.toml @@ -12,6 +12,10 @@ license.workspace = true keywords.workspace = true categories.workspace = true +[features] +default = ["arrow-rs"] +arrow-rs = [] + [lib] proc-macro = true diff --git a/narrow-derive/src/struct.rs b/narrow-derive/src/struct.rs index c611a5d3..df773e8c 100644 --- a/narrow-derive/src/struct.rs +++ b/narrow-derive/src/struct.rs @@ -4,8 +4,7 @@ use quote::{format_ident, quote, ToTokens, TokenStreamExt}; use std::iter::{Enumerate, Map}; use syn::{ parse2, parse_quote, punctuated, token::Paren, visit_mut::VisitMut, DeriveInput, Field, Fields, - File, Generics, Ident, Index, ItemImpl, ItemStruct, Type, TypeParamBound, Visibility, - WherePredicate, + Generics, Ident, Index, ItemImpl, ItemStruct, Type, TypeParamBound, Visibility, WherePredicate, }; pub(super) fn derive(input: &DeriveInput, fields: &Fields) -> TokenStream { @@ -23,6 +22,12 @@ pub(super) fn derive(input: &DeriveInput, fields: &Fields) -> TokenStream { // Generate the StructArrayType impl. let struct_array_type_impl = input.struct_array_type_impl(); + // Optionally generate the StructArrayTypeFields impl. + let struct_array_type_fields_impl = input.struct_array_type_fields_impl(); + + // Optionally generates the conversion to vec of array refs + let struct_array_into_array_refs = input.struct_array_into_array_refs(); + // Generate the array wrapper struct definition. let array_struct_def = input.array_struct_def(); @@ -45,6 +50,10 @@ pub(super) fn derive(input: &DeriveInput, fields: &Fields) -> TokenStream { #struct_array_type_impl + #struct_array_type_fields_impl + + #struct_array_into_array_refs + #array_struct_def #array_default_impl @@ -164,7 +173,7 @@ impl Struct<'_> { } /// Add an `StructArrayType` implementation for the derive input. - fn struct_array_type_impl(&self) -> File { + fn struct_array_type_impl(&self) -> ItemImpl { let narrow = util::narrow(); // Generics @@ -188,6 +197,111 @@ impl Struct<'_> { parse2(tokens).expect("struct_array_type_impl") } + /// Add an `StructArrayTypeFields` implementation for the derive input. + fn struct_array_type_fields_impl(&self) -> ItemImpl { + let narrow = util::narrow(); + + // Generics + let mut generics = self.generics.clone(); + SelfReplace::new(self.ident, &generics).visit_generics_mut(&mut generics); + AddTypeParamBound(Self::array_type_bound()).visit_generics_mut(&mut generics); + AddTypeParam(parse_quote!(Buffer: #narrow::buffer::BufferType)) + .visit_generics_mut(&mut generics); + generics + .make_where_clause() + .predicates + .extend(self.where_predicate_fields(parse_quote!(#narrow::arrow::ArrowArray))); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + // Fields + let field_ident = self.field_idents().map(|ident| ident.to_string()); + let field_ty = self.field_types(); + let fields = quote!( + #( + ::std::sync::Arc::new(<<#field_ty as ::narrow::array::ArrayType>::Array as #narrow::arrow::ArrowArray>::as_field(#field_ident)), + )* + ); + + let ident = self.array_struct_ident(); + let tokens = quote!( + impl #impl_generics #narrow::arrow::StructArrayTypeFields for #ident #ty_generics #where_clause { + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + #fields + ]) + } + } + ); + parse2(tokens).expect("struct_array_type_fields_impl") + } + + /// Add an `Into` implementation for the array to convert to a vec of array refs + fn struct_array_into_array_refs(&self) -> ItemImpl { + let narrow = util::narrow(); + + // Generics + let mut generics = self.generics.clone(); + SelfReplace::new(self.ident, &generics).visit_generics_mut(&mut generics); + AddTypeParamBound(Self::array_type_bound()).visit_generics_mut(&mut generics); + AddTypeParam(parse_quote!(Buffer: #narrow::buffer::BufferType)) + .visit_generics_mut(&mut generics); + generics + .make_where_clause() + .predicates + .extend(self.where_predicate_fields_arrow_array_into()); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + // Fields + let field_ty = self.field_types(); + let field_arrays = match self.fields { + Fields::Named(_) => { + let field_ident = self.field_idents(); + quote!( + #( + ::std::sync::Arc::< + <<#field_ty as #narrow::array::ArrayType>::Array as #narrow::arrow::ArrowArray>::Array + >::new(value.#field_ident.into()), + )* + ) + } + Fields::Unnamed(_) => { + let field_idx = self + .fields + .iter() + .enumerate() + .map(|(idx, _)| Index::from(idx)); + quote!( + #( + ::std::sync::Arc::< + <<#field_ty as #narrow::array::ArrayType>::Array as #narrow::arrow::ArrowArray>::Array + >::new(value.#field_idx.into()), + )* + ) + } + Fields::Unit => { + quote!( + #( + ::std::sync::Arc::< + <<#field_ty as #narrow::array::ArrayType>::Array as #narrow::arrow::ArrowArray>::Array + >::new(value.0.into()) + )* + ) + } + }; + + let ident = self.array_struct_ident(); + let tokens = quote!( + impl #impl_generics ::std::convert::From<#ident #ty_generics> for ::std::vec::Vec<::std::sync::Arc> #where_clause { + fn from(value: #ident #ty_generics) -> Self { + vec![ + #field_arrays + ] + } + } + ); + parse2(tokens).expect("struct_array_into_array_refs") + } + /// Returns the struct definition of the Array wrapper struct. fn array_struct_def(&self) -> ItemStruct { let narrow = util::narrow(); @@ -467,6 +581,18 @@ impl Struct<'_> { self.field_types() .map(move |ty| parse_quote!(<#ty as #narrow::array::ArrayType>::Array: #bound)) } + + fn where_predicate_fields_arrow_array_into(&self) -> impl Iterator + '_ { + let narrow = util::narrow(); + self.field_types() + .map(move |ty| parse_quote!( + <#ty as #narrow::array::ArrayType>::Array: + ::std::convert::Into< + <<#ty as #narrow::array::ArrayType>::Array + as #narrow::arrow::ArrowArray>::Array + > + )) + } } #[cfg(test)] diff --git a/narrow-derive/tests/expand/struct/named/generic.expanded.rs b/narrow-derive/tests/expand/struct/named/generic.expanded.rs index ff722530..fdff1caa 100644 --- a/narrow-derive/tests/expand/struct/named/generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/named/generic.expanded.rs @@ -31,6 +31,66 @@ where { type Array = FooArray<'a, T, Buffer>; } +impl< + 'a, + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray<'a, T, Buffer> +where + T: Copy, + <&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <<&'a T as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("a"), + ), + ]) + } +} +impl< + 'a, + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + T: Copy, + <&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <<&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray<'a, T, Buffer>) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <<&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.a.into()), + ]), + ) + } +} struct FooArray<'a, T: narrow::array::ArrayType, Buffer: narrow::buffer::BufferType> where T: Copy, diff --git a/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs b/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs index b0c10217..d3e81a2b 100644 --- a/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs +++ b/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs @@ -21,6 +21,142 @@ for ::std::option::Option> { impl narrow::array::StructArrayType for Bar { type Array = BarArray; } +impl< + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for BarArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("a"), + ), + ::std::sync::Arc::new( + < as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("b"), + ), + ::std::sync::Arc::new( + < as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("c"), + ), + ]) + } +} +impl< + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + < as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + < as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: BarArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.a.into()), + ::std::sync::Arc::< + < as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.b.into()), + ::std::sync::Arc::< + < as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.c.into()), + ]), + ) + } +} struct BarArray { a: ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/named/simple.expanded.rs b/narrow-derive/tests/expand/struct/named/simple.expanded.rs index 1e5b44d7..1f9768ad 100644 --- a/narrow-derive/tests/expand/struct/named/simple.expanded.rs +++ b/narrow-derive/tests/expand/struct/named/simple.expanded.rs @@ -20,6 +20,127 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } +impl narrow::arrow::StructArrayTypeFields +for FooArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + , + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("a"), + ), + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("b"), + ), + ::std::sync::Arc::new( + <, + > as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("c"), + ), + ]) + } +} +impl ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + , + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <, + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.a.into()), + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.b.into()), + ::std::sync::Arc::< + <, + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.c.into()), + ]), + ) + } +} struct FooArray { a: ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs b/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs index e92c129f..d88c6ebb 100644 --- a/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs @@ -21,6 +21,23 @@ impl narrow::array::ArrayType> for ::std::option::Option< impl narrow::array::StructArrayType for Foo { type Array = FooArray; } +impl< + const N: usize, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray { + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([]) + } +} +impl< + const N: usize, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> { + fn from(value: FooArray) -> Self { + ::alloc::vec::Vec::new() + } +} pub struct FooArray( narrow::array::NullArray, false, Buffer>, ); diff --git a/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs b/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs index 5e1f0e09..1bfdf332 100644 --- a/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs @@ -21,6 +21,23 @@ impl narrow::array::ArrayType> for ::std::option::Option< impl narrow::array::StructArrayType for Foo { type Array = FooArray; } +impl< + const N: usize, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray { + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([]) + } +} +impl< + const N: usize, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> { + fn from(value: FooArray) -> Self { + ::alloc::vec::Vec::new() + } +} pub struct FooArray( narrow::array::NullArray, false, Buffer>, ); diff --git a/narrow-derive/tests/expand/struct/unit/self.expanded.rs b/narrow-derive/tests/expand/struct/unit/self.expanded.rs index 25a0b22b..6d744d0c 100644 --- a/narrow-derive/tests/expand/struct/unit/self.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/self.expanded.rs @@ -35,6 +35,24 @@ where { type Array = FooArray; } +impl narrow::arrow::StructArrayTypeFields +for FooArray +where + Foo: Debug, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([]) + } +} +impl ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + Foo: Debug, +{ + fn from(value: FooArray) -> Self { + ::alloc::vec::Vec::new() + } +} struct FooArray( narrow::array::NullArray, ) diff --git a/narrow-derive/tests/expand/struct/unit/simple.expanded.rs b/narrow-derive/tests/expand/struct/unit/simple.expanded.rs index 7d80703c..0b59b9ab 100644 --- a/narrow-derive/tests/expand/struct/unit/simple.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/simple.expanded.rs @@ -21,6 +21,18 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } +impl narrow::arrow::StructArrayTypeFields +for FooArray { + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([]) + } +} +impl ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> { + fn from(value: FooArray) -> Self { + ::alloc::vec::Vec::new() + } +} struct FooArray( narrow::array::NullArray, ); diff --git a/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs b/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs index b41f5f82..9352a1e5 100644 --- a/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs @@ -40,6 +40,31 @@ where { type Array = FooArray; } +impl< + const N: bool, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray +where + Foo: Sized, + (): From>, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([]) + } +} +impl< + const N: bool, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + Foo: Sized, + (): From>, +{ + fn from(value: FooArray) -> Self { + ::alloc::vec::Vec::new() + } +} pub(super) struct FooArray( narrow::array::NullArray, false, Buffer>, ) diff --git a/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs index 5c0fa736..9c928f86 100644 --- a/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs @@ -35,6 +35,68 @@ where { type Array = FooArray<'a, T, Buffer>; } +impl< + 'a, + T: Add> + narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray<'a, T, Buffer> +where + Foo<'a, T>: Sized, + >>::Output: Debug, + <&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <<&'a T as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + 'a, + T: Add> + narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + Foo<'a, T>: Sized, + >>::Output: Debug, + <&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <<&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray<'a, T, Buffer>) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <<&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooArray< 'a, T: Add> + narrow::array::ArrayType, @@ -144,6 +206,62 @@ for ::std::option::Option> { impl narrow::array::StructArrayType for FooBar { type Array = FooBarArray; } +impl< + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooBarArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooBarArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooBarArray( ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs index 035b6184..08f5f2d2 100644 --- a/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs @@ -17,6 +17,64 @@ for ::std::option::Option> { impl<'a, T: narrow::array::ArrayType> narrow::array::StructArrayType for Foo<'a, T> { type Array = FooArray<'a, T, Buffer>; } +impl< + 'a, + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray<'a, T, Buffer> +where + <&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <<&'a T as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + 'a, + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + <&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <<&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray<'a, T, Buffer>) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <<&'a T as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooArray<'a, T: narrow::array::ArrayType, Buffer: narrow::buffer::BufferType>( <&'a T as narrow::array::ArrayType>::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs index de40f536..d92a1cc5 100644 --- a/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs @@ -16,6 +16,147 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Bar { type Array = BarArray; } +impl narrow::arrow::StructArrayTypeFields +for BarArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_1"), + ), + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_2"), + ), + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_3"), + ), + ]) + } +} +impl ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: BarArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.1.into()), + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.2.into()), + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.3.into()), + ]), + ) + } +} struct BarArray( ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs index 51d000ae..6373ad89 100644 --- a/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs @@ -16,6 +16,57 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } +impl narrow::arrow::StructArrayTypeFields +for FooArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooArray( ::Array< Buffer, @@ -94,6 +145,57 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Bar { type Array = BarArray; } +impl narrow::arrow::StructArrayTypeFields +for BarArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: BarArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct BarArray( ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs index a97782f8..5149e6c1 100644 --- a/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs @@ -30,6 +30,64 @@ where { type Array = FooArray; } +impl< + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray +where + T: Copy, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + T: Copy, + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooArray( ::Array< Buffer, @@ -124,6 +182,74 @@ for ::std::option::Option> { impl<'a, T: narrow::array::ArrayType> narrow::array::StructArrayType for Bar<'a, T> { type Array = BarArray<'a, T, Buffer>; } +impl< + 'a, + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for BarArray<'a, T, Buffer> +where + <&'a Foo< + T, + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <<&'a Foo< + T, + > as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + 'a, + T: narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + <&'a Foo< + T, + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <<&'a Foo< + T, + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: BarArray<'a, T, Buffer>) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <<&'a Foo< + T, + > as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct BarArray<'a, T: narrow::array::ArrayType, Buffer: narrow::buffer::BufferType>( <&'a Foo< T, @@ -224,6 +350,75 @@ impl<'a> narrow::array::ArrayType> for ::std::option::Option narrow::array::StructArrayType for FooBar<'a> { type Array = FooBarArray<'a, Buffer>; } +impl<'a, Buffer: narrow::buffer::BufferType> narrow::arrow::StructArrayTypeFields +for FooBarArray<'a, Buffer> +where + as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + < as ::narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + 'a, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + < as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooBarArray<'a, Buffer>) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + < as narrow::array::ArrayType>::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooBarArray<'a, Buffer: narrow::buffer::BufferType>( > { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } +impl< + T: Sized + narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> narrow::arrow::StructArrayTypeFields for FooArray +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: narrow::arrow::ArrowArray, +{ + fn fields() -> ::arrow_schema::Fields { + ::arrow_schema::Fields::from([ + ::std::sync::Arc::new( + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::as_field("_0"), + ), + ]) + } +} +impl< + T: Sized + narrow::array::ArrayType, + Buffer: narrow::buffer::BufferType, +> ::std::convert::From> +for ::std::vec::Vec<::std::sync::Arc> +where + ::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + >: ::std::convert::Into< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >, +{ + fn from(value: FooArray) -> Self { + <[_]>::into_vec( + #[rustc_box] + ::alloc::boxed::Box::new([ + ::std::sync::Arc::< + <::Array< + Buffer, + narrow::offset::NA, + narrow::array::union::NA, + > as narrow::arrow::ArrowArray>::Array, + >::new(value.0.into()), + ]), + ) + } +} struct FooArray( ::Array< Buffer, diff --git a/src/arrow/array/boolean.rs b/src/arrow/array/boolean.rs index 67dbe4f8..354b06ea 100644 --- a/src/arrow/array/boolean.rs +++ b/src/arrow/array/boolean.rs @@ -15,7 +15,7 @@ where { type Array = arrow_array::BooleanArray; - fn as_field(&self, name: &str) -> arrow_schema::Field { + fn as_field(name: &str) -> arrow_schema::Field { Field::new(name, DataType::Boolean, NULLABLE) } } diff --git a/src/arrow/array/fixed_size_primitive.rs b/src/arrow/array/fixed_size_primitive.rs index 5d8b18df..d26e225e 100644 --- a/src/arrow/array/fixed_size_primitive.rs +++ b/src/arrow/array/fixed_size_primitive.rs @@ -24,7 +24,7 @@ macro_rules! arrow_array_convert { { type Array = arrow_array::PrimitiveArray<$primitive_type>; - fn as_field(&self, name: &str) -> arrow_schema::Field { + fn as_field(name: &str) -> arrow_schema::Field { Field::new(name, DataType::$data_type, NULLABLE) } } diff --git a/src/arrow/array/mod.rs b/src/arrow/array/mod.rs index b2a1d970..419fd90c 100644 --- a/src/arrow/array/mod.rs +++ b/src/arrow/array/mod.rs @@ -4,4 +4,5 @@ mod boolean; mod fixed_size_primitive; mod string; mod r#struct; +pub use r#struct::StructArrayTypeFields; mod variable_size_list; diff --git a/src/arrow/array/string.rs b/src/arrow/array/string.rs index 432f3ae8..7f7cb867 100644 --- a/src/arrow/array/string.rs +++ b/src/arrow/array/string.rs @@ -23,7 +23,7 @@ where { type Array = arrow_array::GenericStringArray; - fn as_field(&self, name: &str) -> arrow_schema::Field { + fn as_field(name: &str) -> arrow_schema::Field { Field::new(name, DataType::Utf8, NULLABLE) } } diff --git a/src/arrow/array/struct.rs b/src/arrow/array/struct.rs index b8492f96..3f8827f0 100644 --- a/src/arrow/array/struct.rs +++ b/src/arrow/array/struct.rs @@ -1 +1,218 @@ //! Interop with [`arrow-rs`] struct arrays. + +use std::sync::Arc; + +use arrow_buffer::NullBuffer; +use arrow_schema::{DataType, Field, Fields}; + +use crate::{ + array::{StructArray, StructArrayType}, + arrow::ArrowArray, + bitmap::Bitmap, + buffer::BufferType, + nullable::Nullable, + validity::Validity, +}; + +/// Arrow schema interop trait for the fields of a struct array type. +pub trait StructArrayTypeFields { + /// Returns the fields of this struct array. + fn fields() -> Fields; +} + +impl ArrowArray + for StructArray +where + ::Array: Validity + StructArrayTypeFields, +{ + type Array = arrow_array::StructArray; + + fn as_field(name: &str) -> arrow_schema::Field { + Field::new( + name, + DataType::Struct( + <::Array as StructArrayTypeFields>::fields(), + ), + NULLABLE, + ) + } +} + +impl From> + for StructArray +where + ::Array: Validity, + Self: From, +{ + fn from(value: Arc) -> Self { + Self::from(arrow_array::StructArray::from(value.to_data())) + } +} + +impl From> + for arrow_array::StructArray +where + ::Array: + StructArrayTypeFields + Into>>, +{ + fn from(value: StructArray) -> Self { + // Safety: + // - struct arrays are valid by construction + unsafe { + arrow_array::StructArray::new_unchecked( + <::Array as StructArrayTypeFields>::fields(), + // value.0.into_arrays(), + value.0.into(), + None, + ) + } + } +} + +impl From> + for arrow_array::StructArray +where + ::Array: + StructArrayTypeFields + Into>>, + Bitmap: Into, +{ + fn from(value: StructArray) -> Self { + // Safety: + // - struct arrays are valid by construction + unsafe { + arrow_array::StructArray::new_unchecked( + <::Array as StructArrayTypeFields>::fields(), + value.0.data.into(), + Some(value.0.validity.into()), + ) + } + } +} + +impl From + for StructArray +where + ::Array: From>>, +{ + fn from(value: arrow_array::StructArray) -> Self { + let (_fields, arrays, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(_) => panic!("expected array without a null buffer"), + None => StructArray(arrays.into()), + } + } +} + +impl From + for StructArray +where + ::Array: From>>, + Bitmap: From, +{ + fn from(value: arrow_array::StructArray) -> Self { + let (_fields, arrays, nulls_opt) = value.into_parts(); + match nulls_opt { + Some(null_buffer) => StructArray(Nullable { + data: arrays.into(), + validity: null_buffer.into(), + }), + None => panic!("expected array with a null buffer"), + } + } +} + +#[cfg(test)] +mod tests { + + use arrow_array::Array as _; + + use crate::{ + array::union::{self, UnionType}, + array::ArrayType, + arrow::buffer_builder::ArrowBufferBuilder, + offset::{self, OffsetElement}, + }; + + use super::*; + + #[derive(Default)] + struct Foo { + a: u32, + } + struct FooArray { + a: ::Array, + } + impl ArrayType for Foo { + type Array = + StructArray; + } + impl ArrayType for Option { + type Array = + StructArray; + } + impl Default for FooArray + where + ::Array: Default, + { + fn default() -> Self { + Self { + a: ::Array::::default(), + } + } + } + impl Extend for FooArray + where + ::Array: Extend, + { + fn extend>(&mut self, iter: I) { + iter.into_iter().for_each(|Foo { a }| { + self.a.extend(std::iter::once(a)); + }); + } + } + impl FromIterator for FooArray + where + ::Array: Default + Extend, + { + fn from_iter>(iter: T) -> Self { + let (a, _): (_, Vec<_>) = iter.into_iter().map(|Foo { a }| (a, ())).unzip(); + Self { a } + } + } + impl StructArrayType for Foo { + type Array = FooArray; + } + impl StructArrayTypeFields for FooArray { + fn fields() -> Fields { + Fields::from(vec![Field::new("a", DataType::UInt32, false)]) + } + } + impl From> for Vec> + where + ::Array: + Into<<::Array as ArrowArray>::Array>, + { + fn from(value: FooArray) -> Self { + vec![Arc::< + <::Array as ArrowArray>::Array, + >::new(value.a.into())] + } + } + + #[test] + fn from() { + let struct_array = [Foo { a: 1 }, Foo { a: 2 }] + .into_iter() + .collect::>(); + let struct_array_arrow = arrow_array::StructArray::from(struct_array); + assert_eq!(struct_array_arrow.len(), 2); + + let struct_array_nullable = [Some(Foo { a: 1234 }), None] + .into_iter() + .collect::>(); + let struct_array_arrow_nullable = arrow_array::StructArray::from(struct_array_nullable); + assert_eq!(struct_array_arrow_nullable.len(), 2); + assert!(struct_array_arrow_nullable.is_valid(0)); + assert!(struct_array_arrow_nullable.is_null(1)); + } +} diff --git a/src/arrow/array/variable_size_list.rs b/src/arrow/array/variable_size_list.rs index 5e8d1128..b4865ec0 100644 --- a/src/arrow/array/variable_size_list.rs +++ b/src/arrow/array/variable_size_list.rs @@ -27,10 +27,10 @@ where { type Array = arrow_array::GenericListArray; - fn as_field(&self, name: &str) -> arrow_schema::Field { + fn as_field(name: &str) -> arrow_schema::Field { Field::new( name, - DataType::List(Arc::new(self.0.data.as_field("item"))), + DataType::List(Arc::new(T::as_field("item"))), NULLABLE, ) } @@ -62,11 +62,11 @@ where { fn from(value: VariableSizeListArray) -> Self { arrow_array::GenericListArray::new( - Arc::new(value.0.data.as_field("item")), + Arc::new(T::as_field("item")), // Safety: // - The narrow offfset buffer contains valid offset data unsafe { OffsetBuffer::new_unchecked(value.0.offsets.into()) }, - value.0.data.into_array_ref(), + Arc::<::Array>::new(value.0.data.into()), None, ) } @@ -82,11 +82,11 @@ where { fn from(value: VariableSizeListArray) -> Self { arrow_array::GenericListArray::new( - Arc::new(value.0.data.as_field("item")), + Arc::new(T::as_field("item")), // Safety: // - The narrow offfset buffer contains valid offset data unsafe { OffsetBuffer::new_unchecked(value.0.offsets.data.into()) }, - value.0.data.into_array_ref(), + Arc::<::Array>::new(value.0.data.into()), Some(value.0.offsets.validity.into()), ) } diff --git a/src/arrow/mod.rs b/src/arrow/mod.rs index 42c32ccd..5919eb65 100644 --- a/src/arrow/mod.rs +++ b/src/arrow/mod.rs @@ -3,29 +3,19 @@ //! [`arrow-rs`]: https://crates.io/crates/arrow mod array; -// mod bitmap; +pub use array::StructArrayTypeFields; mod buffer; pub use buffer::*; use crate::array::Array; -use arrow_array::ArrayRef; use arrow_schema::Field; -use std::sync::Arc; /// Extension trait of [`Array`] for [`arrow-rs`] interop. pub trait ArrowArray: Array + Sized { /// The corresponding arrow array - type Array: arrow_array::Array; // + From + 'static; - - /// Returns as array ref - fn into_array_ref(self) -> ArrayRef - where - Self::Array: From + 'static, - { - Arc::::new(self.into()) - } + type Array: arrow_array::Array; /// Returns the field of this array. - fn as_field(&self, name: &str) -> Field; + fn as_field(name: &str) -> Field; } diff --git a/tests/derive.rs b/tests/derive.rs index d1f49cbe..1653ab5a 100644 --- a/tests/derive.rs +++ b/tests/derive.rs @@ -2,6 +2,7 @@ mod tests { mod derive { mod r#struct { + #[cfg(not(feature = "arrow-rs"))] mod unit { use narrow::{ array::{StructArray, VariableSizeListArray}, @@ -64,6 +65,7 @@ mod tests { } } + #[cfg(not(feature = "arrow-rs"))] mod unnamed { use narrow::{ array::{StructArray, VariableSizeListArray}, @@ -141,6 +143,7 @@ mod tests { } } + #[cfg(not(feature = "arrow-rs"))] mod named { use narrow::{ array::{StructArray, VariableSizeListArray}, From d9c076f727fd2ba3554002cf12d1785654b4f2a8 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 11 Dec 2023 22:36:08 +0100 Subject: [PATCH 24/27] Remove a comment --- src/arrow/buffer/mod.rs | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/src/arrow/buffer/mod.rs b/src/arrow/buffer/mod.rs index d5a03bc6..e532be23 100644 --- a/src/arrow/buffer/mod.rs +++ b/src/arrow/buffer/mod.rs @@ -5,35 +5,3 @@ pub mod buffer_builder; pub mod null_buffer; pub mod offset_buffer; pub mod scalar_buffer; - -// /// A [`BufferType`] implementation for &'a arrow Buffer. -// /// -// /// Stores items `T` in an arrow `&Buffer`. -// #[derive(Clone, Copy, Debug)] -// pub struct ArrowRefBuffer<'a>(PhantomData<&'a ()>); - -// impl<'a> BufferType for ArrowRefBuffer<'a> { -// type Buffer = &'a [T]; -// } - -// impl Buffer for &arrow_buffer::Buffer { -// fn as_slice(&self) -> &[T] { -// self.typed_data() -// } -// } - -// impl Length for &arrow_buffer::Buffer { -// fn len(&self) -> usize { -// arrow_buffer::Buffer::len(self) -// } -// } - -// impl Index for &arrow_buffer::Buffer { -// type Item<'a> = &'a u8 -// where -// Self: 'a; - -// unsafe fn index_unchecked(&self, index: usize) -> Self::Item<'_> { -// self.get_unchecked(index) -// } -// } From fefa1bc666f019071063037055845df456b6967a Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 12 Dec 2023 13:00:36 +0100 Subject: [PATCH 25/27] Fix build issues --- Cargo.toml | 8 +- narrow-derive/src/struct.rs | 10 +- .../expand/struct/named/generic.expanded.rs | 60 ------ .../struct/named/generic_option.expanded.rs | 136 ------------ .../expand/struct/named/simple.expanded.rs | 121 ----------- .../struct/unit/const_generic.expanded.rs | 17 -- .../unit/const_generic_default.expanded.rs | 17 -- .../tests/expand/struct/unit/self.expanded.rs | 18 -- .../expand/struct/unit/simple.expanded.rs | 12 -- .../struct/unit/where_clause.expanded.rs | 25 --- .../expand/struct/unnamed/generic.expanded.rs | 118 ----------- .../struct/unnamed/lifetime.expanded.rs | 58 ------ .../struct/unnamed/multiple.expanded.rs | 141 ------------- .../expand/struct/unnamed/nested.expanded.rs | 102 --------- .../struct/unnamed/nested_generic.expanded.rs | 195 ------------------ .../expand/struct/unnamed/simple.expanded.rs | 56 ----- 16 files changed, 10 insertions(+), 1084 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a14f41b1..d448daaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ categories.workspace = true [features] default = ["arrow-rs", "derive"] -arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema", "narrow-derive/arrow-rs"] +arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema", "narrow-derive?/arrow-rs"] derive = ["dep:narrow-derive"] [dependencies] @@ -59,6 +59,6 @@ codegen-units = 1 name = "narrow" harness = false -# [[example]] -# name = "parquet" -# required-features = ["arrow-rs", "derive"] +[[example]] +name = "parquet" +required-features = ["arrow-rs", "derive"] diff --git a/narrow-derive/src/struct.rs b/narrow-derive/src/struct.rs index df773e8c..96ea38cc 100644 --- a/narrow-derive/src/struct.rs +++ b/narrow-derive/src/struct.rs @@ -223,7 +223,8 @@ impl Struct<'_> { ); let ident = self.array_struct_ident(); - let tokens = quote!( + let tokens = quote! { + #[cfg(feature = "arrow-rs")] impl #impl_generics #narrow::arrow::StructArrayTypeFields for #ident #ty_generics #where_clause { fn fields() -> ::arrow_schema::Fields { ::arrow_schema::Fields::from([ @@ -231,7 +232,7 @@ impl Struct<'_> { ]) } } - ); + }; parse2(tokens).expect("struct_array_type_fields_impl") } @@ -290,7 +291,8 @@ impl Struct<'_> { }; let ident = self.array_struct_ident(); - let tokens = quote!( + let tokens = quote! { + #[cfg(feature = "arrow-rs")] impl #impl_generics ::std::convert::From<#ident #ty_generics> for ::std::vec::Vec<::std::sync::Arc> #where_clause { fn from(value: #ident #ty_generics) -> Self { vec![ @@ -298,7 +300,7 @@ impl Struct<'_> { ] } } - ); + }; parse2(tokens).expect("struct_array_into_array_refs") } diff --git a/narrow-derive/tests/expand/struct/named/generic.expanded.rs b/narrow-derive/tests/expand/struct/named/generic.expanded.rs index fdff1caa..ff722530 100644 --- a/narrow-derive/tests/expand/struct/named/generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/named/generic.expanded.rs @@ -31,66 +31,6 @@ where { type Array = FooArray<'a, T, Buffer>; } -impl< - 'a, - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray<'a, T, Buffer> -where - T: Copy, - <&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <<&'a T as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("a"), - ), - ]) - } -} -impl< - 'a, - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - T: Copy, - <&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <<&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray<'a, T, Buffer>) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <<&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.a.into()), - ]), - ) - } -} struct FooArray<'a, T: narrow::array::ArrayType, Buffer: narrow::buffer::BufferType> where T: Copy, diff --git a/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs b/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs index d3e81a2b..b0c10217 100644 --- a/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs +++ b/narrow-derive/tests/expand/struct/named/generic_option.expanded.rs @@ -21,142 +21,6 @@ for ::std::option::Option> { impl narrow::array::StructArrayType for Bar { type Array = BarArray; } -impl< - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for BarArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("a"), - ), - ::std::sync::Arc::new( - < as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("b"), - ), - ::std::sync::Arc::new( - < as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("c"), - ), - ]) - } -} -impl< - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - < as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - < as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: BarArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.a.into()), - ::std::sync::Arc::< - < as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.b.into()), - ::std::sync::Arc::< - < as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.c.into()), - ]), - ) - } -} struct BarArray { a: ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/named/simple.expanded.rs b/narrow-derive/tests/expand/struct/named/simple.expanded.rs index 1f9768ad..1e5b44d7 100644 --- a/narrow-derive/tests/expand/struct/named/simple.expanded.rs +++ b/narrow-derive/tests/expand/struct/named/simple.expanded.rs @@ -20,127 +20,6 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } -impl narrow::arrow::StructArrayTypeFields -for FooArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - , - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("a"), - ), - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("b"), - ), - ::std::sync::Arc::new( - <, - > as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("c"), - ), - ]) - } -} -impl ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - , - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <, - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.a.into()), - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.b.into()), - ::std::sync::Arc::< - <, - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.c.into()), - ]), - ) - } -} struct FooArray { a: ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs b/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs index d88c6ebb..e92c129f 100644 --- a/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/const_generic.expanded.rs @@ -21,23 +21,6 @@ impl narrow::array::ArrayType> for ::std::option::Option< impl narrow::array::StructArrayType for Foo { type Array = FooArray; } -impl< - const N: usize, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray { - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([]) - } -} -impl< - const N: usize, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> { - fn from(value: FooArray) -> Self { - ::alloc::vec::Vec::new() - } -} pub struct FooArray( narrow::array::NullArray, false, Buffer>, ); diff --git a/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs b/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs index 1bfdf332..5e1f0e09 100644 --- a/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/const_generic_default.expanded.rs @@ -21,23 +21,6 @@ impl narrow::array::ArrayType> for ::std::option::Option< impl narrow::array::StructArrayType for Foo { type Array = FooArray; } -impl< - const N: usize, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray { - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([]) - } -} -impl< - const N: usize, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> { - fn from(value: FooArray) -> Self { - ::alloc::vec::Vec::new() - } -} pub struct FooArray( narrow::array::NullArray, false, Buffer>, ); diff --git a/narrow-derive/tests/expand/struct/unit/self.expanded.rs b/narrow-derive/tests/expand/struct/unit/self.expanded.rs index 6d744d0c..25a0b22b 100644 --- a/narrow-derive/tests/expand/struct/unit/self.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/self.expanded.rs @@ -35,24 +35,6 @@ where { type Array = FooArray; } -impl narrow::arrow::StructArrayTypeFields -for FooArray -where - Foo: Debug, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([]) - } -} -impl ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - Foo: Debug, -{ - fn from(value: FooArray) -> Self { - ::alloc::vec::Vec::new() - } -} struct FooArray( narrow::array::NullArray, ) diff --git a/narrow-derive/tests/expand/struct/unit/simple.expanded.rs b/narrow-derive/tests/expand/struct/unit/simple.expanded.rs index 0b59b9ab..7d80703c 100644 --- a/narrow-derive/tests/expand/struct/unit/simple.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/simple.expanded.rs @@ -21,18 +21,6 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } -impl narrow::arrow::StructArrayTypeFields -for FooArray { - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([]) - } -} -impl ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> { - fn from(value: FooArray) -> Self { - ::alloc::vec::Vec::new() - } -} struct FooArray( narrow::array::NullArray, ); diff --git a/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs b/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs index 9352a1e5..b41f5f82 100644 --- a/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs +++ b/narrow-derive/tests/expand/struct/unit/where_clause.expanded.rs @@ -40,31 +40,6 @@ where { type Array = FooArray; } -impl< - const N: bool, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray -where - Foo: Sized, - (): From>, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([]) - } -} -impl< - const N: bool, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - Foo: Sized, - (): From>, -{ - fn from(value: FooArray) -> Self { - ::alloc::vec::Vec::new() - } -} pub(super) struct FooArray( narrow::array::NullArray, false, Buffer>, ) diff --git a/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs index 9c928f86..5c0fa736 100644 --- a/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/generic.expanded.rs @@ -35,68 +35,6 @@ where { type Array = FooArray<'a, T, Buffer>; } -impl< - 'a, - T: Add> + narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray<'a, T, Buffer> -where - Foo<'a, T>: Sized, - >>::Output: Debug, - <&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <<&'a T as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - 'a, - T: Add> + narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - Foo<'a, T>: Sized, - >>::Output: Debug, - <&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <<&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray<'a, T, Buffer>) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <<&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooArray< 'a, T: Add> + narrow::array::ArrayType, @@ -206,62 +144,6 @@ for ::std::option::Option> { impl narrow::array::StructArrayType for FooBar { type Array = FooBarArray; } -impl< - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooBarArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooBarArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooBarArray( ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs index 08f5f2d2..035b6184 100644 --- a/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/lifetime.expanded.rs @@ -17,64 +17,6 @@ for ::std::option::Option> { impl<'a, T: narrow::array::ArrayType> narrow::array::StructArrayType for Foo<'a, T> { type Array = FooArray<'a, T, Buffer>; } -impl< - 'a, - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray<'a, T, Buffer> -where - <&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <<&'a T as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - 'a, - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - <&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <<&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray<'a, T, Buffer>) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <<&'a T as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooArray<'a, T: narrow::array::ArrayType, Buffer: narrow::buffer::BufferType>( <&'a T as narrow::array::ArrayType>::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs index d92a1cc5..de40f536 100644 --- a/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/multiple.expanded.rs @@ -16,147 +16,6 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Bar { type Array = BarArray; } -impl narrow::arrow::StructArrayTypeFields -for BarArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_1"), - ), - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_2"), - ), - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_3"), - ), - ]) - } -} -impl ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: BarArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.1.into()), - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.2.into()), - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.3.into()), - ]), - ) - } -} struct BarArray( ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs index 6373ad89..51d000ae 100644 --- a/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/nested.expanded.rs @@ -16,57 +16,6 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } -impl narrow::arrow::StructArrayTypeFields -for FooArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooArray( ::Array< Buffer, @@ -145,57 +94,6 @@ impl narrow::array::ArrayType for ::std::option::Option { impl narrow::array::StructArrayType for Bar { type Array = BarArray; } -impl narrow::arrow::StructArrayTypeFields -for BarArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: BarArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct BarArray( ::Array< Buffer, diff --git a/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs b/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs index 5149e6c1..a97782f8 100644 --- a/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs +++ b/narrow-derive/tests/expand/struct/unnamed/nested_generic.expanded.rs @@ -30,64 +30,6 @@ where { type Array = FooArray; } -impl< - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray -where - T: Copy, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - T: Copy, - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooArray( ::Array< Buffer, @@ -182,74 +124,6 @@ for ::std::option::Option> { impl<'a, T: narrow::array::ArrayType> narrow::array::StructArrayType for Bar<'a, T> { type Array = BarArray<'a, T, Buffer>; } -impl< - 'a, - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for BarArray<'a, T, Buffer> -where - <&'a Foo< - T, - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <<&'a Foo< - T, - > as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - 'a, - T: narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - <&'a Foo< - T, - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <<&'a Foo< - T, - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: BarArray<'a, T, Buffer>) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <<&'a Foo< - T, - > as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct BarArray<'a, T: narrow::array::ArrayType, Buffer: narrow::buffer::BufferType>( <&'a Foo< T, @@ -350,75 +224,6 @@ impl<'a> narrow::array::ArrayType> for ::std::option::Option narrow::array::StructArrayType for FooBar<'a> { type Array = FooBarArray<'a, Buffer>; } -impl<'a, Buffer: narrow::buffer::BufferType> narrow::arrow::StructArrayTypeFields -for FooBarArray<'a, Buffer> -where - as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - < as ::narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - 'a, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - < as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooBarArray<'a, Buffer>) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - < as narrow::array::ArrayType>::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooBarArray<'a, Buffer: narrow::buffer::BufferType>( > { impl narrow::array::StructArrayType for Foo { type Array = FooArray; } -impl< - T: Sized + narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> narrow::arrow::StructArrayTypeFields for FooArray -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: narrow::arrow::ArrowArray, -{ - fn fields() -> ::arrow_schema::Fields { - ::arrow_schema::Fields::from([ - ::std::sync::Arc::new( - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::as_field("_0"), - ), - ]) - } -} -impl< - T: Sized + narrow::array::ArrayType, - Buffer: narrow::buffer::BufferType, -> ::std::convert::From> -for ::std::vec::Vec<::std::sync::Arc> -where - ::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - >: ::std::convert::Into< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >, -{ - fn from(value: FooArray) -> Self { - <[_]>::into_vec( - #[rustc_box] - ::alloc::boxed::Box::new([ - ::std::sync::Arc::< - <::Array< - Buffer, - narrow::offset::NA, - narrow::array::union::NA, - > as narrow::arrow::ArrowArray>::Array, - >::new(value.0.into()), - ]), - ) - } -} struct FooArray( ::Array< Buffer, From 8f0c5b23ddc769dddb7e3c7b762a06d4528603fa Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 12 Dec 2023 15:26:30 +0100 Subject: [PATCH 26/27] Add direct `RecordBatch` conversion for `StructArray` --- examples/parquet.rs | 3 +-- src/arrow/array/struct.rs | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/examples/parquet.rs b/examples/parquet.rs index 1bef46a0..4b093026 100644 --- a/examples/parquet.rs +++ b/examples/parquet.rs @@ -40,8 +40,7 @@ fn main() { .into_iter() .collect::>(); - let arrow_struct_array = arrow_array::StructArray::from(narrow_array); - let record_batch = RecordBatch::from(arrow_struct_array); + let record_batch = RecordBatch::from(narrow_array); pretty::print_batches(&[record_batch.clone()]).unwrap(); let mut buffer = Vec::new(); diff --git a/src/arrow/array/struct.rs b/src/arrow/array/struct.rs index 3f8827f0..e3bb2dbb 100644 --- a/src/arrow/array/struct.rs +++ b/src/arrow/array/struct.rs @@ -121,6 +121,28 @@ where } } +impl + From> for arrow_array::RecordBatch +where + ::Array: Validity, + arrow_array::StructArray: From>, +{ + fn from(value: StructArray) -> Self { + Self::from(arrow_array::StructArray::from(value)) + } +} + +impl From + for StructArray +where + ::Array: Validity, + Self: From, +{ + fn from(value: arrow_array::RecordBatch) -> Self { + Self::from(arrow_array::StructArray::from(value)) + } +} + #[cfg(test)] mod tests { From eb7edbb5e5cf9361be5414cf634913c78cf712f5 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 12 Dec 2023 19:58:39 +0100 Subject: [PATCH 27/27] Use `apache/arrow-rs` instead of fork and update default features --- Cargo.toml | 18 ++++++------------ narrow-derive/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d448daaa..45ab1b77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,28 +28,22 @@ keywords.workspace = true categories.workspace = true [features] -default = ["arrow-rs", "derive"] +default = [] arrow-rs = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema", "narrow-derive?/arrow-rs"] derive = ["dep:narrow-derive"] [dependencies] -# arrow-array = { version = "49.0.0", optional = true } -# arrow-buffer = { version = "49.0.0", optional = true } -# arrow-schema = { version = "49.0.0", optional = true } -arrow-array = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", optional = true } -# arrow-buffer = { git = "https://github.com/apache/arrow-rs", branch = "master", optional = true } -arrow-buffer = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", optional = true } -arrow-schema = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", optional = true } +arrow-array = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", optional = true } +arrow-buffer = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", optional = true } +arrow-schema = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", optional = true } narrow-derive = { path = "narrow-derive", version = "^0.3.4", optional = true } [dev-dependencies] -# arrow-cast = { version = "49.0.0", default-features = false, features = ["prettyprint"] } -arrow-cast = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", default-features = false, features = ["prettyprint"] } +arrow-cast = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", default-features = false, features = ["prettyprint"] } bytes = "1.5.0" criterion = { version = "0.5.1", default-features = false } rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } -parquet = { git = "https://github.com/mbrobbel/arrow-rs", branch = "buffer-builder-scalar-buffer", features = ["arrow"] } -# parquet = { version = "49.0.0", default-features = false, features = ["arrow"] } +parquet = { git = "https://github.com/apache/arrow-rs", rev = "7fd2d42", features = ["arrow"] } [profile.bench] lto = true diff --git a/narrow-derive/Cargo.toml b/narrow-derive/Cargo.toml index 5980e071..253c9b64 100644 --- a/narrow-derive/Cargo.toml +++ b/narrow-derive/Cargo.toml @@ -13,7 +13,7 @@ keywords.workspace = true categories.workspace = true [features] -default = ["arrow-rs"] +default = [] arrow-rs = [] [lib]