diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 65d9d17..06bec06 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,6 +17,7 @@ jobs: with: toolchain: beta # FIXME: revert to stable when 1.65 lands components: clippy + override: true - name: "clippy --all" run: cargo clippy --all --tests -- -D warnings @@ -28,6 +29,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: beta # FIXME: revert to stable when 1.65 lands + override: true components: rustfmt - name: Run run: cargo fmt --all -- --check @@ -40,6 +42,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: beta # FIXME: revert to stable when 1.65 lands + override: true - uses: Swatinem/rust-cache@v1 - uses: taiki-e/install-action@nextest - name: Run @@ -54,6 +57,7 @@ jobs: - uses: actions-rs/toolchain@v1 with: toolchain: beta # FIXME: revert to stable when 1.65 lands + override: true components: llvm-tools-preview - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov diff --git a/Cargo.toml b/Cargo.toml index e840284..f96c34d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,5 +2,8 @@ members = [ "arrow2_convert", "arrow2_convert_derive", - "examples/simple" + #"examples/simple" ] + +[workspace.dependencies] +arrow2 = { git="https://github.com/jorgecarleitao/arrow2.git", rev="27e109d" } diff --git a/arrow2_convert/Cargo.toml b/arrow2_convert/Cargo.toml index 1eaa3c2..bf3d765 100644 --- a/arrow2_convert/Cargo.toml +++ b/arrow2_convert/Cargo.toml @@ -12,14 +12,13 @@ repository = "https://github.com/DataEngineeringLabs/arrow2-convert" description = "Convert between nested rust types and Arrow with arrow2" [dependencies] -arrow2 = "0.14.1" +arrow2.workspace = true arrow2_convert_derive = { version = "0.3.2", path = "../arrow2_convert_derive", optional = true } chrono = { version = "0.4", default_features = false, features = ["std"] } -err-derive = "0.3" -trybuild = "1.0" [dev-dependencies] arrow2_convert_derive = { version = "0.3.2", path = "../arrow2_convert_derive" } +trybuild = "1.0.71" [features] default = [ "derive" ] diff --git a/arrow2_convert/src/Notes.md b/arrow2_convert/src/Notes.md new file mode 100644 index 0000000..012b423 --- /dev/null +++ b/arrow2_convert/src/Notes.md @@ -0,0 +1,96 @@ +deserialize: +- source: a physical arrow type +- target: any rust type + +serialize: + - source: any rust type + - target: a physical arrow type + +These naturally correspond to conversion methods that any type that aspires to be an arrow field can implement. + +For non-list fields: + - deserialize conversion can look like: + ```rust + deserialize(source: ArrowType) -> Type + ``` + - serialize conversion can look like: + ```rust + serialize(source: Type) -> ArrowType + ``` + +lists are tricky: +- lists in arrow are represented as offsets into a physical array. nested lists are simply layers of offsets. +- the physical types of lists are iterators and since we cannot return `impl` types from traits yet, representing the types is cumbersome. Fortunately, now that GATs are nearly stable we can use those for deserialize. We'll need to hack serialize for now (more on this below). + +For list fields: +- deserialize conversion can look like: + ```rust + type Source = Iterator; + type Target = Collection; + deserialize(source: Source) -> Target + ``` + +We need connect this conversion method to the data-structures arrow2 uses. Specifically, we need to get the iterator for an arrow2::array::Array. But which data structures need to be used for a specific-field? We need to map these with an associated type. + +This leads to the following deserialize trait: + +```rust +trait ArrowDeserialize { + type Source; + type Target; + type Array: ArrayAdapter + fn deserialize(source: Source) -> Target; +} +``` + +A further simplification can be made since we can infer types if we design the Adapter traits correctly, and also since both of these traits inherit from the `ArrowField` trait which defines the rust type: + +```rust +trait ArrowDeserialize { + // can be inferred from ArrayAdapter + //type Source; + // can be inferred from ArrowField + // type Target; + type Array: ArrayAdapter + fn deserialize(source: Source) -> Target; +} +``` + +Why do we need an explicit `Type` why not `Self`? Two reasons: + +1. To support field overrides, which in-turn support two use cases: + 1. Custom conversion methods for a specific field + 2. Allow using i64 memory offsets for larger data. +2. To use the same set of traits for collections, so that we can support collections without explicit annotations by the user. + +Ideally, serialize would work similarly and would result in a trait similar to `ArrowDeserialize`: + +```rust +trait ArrowSerialize { + // can be inferred from ArrowField + // type Source: Iterator; + // type Target: Iterator; + // can be inferred from MutableArrayAdapter + type Array: MutableArrayAdapter + fn deserialize(source: Source) -> Target; +} + +``` + +However, since both `impl` types cannot be used in traits, we need to explicitly provide a type for the iterator that a MutableArray can +consume. + +- serialize conversion: + ```rust + type Source = Collection; + type Target = Iterator + serialize(source: Source) -> Target + +trait ArrowSerialize { + type Source; + type Target; + type Array: MutableArrayAdapter + fn deserialize(source: Source) -> Target; +} + +# Add notes on `Nullable` for `ArrowDeserialize` \ No newline at end of file diff --git a/arrow2_convert/src/deserialize.rs b/arrow2_convert/src/deserialize.rs deleted file mode 100644 index 78de9fc..0000000 --- a/arrow2_convert/src/deserialize.rs +++ /dev/null @@ -1,350 +0,0 @@ -//! Implementation and traits for deserializing from Arrow. - -use arrow2::array::*; -use chrono::{NaiveDate, NaiveDateTime}; - -use crate::field::*; - -#[doc(hidden)] -/// Type whose reference can be used to create an iterator. -pub trait IterRef { - /// Iterator type. - type Iter<'a>: Iterator - where - Self: 'a; - - /// Converts `&self` into an iterator. - fn iter_ref(&self) -> Self::Iter<'_>; -} - -impl IterRef for T -where - for<'a> &'a T: IntoIterator, -{ - type Iter<'a> = <&'a T as IntoIterator>::IntoIter where Self: 'a; - - #[inline] - fn iter_ref(&self) -> Self::Iter<'_> { - self.into_iter() - } -} - -/// Implemented by [`ArrowField`] that can be deserialized from arrow -pub trait ArrowDeserialize: ArrowField + Sized { - /// The `arrow2::Array` type corresponding to this field - type ArrayType: ArrowArray; - - /// Deserialize this field from arrow - fn arrow_deserialize( - v: <::Iter<'_> as Iterator>::Item, - ) -> Option<::Type>; - - #[inline] - #[doc(hidden)] - /// For internal use only - /// - /// This is an ugly hack to allow generating a blanket Option deserialize. - /// Ideally we would be able to capture the optional field of the iterator via - /// something like for<'a> &'a T::ArrayType: IntoIterator>, - /// However, the E parameter seems to confuse the borrow checker if it's a reference. - fn arrow_deserialize_internal( - v: <::Iter<'_> as Iterator>::Item, - ) -> ::Type { - Self::arrow_deserialize(v).unwrap() - } -} - -/// Internal trait used to support deserialization and iteration of structs, and nested struct lists -/// -/// Trivial pass-thru implementations are provided for arrow2 arrays that auto-implement IterRef. -/// -/// The derive macro generates implementations for typed struct arrays. -#[doc(hidden)] -pub trait ArrowArray: IterRef { - type BaseArrayType: Array; - - // Returns a typed iterator to the underlying elements of the array from an untyped Array reference. - fn iter_from_array_ref(b: &dyn Array) -> ::Iter<'_>; -} - -// Macro to facilitate implementation for numeric types and numeric arrays. -macro_rules! impl_arrow_deserialize_primitive { - ($physical_type:ty) => { - impl ArrowDeserialize for $physical_type { - type ArrayType = PrimitiveArray<$physical_type>; - - #[inline] - fn arrow_deserialize<'a>(v: Option<&$physical_type>) -> Option { - v.map(|t| *t) - } - } - - impl_arrow_array!(PrimitiveArray<$physical_type>); - }; -} - -macro_rules! impl_arrow_array { - ($array:ty) => { - impl ArrowArray for $array { - type BaseArrayType = Self; - - fn iter_from_array_ref(b: &dyn Array) -> ::Iter<'_> { - b.as_any() - .downcast_ref::() - .unwrap() - .iter_ref() - } - } - }; -} - -// blanket implementation for optional fields -impl ArrowDeserialize for Option -where - T: ArrowDeserialize, -{ - type ArrayType = ::ArrayType; - - #[inline] - fn arrow_deserialize( - v: <::Iter<'_> as Iterator>::Item, - ) -> Option<::Type> { - Self::arrow_deserialize_internal(v).map(Some) - } - - #[inline] - fn arrow_deserialize_internal( - v: <::Iter<'_> as Iterator>::Item, - ) -> ::Type { - ::arrow_deserialize(v) - } -} - -impl_arrow_deserialize_primitive!(u8); -impl_arrow_deserialize_primitive!(u16); -impl_arrow_deserialize_primitive!(u32); -impl_arrow_deserialize_primitive!(u64); -impl_arrow_deserialize_primitive!(i8); -impl_arrow_deserialize_primitive!(i16); -impl_arrow_deserialize_primitive!(i32); -impl_arrow_deserialize_primitive!(i64); -impl_arrow_deserialize_primitive!(f32); -impl_arrow_deserialize_primitive!(f64); - -impl ArrowDeserialize for I128 { - type ArrayType = PrimitiveArray; - - #[inline] - fn arrow_deserialize<'a>(v: Option<&i128>) -> Option { - v.copied() - } -} - -impl_arrow_array!(PrimitiveArray); - -impl ArrowDeserialize for String { - type ArrayType = Utf8Array; - - #[inline] - fn arrow_deserialize(v: Option<&str>) -> Option { - v.map(|t| t.to_string()) - } -} - -impl ArrowDeserialize for LargeString { - type ArrayType = Utf8Array; - - #[inline] - fn arrow_deserialize(v: Option<&str>) -> Option { - v.map(|t| t.to_string()) - } -} - -impl ArrowDeserialize for bool { - type ArrayType = BooleanArray; - - #[inline] - fn arrow_deserialize(v: Option) -> Option { - v - } -} - -impl ArrowDeserialize for NaiveDateTime { - type ArrayType = PrimitiveArray; - - #[inline] - fn arrow_deserialize(v: Option<&i64>) -> Option { - v.map(|t| arrow2::temporal_conversions::timestamp_ns_to_datetime(*t)) - } -} - -impl ArrowDeserialize for NaiveDate { - type ArrayType = PrimitiveArray; - - #[inline] - fn arrow_deserialize(v: Option<&i32>) -> Option { - v.map(|t| arrow2::temporal_conversions::date32_to_date(*t)) - } -} - -impl ArrowDeserialize for Vec { - type ArrayType = BinaryArray; - - #[inline] - fn arrow_deserialize(v: Option<&[u8]>) -> Option { - v.map(|t| t.to_vec()) - } -} - -impl ArrowDeserialize for LargeBinary { - type ArrayType = BinaryArray; - - #[inline] - fn arrow_deserialize(v: Option<&[u8]>) -> Option> { - v.map(|t| t.to_vec()) - } -} - -impl ArrowDeserialize for FixedSizeBinary { - type ArrayType = FixedSizeBinaryArray; - - #[inline] - fn arrow_deserialize(v: Option<&[u8]>) -> Option> { - v.map(|t| t.to_vec()) - } -} - -fn arrow_deserialize_vec_helper( - v: Option>, -) -> Option< as ArrowField>::Type> -where - T: ArrowDeserialize + ArrowEnableVecForType + 'static, -{ - use std::ops::Deref; - v.map(|t| { - arrow_array_deserialize_iterator_internal::<::Type, T>(t.deref()) - .collect::::Type>>() - }) -} - -// Blanket implementation for Vec -impl ArrowDeserialize for Vec -where - T: ArrowDeserialize + ArrowEnableVecForType + 'static, -{ - type ArrayType = ListArray; - - fn arrow_deserialize(v: Option>) -> Option<::Type> { - arrow_deserialize_vec_helper::(v) - } -} - -impl ArrowDeserialize for LargeVec -where - T: ArrowDeserialize + ArrowEnableVecForType + 'static, -{ - type ArrayType = ListArray; - - fn arrow_deserialize(v: Option>) -> Option<::Type> { - arrow_deserialize_vec_helper::(v) - } -} - -impl ArrowDeserialize for FixedSizeVec -where - T: ArrowDeserialize + ArrowEnableVecForType + 'static, -{ - type ArrayType = FixedSizeListArray; - - fn arrow_deserialize(v: Option>) -> Option<::Type> { - arrow_deserialize_vec_helper::(v) - } -} - -impl_arrow_array!(BooleanArray); -impl_arrow_array!(Utf8Array); -impl_arrow_array!(Utf8Array); -impl_arrow_array!(BinaryArray); -impl_arrow_array!(BinaryArray); -impl_arrow_array!(FixedSizeBinaryArray); -impl_arrow_array!(ListArray); -impl_arrow_array!(ListArray); -impl_arrow_array!(FixedSizeListArray); - -/// Top-level API to deserialize from Arrow -pub trait TryIntoCollection -where - Element: ArrowField, - Collection: FromIterator, -{ - /// Convert from a `arrow2::Array` to any collection that implements the `FromIterator` trait - fn try_into_collection(self) -> arrow2::error::Result; - - /// Same as `try_into_collection` except can coerce the conversion to a specific Arrow type. This is - /// useful when the same rust type maps to one or more Arrow types for example `LargeString`. - fn try_into_collection_as_type(self) -> arrow2::error::Result - where - ArrowType: ArrowDeserialize + ArrowField + 'static; -} - -/// Helper to return an iterator for elements from a [`arrow2::array::Array`]. -fn arrow_array_deserialize_iterator_internal<'a, Element, Field>( - b: &'a dyn Array, -) -> impl Iterator + 'a -where - Field: ArrowDeserialize + ArrowField + 'static, -{ - <::ArrayType as ArrowArray>::iter_from_array_ref(b) - .map(::arrow_deserialize_internal) -} - -/// Returns a typed iterator to a target type from an `arrow2::Array` -pub fn arrow_array_deserialize_iterator_as_type<'a, Element, ArrowType>( - arr: &'a dyn Array, -) -> arrow2::error::Result + 'a> -where - Element: 'static, - ArrowType: ArrowDeserialize + ArrowField + 'static, -{ - if &::data_type() != arr.data_type() { - Err(arrow2::error::Error::InvalidArgumentError( - "Data type mismatch".to_string(), - )) - } else { - Ok(arrow_array_deserialize_iterator_internal::< - Element, - ArrowType, - >(arr)) - } -} - -/// Return an iterator that deserializes an [`Array`] to an element of type T -pub fn arrow_array_deserialize_iterator<'a, T>( - arr: &'a dyn Array, -) -> arrow2::error::Result + 'a> -where - T: ArrowDeserialize + ArrowField + 'static, -{ - arrow_array_deserialize_iterator_as_type::(arr) -} - -impl TryIntoCollection for ArrowArray -where - Element: ArrowDeserialize + ArrowField + 'static, - ArrowArray: std::borrow::Borrow, - Collection: FromIterator, -{ - fn try_into_collection(self) -> arrow2::error::Result { - Ok(arrow_array_deserialize_iterator::(self.borrow())?.collect()) - } - - fn try_into_collection_as_type(self) -> arrow2::error::Result - where - ArrowType: ArrowDeserialize + ArrowField + 'static, - { - Ok( - arrow_array_deserialize_iterator_as_type::(self.borrow())? - .collect(), - ) - } -} diff --git a/arrow2_convert/src/deserialize/array_adapter_impls.rs b/arrow2_convert/src/deserialize/array_adapter_impls.rs new file mode 100644 index 0000000..e8ce7f1 --- /dev/null +++ b/arrow2_convert/src/deserialize/array_adapter_impls.rs @@ -0,0 +1,82 @@ +use arrow2::{ + array::*, + bitmap::utils::BitmapIter, + types::{NativeType, Offset}, +}; + +use super::{ArrayAdapter, Nullable}; + +impl ArrayAdapter for PrimitiveArray { + type Element<'a> = &'a T; + type Iter<'a> = std::slice::Iter<'a, T>; + + #[inline] + fn into_iter(array: &dyn Array) -> Option> { + Some( + array + .as_any() + .downcast_ref::>()? + .values_iter(), + ) + } +} + +impl ArrayAdapter for Nullable +where + T: ArrayAdapter + arrow2::array::Array, + for<'a> &'a T: IntoIterator::Element<'a>>>, +{ + type Element<'a> = Option<::Element<'a>>; + type Iter<'a> = <&'a T as IntoIterator>::IntoIter; + + #[inline] + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + let array = array.as_any().downcast_ref::()?; + Some(<&T as IntoIterator>::into_iter(array)) + } +} + +macro_rules! impl_into_value_iter { + () => { + #[inline] + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::()?.values_iter()) + } + }; +} + +impl ArrayAdapter for Utf8Array { + type Element<'a> = &'a str; + type Iter<'a> = Utf8ValuesIter<'a, O>; + impl_into_value_iter!(); +} + +impl ArrayAdapter for BooleanArray { + type Element<'a> = bool; + type Iter<'a> = BitmapIter<'a>; + impl_into_value_iter!(); +} + +impl ArrayAdapter for BinaryArray { + type Element<'a> = &'a [u8]; + type Iter<'a> = BinaryValueIter<'a, O>; + impl_into_value_iter!(); +} + +impl ArrayAdapter for FixedSizeBinaryArray { + type Element<'a> = &'a [u8]; + type Iter<'a> = std::slice::ChunksExact<'a, u8>; + impl_into_value_iter!(); +} + +impl ArrayAdapter for ListArray { + type Element<'a> = Box; + type Iter<'a> = ArrayValuesIter<'a, ListArray>; + impl_into_value_iter!(); +} + +impl ArrayAdapter for FixedSizeListArray { + type Element<'a> = Box; + type Iter<'a> = ArrayValuesIter<'a, FixedSizeListArray>; + impl_into_value_iter!(); +} diff --git a/arrow2_convert/src/deserialize/field_impls.rs b/arrow2_convert/src/deserialize/field_impls.rs new file mode 100644 index 0000000..e459f87 --- /dev/null +++ b/arrow2_convert/src/deserialize/field_impls.rs @@ -0,0 +1,224 @@ +use std::borrow::Borrow; + +use crate::field::{ + ArrowEnableVecForType, ArrowField, FixedSizeBinary, FixedSizeList, GenericBinary, GenericList, + GenericUtf8, I128, +}; + +use super::{array_to_collection, ArrayAdapter, ArrowDeserialize, Nullable}; +use arrow2::array::*; +use chrono::{NaiveDate, NaiveDateTime}; + +// Macro to facilitate implementation for numeric types and numeric arrays. +macro_rules! impl_arrow_deserialize_primitive { + ($t:ty) => { + impl ArrowDeserialize for $t { + type Array = PrimitiveArray<$t>; + + #[inline] + fn arrow_deserialize<'a>(v: &$t) -> Self { + *v + } + } + }; +} + +// blanket implementation for optional fields +impl ArrowDeserialize for Option +where + T: ArrowDeserialize, + for<'a> Nullable<::Array>: ArrayAdapter< + Element<'a> = Option<<::Array as ArrayAdapter>::Element<'a>>, + >, +{ + type Array = Nullable<::Array>; + + #[inline] + fn arrow_deserialize( + v: Option<<::Array as ArrayAdapter>::Element<'_>>, + ) -> Option<::Type> { + v.map(T::arrow_deserialize) + } +} + +impl_arrow_deserialize_primitive!(u8); +impl_arrow_deserialize_primitive!(u16); +impl_arrow_deserialize_primitive!(u32); +impl_arrow_deserialize_primitive!(u64); +impl_arrow_deserialize_primitive!(i8); +impl_arrow_deserialize_primitive!(i16); +impl_arrow_deserialize_primitive!(i32); +impl_arrow_deserialize_primitive!(i64); +impl_arrow_deserialize_primitive!(f32); +impl_arrow_deserialize_primitive!(f64); + +impl ArrowDeserialize for I128 { + type Array = PrimitiveArray; + + #[inline] + fn arrow_deserialize(v: &i128) -> i128 { + *v + } +} + +impl ArrowDeserialize for String { + type Array = Utf8Array; + + #[inline] + fn arrow_deserialize(v: &str) -> String { + v.into() + } +} + +impl ArrowDeserialize for GenericUtf8 +where + for<'a> S: From<&'a str>, +{ + type Array = Utf8Array; + + #[inline] + fn arrow_deserialize(v: &str) -> S { + v.into() + } +} + +impl ArrowDeserialize for GenericUtf8 +where + for<'a> S: From<&'a str>, +{ + type Array = Utf8Array; + + #[inline] + fn arrow_deserialize(v: &str) -> S { + v.into() + } +} + +impl ArrowDeserialize for bool { + type Array = BooleanArray; + + #[inline] + fn arrow_deserialize(v: bool) -> Self { + v + } +} + +impl ArrowDeserialize for NaiveDateTime { + type Array = PrimitiveArray; + + #[inline] + fn arrow_deserialize(v: &i64) -> Self { + arrow2::temporal_conversions::timestamp_ns_to_datetime(*v) + } +} + +impl ArrowDeserialize for NaiveDate { + type Array = PrimitiveArray; + + #[inline] + fn arrow_deserialize(v: &i32) -> Self { + arrow2::temporal_conversions::date32_to_date(*v) + } +} + +impl ArrowDeserialize for Vec { + type Array = BinaryArray; + + #[inline] + fn arrow_deserialize(v: &[u8]) -> Vec { + v.iter().map(|v| *v).collect() + } +} + +impl<'a, C> ArrowDeserialize for GenericBinary +where + Self: 'a, + C: FromIterator, + &'a C: IntoIterator, +{ + type Array = BinaryArray; + + #[inline] + fn arrow_deserialize(v: &[u8]) -> C { + v.iter().map(|v| *v).collect() + } +} + +impl<'a, C> ArrowDeserialize for GenericBinary +where + Self: 'a, + C: FromIterator, + &'a C: IntoIterator, +{ + type Array = BinaryArray; + + #[inline] + fn arrow_deserialize(v: &[u8]) -> C { + v.iter().map(|v| *v).collect() + } +} + +impl<'a, const SIZE: usize, C> ArrowDeserialize for FixedSizeBinary +where + Self: 'a, + C: FromIterator, + &'a C: IntoIterator, +{ + type Array = FixedSizeBinaryArray; + + #[inline] + fn arrow_deserialize(v: &[u8]) -> C { + v.iter().map(|v| *v).collect() + } +} + +// Blanket implementation for Vec +impl ArrowDeserialize for Vec +where + T: ArrowField + ArrowDeserialize + ArrowEnableVecForType + 'static, +{ + type Array = ListArray; + + fn arrow_deserialize(array: Box) -> ::Type { + array_to_collection::(array.borrow()) + } +} + +impl<'a, T, C> ArrowDeserialize for GenericList +where + T: ArrowDeserialize + 'static, + &'a C: IntoIterator::Type>, + C: FromIterator<::Type> + 'static + Default, +{ + type Array = ListArray; + + fn arrow_deserialize(array: Box) -> ::Type { + array_to_collection::(array.borrow()) + } +} + +impl<'a, T, C> ArrowDeserialize for GenericList +where + T: ArrowDeserialize + 'static, + &'a C: IntoIterator::Type>, + C: FromIterator<::Type> + 'static + Default, +{ + type Array = ListArray; + + fn arrow_deserialize(array: Box) -> ::Type { + array_to_collection::(array.borrow()) + } +} + +impl<'a, T, C, const SIZE: usize> ArrowDeserialize for FixedSizeList +where + T: ArrowDeserialize + 'static, + &'a C: IntoIterator::Type>, + C: FromIterator<::Type> + 'static + Default, +{ + type Array = FixedSizeListArray; + + fn arrow_deserialize(array: Box) -> ::Type { + array_to_collection::(array.borrow()) + } +} diff --git a/arrow2_convert/src/deserialize/mod.rs b/arrow2_convert/src/deserialize/mod.rs new file mode 100644 index 0000000..81081a2 --- /dev/null +++ b/arrow2_convert/src/deserialize/mod.rs @@ -0,0 +1,123 @@ +//! Implementation and traits for deserializing from Arrow. + +mod array_adapter_impls; +mod field_impls; + +use crate::field::ArrowField; +use std::marker::PhantomData; + +/// Implemented by array types to convert a `dyn arrow2::array::Array` to +/// either a value iterator or a default iterator over optional elements. +pub trait ArrayAdapter { + /// The element of the array + type Element<'a>; + /// Iterator over the elements + type Iter<'a>: Iterator>; + + /// Convert to a typed iterator + fn into_iter(array: &dyn arrow2::array::Array) -> Option>; +} + +/// Implemented by fields that can be deserialized from arrow +pub trait ArrowDeserialize: ArrowField + Sized { + /// The `arrow2::Array` type corresponding to this field + type Array: ArrayAdapter; + + /// Deserialize this field from arrow + fn arrow_deserialize( + v: ::Element<'_>, + ) -> ::Type; +} + +/// Wrapper to implement ArrayAdapter for an iterator over optional elements +pub struct Nullable { + _t: PhantomData, +} + +/// Top-level API to deserialize from Arrow, represented by arrow2 data-structures +/// This is implemented by wrappers around arrow2 arrays such as Box +pub trait TryIntoCollection +where + Element: ArrowField, + Collection: FromIterator, +{ + /// Convert from a `arrow2::Array` to any collection that implements the `FromIterator` trait + fn try_into_collection(self) -> arrow2::error::Result; + + /// Same as `try_into_collection` but can coerce the conversion to a specific Arrow type. This is + /// useful for using fixed-size and large offset arrow types. + fn try_into_collection_as_type(self) -> arrow2::error::Result + where + ArrowType: ArrowDeserialize + ArrowField + 'static; +} + +/// Returns a typed iterator to a target type from an `arrow2::Array` +pub fn arrow_array_deserialize_iterator_as_type( + arr: &dyn arrow2::array::Array, +) -> arrow2::error::Result + '_> +where + Element: 'static, + ArrowType: ArrowDeserialize + ArrowField + 'static, +{ + if &::data_type() != arr.data_type() { + // TODO: use arrow2_convert error type here and include more detail + Err(arrow2::error::Error::InvalidArgumentError( + "Data type mismatch".to_string(), + )) + } else { + array_to_typed_iter::(arr).ok_or_else(|| + // TODO: use arrow2_convert error type here and include more detail + arrow2::error::Error::InvalidArgumentError("Schema mismatch".to_string())) + } +} + +/// Return an iterator that deserializes an [`Array`] to an element of type T +pub fn arrow_array_deserialize_iterator( + arr: &dyn arrow2::array::Array, +) -> arrow2::error::Result + '_> +where + T: ArrowDeserialize + ArrowField + 'static, +{ + arrow_array_deserialize_iterator_as_type::(arr) +} + +impl TryIntoCollection for ArrowArray +where + Element: ArrowDeserialize + ArrowField + 'static, + ArrowArray: std::borrow::Borrow, + Collection: FromIterator, +{ + fn try_into_collection(self) -> arrow2::error::Result { + Ok(arrow_array_deserialize_iterator::(self.borrow())?.collect()) + } + + fn try_into_collection_as_type(self) -> arrow2::error::Result + where + ArrowType: ArrowDeserialize + ArrowField + 'static, + { + Ok( + arrow_array_deserialize_iterator_as_type::(self.borrow())? + .collect(), + ) + } +} + +fn array_to_typed_iter<'a, T>( + array: &'a dyn arrow2::array::Array, +) -> Option::Type> + 'a> +where + T: ArrowDeserialize + 'a, +{ + let iter = <::Array as ArrayAdapter>::into_iter(array)?; + Some(iter.map(T::arrow_deserialize)) +} + +fn array_to_collection<'a, T, C>(array: &'a dyn arrow2::array::Array) -> C +where + T: ArrowDeserialize + 'a, + C: FromIterator<::Type> + Default, +{ + array_to_typed_iter::(array) + .map(|iter| iter.collect()) + .unwrap_or_default() +} diff --git a/arrow2_convert/src/field.rs b/arrow2_convert/src/field.rs index 3f144cb..feefcf5 100644 --- a/arrow2_convert/src/field.rs +++ b/arrow2_convert/src/field.rs @@ -1,5 +1,7 @@ //! Implementation and traits for mapping rust types to Arrow types +use std::marker::PhantomData; + use arrow2::datatypes::{DataType, Field}; use chrono::{NaiveDate, NaiveDateTime}; @@ -122,7 +124,7 @@ impl ArrowField for I128 arrow2::datatypes::DataType { @@ -130,11 +132,29 @@ impl ArrowField for String { } } -/// Represents the `LargeUtf8` Arrow type -pub struct LargeString {} +/// Utf8 field that can be used by any type that can be converted from a String. +pub struct GenericUtf8 { + _data: PhantomData, + _o: PhantomData, +} + +impl<'a, S> ArrowField for GenericUtf8 +where + S: From<&'a str>, +{ + type Type = S; + + #[inline] + fn data_type() -> arrow2::datatypes::DataType { + arrow2::datatypes::DataType::Utf8 + } +} -impl ArrowField for LargeString { - type Type = String; +impl<'a, S> ArrowField for GenericUtf8 +where + S: From<&'a str>, +{ + type Type = S; #[inline] fn data_type() -> arrow2::datatypes::DataType { @@ -169,7 +189,7 @@ impl ArrowField for NaiveDate { } } -impl ArrowField for Vec { +impl<'a> ArrowField for Vec { type Type = Self; #[inline] @@ -178,11 +198,33 @@ impl ArrowField for Vec { } } -/// Represents the `LargeString` Arrow type. -pub struct LargeBinary {} +/// Binary field that can be used by any type that can be converted from a [u8]. +pub struct GenericBinary { + _data: PhantomData, + _o: PhantomData, +} + +impl<'a, C> ArrowField for GenericBinary +where + Self: 'a, + C: FromIterator, + &'a C: IntoIterator, +{ + type Type = C; + + #[inline] + fn data_type() -> arrow2::datatypes::DataType { + arrow2::datatypes::DataType::Binary + } +} -impl ArrowField for LargeBinary { - type Type = Vec; +impl<'a, C> ArrowField for GenericBinary +where + Self: 'a, + C: FromIterator, + &'a C: IntoIterator, +{ + type Type = C; #[inline] fn data_type() -> arrow2::datatypes::DataType { @@ -190,11 +232,18 @@ impl ArrowField for LargeBinary { } } -/// Represents the `FixedSizeBinary` Arrow type. -pub struct FixedSizeBinary {} +/// FixedSizeBinary field that can be used by any type that can be converted from a [u8]. +pub struct FixedSizeBinary { + _data: PhantomData, +} -impl ArrowField for FixedSizeBinary { - type Type = Vec; +impl<'a, C, const SIZE: usize> ArrowField for FixedSizeBinary +where + Self: 'a, + C: FromIterator, + &'a C: IntoIterator, +{ + type Type = C; #[inline] fn data_type() -> arrow2::datatypes::DataType { @@ -205,9 +254,9 @@ impl ArrowField for FixedSizeBinary { // Blanket implementation for Vec. impl ArrowField for Vec where - T: ArrowField + ArrowEnableVecForType, + T: ArrowField + ArrowEnableVecForType + 'static, { - type Type = Vec<::Type>; + type Type = Self; #[inline] fn data_type() -> arrow2::datatypes::DataType { @@ -215,16 +264,37 @@ where } } -/// Represents the `LargeList` Arrow type. -pub struct LargeVec { - d: std::marker::PhantomData, +/// List field that can be used by any type that can be converted from a [T]. +pub struct GenericList { + _d: std::marker::PhantomData, + _t: std::marker::PhantomData, + _o: std::marker::PhantomData, +} + +// Blanket implementation for Vec. +impl<'a, C, T> ArrowField for GenericList +where + C: 'a, + T: ArrowField + 'static, + &'a C: IntoIterator::Type>, + C: FromIterator<::Type>, +{ + type Type = C; + + #[inline] + fn data_type() -> arrow2::datatypes::DataType { + arrow2::datatypes::DataType::List(Box::new(::field("item"))) + } } -impl ArrowField for LargeVec +impl<'a, C, T> ArrowField for GenericList where - T: ArrowField + ArrowEnableVecForType, + C: 'a, + T: ArrowField + 'static, + &'a C: IntoIterator::Type>, + C: FromIterator<::Type>, { - type Type = Vec<::Type>; + type Type = C; #[inline] fn data_type() -> arrow2::datatypes::DataType { @@ -232,16 +302,20 @@ where } } -/// Represents the `FixedSizeList` Arrow type. -pub struct FixedSizeVec { - d: std::marker::PhantomData, +/// FixedSizeList field that can be used by any type that can be converted from a [T]. +pub struct FixedSizeList { + _data: PhantomData, + _t: PhantomData, } -impl ArrowField for FixedSizeVec +impl<'a, T, C, const SIZE: usize> ArrowField for FixedSizeList where - T: ArrowField + ArrowEnableVecForType, + Self: 'a, + T: ArrowField, + &'a C: IntoIterator::Type>, + C: FromIterator<::Type>, { - type Type = Vec<::Type>; + type Type = C; #[inline] fn data_type() -> arrow2::datatypes::DataType { @@ -250,22 +324,9 @@ where } arrow_enable_vec_for_type!(String); -arrow_enable_vec_for_type!(LargeString); arrow_enable_vec_for_type!(bool); arrow_enable_vec_for_type!(NaiveDateTime); arrow_enable_vec_for_type!(NaiveDate); -arrow_enable_vec_for_type!(Vec); -arrow_enable_vec_for_type!(LargeBinary); -impl ArrowEnableVecForType for FixedSizeBinary {} -impl ArrowEnableVecForType for I128 {} // Blanket implementation for Vec> if vectors are enabled for T impl ArrowEnableVecForType for Option where T: ArrowField + ArrowEnableVecForType {} - -// Blanket implementation for Vec> if vectors are enabled for T -impl ArrowEnableVecForType for Vec where T: ArrowField + ArrowEnableVecForType {} -impl ArrowEnableVecForType for LargeVec where T: ArrowField + ArrowEnableVecForType {} -impl ArrowEnableVecForType for FixedSizeVec where - T: ArrowField + ArrowEnableVecForType -{ -} diff --git a/arrow2_convert/src/lib.rs b/arrow2_convert/src/lib.rs index efca6c5..76ccdb0 100644 --- a/arrow2_convert/src/lib.rs +++ b/arrow2_convert/src/lib.rs @@ -4,6 +4,7 @@ pub mod deserialize; pub mod field; +//pub mod physical_type; pub mod serialize; // The proc macro is implemented in derive_internal, and re-exported by this diff --git a/arrow2_convert/src/physical_type.rs b/arrow2_convert/src/physical_type.rs new file mode 100644 index 0000000..90f16c3 --- /dev/null +++ b/arrow2_convert/src/physical_type.rs @@ -0,0 +1,236 @@ +//! Implementations and traits for types have unique physical representations in the arrow format. + +use std::marker::PhantomData; + +use arrow2::{ + array::{ + ArrayValuesIter, BinaryArray, BinaryValueIter, BooleanArray, FixedSizeBinaryArray, + FixedSizeListArray, ListArray, MutableBinaryArray, MutableBooleanArray, MutableListArray, + MutablePrimitiveArray, MutableUtf8Array, Utf8Array, Utf8ValuesIter, + }, + bitmap::utils::BitmapIter, +}; + + +/// Implemented by physical types convert to the corresponding mutable array types. This is +/// used to differentiate between arrow2 value iterators (which are used for required +/// fields), and the default iterators, which are used by optional fields. +pub trait MutableArrayAdapter { + /// The element of the array + type Element<'a>; + /// The MutableArray implementation + type Array; + + /// Create a new mutable array + fn new_array() -> Self::Array; + /// Push an element into the mutable array. + fn try_push(array: &mut Self::Array, element: Self::Element<'_>) -> arrow2::error::Result<()>; +} + +/// A physical type that's nullable +#[derive(Default)] +pub struct NullableArray +where + T: Array, +{ + _data: PhantomData, +} + +macro_rules! declare_offset_type { + ($name: ident) => { + #[derive(Default)] + /// Physical type for $name + pub struct $name { + _data: PhantomData, + } + }; +} + +declare_offset_type!(Utf8); + +declare_offset_type!(Binary); +/// Represents the `FixedSizeBinary` Arrow type. +#[derive(Default)] +pub struct FixedSizeBinary {} + +declare_offset_type!(List); +/// Represents the `FixedSizeList` arrow type +#[derive(Default)] +pub struct FixedSizeList {} + +macro_rules! impl_physical_type_generic { + ($t:ty, $element_type:ty, $array_type:ty, $iter_type:ty, $mutable_array_type:ty) => { + impl ArrayAdapter for $t { + type Element<'a> = $element_type; + type Iter<'a> = $iter_type; + + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::<$array_type>()?.values_iter()) + } + } + + impl ArrayAdapter for NullableArray<$t> { + type Element<'a> = Option<$element_type>; + type Iter<'a> = <&'a $array_type as IntoIterator>::IntoIter; + + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::<$array_type>()?.iter()) + } + } + + impl MutableArrayAdapter for $t { + type Element<'a> = $t; + type Array = $mutable_array_type; + + fn new_array() -> Self::Array { + Self::Array::new() + } + + fn try_push(array: &mut Self::Array, e: $t) -> arrow2::error::Result<()> { + use arrow2::array::TryPush; + array.try_push(Some(e)) + } + } + }; +} + +// TODO: consolidate with above macro with generic type bounds. didn't figure out a way +// to include where bounds in macros by example. +macro_rules! impl_physical_type_with_offset { + ($t:ty, $element_type:ty, $array_type:ty, $iter_type:ty, $mutable_array_type:ty) => { + impl $crate::physical_type::PhysicalType for $t {} + + impl ArrayAdapter for $t { + type Element<'a> = $element_type; + type Iter<'a> = $iter_type; + + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::<$array_type>()?.values_iter()) + } + } + + impl ArrayAdapter for Nullable<$t> { + type Element<'a> = Option<$element_type>; + type Iter<'a> = <&'a $array_type as IntoIterator>::IntoIter; + + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::<$array_type>()?.iter()) + } + } + + impl MutableArrayAdapter for $t { + type Element<'a> = $element_type; + type Array = $mutable_array_type; + + fn new_array() -> Self::Array { + Self::Array::new() + } + + fn try_push<'a>( + array: &mut Self::Array, + e: $element_type, + ) -> arrow2::error::Result<()> { + use arrow2::array::TryPush; + array.try_push(Some(e)) + } + } + }; +} + +// TODO: consolidate with above macro with generic type bounds. didn't figure out a way +// to include where bounds in macros by example. +macro_rules! impl_physical_type_with_size { + ($t:ty, $element_type:ty, $array_type:ty, $iter_type:ty) => { + impl $crate::physical_type::PhysicalType for $t {} + + impl ArrayAdapter for $t { + type Element<'a> = $element_type; + type Iter<'a> = $iter_type; + + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::<$array_type>()?.values_iter()) + } + } + + impl ArrayAdapter for Nullable<$t> { + type Element<'a> = Option<$element_type>; + type Iter<'a> = <&'a $array_type as IntoIterator>::IntoIter; + + fn into_iter(array: &dyn arrow2::array::Array) -> Option> { + Some(array.as_any().downcast_ref::<$array_type>()?.iter()) + } + } + }; +} + +macro_rules! impl_numeric_type { + ($t:ty) => { + impl_physical_type_generic!( + $t, + &'a $t, + arrow2::array::PrimitiveArray<$t>, + std::slice::Iter<'a, $t>, + MutablePrimitiveArray<$t> + ); + }; +} + +impl PhysicalType for NullableArray {} + +impl_numeric_type!(u8); +impl_numeric_type!(u16); +impl_numeric_type!(u32); +impl_numeric_type!(u64); +impl_numeric_type!(i8); +impl_numeric_type!(i16); +impl_numeric_type!(i32); +impl_numeric_type!(i64); +impl_numeric_type!(i128); +impl_numeric_type!(f32); +impl_numeric_type!(f64); + +impl_physical_type_generic!( + bool, + bool, + BooleanArray, + BitmapIter<'a>, + MutableBooleanArray +); + +impl_physical_type_with_offset!( + Utf8, + &'a str, + Utf8Array, + Utf8ValuesIter<'a, O>, + MutableUtf8Array +); + +impl_physical_type_with_offset!( + Binary, + &'a [u8], + BinaryArray, + BinaryValueIter<'a, O>, + MutableBinaryArray +); + +impl_physical_type_with_size!( + FixedSizeBinary, + &'a [u8], + FixedSizeBinaryArray, + std::slice::ChunksExact<'a, u8> +); + +impl_physical_type_with_offset!( + List, + Box, + ListArray, + ArrayValuesIter<'a, ListArray>, + MutableListArray +); + +impl_physical_type_with_size!( + FixedSizeList, + Box, + FixedSizeListArray, + ArrayValuesIter<'a, FixedSizeListArray> +); diff --git a/arrow2_convert/tests/test_round_trip.rs b/arrow2_convert/tests/test_round_trip.rs index 826af36..497d060 100644 --- a/arrow2_convert/tests/test_round_trip.rs +++ b/arrow2_convert/tests/test_round_trip.rs @@ -11,39 +11,6 @@ use arrow2_convert::{ use std::borrow::Borrow; use std::sync::Arc; -#[test] -fn test_nested_optional_struct_array() { - #[derive(Debug, Clone, ArrowField, PartialEq)] - struct Top { - child_array: Vec>, - } - #[derive(Debug, Clone, ArrowField, PartialEq)] - struct Child { - a1: i64, - } - - let original_array = vec![ - Top { - child_array: vec![ - Some(Child { a1: 10 }), - None, - Some(Child { a1: 12 }), - Some(Child { a1: 14 }), - ], - }, - Top { - child_array: vec![None, None, None, None], - }, - Top { - child_array: vec![None, None, Some(Child { a1: 12 }), None], - }, - ]; - - let b: Box = original_array.try_into_arrow().unwrap(); - let round_trip: Vec = b.try_into_collection().unwrap(); - assert_eq!(original_array, round_trip); -} - #[test] fn test_large_string() { let strs = vec!["1".to_string(), "2".to_string()]; diff --git a/arrow2_convert/tests/test_struct.rs b/arrow2_convert/tests/test_struct.rs index c7aa06d..d856369 100644 --- a/arrow2_convert/tests/test_struct.rs +++ b/arrow2_convert/tests/test_struct.rs @@ -3,6 +3,19 @@ use arrow2_convert::deserialize::*; use arrow2_convert::serialize::*; use arrow2_convert::ArrowField; +#[test] +fn test_optional_struct_array() { + #[derive(Debug, Clone, ArrowField, PartialEq)] + struct Foo { + field: i32, + } + + let original_array = vec![Some(Foo { field: 0 }), None, Some(Foo { field: 10 })]; + let b: Box = original_array.try_into_arrow().unwrap(); + let round_trip: Vec> = b.try_into_collection().unwrap(); + assert_eq!(original_array, round_trip); +} + #[test] fn test_nested_optional_struct_array() { #[derive(Debug, Clone, ArrowField, PartialEq)] diff --git a/arrow2_convert_derive/src/derive_enum.rs b/arrow2_convert_derive/src/derive_enum.rs index 3edc459..4639b78 100644 --- a/arrow2_convert_derive/src/derive_enum.rs +++ b/arrow2_convert_derive/src/derive_enum.rs @@ -446,28 +446,6 @@ pub fn expand(input: DeriveEnum) -> TokenStream { {} }; - let array_impl = quote! { - impl arrow2_convert::deserialize::ArrowArray for #array_name - { - type BaseArrayType = arrow2::array::UnionArray; - - #[inline] - fn iter_from_array_ref<'a>(b: &'a dyn arrow2::array::Array) -> <&'a Self as IntoIterator>::IntoIter - { - use core::ops::Deref; - let arr = b.as_any().downcast_ref::().unwrap(); - let fields = arr.fields(); - - #iterator_name { - #( - #variant_names: <<#variant_types as arrow2_convert::deserialize::ArrowDeserialize>::ArrayType as arrow2_convert::deserialize::ArrowArray>::iter_from_array_ref(fields[#variant_indices].deref()), - )* - types_iter: arr.types().iter(), - } - } - } - }; - let array_into_iterator_impl = quote! { impl<'a> IntoIterator for &'a #array_name { @@ -517,12 +495,28 @@ pub fn expand(input: DeriveEnum) -> TokenStream { fn arrow_deserialize<'a>(v: Option) -> Option { v } + + #[inline] + fn arrow_array_ref_into_iter( + array: &dyn arrow2::array::Array + ) -> Option<<#array_name as arrow2_convert::deserialize::RefIntoIterator>::Iterator<'_>> + { + use core::ops::Deref; + let arr = array.as_any().downcast_ref::()?; + let fields = arr.fields(); + + Some(#iterator_name { + #( + #variant_names: <#variant_types as arrow2_convert::deserialize::ArrowDeserialize>::arrow_array_ref_into_iter(fields[#variant_indices].deref())?, + )* + types_iter: arr.types().iter(), + }) + } } }; generated.extend([ array_decl, - array_impl, array_into_iterator_impl, array_iterator_decl, array_iterator_iterator_impl, diff --git a/arrow2_convert_derive/src/derive_struct.rs b/arrow2_convert_derive/src/derive_struct.rs index 7bd96e9..0ddd7d0 100644 --- a/arrow2_convert_derive/src/derive_struct.rs +++ b/arrow2_convert_derive/src/derive_struct.rs @@ -265,30 +265,6 @@ pub fn expand(input: DeriveStruct) -> TokenStream { {} }; - let array_impl = quote! { - impl arrow2_convert::deserialize::ArrowArray for #array_name - { - type BaseArrayType = arrow2::array::StructArray; - - #[inline] - fn iter_from_array_ref<'a>(b: &'a dyn arrow2::array::Array) -> <&'a Self as IntoIterator>::IntoIter - { - use core::ops::Deref; - let arr = b.as_any().downcast_ref::().unwrap(); - let values = arr.values(); - let validity = arr.validity(); - // for now do a straight comp - #iterator_name { - #( - #field_names: <<#field_types as arrow2_convert::deserialize::ArrowDeserialize>::ArrayType as arrow2_convert::deserialize::ArrowArray>::iter_from_array_ref(values[#field_indices].deref()), - )* - has_validity: validity.as_ref().is_some(), - validity_iter: validity.as_ref().map(|x| x.iter()).unwrap_or_else(|| arrow2::bitmap::utils::BitmapIter::new(&[], 0, 0)) - } - } - } - }; - let array_into_iterator_impl = quote! { impl<'a> IntoIterator for &'a #array_name { @@ -296,7 +272,7 @@ pub fn expand(input: DeriveStruct) -> TokenStream { type IntoIter = #iterator_name<'a>; fn into_iter(self) -> Self::IntoIter { - unimplemented!("Use iter_from_array_ref"); + unimplemented!(); } } }; @@ -362,12 +338,36 @@ pub fn expand(input: DeriveStruct) -> TokenStream { fn arrow_deserialize<'a>(v: Option) -> Option { v } + + #[inline] + fn arrow_array_ref_into_iter( + array: &dyn arrow2::array::Array, + ) -> Option<<#array_name as arrow2_convert::deserialize::RefIntoIterator>::Iterator<'_>> + where + Self::ArrayType: 'static, + { + use core::ops::Deref; + + let arr = array + .as_any() + .downcast_ref::()?; + + let values = arr.values(); + let validity = arr.validity(); + // for now do a straight comp + Some(#iterator_name { + #( + #field_names: <#field_types as arrow2_convert::deserialize::ArrowDeserialize>::arrow_array_ref_into_iter(values[#field_indices].deref())?, + )* + has_validity: validity.as_ref().is_some(), + validity_iter: validity.as_ref().map(|x| x.iter()).unwrap_or_else(|| arrow2::bitmap::utils::BitmapIter::new(&[], 0, 0)) + }) + } } }; generated.extend([ array_decl, - array_impl, array_into_iterator_impl, iterator_decl, iterator_impl, diff --git a/examples/simple/Cargo.toml b/examples/simple/Cargo.toml index e529d7e..be3da64 100644 --- a/examples/simple/Cargo.toml +++ b/examples/simple/Cargo.toml @@ -6,5 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -arrow2 = "0.14" +arrow2.workspace = true arrow2_convert = { version = "0.3", path = "../../arrow2_convert" }