From 0acad8e54f12a82d67e519875f7bab09918de806 Mon Sep 17 00:00:00 2001 From: npenke <16369152+ncpenke@users.noreply.github.com> Date: Sat, 5 Mar 2022 22:41:20 -0800 Subject: [PATCH] Fix #26 Support FixedSizeVec --- README.md | 4 +++- arrow2_convert/Cargo.toml | 3 ++- arrow2_convert/src/serialize.rs | 10 ++++------ arrow2_convert/tests/complex_example.rs | 9 +++++++-- arrow2_convert/tests/test_round_trip.rs | 17 ++++++++++++++++- 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index b2c8ffc..315b6e6 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ The following features are supported: - These can be used via the "override" attribute. Please see the [complex_example.rs](./arrow2_convert/tests/complex_example.rs) for usage. - Fixed size types: - [`FixedSizeBinary`] + - [`FixedSizeList`] + - This is supported for a fixed size `Vec` via the `FixedSizeVec` type override. + - Note: nesting of [`FixedSizeList`] is not supported. The following are not yet supported. -- [`FixedSizeList`] - Rust enums, slices, references Note: This is not an exclusive list. Please see the repo issues for current work in progress and add proposals for features that would be useful for your project. diff --git a/arrow2_convert/Cargo.toml b/arrow2_convert/Cargo.toml index 29305ed..4f55f58 100644 --- a/arrow2_convert/Cargo.toml +++ b/arrow2_convert/Cargo.toml @@ -12,7 +12,8 @@ repository = "https://github.com/DataEngineeringLabs/arrow2-convert/arrow2_conve description = "Convert between nested rust types and Arrow with arrow2" [dependencies] -arrow2 = { version = "0.9.1", default-features = false } +# Temporary until next arrow2 release +arrow2 = { git = "https://github.com/jorgecarleitao/arrow2", rev = "81bfad", default_features = false } arrow2_convert_derive = { version = "0.1.0", path = "../arrow2_convert_derive", optional = true } chrono = { version = "0.4", default_features = false, features = ["std"] } err-derive = "0.3" diff --git a/arrow2_convert/src/serialize.rs b/arrow2_convert/src/serialize.rs index 97533dd..4108bd0 100644 --- a/arrow2_convert/src/serialize.rs +++ b/arrow2_convert/src/serialize.rs @@ -279,22 +279,20 @@ where #[inline] fn new_array() -> Self::MutableArrayType { - Self::MutableArrayType::new( + Self::MutableArrayType::new_with_field( ::new_array(), + "item", + ::is_nullable(), SIZE ) } - fn arrow_serialize(_v: &::Type, _array: &mut Self::MutableArrayType) -> arrow2::error::Result<()> { - // TODO: neeed mutable values from arrow2::MutableFixedSizeListArray - /* + fn arrow_serialize(v: &::Type, array: &mut Self::MutableArrayType) -> arrow2::error::Result<()> { let values = array.mut_values(); for i in v.iter() { ::arrow_serialize(i, values)?; } array.try_push_valid() - */ - unimplemented!() } } diff --git a/arrow2_convert/tests/complex_example.rs b/arrow2_convert/tests/complex_example.rs index be441e4..168ffb2 100644 --- a/arrow2_convert/tests/complex_example.rs +++ b/arrow2_convert/tests/complex_example.rs @@ -5,7 +5,7 @@ use arrow2_convert::ArrowField; use arrow2_convert::deserialize::{arrow_array_deserialize_iterator, TryIntoCollection}; -use arrow2_convert::field::{LargeBinary, LargeString, LargeVec, FixedSizeBinary}; +use arrow2_convert::field::{LargeBinary, LargeString, LargeVec, FixedSizeBinary, FixedSizeVec}; use arrow2_convert::serialize::TryIntoArrow; use arrow2::array::*; use std::borrow::Borrow; @@ -52,6 +52,9 @@ pub struct Root { // large vec #[arrow_field(override="LargeVec")] large_vec: Vec, + // fixed size vec + #[arrow_field(override="FixedSizeVec")] + fixed_size_vec: Vec, } #[derive(Debug, Clone, PartialEq, ArrowField)] @@ -158,6 +161,7 @@ fn item1() -> Root { fixed_size_binary: b"aaa".to_vec(), large_string: "abcdefg".to_string(), large_vec: vec![1, 2, 3, 4], + fixed_size_vec: vec![10, 20, 30], } } @@ -203,6 +207,7 @@ fn item2() -> Root { fixed_size_binary: b"bbb".to_vec(), large_string: "abdefag".to_string(), large_vec: vec![5, 4, 3, 2], + fixed_size_vec: vec![11, 21, 32], } } @@ -219,7 +224,7 @@ fn test_round_trip() -> arrow2::error::Result<()> { assert_eq!(struct_array.len(), 2); let values = struct_array.values(); - assert_eq!(values.len(), 20); + assert_eq!(values.len(), 21); assert_eq!(struct_array.len(), 2); // can iterate one struct at a time without collecting diff --git a/arrow2_convert/tests/test_round_trip.rs b/arrow2_convert/tests/test_round_trip.rs index 5fe718f..9edd46c 100644 --- a/arrow2_convert/tests/test_round_trip.rs +++ b/arrow2_convert/tests/test_round_trip.rs @@ -1,4 +1,4 @@ -use arrow2_convert::{ArrowField,field::{LargeString,LargeVec,FixedSizeBinary}}; +use arrow2_convert::{ArrowField,field::{LargeString,LargeVec,FixedSizeBinary,FixedSizeVec}}; use arrow2_convert::deserialize::*; use arrow2_convert::field::LargeBinary; use arrow2_convert::serialize::*; @@ -127,3 +127,18 @@ fn test_large_vec_nested() let round_trip: Vec>> = b.try_into_collection_as_type::>().unwrap(); assert_eq!(round_trip, strs); } + +#[test] +fn test_fixed_size_vec() +{ + let ints = vec![vec![1, 2, 3]]; + let b: Box = ints.try_into_arrow_as_type::>().unwrap(); + assert_eq!(b.data_type(), + &DataType::FixedSizeList(Box::new(Field::new( + "item", + DataType::Int32, + false)), + 3)); + let round_trip: Vec> = b.try_into_collection_as_type::>().unwrap(); + assert_eq!(round_trip, ints); +}