Skip to content

Commit

Permalink
Split ArrowField derive macro (#88)
Browse files Browse the repository at this point in the history
* Split ArrowField derive macro into individual Field/Serialize/Deserialize derives for more composability.

* Update README.md

* fixes

* Fix `struct_incorrect_type` test
  • Loading branch information
jondo2010 authored Jan 3, 2023
1 parent 6c37e29 commit 02db5dd
Show file tree
Hide file tree
Showing 18 changed files with 817 additions and 749 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ Please see the [complex_example.rs](./arrow2_convert/tests/complex_example.rs) f
/// Simple example

use arrow2::array::Array;
use arrow2_convert::{deserialize::TryIntoCollection, serialize::TryIntoArrow, ArrowField};
use arrow2_convert::{deserialize::TryIntoCollection, serialize::TryIntoArrow, ArrowField, ArrowSerialize, ArrowDeserialize};

#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
pub struct Foo {
name: String,
}
Expand Down Expand Up @@ -47,7 +47,7 @@ fn main() {

Types that implement the `ArrowField`, `ArrowSerialize` and `ArrowDeserialize` traits can be converted to/from Arrow via the `try_into_arrow` and the `try_into_collection` methods.

The `ArrowField` derive macro can be used to generate implementations of these traits for structs and enums. Custom implementations can also be defined for any type that needs to convert to/from Arrow by manually implementing the traits.
The `ArrowField`, `ArrowSerialize` and `ArrowDeserialize` derive macros can be used to generate implementations of these traits for structs and enums. Custom implementations can also be defined for any type that needs to convert to/from Arrow by manually implementing the traits.

For serializing to arrow, `TryIntoArrow::try_into_arrow` can be used to serialize any iterable into an `arrow2::Array` or a `arrow2::Chunk`. `arrow2::Array` represents the in-memory Arrow layout. `arrow2::Chunk` represents a column group and can be used with `arrow2` API for other functionality such converting to parquet and arrow flight RPC.

Expand Down
2 changes: 1 addition & 1 deletion arrow2_convert/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub mod serialize;
// macro_rules macro.
#[cfg(feature = "arrow2_convert_derive")]
#[doc(hidden)]
pub use arrow2_convert_derive::ArrowField;
pub use arrow2_convert_derive::{ArrowDeserialize, ArrowField, ArrowSerialize};

// Test README with doctests
#[doc = include_str!("../README.md")]
Expand Down
26 changes: 13 additions & 13 deletions arrow2_convert/tests/complex_example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use arrow2_convert::serialize::TryIntoArrow;
///
/// - Deeply Nested structs and lists
/// - Custom types
use arrow2_convert::ArrowField;
use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize};
use std::borrow::Borrow;

#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
pub struct Root {
name: Option<String>,
is_deleted: bool,
Expand Down Expand Up @@ -55,15 +55,15 @@ pub struct Root {
fixed_size_vec: Vec<i64>,
}

#[derive(Debug, Clone, PartialEq, Eq, ArrowField)]
#[derive(Debug, Clone, PartialEq, Eq, ArrowField, ArrowSerialize, ArrowDeserialize)]
pub struct Child {
a1: i64,
a2: String,
// nested struct array
child_array: Vec<ChildChild>,
}

#[derive(Debug, Clone, PartialEq, Eq, ArrowField)]
#[derive(Debug, Clone, PartialEq, Eq, ArrowField, ArrowSerialize, ArrowDeserialize)]
pub struct ChildChild {
a1: i32,
bool_array: Vec<bool>,
Expand Down Expand Up @@ -126,12 +126,12 @@ fn item1() -> Root {
a1: Some(0.1),
a2: 1,
a3: Some(b"aa".to_vec()),
a4: NaiveDate::from_ymd(1970, 1, 2),
a5: NaiveDateTime::from_timestamp(10000, 0),
a6: Some(NaiveDateTime::from_timestamp(10001, 0)),
a4: NaiveDate::from_ymd_opt(1970, 1, 2).unwrap(),
a5: NaiveDateTime::from_timestamp_opt(10000, 0).unwrap(),
a6: Some(NaiveDateTime::from_timestamp_opt(10001, 0)).unwrap(),
date_time_list: vec![
NaiveDateTime::from_timestamp(10000, 10),
NaiveDateTime::from_timestamp(10000, 11),
NaiveDateTime::from_timestamp_opt(10000, 10).unwrap(),
NaiveDateTime::from_timestamp_opt(10000, 11).unwrap(),
],
nullable_list: Some(vec![Some("cc".to_string()), Some("dd".to_string())]),
required_list: vec![Some("aa".to_string()), Some("bb".to_string())],
Expand Down Expand Up @@ -172,12 +172,12 @@ fn item2() -> Root {
a1: Some(0.1),
a2: 1,
a3: Some(b"aa".to_vec()),
a4: NaiveDate::from_ymd(1970, 1, 2),
a5: NaiveDateTime::from_timestamp(10000, 0),
a4: NaiveDate::from_ymd_opt(1970, 1, 2).unwrap(),
a5: NaiveDateTime::from_timestamp_opt(10000, 0).unwrap(),
a6: None,
date_time_list: vec![
NaiveDateTime::from_timestamp(10000, 10),
NaiveDateTime::from_timestamp(10000, 11),
NaiveDateTime::from_timestamp_opt(10000, 10).unwrap(),
NaiveDateTime::from_timestamp_opt(10000, 11).unwrap(),
],
nullable_list: None,
required_list: vec![Some("ee".to_string()), Some("ff".to_string())],
Expand Down
7 changes: 5 additions & 2 deletions arrow2_convert/tests/simple_example.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
/// Simple example
use arrow2::array::Array;
use arrow2_convert::{deserialize::TryIntoCollection, serialize::TryIntoArrow, ArrowField};
use arrow2_convert::{
deserialize::TryIntoCollection, serialize::TryIntoArrow, ArrowDeserialize, ArrowField,
ArrowSerialize,
};

#[derive(Debug, Clone, PartialEq, Eq, ArrowField)]
#[derive(Debug, Clone, PartialEq, Eq, ArrowField, ArrowSerialize, ArrowDeserialize)]
pub struct Foo {
name: String,
}
Expand Down
14 changes: 6 additions & 8 deletions arrow2_convert/tests/test_deserialize.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
use arrow2::array::*;
use arrow2::error::Result;
use arrow2_convert::deserialize::*;
use arrow2_convert::serialize::*;
use arrow2_convert::ArrowField;
use arrow2_convert::{deserialize::*, serialize::*, ArrowDeserialize, ArrowField, ArrowSerialize};

#[test]
fn test_deserialize_iterator() {
Expand All @@ -11,7 +9,7 @@ fn test_deserialize_iterator() {
use arrow2_convert::serialize::*;
use std::borrow::Borrow;

#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct S {
a1: i64,
}
Expand All @@ -34,11 +32,11 @@ fn test_deserialize_iterator() {

#[test]
fn test_deserialize_schema_mismatch_error() {
#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct S1 {
a: i64,
}
#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct S2 {
a: String,
}
Expand All @@ -56,11 +54,11 @@ fn test_deserialize_schema_mismatch_error() {

#[test]
fn test_deserialize_large_types_schema_mismatch_error() {
#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct S1 {
a: String,
}
#[derive(Debug, Clone, PartialEq, ArrowField)]
#[derive(Debug, Clone, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct S2 {
#[arrow_field(type = "arrow2_convert::field::LargeString")]
a: String,
Expand Down
21 changes: 12 additions & 9 deletions arrow2_convert/tests/test_enum.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use arrow2::array::*;
use arrow2_convert::{deserialize::TryIntoCollection, serialize::TryIntoArrow, ArrowField};
use arrow2_convert::{
deserialize::TryIntoCollection, serialize::TryIntoArrow, ArrowDeserialize, ArrowField,
ArrowSerialize,
};

#[test]
fn test_dense_enum_unit_variant() {
#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
#[arrow_field(type = "dense")]
enum TestEnum {
VAL1,
Expand All @@ -25,7 +28,7 @@ fn test_dense_enum_unit_variant() {

#[test]
fn test_sparse_enum_unit_variant() {
#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
#[arrow_field(type = "sparse")]
enum TestEnum {
VAL1,
Expand All @@ -47,12 +50,12 @@ fn test_sparse_enum_unit_variant() {

#[test]
fn test_nested_unit_variant() {
#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct TestStruct {
a1: i64,
}

#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
#[arrow_field(type = "dense")]
enum TestEnum {
VAL1,
Expand All @@ -62,7 +65,7 @@ fn test_nested_unit_variant() {
VAL5(ChildEnum),
}

#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
#[arrow_field(type = "sparse")]
enum ChildEnum {
VAL1,
Expand All @@ -87,12 +90,12 @@ fn test_nested_unit_variant() {
//#[test]
#[allow(unused)]
fn test_slice() {
#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct TestStruct {
a1: i64,
}

#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
#[arrow_field(type = "dense")]
enum TestEnum {
VAL1,
Expand All @@ -102,7 +105,7 @@ fn test_slice() {
VAL5(ChildEnum),
}

#[derive(Debug, PartialEq, ArrowField)]
#[derive(Debug, PartialEq, ArrowField, ArrowSerialize, ArrowDeserialize)]
#[arrow_field(type = "sparse")]
enum ChildEnum {
VAL1,
Expand Down
7 changes: 3 additions & 4 deletions arrow2_convert/tests/test_flatten_chunk.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use arrow2::array::*;
use arrow2::chunk::Chunk;
use arrow2_convert::serialize::*;
use arrow2_convert::ArrowField;
use arrow2_convert::{serialize::*, ArrowField, ArrowSerialize};
use std::sync::Arc;

#[test]
fn test_flatten_chunk() {
#[derive(Debug, Clone, ArrowField)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize)]
struct Struct {
a: i64,
b: i64,
Expand Down Expand Up @@ -35,7 +34,7 @@ fn test_flatten_chunk_empty_chunk_error() {

#[test]
fn test_flatten_chunk_no_single_struct_array_error() {
#[derive(Debug, Clone, ArrowField)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize)]
struct Struct {
a: i64,
b: String,
Expand Down
1 change: 1 addition & 0 deletions arrow2_convert/tests/test_hygeine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use arrow2_convert_derive::ArrowField;

#[derive(ArrowField)]
#[allow(dead_code)]
struct S {
int_field: i64,
}
8 changes: 4 additions & 4 deletions arrow2_convert/tests/test_round_trip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@ use arrow2_convert::field::{LargeBinary, I128};
use arrow2_convert::serialize::*;
use arrow2_convert::{
field::{FixedSizeBinary, FixedSizeVec, LargeString, LargeVec},
ArrowField,
ArrowDeserialize, ArrowField, ArrowSerialize,
};
use std::borrow::Borrow;
use std::sync::Arc;

#[test]
fn test_nested_optional_struct_array() {
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct Top {
child_array: Vec<Option<Child>>,
}
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct Child {
a1: i64,
}
Expand Down Expand Up @@ -237,7 +237,7 @@ fn test_primitive_type_vec() {

#[test]
fn test_escaped_name() {
#[derive(ArrowField, Debug, Eq, PartialEq)]
#[derive(ArrowField, ArrowSerialize, ArrowDeserialize, Debug, Eq, PartialEq)]
struct EscapedName {
r#type: bool,
}
Expand Down
3 changes: 3 additions & 0 deletions arrow2_convert/tests/test_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use arrow2_convert::ArrowField;
#[test]
fn test_schema_types() {
#[derive(Debug, ArrowField)]
#[allow(dead_code)]
struct Root {
name: Option<String>,
is_deleted: bool,
Expand Down Expand Up @@ -54,6 +55,7 @@ fn test_schema_types() {
}

#[derive(Debug, ArrowField)]
#[allow(dead_code)]
struct Child {
a1: i64,
a2: String,
Expand All @@ -62,6 +64,7 @@ fn test_schema_types() {
}

#[derive(Debug, ArrowField)]
#[allow(dead_code)]
pub struct ChildChild {
a1: i32,
bool_array: Vec<bool>,
Expand Down
14 changes: 6 additions & 8 deletions arrow2_convert/tests/test_struct.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
use arrow2::array::*;
use arrow2_convert::deserialize::*;
use arrow2_convert::serialize::*;
use arrow2_convert::ArrowField;
use arrow2_convert::{deserialize::*, serialize::*, ArrowDeserialize, ArrowField, ArrowSerialize};

#[test]
fn test_nested_optional_struct_array() {
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct Top {
child_array: Vec<Option<Child>>,
}
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct Child {
a1: i64,
}
Expand Down Expand Up @@ -38,7 +36,7 @@ fn test_nested_optional_struct_array() {

#[test]
fn test_slice() {
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct T {
a1: i64,
}
Expand All @@ -57,11 +55,11 @@ fn test_slice() {

#[test]
fn test_nested_slice() {
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct Top {
child_array: Vec<Option<Child>>,
}
#[derive(Debug, Clone, ArrowField, PartialEq)]
#[derive(Debug, Clone, ArrowField, ArrowSerialize, ArrowDeserialize, PartialEq)]
struct Child {
a1: i64,
}
Expand Down
11 changes: 5 additions & 6 deletions arrow2_convert/tests/ui/struct_incorrect_type.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use arrow2_convert::ArrowField;
use arrow2_convert::field::LargeBinary;
use arrow2_convert::{ArrowDeserialize, ArrowField, ArrowSerialize};

#[derive(Debug, ArrowField)]
#[derive(Debug, ArrowField, ArrowSerialize, ArrowDeserialize)]
struct Test {
#[arrow_field(type="LargeBinary")]
s: String
#[arrow_field(type = "LargeBinary")]
s: String,
}

fn main()
{}
fn main() {}
Loading

0 comments on commit 02db5dd

Please sign in to comment.