From 40a0745c6cfd067cb4205ab5b3b4562ce791599d Mon Sep 17 00:00:00 2001 From: illumination-k Date: Sun, 23 Jan 2022 04:20:25 +0900 Subject: [PATCH 01/11] implement fixedsizelist --- src/scalar/fixed_size_list.rs | 69 +++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/scalar/fixed_size_list.rs diff --git a/src/scalar/fixed_size_list.rs b/src/scalar/fixed_size_list.rs new file mode 100644 index 00000000000..daced880ee5 --- /dev/null +++ b/src/scalar/fixed_size_list.rs @@ -0,0 +1,69 @@ +use std::any::Any; +use std::sync::Arc; + +use crate::{array::*, datatypes::DataType}; + +use super::Scalar; + +/// The scalar equivalent of [`FixedSizeListArray`]. Like [`FixedSizeListArray`], this struct holds a dynamically-typed +/// [`Array`]. The only difference is that this has only one element. +#[derive(Debug, Clone)] +pub struct FixedSizeListScalar { + values: Arc, + is_valid: bool, + data_type: DataType, +} + +impl PartialEq for FixedSizeListScalar { + fn eq(&self, other: &Self) -> bool { + (self.data_type == other.data_type) + && (self.is_valid == other.is_valid) + && ((!self.is_valid) | (self.values.as_ref() == other.values.as_ref())) + } +} + +impl FixedSizeListScalar { + /// returns a new [`FixedSizeListScalar`] + /// # Panics + /// iff + /// * the `data_type` is not `FixedSizeList` + /// * the child of the `data_type` is not equal to the `values` + /// * the size of child array is not equal + #[inline] + pub fn new(data_type: DataType, values: Option>) -> Self { + let (field, size) = FixedSizeListArray::get_child_and_size(&data_type); + let inner_data_type = field.data_type(); + let (is_valid, values) = match values { + Some(values) => { + assert_eq!(inner_data_type, values.data_type()); + assert_eq!(size, values.len()); + (true, values) + } + None => (false, new_empty_array(inner_data_type.clone()).into()), + }; + Self { + values, + is_valid, + data_type, + } + } + + /// The values of the [`FixedSizeListScalar`] + pub fn values(&self) -> &Arc { + &self.values + } +} + +impl Scalar for FixedSizeListScalar { + fn as_any(&self) -> &dyn Any { + self + } + + fn is_valid(&self) -> bool { + self.is_valid + } + + fn data_type(&self) -> &DataType { + &self.data_type + } +} From 12b4eb8f1bd9de1cde558068232d3aac0bb20dfa Mon Sep 17 00:00:00 2001 From: illumination-k Date: Sun, 23 Jan 2022 04:20:40 +0900 Subject: [PATCH 02/11] implement fixedsizebinary --- src/scalar/fixed_size_binary.rs | 52 +++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 src/scalar/fixed_size_binary.rs diff --git a/src/scalar/fixed_size_binary.rs b/src/scalar/fixed_size_binary.rs new file mode 100644 index 00000000000..debfd26263d --- /dev/null +++ b/src/scalar/fixed_size_binary.rs @@ -0,0 +1,52 @@ +use crate::datatypes::DataType; + +use super::Scalar; + +#[derive(Debug, Clone, PartialEq)] +/// The [`Scalar`] implementation of fixed size binary ([`Option>`]). +pub struct FixedSizeBinaryScalar { + value: Option>, + data_type: DataType, +} + +impl FixedSizeBinaryScalar { + /// Returns a new [`FixedSizeBinaryScalar`]. + /// # Panics + /// iff + /// * the `data_type` is not `FixedSizeBinary` + /// * the size of child binary is not equal + #[inline] + pub fn new>>(data_type: DataType, value: Option

) -> Self { + Self { + value: value.map(|x| { + let x = x.into(); + assert_eq!(data_type, DataType::FixedSizeBinary(x.len())); + x + }), + data_type, + } + } + + /// Its value + #[inline] + pub fn value(&self) -> Option<&[u8]> { + self.value.as_ref().map(|x| x.as_ref()) + } +} + +impl Scalar for FixedSizeBinaryScalar { + #[inline] + fn as_any(&self) -> &dyn std::any::Any { + self + } + + #[inline] + fn is_valid(&self) -> bool { + self.value.is_some() + } + + #[inline] + fn data_type(&self) -> &DataType { + &self.data_type + } +} From d7c2a00f79938e22828fc84504fb552f233b70a5 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Sun, 23 Jan 2022 04:20:53 +0900 Subject: [PATCH 03/11] adding test --- tests/it/scalar/fixed_size_binary.rs | 30 +++++++++++++++++++ tests/it/scalar/fixed_size_list.rs | 45 ++++++++++++++++++++++++++++ tests/it/scalar/mod.rs | 2 ++ 3 files changed, 77 insertions(+) create mode 100644 tests/it/scalar/fixed_size_binary.rs create mode 100644 tests/it/scalar/fixed_size_list.rs diff --git a/tests/it/scalar/fixed_size_binary.rs b/tests/it/scalar/fixed_size_binary.rs new file mode 100644 index 00000000000..dfd119cfde4 --- /dev/null +++ b/tests/it/scalar/fixed_size_binary.rs @@ -0,0 +1,30 @@ +use arrow2::{ + datatypes::DataType, + scalar::{FixedSizeBinaryScalar, Scalar}, +}; + +#[allow(clippy::eq_op)] +#[test] +fn equal() { + let a = FixedSizeBinaryScalar::new(DataType::FixedSizeBinary(1), Some("a")); + let b = FixedSizeBinaryScalar::new(DataType::FixedSizeBinary(1), None::<&str>); + assert_eq!(a, a); + assert_eq!(b, b); + assert!(a != b); + let b = FixedSizeBinaryScalar::new(DataType::FixedSizeBinary(1),Some("b")); + assert!(a != b); + assert_eq!(b, b); +} + +#[test] +fn basics() { + let a = FixedSizeBinaryScalar::new(DataType::FixedSizeBinary(1), Some("a")); + + assert_eq!(a.value(), Some(b"a".as_ref())); + assert_eq!(a.data_type(), &DataType::FixedSizeBinary(1)); + assert!(a.is_valid()); + + let _: &dyn std::any::Any = a.as_any(); +} + + diff --git a/tests/it/scalar/fixed_size_list.rs b/tests/it/scalar/fixed_size_list.rs new file mode 100644 index 00000000000..f0d99cf464d --- /dev/null +++ b/tests/it/scalar/fixed_size_list.rs @@ -0,0 +1,45 @@ +use std::sync::Arc; + +use arrow2::{ + array::{Array, BooleanArray}, + datatypes::{DataType, Field}, + scalar::{FixedSizeListScalar, Scalar}, +}; + +#[allow(clippy::eq_op)] +#[test] +fn equal() { + let dt = DataType::FixedSizeList(Box::new(Field::new("a", DataType::Boolean, true)), 2); + let a = FixedSizeListScalar::new( + dt.clone(), + Some(Arc::new(BooleanArray::from_slice([true, false])) as Arc), + ); + + let b = FixedSizeListScalar::new(dt.clone(), None); + + assert_eq!(a, a); + assert_eq!(b, b); + assert!(a != b); + + let b = FixedSizeListScalar::new( + dt, + Some(Arc::new(BooleanArray::from_slice([true, true])) as Arc), + ); + assert!(a != b); + assert_eq!(b, b); +} + +#[test] +fn basics() { + let dt = DataType::FixedSizeList(Box::new(Field::new("a", DataType::Boolean, true)), 2); + let a = FixedSizeListScalar::new( + dt.clone(), + Some(Arc::new(BooleanArray::from_slice([true, false])) as Arc), + ); + + assert_eq!(BooleanArray::from_slice([true, false]), a.values().as_ref()); + assert_eq!(a.data_type(), &dt); + assert!(a.is_valid()); + + let _: &dyn std::any::Any = a.as_any(); +} diff --git a/tests/it/scalar/mod.rs b/tests/it/scalar/mod.rs index 64a35e0edb4..14779a0fbbe 100644 --- a/tests/it/scalar/mod.rs +++ b/tests/it/scalar/mod.rs @@ -1,5 +1,7 @@ mod binary; +mod fixed_size_binary; mod boolean; +mod fixed_size_list; mod list; mod null; mod primitive; From 5e840547f896dbc51d8b9d2e165c630c6050616a Mon Sep 17 00:00:00 2001 From: illumination-k Date: Sun, 23 Jan 2022 18:00:00 +0900 Subject: [PATCH 04/11] use boxed slice --- src/scalar/fixed_size_binary.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/scalar/fixed_size_binary.rs b/src/scalar/fixed_size_binary.rs index debfd26263d..8eafeb31ab8 100644 --- a/src/scalar/fixed_size_binary.rs +++ b/src/scalar/fixed_size_binary.rs @@ -3,9 +3,9 @@ use crate::datatypes::DataType; use super::Scalar; #[derive(Debug, Clone, PartialEq)] -/// The [`Scalar`] implementation of fixed size binary ([`Option>`]). +/// The [`Scalar`] implementation of fixed size binary ([`Option>`]). pub struct FixedSizeBinaryScalar { - value: Option>, + value: Option>, data_type: DataType, } @@ -19,9 +19,9 @@ impl FixedSizeBinaryScalar { pub fn new>>(data_type: DataType, value: Option

) -> Self { Self { value: value.map(|x| { - let x = x.into(); + let x: Vec = x.into(); assert_eq!(data_type, DataType::FixedSizeBinary(x.len())); - x + x.into_boxed_slice() }), data_type, } From b7c4beb7a2aaf95ea732f81050e345e022c7095b Mon Sep 17 00:00:00 2001 From: illumination-k Date: Sun, 23 Jan 2022 18:00:30 +0900 Subject: [PATCH 05/11] new_scalar for fixedsize list and binary --- src/scalar/mod.rs | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index 9a2ca3fecf5..189528a39c0 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -22,6 +22,10 @@ mod null; pub use null::*; mod struct_; pub use struct_::*; +mod fixed_size_list; +pub use fixed_size_list::*; +mod fixed_size_binary; +pub use fixed_size_binary::*; /// Trait object declaring an optional value with a [`DataType`]. /// This strait is often used in APIs that accept multiple scalar types. @@ -120,8 +124,24 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { Box::new(StructScalar::new(array.data_type().clone(), None)) } } - FixedSizeBinary => todo!(), - FixedSizeList => todo!(), + FixedSizeBinary => { + let array = array.as_any().downcast_ref::().unwrap(); + let value = if array.is_valid(index) { + Some(array.value(index)) + } else { + None + }; + Box::new(FixedSizeBinaryScalar::new(array.data_type().clone(), value)) + }, + FixedSizeList => { + let array = array.as_any().downcast_ref::().unwrap(); + let value = if array.is_valid(index) { + Some(array.value(index).into()) + } else { + None + }; + Box::new(FixedSizeListScalar::new(array.data_type().clone(), value)) + }, Union | Map => todo!(), Dictionary(key_type) => match_integer_type!(key_type, |$T| { let array = array From 4b5a529d69451f1857cbeace2a88f88ad657c8b2 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Sun, 23 Jan 2022 18:02:25 +0900 Subject: [PATCH 06/11] fmt and merge --- src/scalar/mod.rs | 9 ++++++--- tests/it/scalar/fixed_size_binary.rs | 4 +--- tests/it/scalar/mod.rs | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index 189528a39c0..4910bb67c29 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -125,14 +125,17 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { } } FixedSizeBinary => { - let array = array.as_any().downcast_ref::().unwrap(); + let array = array + .as_any() + .downcast_ref::() + .unwrap(); let value = if array.is_valid(index) { Some(array.value(index)) } else { None }; Box::new(FixedSizeBinaryScalar::new(array.data_type().clone(), value)) - }, + } FixedSizeList => { let array = array.as_any().downcast_ref::().unwrap(); let value = if array.is_valid(index) { @@ -141,7 +144,7 @@ pub fn new_scalar(array: &dyn Array, index: usize) -> Box { None }; Box::new(FixedSizeListScalar::new(array.data_type().clone(), value)) - }, + } Union | Map => todo!(), Dictionary(key_type) => match_integer_type!(key_type, |$T| { let array = array diff --git a/tests/it/scalar/fixed_size_binary.rs b/tests/it/scalar/fixed_size_binary.rs index dfd119cfde4..3962c390180 100644 --- a/tests/it/scalar/fixed_size_binary.rs +++ b/tests/it/scalar/fixed_size_binary.rs @@ -11,7 +11,7 @@ fn equal() { assert_eq!(a, a); assert_eq!(b, b); assert!(a != b); - let b = FixedSizeBinaryScalar::new(DataType::FixedSizeBinary(1),Some("b")); + let b = FixedSizeBinaryScalar::new(DataType::FixedSizeBinary(1), Some("b")); assert!(a != b); assert_eq!(b, b); } @@ -26,5 +26,3 @@ fn basics() { let _: &dyn std::any::Any = a.as_any(); } - - diff --git a/tests/it/scalar/mod.rs b/tests/it/scalar/mod.rs index 14779a0fbbe..2e7d105d7fd 100644 --- a/tests/it/scalar/mod.rs +++ b/tests/it/scalar/mod.rs @@ -1,6 +1,6 @@ mod binary; -mod fixed_size_binary; mod boolean; +mod fixed_size_binary; mod fixed_size_list; mod list; mod null; From 9583a5e76cb44753ef05c4cdf6336ca6d9ac49c1 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Mon, 24 Jan 2022 03:47:53 +0900 Subject: [PATCH 07/11] assert physicaltype and logicaltype --- src/scalar/fixed_size_binary.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scalar/fixed_size_binary.rs b/src/scalar/fixed_size_binary.rs index 8eafeb31ab8..adeb61269d5 100644 --- a/src/scalar/fixed_size_binary.rs +++ b/src/scalar/fixed_size_binary.rs @@ -17,10 +17,11 @@ impl FixedSizeBinaryScalar { /// * the size of child binary is not equal #[inline] pub fn new>>(data_type: DataType, value: Option

) -> Self { + assert_eq!(data_type.to_physical_type(), crate::datatypes::PhysicalType::FixedSizeBinary); Self { value: value.map(|x| { let x: Vec = x.into(); - assert_eq!(data_type, DataType::FixedSizeBinary(x.len())); + assert_eq!(data_type.to_logical_type(), &DataType::FixedSizeBinary(x.len())); x.into_boxed_slice() }), data_type, From 4bef2fbdc6a5f60f65bd2d2d2f8bb53a209d1253 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Mon, 24 Jan 2022 03:48:12 +0900 Subject: [PATCH 08/11] use Option and remove is_valid in fixed_size_list --- src/scalar/fixed_size_list.rs | 20 +++++++++----------- tests/it/scalar/fixed_size_list.rs | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/scalar/fixed_size_list.rs b/src/scalar/fixed_size_list.rs index daced880ee5..21861042a5a 100644 --- a/src/scalar/fixed_size_list.rs +++ b/src/scalar/fixed_size_list.rs @@ -9,16 +9,15 @@ use super::Scalar; /// [`Array`]. The only difference is that this has only one element. #[derive(Debug, Clone)] pub struct FixedSizeListScalar { - values: Arc, - is_valid: bool, + values: Option>, data_type: DataType, } impl PartialEq for FixedSizeListScalar { fn eq(&self, other: &Self) -> bool { (self.data_type == other.data_type) - && (self.is_valid == other.is_valid) - && ((!self.is_valid) | (self.values.as_ref() == other.values.as_ref())) + && (self.values.is_some() == other.values.is_some()) + && ((!self.values.is_some()) | (self.values.as_ref() == other.values.as_ref())) } } @@ -33,24 +32,23 @@ impl FixedSizeListScalar { pub fn new(data_type: DataType, values: Option>) -> Self { let (field, size) = FixedSizeListArray::get_child_and_size(&data_type); let inner_data_type = field.data_type(); - let (is_valid, values) = match values { + let values = match values { Some(values) => { assert_eq!(inner_data_type, values.data_type()); assert_eq!(size, values.len()); - (true, values) + Some(values) } - None => (false, new_empty_array(inner_data_type.clone()).into()), + None => None }; Self { values, - is_valid, data_type, } } /// The values of the [`FixedSizeListScalar`] - pub fn values(&self) -> &Arc { - &self.values + pub fn values(&self) -> Option<&Arc> { + self.values.as_ref() } } @@ -60,7 +58,7 @@ impl Scalar for FixedSizeListScalar { } fn is_valid(&self) -> bool { - self.is_valid + self.values.is_some() } fn data_type(&self) -> &DataType { diff --git a/tests/it/scalar/fixed_size_list.rs b/tests/it/scalar/fixed_size_list.rs index f0d99cf464d..003f7389abb 100644 --- a/tests/it/scalar/fixed_size_list.rs +++ b/tests/it/scalar/fixed_size_list.rs @@ -37,7 +37,7 @@ fn basics() { Some(Arc::new(BooleanArray::from_slice([true, false])) as Arc), ); - assert_eq!(BooleanArray::from_slice([true, false]), a.values().as_ref()); + assert_eq!(BooleanArray::from_slice([true, false]), a.values().unwrap().as_ref()); assert_eq!(a.data_type(), &dt); assert!(a.is_valid()); From 8e971f6ca14b6deb761c9038c18733abc4f0af10 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Mon, 24 Jan 2022 03:49:35 +0900 Subject: [PATCH 09/11] fmt --- src/scalar/fixed_size_binary.rs | 10 ++++++++-- src/scalar/fixed_size_list.rs | 7 ++----- tests/it/scalar/fixed_size_list.rs | 5 ++++- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/scalar/fixed_size_binary.rs b/src/scalar/fixed_size_binary.rs index adeb61269d5..c09466f9eb3 100644 --- a/src/scalar/fixed_size_binary.rs +++ b/src/scalar/fixed_size_binary.rs @@ -17,11 +17,17 @@ impl FixedSizeBinaryScalar { /// * the size of child binary is not equal #[inline] pub fn new>>(data_type: DataType, value: Option

) -> Self { - assert_eq!(data_type.to_physical_type(), crate::datatypes::PhysicalType::FixedSizeBinary); + assert_eq!( + data_type.to_physical_type(), + crate::datatypes::PhysicalType::FixedSizeBinary + ); Self { value: value.map(|x| { let x: Vec = x.into(); - assert_eq!(data_type.to_logical_type(), &DataType::FixedSizeBinary(x.len())); + assert_eq!( + data_type.to_logical_type(), + &DataType::FixedSizeBinary(x.len()) + ); x.into_boxed_slice() }), data_type, diff --git a/src/scalar/fixed_size_list.rs b/src/scalar/fixed_size_list.rs index 21861042a5a..784dfa20a4c 100644 --- a/src/scalar/fixed_size_list.rs +++ b/src/scalar/fixed_size_list.rs @@ -38,12 +38,9 @@ impl FixedSizeListScalar { assert_eq!(size, values.len()); Some(values) } - None => None + None => None, }; - Self { - values, - data_type, - } + Self { values, data_type } } /// The values of the [`FixedSizeListScalar`] diff --git a/tests/it/scalar/fixed_size_list.rs b/tests/it/scalar/fixed_size_list.rs index 003f7389abb..a76b7aca38d 100644 --- a/tests/it/scalar/fixed_size_list.rs +++ b/tests/it/scalar/fixed_size_list.rs @@ -37,7 +37,10 @@ fn basics() { Some(Arc::new(BooleanArray::from_slice([true, false])) as Arc), ); - assert_eq!(BooleanArray::from_slice([true, false]), a.values().unwrap().as_ref()); + assert_eq!( + BooleanArray::from_slice([true, false]), + a.values().unwrap().as_ref() + ); assert_eq!(a.data_type(), &dt); assert!(a.is_valid()); From 63224cc2bebc133ead81a260bbfd1de407103720 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Mon, 24 Jan 2022 03:52:39 +0900 Subject: [PATCH 10/11] clippy --- src/scalar/fixed_size_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scalar/fixed_size_list.rs b/src/scalar/fixed_size_list.rs index 784dfa20a4c..f2522080369 100644 --- a/src/scalar/fixed_size_list.rs +++ b/src/scalar/fixed_size_list.rs @@ -17,7 +17,7 @@ impl PartialEq for FixedSizeListScalar { fn eq(&self, other: &Self) -> bool { (self.data_type == other.data_type) && (self.values.is_some() == other.values.is_some()) - && ((!self.values.is_some()) | (self.values.as_ref() == other.values.as_ref())) + && ((self.values.is_none()) | (self.values.as_ref() == other.values.as_ref())) } } From 45b6e78dcbb59fa63bb7c42b6edfbf0b33feda92 Mon Sep 17 00:00:00 2001 From: illumination-k Date: Mon, 24 Jan 2022 04:15:07 +0900 Subject: [PATCH 11/11] use map for new in fixed_size_list_scalar --- src/scalar/fixed_size_list.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/scalar/fixed_size_list.rs b/src/scalar/fixed_size_list.rs index f2522080369..fd41e2100d5 100644 --- a/src/scalar/fixed_size_list.rs +++ b/src/scalar/fixed_size_list.rs @@ -32,14 +32,11 @@ impl FixedSizeListScalar { pub fn new(data_type: DataType, values: Option>) -> Self { let (field, size) = FixedSizeListArray::get_child_and_size(&data_type); let inner_data_type = field.data_type(); - let values = match values { - Some(values) => { - assert_eq!(inner_data_type, values.data_type()); - assert_eq!(size, values.len()); - Some(values) - } - None => None, - }; + let values = values.map(|x| { + assert_eq!(inner_data_type, x.data_type()); + assert_eq!(size, x.len()); + x + }); Self { values, data_type } }