From 55277dee7cd9aa0430158293e4218036a9cce775 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 14 Mar 2022 05:19:08 +0000 Subject: [PATCH] Improved APIs --- src/array/fixed_size_binary/iterator.rs | 7 +- src/array/fixed_size_binary/mod.rs | 3 +- src/array/fixed_size_binary/mutable.rs | 5 + tests/it/array/fixed_size_binary/mod.rs | 7 + tests/it/array/fixed_size_binary/mutable.rs | 32 +++++ tests/it/array/growable/fixed_binary.rs | 36 +++++ tests/it/array/growable/mod.rs | 144 ++++---------------- tests/it/ffi/data.rs | 42 ++++-- 8 files changed, 139 insertions(+), 137 deletions(-) diff --git a/src/array/fixed_size_binary/iterator.rs b/src/array/fixed_size_binary/iterator.rs index 445e584be4a..cc88967667c 100644 --- a/src/array/fixed_size_binary/iterator.rs +++ b/src/array/fixed_size_binary/iterator.rs @@ -60,10 +60,7 @@ impl<'a> FixedSizeBinaryArray { pub fn iter( &'a self, ) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, FixedSizeBinaryArray>> { - zip_validity( - FixedSizeBinaryValuesIter::new(self), - self.validity.as_ref().map(|x| x.iter()), - ) + zip_validity(self.iter_values(), self.validity.as_ref().map(|x| x.iter())) } /// Returns iterator over the values of [`FixedSizeBinaryArray`] @@ -88,7 +85,7 @@ impl<'a> MutableFixedSizeBinaryArray { &'a self, ) -> ZipValidity<'a, &'a [u8], FixedSizeBinaryValuesIter<'a, MutableFixedSizeBinaryArray>> { zip_validity( - FixedSizeBinaryValuesIter::new(self), + self.iter_values(), self.validity().as_ref().map(|x| x.iter()), ) } diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 98dc365ff33..82695c437e7 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -81,9 +81,10 @@ impl FixedSizeBinaryArray { /// Returns a new null [`FixedSizeBinaryArray`]. pub fn new_null(data_type: DataType, length: usize) -> Self { + let size = Self::maybe_get_size(&data_type).unwrap(); Self::new( data_type, - Buffer::new_zeroed(length), + Buffer::new_zeroed(length * size), Some(Bitmap::new_zeroed(length)), ) } diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index a5da541cc86..787996267bc 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -134,6 +134,11 @@ impl MutableFixedSizeBinaryArray { self.size } + /// Returns the capacity of this array + pub fn capacity(&self) -> usize { + self.values.capacity() / self.size + } + fn init_validity(&mut self) { let mut validity = MutableBitmap::new(); validity.extend_constant(self.len(), true); diff --git a/tests/it/array/fixed_size_binary/mod.rs b/tests/it/array/fixed_size_binary/mod.rs index cecaf402aae..f1edb8bf76d 100644 --- a/tests/it/array/fixed_size_binary/mod.rs +++ b/tests/it/array/fixed_size_binary/mod.rs @@ -50,6 +50,13 @@ fn empty() { assert_eq!(array.validity(), None); } +#[test] +fn null() { + let array = FixedSizeBinaryArray::new_null(DataType::FixedSizeBinary(2), 2); + assert_eq!(array.values().len(), 4); + assert_eq!(array.validity().cloned(), Some([false, false].into())); +} + #[test] fn from_iter() { let iter = std::iter::repeat(vec![1u8, 2]).take(2).map(Some); diff --git a/tests/it/array/fixed_size_binary/mutable.rs b/tests/it/array/fixed_size_binary/mutable.rs index 4739baeb239..b5f5babae00 100644 --- a/tests/it/array/fixed_size_binary/mutable.rs +++ b/tests/it/array/fixed_size_binary/mutable.rs @@ -66,3 +66,35 @@ fn push_null() { let array: FixedSizeBinaryArray = array.into(); assert_eq!(array.validity(), Some(&Bitmap::from([false]))); } + +#[test] +fn as_arc() { + let mut array = MutableFixedSizeBinaryArray::try_from_iter( + vec![Some(b"ab"), Some(b"bc"), None, Some(b"fh")], + 2, + ) + .unwrap(); + + let array = array.as_arc(); + assert_eq!(array.len(), 4); +} + +#[test] +fn as_box() { + let mut array = MutableFixedSizeBinaryArray::try_from_iter( + vec![Some(b"ab"), Some(b"bc"), None, Some(b"fh")], + 2, + ) + .unwrap(); + + let array = array.as_box(); + assert_eq!(array.len(), 4); +} + +#[test] +fn shrink_to_fit_and_capacity() { + let mut array = MutableFixedSizeBinaryArray::with_capacity(2, 100); + array.push(Some([1, 2])); + array.shrink_to_fit(); + assert_eq!(array.capacity(), 1); +} diff --git a/tests/it/array/growable/fixed_binary.rs b/tests/it/array/growable/fixed_binary.rs index e5936a1f096..6c213432176 100644 --- a/tests/it/array/growable/fixed_binary.rs +++ b/tests/it/array/growable/fixed_binary.rs @@ -87,3 +87,39 @@ fn sized_offsets() { let expected = FixedSizeBinaryArray::from_iter(vec![Some(&[0, 2]), Some(&[0, 1])], 2); assert_eq!(result, expected); } + +/// to, as_box, as_arc +#[test] +fn as_box() { + let array = + FixedSizeBinaryArray::from_iter(vec![Some(b"ab"), Some(b"bc"), None, Some(b"de")], 2); + let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); + a.extend(0, 1, 2); + + let result = a.as_box(); + let result = result + .as_any() + .downcast_ref::() + .unwrap(); + + let expected = FixedSizeBinaryArray::from_iter(vec![Some("bc"), None], 2); + assert_eq!(&expected, result); +} + +/// as_arc +#[test] +fn as_arc() { + let array = + FixedSizeBinaryArray::from_iter(vec![Some(b"ab"), Some(b"bc"), None, Some(b"de")], 2); + let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); + a.extend(0, 1, 2); + + let result = a.as_arc(); + let result = result + .as_any() + .downcast_ref::() + .unwrap(); + + let expected = FixedSizeBinaryArray::from_iter(vec![Some("bc"), None], 2); + assert_eq!(&expected, result); +} diff --git a/tests/it/array/growable/mod.rs b/tests/it/array/growable/mod.rs index 578f34485cf..e3de20a424e 100644 --- a/tests/it/array/growable/mod.rs +++ b/tests/it/array/growable/mod.rs @@ -10,131 +10,37 @@ mod struct_; mod union; mod utf8; -/* -#[cfg(test)] -mod tests { - use std::convert::TryFrom; +use arrow2::array::growable::make_growable; +use arrow2::array::*; +use arrow2::datatypes::DataType; - use super::*; +#[test] +fn test_make_growable() { + let array = Int32Array::from_slice([1, 2]); + make_growable(&[&array], false, 2); - use crate::{ - array::{ - Array, ArrayDataRef, ArrayRef, BooleanArray, DictionaryArray, - FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array, - Int64Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray, - StringDictionaryBuilder, StructArray, UInt8Array, - }, - buffer::Buffer, - datatypes::Field, - }; - use crate::{ - array::{ListArray, StringBuilder}, - error::Result, - }; + let array = Utf8Array::::from_slice(["a", "aa"]); + make_growable(&[&array], false, 2); - fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayDataRef { - let values = StringArray::from(values.to_vec()); - let mut builder = StringDictionaryBuilder::new_with_dictionary( - PrimitiveBuilder::::new(3), - &values, - ) - .unwrap(); - for key in keys { - if let Some(v) = key { - builder.append(v).unwrap(); - } else { - builder.append_null().unwrap() - } - } - builder.finish().data() - } + let array = Utf8Array::::from_slice(["a", "aa"]); + make_growable(&[&array], false, 2); - /* - // this is an old test used on a meanwhile removed dead code - // that is still useful when `MutableArrayData` supports fixed-size lists. - #[test] - fn test_fixed_size_list_append() -> Result<()> { - let int_builder = UInt16Builder::new(64); - let mut builder = FixedSizeListBuilder::::new(int_builder, 2); - builder.values().append_slice(&[1, 2])?; - builder.append(true)?; - builder.values().append_slice(&[3, 4])?; - builder.append(false)?; - builder.values().append_slice(&[5, 6])?; - builder.append(true)?; + let array = BinaryArray::::from_slice([b"a".as_ref(), b"aa".as_ref()]); + make_growable(&[&array], false, 2); - let a_builder = UInt16Builder::new(64); - let mut a_builder = FixedSizeListBuilder::::new(a_builder, 2); - a_builder.values().append_slice(&[7, 8])?; - a_builder.append(true)?; - a_builder.values().append_slice(&[9, 10])?; - a_builder.append(true)?; - a_builder.values().append_slice(&[11, 12])?; - a_builder.append(false)?; - a_builder.values().append_slice(&[13, 14])?; - a_builder.append(true)?; - a_builder.values().append_null()?; - a_builder.values().append_null()?; - a_builder.append(true)?; - let a = a_builder.finish(); + let array = BinaryArray::::from_slice([b"a".as_ref(), b"aa".as_ref()]); + make_growable(&[&array], false, 2); - // append array - builder.append_data(&[ - a.data(), - a.slice(1, 3).data(), - a.slice(2, 1).data(), - a.slice(5, 0).data(), - ])?; - let finished = builder.finish(); + let array = BinaryArray::::from_slice([b"a".as_ref(), b"aa".as_ref()]); + make_growable(&[&array], false, 2); - let expected_int_array = UInt16Array::from(vec![ - Some(1), - Some(2), - Some(3), - Some(4), - Some(5), - Some(6), - // append first array - Some(7), - Some(8), - Some(9), - Some(10), - Some(11), - Some(12), - Some(13), - Some(14), - None, - None, - // append slice(1, 3) - Some(9), - Some(10), - Some(11), - Some(12), - Some(13), - Some(14), - // append slice(2, 1) - Some(11), - Some(12), - ]); - let expected_list_data = ArrayData::new( - DataType::FixedSizeList( - Box::new(Field::new("item", DataType::UInt16, true)), - 2, - ), - 12, - None, - None, - 0, - vec![], - vec![expected_int_array.data()], - ); - let expected_list = - FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayDataRef); - assert_eq!(&expected_list.values(), &finished.values()); - assert_eq!(expected_list.len(), finished.len()); + let array = + FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), b"abcd".to_vec().into(), None); + make_growable(&[&array], false, 2); - Ok(()) - } - */ + let array = DictionaryArray::::from_data( + Int32Array::from_slice([1, 2]), + std::sync::Arc::new(Int32Array::from_slice([1, 2])), + ); + make_growable(&[&array], false, 2); } -*/ diff --git a/tests/it/ffi/data.rs b/tests/it/ffi/data.rs index 45c2f66cc1a..63a4489a0df 100644 --- a/tests/it/ffi/data.rs +++ b/tests/it/ffi/data.rs @@ -58,26 +58,47 @@ fn test_round_trip_schema(field: Field) -> Result<()> { } #[test] -fn u32() -> Result<()> { +fn bool_nullable() -> Result<()> { + let data = BooleanArray::from(&[Some(true), None, Some(false), None]); + test_round_trip(data) +} + +#[test] +fn bool() -> Result<()> { + let data = BooleanArray::from_slice(&[true, true, false]); + test_round_trip(data) +} + +#[test] +fn u32_nullable() -> Result<()> { let data = Int32Array::from(&[Some(2), None, Some(1), None]); test_round_trip(data) } #[test] -fn u64() -> Result<()> { - let data = UInt64Array::from(&[Some(2), None, Some(1), None]); +fn u32() -> Result<()> { + let data = Int32Array::from_slice(&[2, 0, 1, 0]); + test_round_trip(data) +} + +#[test] +fn timestamp_tz() -> Result<()> { + let data = Int64Array::from(&vec![Some(2), None, None]).to(DataType::Timestamp( + TimeUnit::Second, + Some("UTC".to_string()), + )); test_round_trip(data) } #[test] -fn i64() -> Result<()> { - let data = Int64Array::from(&[Some(2), None, Some(1), None]); +fn utf8_nullable() -> Result<()> { + let data = Utf8Array::::from(&vec![Some("a"), None, Some("bb"), None]); test_round_trip(data) } #[test] fn utf8() -> Result<()> { - let data = Utf8Array::::from(&vec![Some("a"), None, Some("bb"), None]); + let data = Utf8Array::::from_slice(&["a", "", "bb", ""]); test_round_trip(data) } @@ -88,18 +109,15 @@ fn large_utf8() -> Result<()> { } #[test] -fn binary() -> Result<()> { +fn binary_nullable() -> Result<()> { let data = BinaryArray::::from(&vec![Some(b"a".as_ref()), None, Some(b"bb".as_ref()), None]); test_round_trip(data) } #[test] -fn timestamp_tz() -> Result<()> { - let data = Int64Array::from(&vec![Some(2), None, None]).to(DataType::Timestamp( - TimeUnit::Second, - Some("UTC".to_string()), - )); +fn binary() -> Result<()> { + let data = BinaryArray::::from_slice(&[b"a".as_ref(), b"", b"bb", b""]); test_round_trip(data) }