From a41d266b4c540e6d4ad215b027bc1da4c140683d Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Mon, 26 Sep 2022 06:22:42 +0000 Subject: [PATCH] Added tests --- src/array/mod.rs | 2 +- src/array/utf8/mutable_values.rs | 22 ++------ tests/it/array/utf8/mod.rs | 1 + tests/it/array/utf8/mutable_values.rs | 81 +++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 17 deletions(-) create mode 100644 tests/it/array/utf8/mutable_values.rs diff --git a/src/array/mod.rs b/src/array/mod.rs index 4a5e7fceba2..86b645b8412 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -394,7 +394,7 @@ pub use null::NullArray; pub use primitive::*; pub use struct_::{MutableStructArray, StructArray}; pub use union::UnionArray; -pub use utf8::{MutableUtf8Array, Utf8Array, Utf8ValuesIter}; +pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter}; pub(crate) use self::ffi::offset_buffers_children_dictionary; pub(crate) use self::ffi::FromFfi; diff --git a/src/array/utf8/mutable_values.rs b/src/array/utf8/mutable_values.rs index a7524a9f3a6..65c9d1231a0 100644 --- a/src/array/utf8/mutable_values.rs +++ b/src/array/utf8/mutable_values.rs @@ -214,28 +214,18 @@ impl MutableArray for MutableUtf8ValuesArray { // Safety: // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. - Box::new(unsafe { - Utf8Array::from_data_unchecked( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - std::mem::take(&mut self.values).into(), - None, - ) - }) + let (data_type, offsets, values) = std::mem::take(self).into_inner(); + unsafe { Utf8Array::from_data_unchecked(data_type, offsets.into(), values.into(), None) } + .boxed() } fn as_arc(&mut self) -> Arc { // Safety: // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. - Arc::new(unsafe { - Utf8Array::from_data_unchecked( - self.data_type.clone(), - std::mem::take(&mut self.offsets).into(), - std::mem::take(&mut self.values).into(), - None, - ) - }) + let (data_type, offsets, values) = std::mem::take(self).into_inner(); + unsafe { Utf8Array::from_data_unchecked(data_type, offsets.into(), values.into(), None) } + .arced() } fn data_type(&self) -> &DataType { diff --git a/tests/it/array/utf8/mod.rs b/tests/it/array/utf8/mod.rs index c6a0acec03f..daa2734faa8 100644 --- a/tests/it/array/utf8/mod.rs +++ b/tests/it/array/utf8/mod.rs @@ -1,6 +1,7 @@ use arrow2::{array::*, bitmap::Bitmap, buffer::Buffer, datatypes::DataType, error::Result}; mod mutable; +mod mutable_values; mod to_mutable; #[test] diff --git a/tests/it/array/utf8/mutable_values.rs b/tests/it/array/utf8/mutable_values.rs new file mode 100644 index 00000000000..3e79fde4055 --- /dev/null +++ b/tests/it/array/utf8/mutable_values.rs @@ -0,0 +1,81 @@ +use arrow2::array::MutableArray; +use arrow2::array::MutableUtf8ValuesArray; +use arrow2::datatypes::DataType; + +#[test] +fn capacity() { + let mut b = MutableUtf8ValuesArray::::with_capacity(100); + + assert_eq!(b.values().capacity(), 0); + assert!(b.offsets().capacity() >= 101); + b.shrink_to_fit(); + assert!(b.offsets().capacity() < 101); +} + +#[test] +fn offsets_must_be_monotonic_increasing() { + let offsets = vec![0, 5, 4]; + let values = b"abbbbb".to_vec(); + assert!(MutableUtf8ValuesArray::::try_new(DataType::Utf8, offsets, values).is_err()); +} + +#[test] +fn data_type_must_be_consistent() { + let offsets = vec![0, 4]; + let values = b"abbb".to_vec(); + assert!(MutableUtf8ValuesArray::::try_new(DataType::Int32, offsets, values).is_err()); +} + +#[test] +fn must_be_utf8() { + let offsets = vec![0, 2]; + let values = vec![207, 128]; + assert!(MutableUtf8ValuesArray::::try_new(DataType::Int32, offsets, values).is_err()); +} + +#[test] +fn as_box() { + let offsets = vec![0, 2]; + let values = b"ab".to_vec(); + let mut b = MutableUtf8ValuesArray::::try_new(DataType::Utf8, offsets, values).unwrap(); + let _ = b.as_box(); +} + +#[test] +fn as_arc() { + let offsets = vec![0, 2]; + let values = b"ab".to_vec(); + let mut b = MutableUtf8ValuesArray::::try_new(DataType::Utf8, offsets, values).unwrap(); + let _ = b.as_arc(); +} + +#[test] +fn extend_trusted_len() { + let offsets = vec![0, 2]; + let values = b"ab".to_vec(); + let mut b = MutableUtf8ValuesArray::::try_new(DataType::Utf8, offsets, values).unwrap(); + b.extend_trusted_len(vec!["a", "b"].into_iter()); + + let offsets = vec![0, 2, 3, 4]; + let values = b"abab".to_vec(); + assert_eq!( + b.as_box(), + MutableUtf8ValuesArray::::try_new(DataType::Utf8, offsets, values) + .unwrap() + .as_box() + ) +} + +#[test] +fn from_trusted_len() { + let mut b = MutableUtf8ValuesArray::::from_trusted_len_iter(vec!["a", "b"].into_iter()); + + let offsets = vec![0, 1, 2]; + let values = b"ab".to_vec(); + assert_eq!( + b.as_box(), + MutableUtf8ValuesArray::::try_new(DataType::Utf8, offsets, values) + .unwrap() + .as_box() + ) +}