diff --git a/src/compute/boolean_kleene.rs b/src/compute/boolean_kleene.rs index 4e31f449a55..a75f9f246dc 100644 --- a/src/compute/boolean_kleene.rs +++ b/src/compute/boolean_kleene.rs @@ -183,137 +183,3 @@ pub fn and(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { validity, )) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn and_generic() { - let lhs = BooleanArray::from(&[ - None, - None, - None, - Some(false), - Some(false), - Some(false), - Some(true), - Some(true), - Some(true), - ]); - let rhs = BooleanArray::from(&[ - None, - Some(false), - Some(true), - None, - Some(false), - Some(true), - None, - Some(false), - Some(true), - ]); - let c = and(&lhs, &rhs).unwrap(); - - let expected = BooleanArray::from(&[ - None, - Some(false), - None, - Some(false), - Some(false), - Some(false), - None, - Some(false), - Some(true), - ]); - - assert_eq!(c, expected); - } - - #[test] - fn or_generic() { - let a = BooleanArray::from(&[ - None, - None, - None, - Some(false), - Some(false), - Some(false), - Some(true), - Some(true), - Some(true), - ]); - let b = BooleanArray::from(&[ - None, - Some(false), - Some(true), - None, - Some(false), - Some(true), - None, - Some(false), - Some(true), - ]); - let c = or(&a, &b).unwrap(); - - let expected = BooleanArray::from(&[ - None, - None, - Some(true), - None, - Some(false), - Some(true), - Some(true), - Some(true), - Some(true), - ]); - - assert_eq!(c, expected); - } - - #[test] - fn or_right_nulls() { - let a = BooleanArray::from_slice(&[false, false, false, true, true, true]); - - let b = BooleanArray::from(&[Some(true), Some(false), None, Some(true), Some(false), None]); - - let c = or(&a, &b).unwrap(); - - let expected = BooleanArray::from(&[ - Some(true), - Some(false), - None, - Some(true), - Some(true), - Some(true), - ]); - - assert_eq!(c, expected); - } - - #[test] - fn or_left_nulls() { - let a = BooleanArray::from(vec![ - Some(true), - Some(false), - None, - Some(true), - Some(false), - None, - ]); - - let b = BooleanArray::from_slice(&[false, false, false, true, true, true]); - - let c = or(&a, &b).unwrap(); - - let expected = BooleanArray::from(vec![ - Some(true), - Some(false), - None, - Some(true), - Some(true), - Some(true), - ]); - - assert_eq!(c, expected); - } -} diff --git a/src/compute/length.rs b/src/compute/length.rs index 9f51e22513b..675cc560bac 100644 --- a/src/compute/length.rs +++ b/src/compute/length.rs @@ -79,95 +79,3 @@ pub fn length(array: &dyn Array) -> Result> { pub fn can_length(data_type: &DataType) -> bool { matches!(data_type, DataType::Utf8 | DataType::LargeUtf8) } - -#[cfg(test)] -mod tests { - use super::*; - - fn length_test_string() { - vec![ - ( - vec![Some("hello"), Some(" "), None], - vec![Some(5usize), Some(1), None], - ), - (vec![Some("💖")], vec![Some(4)]), - ] - .into_iter() - .for_each(|(input, expected)| { - let array = Utf8Array::::from(&input); - let result = length(&array).unwrap(); - - let data_type = if O::is_large() { - DataType::Int64 - } else { - DataType::Int32 - }; - - let expected = expected - .into_iter() - .map(|x| x.map(|x| O::from_usize(x).unwrap())) - .collect::>() - .to(data_type); - assert_eq!(expected, result.as_ref()); - }) - } - - #[test] - fn large_utf8() { - length_test_string::() - } - - #[test] - fn utf8() { - length_test_string::() - } - - #[test] - fn consistency() { - use crate::array::new_null_array; - use crate::datatypes::DataType::*; - use crate::datatypes::TimeUnit; - - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - ]; - - datatypes.into_iter().for_each(|d1| { - let array = new_null_array(d1.clone(), 10); - if can_length(&d1) { - assert!(length(array.as_ref()).is_ok()); - } else { - assert!(length(array.as_ref()).is_err()); - } - }); - } -} diff --git a/src/compute/like.rs b/src/compute/like.rs index 1160721cef7..2e776267e2b 100644 --- a/src/compute/like.rs +++ b/src/compute/like.rs @@ -293,51 +293,3 @@ pub fn like_binary_scalar(lhs: &BinaryArray, rhs: &[u8]) -> Result pub fn nlike_binary_scalar(lhs: &BinaryArray, rhs: &[u8]) -> Result { a_like_binary_scalar(lhs, rhs, |x| !x) } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_like_binary() -> Result<()> { - let strings = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "Arrow", "Ar"]); - let patterns = BinaryArray::::from_slice(&["A%", "B%", "%r_ow", "A_", "A_"]); - let result = like_binary(&strings, &patterns).unwrap(); - assert_eq!( - result, - BooleanArray::from_slice(&[true, false, true, false, true]) - ); - Ok(()) - } - - #[test] - fn test_nlike_binary() -> Result<()> { - let strings = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "Arrow", "Ar"]); - let patterns = BinaryArray::::from_slice(&["A%", "B%", "%r_ow", "A_", "A_"]); - let result = nlike_binary(&strings, &patterns).unwrap(); - assert_eq!( - result, - BooleanArray::from_slice(&[false, true, false, true, false]) - ); - Ok(()) - } - - #[test] - fn test_like_binary_scalar() -> Result<()> { - let array = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "BA"]); - let result = like_binary_scalar(&array, b"A%").unwrap(); - assert_eq!(result, BooleanArray::from_slice(&[true, true, true, false])); - Ok(()) - } - - #[test] - fn test_nlike_binary_scalar() -> Result<()> { - let array = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "BA"]); - let result = nlike_binary_scalar(&array, "A%".as_bytes()).unwrap(); - assert_eq!( - result, - BooleanArray::from_slice(&[false, false, false, true]) - ); - Ok(()) - } -} diff --git a/src/compute/limit.rs b/src/compute/limit.rs index eccd145b9c9..d1056bd1e2b 100644 --- a/src/compute/limit.rs +++ b/src/compute/limit.rs @@ -27,18 +27,3 @@ pub fn limit(array: &dyn Array, num_elements: usize) -> Box { let lim = num_elements.min(array.len()); array.slice(0, lim) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::array::*; - - #[test] - fn test_limit_array() { - let a = Int32Array::from_slice(&[5, 6, 7, 8, 9]); - let b = limit(&a, 3); - let c = b.as_ref().as_any().downcast_ref::().unwrap(); - let expected = Int32Array::from_slice(&[5, 6, 7]); - assert_eq!(&expected, c); - } -} diff --git a/src/compute/merge_sort/mod.rs b/src/compute/merge_sort/mod.rs index 66c4afabd95..b1a4e838fe8 100644 --- a/src/compute/merge_sort/mod.rs +++ b/src/compute/merge_sort/mod.rs @@ -529,235 +529,3 @@ pub fn build_comparator<'a>( }; Ok(Box::new(cmp)) } - -#[cfg(test)] -mod tests { - use crate::array::{BinaryArray, Int32Array, Utf8Array}; - use crate::compute::sort::sort; - - use super::*; - - #[test] - fn test_merge_u32() -> Result<()> { - let a0: &dyn Array = &Int32Array::from_slice(&[0, 1, 2, 3]); - let a1: &dyn Array = &Int32Array::from_slice(&[2, 3, 4, 5]); - - let options = SortOptions::default(); - let arrays = vec![a0, a1]; - let pairs = vec![(arrays.as_ref(), &options)]; - let comparator = build_comparator(&pairs)?; - - // (0, 1, 2) corresponds to slice [1, 2] of a0 - // (1, 2, 2) corresponds to slice [4, 5] of a1 - // slices are already sorted => identity - let result = - merge_sort_slices(once(&(0, 1, 2)), once(&(1, 2, 2)), &comparator).collect::>(); - - assert_eq!(result, vec![(0, 1, 2), (1, 2, 2)]); - - // (0, 2, 2) corresponds to slice [2, 3] of a0 - // (1, 0, 3) corresponds to slice [2, 3, 4] of a1 - let result = - merge_sort_slices(once(&(0, 2, 2)), once(&(1, 0, 3)), &comparator).collect::>(); - - // 2 (a0) , [2, 3] (a1) , 3 (a0) , 4 (a1) - // (0, 2, 1), (1, 0, 2) , (0, 3, 1), (1, 2, 1) - assert_eq!(result, vec![(0, 2, 1), (1, 0, 2), (0, 3, 1), (1, 2, 1)]); - Ok(()) - } - - #[test] - fn test_merge_with_limit() -> Result<()> { - let a0: &dyn Array = &Int32Array::from_slice(&[0, 2, 4, 6, 8]); - let a1: &dyn Array = &Int32Array::from_slice(&[1, 3, 5, 7, 9]); - - let options = SortOptions::default(); - let arrays = vec![a0, a1]; - let pairs = vec![(arrays.as_ref(), &options)]; - let comparator = build_comparator(&pairs)?; - - let slices = merge_sort_slices(once(&(0, 0, 5)), once(&(1, 0, 5)), &comparator); - // thus, they can be used to take from the arrays - let array = take_arrays(&arrays, slices, Some(5)); - - let expected = Int32Array::from_slice(&[0, 1, 2, 3, 4]); - // values are right - assert_eq!(expected, array.as_ref()); - Ok(()) - } - - #[test] - fn test_merge_slices_to_vec() -> Result<()> { - let a0: &dyn Array = &Int32Array::from_slice(&[0, 2, 4, 6, 8]); - let a1: &dyn Array = &Int32Array::from_slice(&[1, 3, 5, 7, 9]); - - let options = SortOptions::default(); - let arrays = vec![a0, a1]; - let pairs = vec![(arrays.as_ref(), &options)]; - let comparator = build_comparator(&pairs)?; - - let slices = merge_sort_slices(once(&(0, 0, 5)), once(&(1, 0, 5)), &comparator); - let vec = slices.to_vec(Some(5)); - assert_eq!(vec, [(0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), (0, 2, 1)]); - Ok(()) - } - - #[test] - fn test_merge_4_i32() -> Result<()> { - let a0: &dyn Array = &Int32Array::from_slice(&[0, 1]); - let a1: &dyn Array = &Int32Array::from_slice(&[2, 6]); - let a2: &dyn Array = &Int32Array::from_slice(&[3, 5]); - let a3: &dyn Array = &Int32Array::from_slice(&[4, 7]); - - let options = SortOptions::default(); - let arrays = vec![a0, a1, a2, a3]; - let pairs = vec![(arrays.as_ref(), &options)]; - let slices = slices(&pairs)?; - - // slices are right. - assert_eq!( - slices, - vec![ - (0, 0, 2), - (1, 0, 1), - (2, 0, 1), - (3, 0, 1), // 4 - (2, 1, 1), // 5 - (1, 1, 1), // 6 - (3, 1, 1), // 7 - ] - ); - - // thus, they can be used to take from the arrays - let array = take_arrays(&arrays, slices, None); - - let expected = Int32Array::from_slice(&[0, 1, 2, 3, 4, 5, 6, 7]); - - // values are right - assert_eq!(expected, array.as_ref()); - Ok(()) - } - - #[test] - fn test_merge_binary() -> Result<()> { - let a0: &dyn Array = &BinaryArray::::from_slice(&[b"a", b"c", b"d", b"e"]); - let a1: &dyn Array = &BinaryArray::::from_slice(&[b"b", b"y", b"z", b"z"]); - - let options = SortOptions::default(); - let arrays = vec![a0, a1]; - let pairs = vec![(arrays.as_ref(), &options)]; - let comparator = build_comparator(&pairs)?; - - // (0, 0, 4) corresponds to slice ["a", "c", "d", "e"] of a0 - // (1, 0, 4) corresponds to slice ["b", "y", "z", "z"] of a1 - - let result = - merge_sort_slices(once(&(0, 0, 4)), once(&(1, 0, 4)), &comparator).collect::>(); - - // "a" (a0) , "b" (a1) , ["c", "d", "e"] (a0), ["y", "z", "z"] (a1) - // (0, 0, 1), (1, 0, 1), (0, 1, 3) , (1, 1, 3) - assert_eq!(result, vec![(0, 0, 1), (1, 0, 1), (0, 1, 3), (1, 1, 3)]); - - // (0, 1, 2) corresponds to slice ["c", "d"] of a0 - // (1, 0, 3) corresponds to slice ["b", "y", "z"] of a1 - let result = - merge_sort_slices(once(&(0, 1, 2)), once(&(1, 0, 3)), &comparator).collect::>(); - - // "b" (a1) , ["c", "d"] (a0) , ["y", "z"] - // (1, 0, 1), (0, 1, 2) , (1, 1, 2) - assert_eq!(result, vec![(1, 0, 1), (0, 1, 2), (1, 1, 2)]); - Ok(()) - } - - #[test] - fn test_merge_string() -> Result<()> { - let a0: &dyn Array = &Utf8Array::::from_slice(&["a", "c", "d", "e"]); - let a1: &dyn Array = &Utf8Array::::from_slice(&["b", "y", "z", "z"]); - - let options = SortOptions::default(); - let arrays = vec![a0, a1]; - let pairs = vec![(arrays.as_ref(), &options)]; - let comparator = build_comparator(&pairs)?; - - // (0, 0, 4) corresponds to slice ["a", "c", "d", "e"] of a0 - // (1, 0, 4) corresponds to slice ["b", "y", "z", "z"] of a1 - - let result = - merge_sort_slices(once(&(0, 0, 4)), once(&(1, 0, 4)), &comparator).collect::>(); - - // "a" (a0) , "b" (a1) , ["c", "d", "e"] (a0), ["y", "z", "z"] (a1) - // (0, 0, 1), (1, 0, 1), (0, 1, 3) , (1, 1, 3) - assert_eq!(result, vec![(0, 0, 1), (1, 0, 1), (0, 1, 3), (1, 1, 3)]); - - // (0, 1, 2) corresponds to slice ["c", "d"] of a0 - // (1, 0, 3) corresponds to slice ["b", "y", "z"] of a1 - let result = - merge_sort_slices(once(&(0, 1, 2)), once(&(1, 0, 3)), &comparator).collect::>(); - - // "b" (a1) , ["c", "d"] (a0) , ["y", "z"] - // (1, 0, 1), (0, 1, 2) , (1, 1, 2) - assert_eq!(result, vec![(1, 0, 1), (0, 1, 2), (1, 1, 2)]); - Ok(()) - } - - #[test] - fn test_merge_sort_many() -> Result<()> { - // column 1 - let a00: &dyn Array = &Int32Array::from_slice(&[0, 1, 2, 3]); - let a01: &dyn Array = &Int32Array::from_slice(&[2, 3, 4]); - // column 2 - let a10: &dyn Array = &Utf8Array::::from_slice(&["a", "c", "d", "e"]); - let a11: &dyn Array = &Utf8Array::::from_slice(&["b", "y", "z"]); - // column 3 - // arrays to be sorted via the columns above - let array0: &dyn Array = &Int32Array::from_slice(&[0, 1, 2, 3]); - let array1: &dyn Array = &Int32Array::from_slice(&[4, 5, 6]); - - let expected = Int32Array::from_slice(&[ - 0, // 0 (a00) < 2 (a01) - 1, // 1 (a00) < 2 (a01) - 4, // 2 (a00) == 2 (a01), "d" (a10) > "b" (a11) - 2, // 2 (a00) < 3 (a01) - 3, // 3 (a00) == 3 (a01), "e" (a10) < "y" (a11) - 5, // arrays0 has finished - 6, // arrays0 has finished - ]); - - // merge-sort according to column 1 and then column 2 - let options = SortOptions::default(); - let arrays0 = vec![a00, a01]; - let arrays1 = vec![a10, a11]; - let pairs = vec![(arrays0.as_ref(), &options), (arrays1.as_ref(), &options)]; - let slices = slices(&pairs)?; - - let array = take_arrays(&[array0, array1], slices, None); - - assert_eq!(expected, array.as_ref()); - Ok(()) - } - - #[test] - fn test_sort() -> Result<()> { - let data0 = vec![4, 1, 2, 10, 3, 3]; - let data1 = vec![5, 1, 0, 6, 7]; - - let mut expected_data = [data0.clone(), data1.clone()].concat(); - expected_data.sort_unstable(); - let expected = Int32Array::from_slice(&expected_data); - - let a0: &dyn Array = &Int32Array::from_slice(&data0); - let a1: &dyn Array = &Int32Array::from_slice(&data1); - - let options = SortOptions::default(); - - // sort individually, potentially in parallel. - let a0 = sort(a0, &options, None)?; - let a1 = sort(a1, &options, None)?; - - // merge then. If multiple arrays, this can be applied in parallel. - let result = merge_sort(a0.as_ref(), a1.as_ref(), &options, None)?; - - assert_eq!(expected, result.as_ref()); - Ok(()) - } -} diff --git a/src/compute/partition.rs b/src/compute/partition.rs index 046eaca41db..c1cd4fb6c7d 100644 --- a/src/compute/partition.rs +++ b/src/compute/partition.rs @@ -114,198 +114,3 @@ impl<'a> Iterator for LexicographicalPartitionIterator<'a> { } } } - -#[cfg(test)] -mod tests { - use super::*; - use crate::array::*; - use crate::compute::sort::SortOptions; - use crate::datatypes::DataType; - - #[test] - fn test_lexicographical_partition_ranges_empty() { - let input = vec![]; - assert!( - lexicographical_partition_ranges(&input).is_err(), - "lexicographical_partition_ranges should reject columns with empty rows" - ); - } - - #[test] - fn test_lexicographical_partition_ranges_unaligned_rows() { - let values1 = Int64Array::from([None, Some(-1)]); - let values2 = Utf8Array::::from(&[Some("foo")]); - let input = vec![ - SortColumn { - values: &values1, - options: None, - }, - SortColumn { - values: &values2, - options: None, - }, - ]; - assert!( - lexicographical_partition_ranges(&input).is_err(), - "lexicographical_partition_ranges should reject columns with different row counts" - ); - } - - #[test] - fn test_lexicographical_partition_single_column() -> Result<()> { - let values = Int64Array::from_slice(&[1, 2, 2, 2, 2, 2, 2, 2, 9]); - let input = vec![SortColumn { - values: &values, - options: Some(SortOptions { - descending: false, - nulls_first: true, - }), - }]; - { - let results = lexicographical_partition_ranges(&input)?; - assert_eq!( - vec![(0_usize..1_usize), (1_usize..8_usize), (8_usize..9_usize)], - results.collect::>() - ); - } - Ok(()) - } - - #[test] - fn test_lexicographical_partition_all_equal_values() -> Result<()> { - let values = Int64Array::from_trusted_len_values_iter(std::iter::repeat(1).take(1000)); - let input = vec![SortColumn { - values: &values, - options: Some(SortOptions { - descending: false, - nulls_first: true, - }), - }]; - - { - let results = lexicographical_partition_ranges(&input)?; - assert_eq!(vec![(0_usize..1000_usize)], results.collect::>()); - } - Ok(()) - } - - #[test] - fn test_lexicographical_partition_all_null_values() -> Result<()> { - let values1 = new_null_array(DataType::Int8, 1000); - let values2 = new_null_array(DataType::UInt16, 1000); - let input = vec![ - SortColumn { - values: values1.as_ref(), - options: Some(SortOptions { - descending: false, - nulls_first: true, - }), - }, - SortColumn { - values: values2.as_ref(), - options: Some(SortOptions { - descending: false, - nulls_first: false, - }), - }, - ]; - { - let results = lexicographical_partition_ranges(&input)?; - assert_eq!(vec![(0_usize..1000_usize)], results.collect::>()); - } - Ok(()) - } - - #[test] - fn test_lexicographical_partition_unique_column_1() -> Result<()> { - let values1 = Int64Array::from(vec![None, Some(-1)]); - let values2 = Utf8Array::::from(vec![Some("foo"), Some("bar")]); - let input = vec![ - SortColumn { - values: &values1, - options: Some(SortOptions { - descending: false, - nulls_first: true, - }), - }, - SortColumn { - values: &values2, - options: Some(SortOptions { - descending: true, - nulls_first: true, - }), - }, - ]; - { - let results = lexicographical_partition_ranges(&input)?; - assert_eq!( - vec![(0_usize..1_usize), (1_usize..2_usize)], - results.collect::>() - ); - } - Ok(()) - } - - #[test] - fn test_lexicographical_partition_unique_column_2() -> Result<()> { - let values1 = Int64Array::from(vec![None, Some(-1), Some(-1)]); - let values2 = Utf8Array::::from(vec![Some("foo"), Some("bar"), Some("apple")]); - - let input = vec![ - SortColumn { - values: &values1, - options: Some(SortOptions { - descending: false, - nulls_first: true, - }), - }, - SortColumn { - values: &values2, - options: Some(SortOptions { - descending: true, - nulls_first: true, - }), - }, - ]; - { - let results = lexicographical_partition_ranges(&input)?; - assert_eq!( - vec![(0_usize..1_usize), (1_usize..2_usize), (2_usize..3_usize),], - results.collect::>() - ); - } - Ok(()) - } - - #[test] - fn test_lexicographical_partition_non_unique_column_1() -> Result<()> { - let values1 = Int64Array::from(vec![None, Some(-1), Some(-1), Some(1)]); - let values2 = - Utf8Array::::from(vec![Some("foo"), Some("bar"), Some("bar"), Some("bar")]); - - let input = vec![ - SortColumn { - values: &values1, - options: Some(SortOptions { - descending: false, - nulls_first: true, - }), - }, - SortColumn { - values: &values2, - options: Some(SortOptions { - descending: true, - nulls_first: true, - }), - }, - ]; - { - let results = lexicographical_partition_ranges(&input)?; - assert_eq!( - vec![(0_usize..1_usize), (1_usize..3_usize), (3_usize..4_usize),], - results.collect::>() - ); - } - Ok(()) - } -} diff --git a/src/compute/regex_match.rs b/src/compute/regex_match.rs index 12779067368..c1972ea0906 100644 --- a/src/compute/regex_match.rs +++ b/src/compute/regex_match.rs @@ -83,62 +83,3 @@ pub fn regex_match_scalar(values: &Utf8Array, regex: &str) -> Resu .map_err(|e| ArrowError::InvalidArgumentError(format!("Unable to compile regex: {}", e)))?; Ok(unary_utf8_boolean(values, |x| regex.is_match(x))) } - -#[cfg(test)] -mod tests { - use super::*; - - fn test_generic, &Utf8Array) -> Result>( - lhs: Vec<&str>, - pattern: Vec<&str>, - op: F, - expected: Vec, - ) { - let lhs = Utf8Array::::from_slice(lhs); - let pattern = Utf8Array::::from_slice(pattern); - let expected = BooleanArray::from_slice(expected); - let result = op(&lhs, &pattern).unwrap(); - assert_eq!(result, expected); - } - - fn test_generic_scalar, &str) -> Result>( - lhs: Vec<&str>, - pattern: &str, - op: F, - expected: Vec, - ) { - let lhs = Utf8Array::::from_slice(lhs); - let expected = BooleanArray::from_slice(expected); - let result = op(&lhs, pattern).unwrap(); - assert_eq!(result, expected); - } - - #[test] - fn test_like() { - test_generic::( - vec![ - "arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow", - ], - vec!["arrow", "^ar", "ro", "foo", "arr$", "arrow.", "arrow."], - regex_match, - vec![true, true, true, false, false, true, false], - ) - } - - #[test] - fn test_like_scalar() { - test_generic_scalar::( - vec!["arrow", "parquet", "datafusion", "flight"], - "ar", - regex_match_scalar, - vec![true, true, false, false], - ); - - test_generic_scalar::( - vec!["arrow", "parquet", "datafusion", "flight"], - "^ar", - regex_match_scalar, - vec![true, false, false, false], - ) - } -} diff --git a/src/compute/substring.rs b/src/compute/substring.rs index aac841bbe82..f0ec238e877 100644 --- a/src/compute/substring.rs +++ b/src/compute/substring.rs @@ -180,345 +180,3 @@ pub fn can_substring(data_type: &DataType) -> bool { DataType::LargeUtf8 | DataType::Utf8 | DataType::LargeBinary | DataType::Binary ) } - -#[cfg(test)] -mod tests { - use super::*; - - fn with_nulls_utf8() -> Result<()> { - let cases = vec![ - // identity - ( - vec![Some("hello"), None, Some("word")], - 0, - None, - vec![Some("hello"), None, Some("word")], - ), - // 0 length -> Nothing - ( - vec![Some("hello"), None, Some("word")], - 0, - Some(0), - vec![Some(""), None, Some("")], - ), - // high start -> Nothing - ( - vec![Some("hello"), None, Some("word")], - 1000, - Some(0), - vec![Some(""), None, Some("")], - ), - // high negative start -> identity - ( - vec![Some("hello"), None, Some("word")], - -1000, - None, - vec![Some("hello"), None, Some("word")], - ), - // high length -> identity - ( - vec![Some("hello"), None, Some("word")], - 0, - Some(1000), - vec![Some("hello"), None, Some("word")], - ), - ]; - - cases - .into_iter() - .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { - let array = Utf8Array::::from(&array); - let result = substring(&array, start, &length)?; - assert_eq!(array.len(), result.len()); - - let result = result.as_any().downcast_ref::>().unwrap(); - let expected = Utf8Array::::from(&expected); - assert_eq!(&expected, result); - Ok(()) - })?; - - Ok(()) - } - - #[test] - fn with_nulls_string() -> Result<()> { - with_nulls_utf8::() - } - - #[test] - fn with_nulls_large_string() -> Result<()> { - with_nulls_utf8::() - } - - fn without_nulls_utf8() -> Result<()> { - let cases = vec![ - // increase start - ( - vec!["hello", "", "word"], - 0, - None, - vec!["hello", "", "word"], - ), - (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]), - (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]), - (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]), - (vec!["hello", "", "word"], 10, None, vec!["", "", ""]), - // increase start negatively - (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]), - (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]), - (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]), - ( - vec!["hello", "", "word"], - -10, - None, - vec!["hello", "", "word"], - ), - // increase length - (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]), - (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]), - ( - vec!["hello", "", "word"], - 1, - Some(3), - vec!["ell", "", "ord"], - ), - ( - vec!["hello", "", "word"], - 1, - Some(4), - vec!["ello", "", "ord"], - ), - (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]), - (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]), - ( - vec!["hello", "", "word"], - -3, - Some(3), - vec!["llo", "", "ord"], - ), - ( - vec!["hello", "", "word"], - -3, - Some(4), - vec!["llo", "", "ord"], - ), - ]; - - cases - .into_iter() - .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { - let array = Utf8Array::::from_slice(&array); - let result = substring(&array, start, &length)?; - assert_eq!(array.len(), result.len()); - let result = result.as_any().downcast_ref::>().unwrap(); - let expected = Utf8Array::::from_slice(&expected); - assert_eq!(&expected, result); - Ok(()) - })?; - - Ok(()) - } - - #[test] - fn without_nulls_string() -> Result<()> { - without_nulls_utf8::() - } - - #[test] - fn without_nulls_large_string() -> Result<()> { - without_nulls_utf8::() - } - - fn with_null_binarys() -> Result<()> { - let cases = vec![ - // identity - ( - vec![Some(b"hello"), None, Some(b"world")], - 0, - None, - vec![Some("hello"), None, Some("world")], - ), - // 0 length -> Nothing - ( - vec![Some(b"hello"), None, Some(b"world")], - 0, - Some(0), - vec![Some(""), None, Some("")], - ), - // high start -> Nothing - ( - vec![Some(b"hello"), None, Some(b"world")], - 1000, - Some(0), - vec![Some(""), None, Some("")], - ), - // high negative start -> identity - ( - vec![Some(b"hello"), None, Some(b"world")], - -1000, - None, - vec![Some("hello"), None, Some("world")], - ), - // high length -> identity - ( - vec![Some(b"hello"), None, Some(b"world")], - 0, - Some(1000), - vec![Some("hello"), None, Some("world")], - ), - ]; - - cases - .into_iter() - .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { - let array = BinaryArray::::from(&array); - let result = substring(&array, start, &length)?; - assert_eq!(array.len(), result.len()); - - let result = result.as_any().downcast_ref::>().unwrap(); - let expected = BinaryArray::::from(&expected); - assert_eq!(&expected, result); - Ok(()) - })?; - - Ok(()) - } - - #[test] - fn with_nulls_binary() -> Result<()> { - with_null_binarys::() - } - - #[test] - fn with_nulls_large_binary() -> Result<()> { - with_null_binarys::() - } - - fn without_null_binarys() -> Result<()> { - let cases = vec![ - // increase start - ( - vec!["hello", "", "word"], - 0, - None, - vec!["hello", "", "word"], - ), - (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]), - (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]), - (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]), - (vec!["hello", "", "word"], 10, None, vec!["", "", ""]), - // increase start negatively - (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]), - (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]), - (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]), - ( - vec!["hello", "", "word"], - -10, - None, - vec!["hello", "", "word"], - ), - // increase length - (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]), - (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]), - ( - vec!["hello", "", "word"], - 1, - Some(3), - vec!["ell", "", "ord"], - ), - ( - vec!["hello", "", "word"], - 1, - Some(4), - vec!["ello", "", "ord"], - ), - (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]), - (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]), - ( - vec!["hello", "", "word"], - -3, - Some(3), - vec!["llo", "", "ord"], - ), - ( - vec!["hello", "", "word"], - -3, - Some(4), - vec!["llo", "", "ord"], - ), - ]; - - cases - .into_iter() - .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { - let array = BinaryArray::::from_slice(&array); - let result = substring(&array, start, &length)?; - assert_eq!(array.len(), result.len()); - let result = result.as_any().downcast_ref::>().unwrap(); - let expected = BinaryArray::::from_slice(&expected); - assert_eq!(&expected, result); - Ok(()) - })?; - - Ok(()) - } - - #[test] - fn without_nulls_binary() -> Result<()> { - without_null_binarys::() - } - - #[test] - fn without_nulls_large_binary() -> Result<()> { - without_null_binarys::() - } - - #[test] - fn consistency() { - use crate::array::new_null_array; - use crate::datatypes::DataType::*; - use crate::datatypes::TimeUnit; - - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - ]; - - datatypes.into_iter().for_each(|d1| { - let array = new_null_array(d1.clone(), 10); - if can_substring(&d1) { - assert!(substring(array.as_ref(), 0, &None).is_ok()); - } else { - assert!(substring(array.as_ref(), 0, &None).is_err()); - } - }); - } -} diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index dba9f1e2741..ae8130ee597 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -203,181 +203,3 @@ pub fn can_year(data_type: &DataType) -> bool { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None) ) } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn date64_hour() { - let array = Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]) - .to(DataType::Date64); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(4)]); - assert_eq!(result, expected); - } - - #[test] - fn date32_hour() { - let array = Int32Array::from(&[Some(15147), None, Some(15148)]).to(DataType::Date32); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(0), None, Some(0)]); - assert_eq!(result, expected); - } - - #[test] - fn time32_second_hour() { - let array = Int32Array::from(&[Some(37800), None]).to(DataType::Time32(TimeUnit::Second)); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); - } - - #[test] - fn time64_micro_hour() { - let array = Int64Array::from(&[Some(37800000000), None]) - .to(DataType::Time64(TimeUnit::Microsecond)); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); - } - - #[test] - fn timestamp_micro_hour() { - let array = Int64Array::from(&[Some(37800000000), None]) - .to(DataType::Timestamp(TimeUnit::Microsecond, None)); - - let result = hour(&array).unwrap(); - let expected = UInt32Array::from(&[Some(10), None]); - assert_eq!(result, expected); - } - - #[test] - fn timestamp_date64_year() { - let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2018), None]); - assert_eq!(result, expected); - } - - #[test] - fn timestamp_date32_year() { - let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2011), None]); - assert_eq!(result, expected); - } - - #[test] - fn timestamp_micro_year() { - let array = Int64Array::from(&[Some(1612025847000000), None]) - .to(DataType::Timestamp(TimeUnit::Microsecond, None)); - - let result = year(&array).unwrap(); - let expected = Int32Array::from(&[Some(2021), None]); - assert_eq!(result, expected); - } - - #[test] - fn consistency_hour() { - use crate::array::new_null_array; - use crate::datatypes::DataType::*; - use crate::datatypes::TimeUnit; - - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - ]; - - datatypes.into_iter().for_each(|d1| { - let array = new_null_array(d1.clone(), 10); - if can_hour(&d1) { - assert!(hour(array.as_ref()).is_ok()); - } else { - assert!(hour(array.as_ref()).is_err()); - } - }); - } - - #[test] - fn consistency_year() { - use crate::array::new_null_array; - use crate::datatypes::DataType::*; - use crate::datatypes::TimeUnit; - - let datatypes = vec![ - Null, - Boolean, - UInt8, - UInt16, - UInt32, - UInt64, - Int8, - Int16, - Int32, - Int64, - Float32, - Float64, - Timestamp(TimeUnit::Second, None), - Timestamp(TimeUnit::Millisecond, None), - Timestamp(TimeUnit::Microsecond, None), - Timestamp(TimeUnit::Nanosecond, None), - Time64(TimeUnit::Microsecond), - Time64(TimeUnit::Nanosecond), - Date32, - Time32(TimeUnit::Second), - Time32(TimeUnit::Millisecond), - Date64, - Utf8, - LargeUtf8, - Binary, - LargeBinary, - Duration(TimeUnit::Second), - Duration(TimeUnit::Millisecond), - Duration(TimeUnit::Microsecond), - Duration(TimeUnit::Nanosecond), - ]; - - datatypes.into_iter().for_each(|d1| { - let array = new_null_array(d1.clone(), 10); - if can_year(&d1) { - assert!(year(array.as_ref()).is_ok()); - } else { - assert!(year(array.as_ref()).is_err()); - } - }); - } -} diff --git a/src/compute/window.rs b/src/compute/window.rs index 9b8e47ef036..8ccced766e3 100644 --- a/src/compute/window.rs +++ b/src/compute/window.rs @@ -63,37 +63,3 @@ pub fn shift(array: &dyn Array, offset: i64) -> Result> { concat::concatenate(&[slice.as_ref(), null_array.as_ref()]) } } - -#[cfg(test)] -mod tests { - use crate::array::Int32Array; - use crate::datatypes::DataType; - - use super::*; - - #[test] - fn shift_pos() { - let array = Int32Array::from(&[Some(1), None, Some(3)]); - let result = shift(&array, 1).unwrap(); - - let expected = Int32Array::from(&[None, Some(1), None]); - - assert_eq!(expected, result.as_ref()); - } - - #[test] - fn shift_many() { - let array = Int32Array::from(&[Some(1), None, Some(3)]).to(DataType::Date32); - assert!(shift(&array, 10).is_err()); - } - - #[test] - fn shift_max() { - let array = Int32Array::from(&[Some(1), None, Some(3)]).to(DataType::Date32); - let result = shift(&array, 3).unwrap(); - - let expected = new_null_array(DataType::Date32, 3); - - assert_eq!(expected.as_ref(), result.as_ref()); - } -} diff --git a/tests/it/compute/boolean_kleene.rs b/tests/it/compute/boolean_kleene.rs new file mode 100644 index 00000000000..09e2fc501a3 --- /dev/null +++ b/tests/it/compute/boolean_kleene.rs @@ -0,0 +1,131 @@ +use arrow2::array::BooleanArray; +use arrow2::compute::boolean_kleene::*; + +#[test] +fn and_generic() { + let lhs = BooleanArray::from(&[ + None, + None, + None, + Some(false), + Some(false), + Some(false), + Some(true), + Some(true), + Some(true), + ]); + let rhs = BooleanArray::from(&[ + None, + Some(false), + Some(true), + None, + Some(false), + Some(true), + None, + Some(false), + Some(true), + ]); + let c = and(&lhs, &rhs).unwrap(); + + let expected = BooleanArray::from(&[ + None, + Some(false), + None, + Some(false), + Some(false), + Some(false), + None, + Some(false), + Some(true), + ]); + + assert_eq!(c, expected); +} + +#[test] +fn or_generic() { + let a = BooleanArray::from(&[ + None, + None, + None, + Some(false), + Some(false), + Some(false), + Some(true), + Some(true), + Some(true), + ]); + let b = BooleanArray::from(&[ + None, + Some(false), + Some(true), + None, + Some(false), + Some(true), + None, + Some(false), + Some(true), + ]); + let c = or(&a, &b).unwrap(); + + let expected = BooleanArray::from(&[ + None, + None, + Some(true), + None, + Some(false), + Some(true), + Some(true), + Some(true), + Some(true), + ]); + + assert_eq!(c, expected); +} + +#[test] +fn or_right_nulls() { + let a = BooleanArray::from_slice(&[false, false, false, true, true, true]); + + let b = BooleanArray::from(&[Some(true), Some(false), None, Some(true), Some(false), None]); + + let c = or(&a, &b).unwrap(); + + let expected = BooleanArray::from(&[ + Some(true), + Some(false), + None, + Some(true), + Some(true), + Some(true), + ]); + + assert_eq!(c, expected); +} + +#[test] +fn or_left_nulls() { + let a = BooleanArray::from(vec![ + Some(true), + Some(false), + None, + Some(true), + Some(false), + None, + ]); + + let b = BooleanArray::from_slice(&[false, false, false, true, true, true]); + + let c = or(&a, &b).unwrap(); + + let expected = BooleanArray::from(vec![ + Some(true), + Some(false), + None, + Some(true), + Some(true), + Some(true), + ]); + + assert_eq!(c, expected); +} diff --git a/tests/it/compute/length.rs b/tests/it/compute/length.rs new file mode 100644 index 00000000000..73d36fc68d3 --- /dev/null +++ b/tests/it/compute/length.rs @@ -0,0 +1,88 @@ +use arrow2::array::*; +use arrow2::compute::length::*; +use arrow2::datatypes::*; + +fn length_test_string() { + vec![ + ( + vec![Some("hello"), Some(" "), None], + vec![Some(5usize), Some(1), None], + ), + (vec![Some("💖")], vec![Some(4)]), + ] + .into_iter() + .for_each(|(input, expected)| { + let array = Utf8Array::::from(&input); + let result = length(&array).unwrap(); + + let data_type = if O::is_large() { + DataType::Int64 + } else { + DataType::Int32 + }; + + let expected = expected + .into_iter() + .map(|x| x.map(|x| O::from_usize(x).unwrap())) + .collect::>() + .to(data_type); + assert_eq!(expected, result.as_ref()); + }) +} + +#[test] +fn large_utf8() { + length_test_string::() +} + +#[test] +fn utf8() { + length_test_string::() +} + +#[test] +fn consistency() { + use arrow2::datatypes::DataType::*; + + let datatypes = vec![ + Null, + Boolean, + UInt8, + UInt16, + UInt32, + UInt64, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Timestamp(TimeUnit::Second, None), + Timestamp(TimeUnit::Millisecond, None), + Timestamp(TimeUnit::Microsecond, None), + Timestamp(TimeUnit::Nanosecond, None), + Time64(TimeUnit::Microsecond), + Time64(TimeUnit::Nanosecond), + Date32, + Time32(TimeUnit::Second), + Time32(TimeUnit::Millisecond), + Date64, + Utf8, + LargeUtf8, + Binary, + LargeBinary, + Duration(TimeUnit::Second), + Duration(TimeUnit::Millisecond), + Duration(TimeUnit::Microsecond), + Duration(TimeUnit::Nanosecond), + ]; + + datatypes.into_iter().for_each(|d1| { + let array = new_null_array(d1.clone(), 10); + if can_length(&d1) { + assert!(length(array.as_ref()).is_ok()); + } else { + assert!(length(array.as_ref()).is_err()); + } + }); +} diff --git a/tests/it/compute/like.rs b/tests/it/compute/like.rs new file mode 100644 index 00000000000..bd93b604cbf --- /dev/null +++ b/tests/it/compute/like.rs @@ -0,0 +1,46 @@ +use arrow2::array::*; +use arrow2::compute::like::*; +use arrow2::error::Result; + +#[test] +fn test_like_binary() -> Result<()> { + let strings = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "Arrow", "Ar"]); + let patterns = BinaryArray::::from_slice(&["A%", "B%", "%r_ow", "A_", "A_"]); + let result = like_binary(&strings, &patterns).unwrap(); + assert_eq!( + result, + BooleanArray::from_slice(&[true, false, true, false, true]) + ); + Ok(()) +} + +#[test] +fn test_nlike_binary() -> Result<()> { + let strings = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "Arrow", "Ar"]); + let patterns = BinaryArray::::from_slice(&["A%", "B%", "%r_ow", "A_", "A_"]); + let result = nlike_binary(&strings, &patterns).unwrap(); + assert_eq!( + result, + BooleanArray::from_slice(&[false, true, false, true, false]) + ); + Ok(()) +} + +#[test] +fn test_like_binary_scalar() -> Result<()> { + let array = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "BA"]); + let result = like_binary_scalar(&array, b"A%").unwrap(); + assert_eq!(result, BooleanArray::from_slice(&[true, true, true, false])); + Ok(()) +} + +#[test] +fn test_nlike_binary_scalar() -> Result<()> { + let array = BinaryArray::::from_slice(&["Arrow", "Arrow", "Arrow", "BA"]); + let result = nlike_binary_scalar(&array, "A%".as_bytes()).unwrap(); + assert_eq!( + result, + BooleanArray::from_slice(&[false, false, false, true]) + ); + Ok(()) +} diff --git a/tests/it/compute/limit.rs b/tests/it/compute/limit.rs new file mode 100644 index 00000000000..30a43e5ce2a --- /dev/null +++ b/tests/it/compute/limit.rs @@ -0,0 +1,11 @@ +use arrow2::array::*; +use arrow2::compute::limit::limit; + +#[test] +fn limit_array() { + let a = Int32Array::from_slice(&[5, 6, 7, 8, 9]); + let b = limit(&a, 3); + let c = b.as_ref().as_any().downcast_ref::().unwrap(); + let expected = Int32Array::from_slice(&[5, 6, 7]); + assert_eq!(&expected, c); +} diff --git a/tests/it/compute/merge_sort.rs b/tests/it/compute/merge_sort.rs new file mode 100644 index 00000000000..e50a41d77e2 --- /dev/null +++ b/tests/it/compute/merge_sort.rs @@ -0,0 +1,230 @@ +use std::iter::once; + +use arrow2::array::*; +use arrow2::compute::merge_sort::*; +use arrow2::compute::sort::sort; +use arrow2::error::Result; + +#[test] +fn merge_u32() -> Result<()> { + let a0: &dyn Array = &Int32Array::from_slice(&[0, 1, 2, 3]); + let a1: &dyn Array = &Int32Array::from_slice(&[2, 3, 4, 5]); + + let options = SortOptions::default(); + let arrays = vec![a0, a1]; + let pairs = vec![(arrays.as_ref(), &options)]; + let comparator = build_comparator(&pairs)?; + + // (0, 1, 2) corresponds to slice [1, 2] of a0 + // (1, 2, 2) corresponds to slice [4, 5] of a1 + // slices are already sorted => identity + let result = + merge_sort_slices(once(&(0, 1, 2)), once(&(1, 2, 2)), &comparator).collect::>(); + + assert_eq!(result, vec![(0, 1, 2), (1, 2, 2)]); + + // (0, 2, 2) corresponds to slice [2, 3] of a0 + // (1, 0, 3) corresponds to slice [2, 3, 4] of a1 + let result = + merge_sort_slices(once(&(0, 2, 2)), once(&(1, 0, 3)), &comparator).collect::>(); + + // 2 (a0) , [2, 3] (a1) , 3 (a0) , 4 (a1) + // (0, 2, 1), (1, 0, 2) , (0, 3, 1), (1, 2, 1) + assert_eq!(result, vec![(0, 2, 1), (1, 0, 2), (0, 3, 1), (1, 2, 1)]); + Ok(()) +} + +#[test] +fn merge_with_limit() -> Result<()> { + let a0: &dyn Array = &Int32Array::from_slice(&[0, 2, 4, 6, 8]); + let a1: &dyn Array = &Int32Array::from_slice(&[1, 3, 5, 7, 9]); + + let options = SortOptions::default(); + let arrays = vec![a0, a1]; + let pairs = vec![(arrays.as_ref(), &options)]; + let comparator = build_comparator(&pairs)?; + + let slices = merge_sort_slices(once(&(0, 0, 5)), once(&(1, 0, 5)), &comparator); + // thus, they can be used to take from the arrays + let array = take_arrays(&arrays, slices, Some(5)); + + let expected = Int32Array::from_slice(&[0, 1, 2, 3, 4]); + // values are right + assert_eq!(expected, array.as_ref()); + Ok(()) +} + +#[test] +fn merge_slices_to_vec() -> Result<()> { + let a0: &dyn Array = &Int32Array::from_slice(&[0, 2, 4, 6, 8]); + let a1: &dyn Array = &Int32Array::from_slice(&[1, 3, 5, 7, 9]); + + let options = SortOptions::default(); + let arrays = vec![a0, a1]; + let pairs = vec![(arrays.as_ref(), &options)]; + let comparator = build_comparator(&pairs)?; + + let slices = merge_sort_slices(once(&(0, 0, 5)), once(&(1, 0, 5)), &comparator); + let vec = slices.to_vec(Some(5)); + assert_eq!(vec, [(0, 0, 1), (1, 0, 1), (0, 1, 1), (1, 1, 1), (0, 2, 1)]); + Ok(()) +} + +#[test] +fn merge_4_i32() -> Result<()> { + let a0: &dyn Array = &Int32Array::from_slice(&[0, 1]); + let a1: &dyn Array = &Int32Array::from_slice(&[2, 6]); + let a2: &dyn Array = &Int32Array::from_slice(&[3, 5]); + let a3: &dyn Array = &Int32Array::from_slice(&[4, 7]); + + let options = SortOptions::default(); + let arrays = vec![a0, a1, a2, a3]; + let pairs = vec![(arrays.as_ref(), &options)]; + let slices = slices(&pairs)?; + + // slices are right. + assert_eq!( + slices, + vec![ + (0, 0, 2), + (1, 0, 1), + (2, 0, 1), + (3, 0, 1), // 4 + (2, 1, 1), // 5 + (1, 1, 1), // 6 + (3, 1, 1), // 7 + ] + ); + + // thus, they can be used to take from the arrays + let array = take_arrays(&arrays, slices, None); + + let expected = Int32Array::from_slice(&[0, 1, 2, 3, 4, 5, 6, 7]); + + // values are right + assert_eq!(expected, array.as_ref()); + Ok(()) +} + +#[test] +fn merge_binary() -> Result<()> { + let a0: &dyn Array = &BinaryArray::::from_slice(&[b"a", b"c", b"d", b"e"]); + let a1: &dyn Array = &BinaryArray::::from_slice(&[b"b", b"y", b"z", b"z"]); + + let options = SortOptions::default(); + let arrays = vec![a0, a1]; + let pairs = vec![(arrays.as_ref(), &options)]; + let comparator = build_comparator(&pairs)?; + + // (0, 0, 4) corresponds to slice ["a", "c", "d", "e"] of a0 + // (1, 0, 4) corresponds to slice ["b", "y", "z", "z"] of a1 + + let result = + merge_sort_slices(once(&(0, 0, 4)), once(&(1, 0, 4)), &comparator).collect::>(); + + // "a" (a0) , "b" (a1) , ["c", "d", "e"] (a0), ["y", "z", "z"] (a1) + // (0, 0, 1), (1, 0, 1), (0, 1, 3) , (1, 1, 3) + assert_eq!(result, vec![(0, 0, 1), (1, 0, 1), (0, 1, 3), (1, 1, 3)]); + + // (0, 1, 2) corresponds to slice ["c", "d"] of a0 + // (1, 0, 3) corresponds to slice ["b", "y", "z"] of a1 + let result = + merge_sort_slices(once(&(0, 1, 2)), once(&(1, 0, 3)), &comparator).collect::>(); + + // "b" (a1) , ["c", "d"] (a0) , ["y", "z"] + // (1, 0, 1), (0, 1, 2) , (1, 1, 2) + assert_eq!(result, vec![(1, 0, 1), (0, 1, 2), (1, 1, 2)]); + Ok(()) +} + +#[test] +fn merge_string() -> Result<()> { + let a0: &dyn Array = &Utf8Array::::from_slice(&["a", "c", "d", "e"]); + let a1: &dyn Array = &Utf8Array::::from_slice(&["b", "y", "z", "z"]); + + let options = SortOptions::default(); + let arrays = vec![a0, a1]; + let pairs = vec![(arrays.as_ref(), &options)]; + let comparator = build_comparator(&pairs)?; + + // (0, 0, 4) corresponds to slice ["a", "c", "d", "e"] of a0 + // (1, 0, 4) corresponds to slice ["b", "y", "z", "z"] of a1 + + let result = + merge_sort_slices(once(&(0, 0, 4)), once(&(1, 0, 4)), &comparator).collect::>(); + + // "a" (a0) , "b" (a1) , ["c", "d", "e"] (a0), ["y", "z", "z"] (a1) + // (0, 0, 1), (1, 0, 1), (0, 1, 3) , (1, 1, 3) + assert_eq!(result, vec![(0, 0, 1), (1, 0, 1), (0, 1, 3), (1, 1, 3)]); + + // (0, 1, 2) corresponds to slice ["c", "d"] of a0 + // (1, 0, 3) corresponds to slice ["b", "y", "z"] of a1 + let result = + merge_sort_slices(once(&(0, 1, 2)), once(&(1, 0, 3)), &comparator).collect::>(); + + // "b" (a1) , ["c", "d"] (a0) , ["y", "z"] + // (1, 0, 1), (0, 1, 2) , (1, 1, 2) + assert_eq!(result, vec![(1, 0, 1), (0, 1, 2), (1, 1, 2)]); + Ok(()) +} + +#[test] +fn merge_sort_many() -> Result<()> { + // column 1 + let a00: &dyn Array = &Int32Array::from_slice(&[0, 1, 2, 3]); + let a01: &dyn Array = &Int32Array::from_slice(&[2, 3, 4]); + // column 2 + let a10: &dyn Array = &Utf8Array::::from_slice(&["a", "c", "d", "e"]); + let a11: &dyn Array = &Utf8Array::::from_slice(&["b", "y", "z"]); + // column 3 + // arrays to be sorted via the columns above + let array0: &dyn Array = &Int32Array::from_slice(&[0, 1, 2, 3]); + let array1: &dyn Array = &Int32Array::from_slice(&[4, 5, 6]); + + let expected = Int32Array::from_slice(&[ + 0, // 0 (a00) < 2 (a01) + 1, // 1 (a00) < 2 (a01) + 4, // 2 (a00) == 2 (a01), "d" (a10) > "b" (a11) + 2, // 2 (a00) < 3 (a01) + 3, // 3 (a00) == 3 (a01), "e" (a10) < "y" (a11) + 5, // arrays0 has finished + 6, // arrays0 has finished + ]); + + // merge-sort according to column 1 and then column 2 + let options = SortOptions::default(); + let arrays0 = vec![a00, a01]; + let arrays1 = vec![a10, a11]; + let pairs = vec![(arrays0.as_ref(), &options), (arrays1.as_ref(), &options)]; + let slices = slices(&pairs)?; + + let array = take_arrays(&[array0, array1], slices, None); + + assert_eq!(expected, array.as_ref()); + Ok(()) +} + +#[test] +fn test_sort() -> Result<()> { + let data0 = vec![4, 1, 2, 10, 3, 3]; + let data1 = vec![5, 1, 0, 6, 7]; + + let mut expected_data = [data0.clone(), data1.clone()].concat(); + expected_data.sort_unstable(); + let expected = Int32Array::from_slice(&expected_data); + + let a0: &dyn Array = &Int32Array::from_slice(&data0); + let a1: &dyn Array = &Int32Array::from_slice(&data1); + + let options = SortOptions::default(); + + // sort individually, potentially in parallel. + let a0 = sort(a0, &options, None)?; + let a1 = sort(a1, &options, None)?; + + // merge then. If multiple arrays, this can be applied in parallel. + let result = merge_sort(a0.as_ref(), a1.as_ref(), &options, None)?; + + assert_eq!(expected, result.as_ref()); + Ok(()) +} diff --git a/tests/it/compute/mod.rs b/tests/it/compute/mod.rs index 0924e72627f..0f2ed2c12a3 100644 --- a/tests/it/compute/mod.rs +++ b/tests/it/compute/mod.rs @@ -1,7 +1,20 @@ mod boolean; +mod boolean_kleene; mod cast; mod concat; mod filter; mod hash; mod if_then_else; +mod length; +#[cfg(feature = "regex")] +mod like; +mod limit; +#[cfg(feature = "merge_sort")] +mod merge_sort; +mod partition; +#[cfg(feature = "regex")] +mod regex_match; mod sort; +mod substring; +mod temporal; +mod window; diff --git a/tests/it/compute/partition.rs b/tests/it/compute/partition.rs new file mode 100644 index 00000000000..3b390ac6406 --- /dev/null +++ b/tests/it/compute/partition.rs @@ -0,0 +1,191 @@ +use arrow2::array::*; +use arrow2::compute::partition::*; +use arrow2::compute::sort::{SortColumn, SortOptions}; +use arrow2::datatypes::DataType; +use arrow2::error::Result; + +#[test] +fn lexicographical_partition_ranges_empty() { + let input = vec![]; + assert!( + lexicographical_partition_ranges(&input).is_err(), + "lexicographical_partition_ranges should reject columns with empty rows" + ); +} + +#[test] +fn lexicographical_partition_ranges_unaligned_rows() { + let values1 = Int64Array::from([None, Some(-1)]); + let values2 = Utf8Array::::from(&[Some("foo")]); + let input = vec![ + SortColumn { + values: &values1, + options: None, + }, + SortColumn { + values: &values2, + options: None, + }, + ]; + assert!( + lexicographical_partition_ranges(&input).is_err(), + "lexicographical_partition_ranges should reject columns with different row counts" + ); +} + +#[test] +fn lexicographical_partition_single_column() -> Result<()> { + let values = Int64Array::from_slice(&[1, 2, 2, 2, 2, 2, 2, 2, 9]); + let input = vec![SortColumn { + values: &values, + options: Some(SortOptions { + descending: false, + nulls_first: true, + }), + }]; + { + let results = lexicographical_partition_ranges(&input)?; + assert_eq!( + vec![(0_usize..1_usize), (1_usize..8_usize), (8_usize..9_usize)], + results.collect::>() + ); + } + Ok(()) +} + +#[test] +fn lexicographical_partition_all_equal_values() -> Result<()> { + let values = Int64Array::from_trusted_len_values_iter(std::iter::repeat(1).take(1000)); + let input = vec![SortColumn { + values: &values, + options: Some(SortOptions { + descending: false, + nulls_first: true, + }), + }]; + + { + let results = lexicographical_partition_ranges(&input)?; + assert_eq!(vec![(0_usize..1000_usize)], results.collect::>()); + } + Ok(()) +} + +#[test] +fn lexicographical_partition_all_null_values() -> Result<()> { + let values1 = new_null_array(DataType::Int8, 1000); + let values2 = new_null_array(DataType::UInt16, 1000); + let input = vec![ + SortColumn { + values: values1.as_ref(), + options: Some(SortOptions { + descending: false, + nulls_first: true, + }), + }, + SortColumn { + values: values2.as_ref(), + options: Some(SortOptions { + descending: false, + nulls_first: false, + }), + }, + ]; + { + let results = lexicographical_partition_ranges(&input)?; + assert_eq!(vec![(0_usize..1000_usize)], results.collect::>()); + } + Ok(()) +} + +#[test] +fn lexicographical_partition_unique_column_1() -> Result<()> { + let values1 = Int64Array::from(vec![None, Some(-1)]); + let values2 = Utf8Array::::from(vec![Some("foo"), Some("bar")]); + let input = vec![ + SortColumn { + values: &values1, + options: Some(SortOptions { + descending: false, + nulls_first: true, + }), + }, + SortColumn { + values: &values2, + options: Some(SortOptions { + descending: true, + nulls_first: true, + }), + }, + ]; + { + let results = lexicographical_partition_ranges(&input)?; + assert_eq!( + vec![(0_usize..1_usize), (1_usize..2_usize)], + results.collect::>() + ); + } + Ok(()) +} + +#[test] +fn lexicographical_partition_unique_column_2() -> Result<()> { + let values1 = Int64Array::from(vec![None, Some(-1), Some(-1)]); + let values2 = Utf8Array::::from(vec![Some("foo"), Some("bar"), Some("apple")]); + + let input = vec![ + SortColumn { + values: &values1, + options: Some(SortOptions { + descending: false, + nulls_first: true, + }), + }, + SortColumn { + values: &values2, + options: Some(SortOptions { + descending: true, + nulls_first: true, + }), + }, + ]; + { + let results = lexicographical_partition_ranges(&input)?; + assert_eq!( + vec![(0_usize..1_usize), (1_usize..2_usize), (2_usize..3_usize),], + results.collect::>() + ); + } + Ok(()) +} + +#[test] +fn lexicographical_partition_non_unique_column_1() -> Result<()> { + let values1 = Int64Array::from(vec![None, Some(-1), Some(-1), Some(1)]); + let values2 = Utf8Array::::from(vec![Some("foo"), Some("bar"), Some("bar"), Some("bar")]); + + let input = vec![ + SortColumn { + values: &values1, + options: Some(SortOptions { + descending: false, + nulls_first: true, + }), + }, + SortColumn { + values: &values2, + options: Some(SortOptions { + descending: true, + nulls_first: true, + }), + }, + ]; + { + let results = lexicographical_partition_ranges(&input)?; + assert_eq!( + vec![(0_usize..1_usize), (1_usize..3_usize), (3_usize..4_usize),], + results.collect::>() + ); + } + Ok(()) +} diff --git a/tests/it/compute/regex_match.rs b/tests/it/compute/regex_match.rs new file mode 100644 index 00000000000..141a87ad3e6 --- /dev/null +++ b/tests/it/compute/regex_match.rs @@ -0,0 +1,57 @@ +use arrow2::array::{BooleanArray, Offset, Utf8Array}; +use arrow2::compute::regex_match::*; +use arrow2::error::Result; + +fn test_generic, &Utf8Array) -> Result>( + lhs: Vec<&str>, + pattern: Vec<&str>, + op: F, + expected: Vec, +) { + let lhs = Utf8Array::::from_slice(lhs); + let pattern = Utf8Array::::from_slice(pattern); + let expected = BooleanArray::from_slice(expected); + let result = op(&lhs, &pattern).unwrap(); + assert_eq!(result, expected); +} + +fn test_generic_scalar, &str) -> Result>( + lhs: Vec<&str>, + pattern: &str, + op: F, + expected: Vec, +) { + let lhs = Utf8Array::::from_slice(lhs); + let expected = BooleanArray::from_slice(expected); + let result = op(&lhs, pattern).unwrap(); + assert_eq!(result, expected); +} + +#[test] +fn test_like() { + test_generic::( + vec![ + "arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow", + ], + vec!["arrow", "^ar", "ro", "foo", "arr$", "arrow.", "arrow."], + regex_match, + vec![true, true, true, false, false, true, false], + ) +} + +#[test] +fn test_like_scalar() { + test_generic_scalar::( + vec!["arrow", "parquet", "datafusion", "flight"], + "ar", + regex_match_scalar, + vec![true, true, false, false], + ); + + test_generic_scalar::( + vec!["arrow", "parquet", "datafusion", "flight"], + "^ar", + regex_match_scalar, + vec![true, false, false, false], + ) +} diff --git a/tests/it/compute/substring.rs b/tests/it/compute/substring.rs new file mode 100644 index 00000000000..3ab1ca95a4c --- /dev/null +++ b/tests/it/compute/substring.rs @@ -0,0 +1,336 @@ +use arrow2::{array::*, compute::substring::*, error::Result}; + +fn with_nulls_utf8() -> Result<()> { + let cases = vec![ + // identity + ( + vec![Some("hello"), None, Some("word")], + 0, + None, + vec![Some("hello"), None, Some("word")], + ), + // 0 length -> Nothing + ( + vec![Some("hello"), None, Some("word")], + 0, + Some(0), + vec![Some(""), None, Some("")], + ), + // high start -> Nothing + ( + vec![Some("hello"), None, Some("word")], + 1000, + Some(0), + vec![Some(""), None, Some("")], + ), + // high negative start -> identity + ( + vec![Some("hello"), None, Some("word")], + -1000, + None, + vec![Some("hello"), None, Some("word")], + ), + // high length -> identity + ( + vec![Some("hello"), None, Some("word")], + 0, + Some(1000), + vec![Some("hello"), None, Some("word")], + ), + ]; + + cases + .into_iter() + .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { + let array = Utf8Array::::from(&array); + let result = substring(&array, start, &length)?; + assert_eq!(array.len(), result.len()); + + let result = result.as_any().downcast_ref::>().unwrap(); + let expected = Utf8Array::::from(&expected); + assert_eq!(&expected, result); + Ok(()) + })?; + + Ok(()) +} + +#[test] +fn with_nulls_string() -> Result<()> { + with_nulls_utf8::() +} + +#[test] +fn with_nulls_large_string() -> Result<()> { + with_nulls_utf8::() +} + +fn without_nulls_utf8() -> Result<()> { + let cases = vec![ + // increase start + ( + vec!["hello", "", "word"], + 0, + None, + vec!["hello", "", "word"], + ), + (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]), + (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]), + (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]), + (vec!["hello", "", "word"], 10, None, vec!["", "", ""]), + // increase start negatively + (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]), + (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]), + (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]), + ( + vec!["hello", "", "word"], + -10, + None, + vec!["hello", "", "word"], + ), + // increase length + (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]), + (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]), + ( + vec!["hello", "", "word"], + 1, + Some(3), + vec!["ell", "", "ord"], + ), + ( + vec!["hello", "", "word"], + 1, + Some(4), + vec!["ello", "", "ord"], + ), + (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]), + (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]), + ( + vec!["hello", "", "word"], + -3, + Some(3), + vec!["llo", "", "ord"], + ), + ( + vec!["hello", "", "word"], + -3, + Some(4), + vec!["llo", "", "ord"], + ), + ]; + + cases + .into_iter() + .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { + let array = Utf8Array::::from_slice(&array); + let result = substring(&array, start, &length)?; + assert_eq!(array.len(), result.len()); + let result = result.as_any().downcast_ref::>().unwrap(); + let expected = Utf8Array::::from_slice(&expected); + assert_eq!(&expected, result); + Ok(()) + })?; + + Ok(()) +} + +#[test] +fn without_nulls_string() -> Result<()> { + without_nulls_utf8::() +} + +#[test] +fn without_nulls_large_string() -> Result<()> { + without_nulls_utf8::() +} + +fn with_null_binarys() -> Result<()> { + let cases = vec![ + // identity + ( + vec![Some(b"hello"), None, Some(b"world")], + 0, + None, + vec![Some("hello"), None, Some("world")], + ), + // 0 length -> Nothing + ( + vec![Some(b"hello"), None, Some(b"world")], + 0, + Some(0), + vec![Some(""), None, Some("")], + ), + // high start -> Nothing + ( + vec![Some(b"hello"), None, Some(b"world")], + 1000, + Some(0), + vec![Some(""), None, Some("")], + ), + // high negative start -> identity + ( + vec![Some(b"hello"), None, Some(b"world")], + -1000, + None, + vec![Some("hello"), None, Some("world")], + ), + // high length -> identity + ( + vec![Some(b"hello"), None, Some(b"world")], + 0, + Some(1000), + vec![Some("hello"), None, Some("world")], + ), + ]; + + cases + .into_iter() + .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { + let array = BinaryArray::::from(&array); + let result = substring(&array, start, &length)?; + assert_eq!(array.len(), result.len()); + + let result = result.as_any().downcast_ref::>().unwrap(); + let expected = BinaryArray::::from(&expected); + assert_eq!(&expected, result); + Ok(()) + })?; + + Ok(()) +} + +#[test] +fn with_nulls_binary() -> Result<()> { + with_null_binarys::() +} + +#[test] +fn with_nulls_large_binary() -> Result<()> { + with_null_binarys::() +} + +fn without_null_binarys() -> Result<()> { + let cases = vec![ + // increase start + ( + vec!["hello", "", "word"], + 0, + None, + vec!["hello", "", "word"], + ), + (vec!["hello", "", "word"], 1, None, vec!["ello", "", "ord"]), + (vec!["hello", "", "word"], 2, None, vec!["llo", "", "rd"]), + (vec!["hello", "", "word"], 3, None, vec!["lo", "", "d"]), + (vec!["hello", "", "word"], 10, None, vec!["", "", ""]), + // increase start negatively + (vec!["hello", "", "word"], -1, None, vec!["o", "", "d"]), + (vec!["hello", "", "word"], -2, None, vec!["lo", "", "rd"]), + (vec!["hello", "", "word"], -3, None, vec!["llo", "", "ord"]), + ( + vec!["hello", "", "word"], + -10, + None, + vec!["hello", "", "word"], + ), + // increase length + (vec!["hello", "", "word"], 1, Some(1), vec!["e", "", "o"]), + (vec!["hello", "", "word"], 1, Some(2), vec!["el", "", "or"]), + ( + vec!["hello", "", "word"], + 1, + Some(3), + vec!["ell", "", "ord"], + ), + ( + vec!["hello", "", "word"], + 1, + Some(4), + vec!["ello", "", "ord"], + ), + (vec!["hello", "", "word"], -3, Some(1), vec!["l", "", "o"]), + (vec!["hello", "", "word"], -3, Some(2), vec!["ll", "", "or"]), + ( + vec!["hello", "", "word"], + -3, + Some(3), + vec!["llo", "", "ord"], + ), + ( + vec!["hello", "", "word"], + -3, + Some(4), + vec!["llo", "", "ord"], + ), + ]; + + cases + .into_iter() + .try_for_each::<_, Result<()>>(|(array, start, length, expected)| { + let array = BinaryArray::::from_slice(&array); + let result = substring(&array, start, &length)?; + assert_eq!(array.len(), result.len()); + let result = result.as_any().downcast_ref::>().unwrap(); + let expected = BinaryArray::::from_slice(&expected); + assert_eq!(&expected, result); + Ok(()) + })?; + + Ok(()) +} + +#[test] +fn without_nulls_binary() -> Result<()> { + without_null_binarys::() +} + +#[test] +fn without_nulls_large_binary() -> Result<()> { + without_null_binarys::() +} + +#[test] +fn consistency() { + use arrow2::datatypes::DataType::*; + use arrow2::datatypes::TimeUnit; + let datatypes = vec![ + Null, + Boolean, + UInt8, + UInt16, + UInt32, + UInt64, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Timestamp(TimeUnit::Second, None), + Timestamp(TimeUnit::Millisecond, None), + Timestamp(TimeUnit::Microsecond, None), + Timestamp(TimeUnit::Nanosecond, None), + Time64(TimeUnit::Microsecond), + Time64(TimeUnit::Nanosecond), + Date32, + Time32(TimeUnit::Second), + Time32(TimeUnit::Millisecond), + Date64, + Utf8, + LargeUtf8, + Binary, + LargeBinary, + Duration(TimeUnit::Second), + Duration(TimeUnit::Millisecond), + Duration(TimeUnit::Microsecond), + Duration(TimeUnit::Nanosecond), + ]; + + datatypes.into_iter().for_each(|d1| { + let array = new_null_array(d1.clone(), 10); + if can_substring(&d1) { + assert!(substring(array.as_ref(), 0, &None).is_ok()); + } else { + assert!(substring(array.as_ref(), 0, &None).is_err()); + } + }); +} diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs new file mode 100644 index 00000000000..22a7e202757 --- /dev/null +++ b/tests/it/compute/temporal.rs @@ -0,0 +1,177 @@ +use arrow2::array::*; +use arrow2::compute::temporal::*; +use arrow2::datatypes::*; + +#[test] +fn date64_hour() { + let array = + Int64Array::from(&[Some(1514764800000), None, Some(1550636625000)]).to(DataType::Date64); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(0), None, Some(4)]); + assert_eq!(result, expected); +} + +#[test] +fn date32_hour() { + let array = Int32Array::from(&[Some(15147), None, Some(15148)]).to(DataType::Date32); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(0), None, Some(0)]); + assert_eq!(result, expected); +} + +#[test] +fn time32_second_hour() { + let array = Int32Array::from(&[Some(37800), None]).to(DataType::Time32(TimeUnit::Second)); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(10), None]); + assert_eq!(result, expected); +} + +#[test] +fn time64_micro_hour() { + let array = + Int64Array::from(&[Some(37800000000), None]).to(DataType::Time64(TimeUnit::Microsecond)); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(10), None]); + assert_eq!(result, expected); +} + +#[test] +fn timestamp_micro_hour() { + let array = Int64Array::from(&[Some(37800000000), None]) + .to(DataType::Timestamp(TimeUnit::Microsecond, None)); + + let result = hour(&array).unwrap(); + let expected = UInt32Array::from(&[Some(10), None]); + assert_eq!(result, expected); +} + +#[test] +fn timestamp_date64_year() { + let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); + + let result = year(&array).unwrap(); + let expected = Int32Array::from(&[Some(2018), None]); + assert_eq!(result, expected); +} + +#[test] +fn timestamp_date32_year() { + let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32); + + let result = year(&array).unwrap(); + let expected = Int32Array::from(&[Some(2011), None]); + assert_eq!(result, expected); +} + +#[test] +fn timestamp_micro_year() { + let array = Int64Array::from(&[Some(1612025847000000), None]) + .to(DataType::Timestamp(TimeUnit::Microsecond, None)); + + let result = year(&array).unwrap(); + let expected = Int32Array::from(&[Some(2021), None]); + assert_eq!(result, expected); +} + +#[test] +fn consistency_hour() { + use arrow2::array::new_null_array; + use arrow2::datatypes::DataType::*; + use arrow2::datatypes::TimeUnit; + + let datatypes = vec![ + Null, + Boolean, + UInt8, + UInt16, + UInt32, + UInt64, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Timestamp(TimeUnit::Second, None), + Timestamp(TimeUnit::Millisecond, None), + Timestamp(TimeUnit::Microsecond, None), + Timestamp(TimeUnit::Nanosecond, None), + Time64(TimeUnit::Microsecond), + Time64(TimeUnit::Nanosecond), + Date32, + Time32(TimeUnit::Second), + Time32(TimeUnit::Millisecond), + Date64, + Utf8, + LargeUtf8, + Binary, + LargeBinary, + Duration(TimeUnit::Second), + Duration(TimeUnit::Millisecond), + Duration(TimeUnit::Microsecond), + Duration(TimeUnit::Nanosecond), + ]; + + datatypes.into_iter().for_each(|d1| { + let array = new_null_array(d1.clone(), 10); + if can_hour(&d1) { + assert!(hour(array.as_ref()).is_ok()); + } else { + assert!(hour(array.as_ref()).is_err()); + } + }); +} + +#[test] +fn consistency_year() { + use arrow2::array::new_null_array; + use arrow2::datatypes::DataType::*; + use arrow2::datatypes::TimeUnit; + + let datatypes = vec![ + Null, + Boolean, + UInt8, + UInt16, + UInt32, + UInt64, + Int8, + Int16, + Int32, + Int64, + Float32, + Float64, + Timestamp(TimeUnit::Second, None), + Timestamp(TimeUnit::Millisecond, None), + Timestamp(TimeUnit::Microsecond, None), + Timestamp(TimeUnit::Nanosecond, None), + Time64(TimeUnit::Microsecond), + Time64(TimeUnit::Nanosecond), + Date32, + Time32(TimeUnit::Second), + Time32(TimeUnit::Millisecond), + Date64, + Utf8, + LargeUtf8, + Binary, + LargeBinary, + Duration(TimeUnit::Second), + Duration(TimeUnit::Millisecond), + Duration(TimeUnit::Microsecond), + Duration(TimeUnit::Nanosecond), + ]; + + datatypes.into_iter().for_each(|d1| { + let array = new_null_array(d1.clone(), 10); + if can_year(&d1) { + assert!(year(array.as_ref()).is_ok()); + } else { + assert!(year(array.as_ref()).is_err()); + } + }); +} diff --git a/tests/it/compute/window.rs b/tests/it/compute/window.rs new file mode 100644 index 00000000000..c89706a05f9 --- /dev/null +++ b/tests/it/compute/window.rs @@ -0,0 +1,29 @@ +use arrow2::array::{new_null_array, Int32Array}; +use arrow2::compute::window::*; +use arrow2::datatypes::DataType; + +#[test] +fn shift_pos() { + let array = Int32Array::from(&[Some(1), None, Some(3)]); + let result = shift(&array, 1).unwrap(); + + let expected = Int32Array::from(&[None, Some(1), None]); + + assert_eq!(expected, result.as_ref()); +} + +#[test] +fn shift_many() { + let array = Int32Array::from(&[Some(1), None, Some(3)]).to(DataType::Date32); + assert!(shift(&array, 10).is_err()); +} + +#[test] +fn shift_max() { + let array = Int32Array::from(&[Some(1), None, Some(3)]).to(DataType::Date32); + let result = shift(&array, 3).unwrap(); + + let expected = new_null_array(DataType::Date32, 3); + + assert_eq!(expected.as_ref(), result.as_ref()); +}