From 27955010a3af7e03e1f07104293fdef487030fed Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Mon, 17 Jan 2022 17:32:13 +0800 Subject: [PATCH 1/7] add eq_dyn for BinaryArray Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 31 +++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 9390d8465a00..4a3deccda8d5 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -697,6 +697,13 @@ pub fn eq_utf8_scalar( compare_op_scalar!(left, right, |a, b| a == b) } +pub fn eq_binary_scalar( + left: &GenericBinaryArray, + right: &[u8] +) -> Result { + compare_op_scalar!(left, right, |a, b| a == b) +} + #[inline] fn binary_boolean_op( left: &BooleanArray, @@ -1165,6 +1172,19 @@ where } } +/// Perform `left == right` operation on an array and a numrtic scalar value. +/// Only support BinaryArrays so far. +pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result { + match left.data_type() { + DataType::Binary => { + let left = as_generic_binary_array::(left); + eq_binary_scalar(left, right) + }, + _ => Err(ArrowError::ComputeError( + "eq_dyn_binary_scalar only supports Binary arrays".to_string())) + } +} + /// Perform `left == right` operation on an array and a numeric scalar /// value. Supports StringArrays, and DictionaryArrays that have string values pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result { @@ -3576,6 +3596,17 @@ mod tests { assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected); } + #[test] + fn test_eq_dyn_binary_scalar() { + let values: Vec<&[u8]> = vec![b"one", b"two", b"", b"three"]; + let array = BinaryArray::from(values); + let a_eq = eq_dyn_binary_scalar(&array, b"two").unwrap(); + assert_eq!( + a_eq, + BooleanArray::from(vec![Some(false), Some(true), Some(false), Some(false)]) + ) + } + #[test] fn test_eq_dyn_utf8_scalar() { let array = StringArray::from(vec!["abc", "def", "xyz"]); From 8b066d7334cdc68d6de849c033d02469fb144d88 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Mon, 17 Jan 2022 17:45:17 +0800 Subject: [PATCH 2/7] correct the code formatting Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 4a3deccda8d5..06ef2335d92d 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -699,7 +699,7 @@ pub fn eq_utf8_scalar( pub fn eq_binary_scalar( left: &GenericBinaryArray, - right: &[u8] + right: &[u8], ) -> Result { compare_op_scalar!(left, right, |a, b| a == b) } @@ -1179,9 +1179,10 @@ pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result { let left = as_generic_binary_array::(left); eq_binary_scalar(left, right) - }, + } _ => Err(ArrowError::ComputeError( - "eq_dyn_binary_scalar only supports Binary arrays".to_string())) + "eq_dyn_binary_scalar only supports Binary arrays".to_string(), + )), } } From fccc0f9ea267f7541a82e82f256614b3d1c8f9d6 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Mon, 17 Jan 2022 21:45:04 +0800 Subject: [PATCH 3/7] add comparison support for fully qualified binary array delete dyn comparison which will be added in successive PRs Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 117 +++++++++++++++++++----- 1 file changed, 96 insertions(+), 21 deletions(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 06ef2335d92d..01bc0a33fcda 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -681,6 +681,22 @@ pub fn regexp_is_match_utf8_scalar( Ok(BooleanArray::from(data)) } +/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn eq_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a == b) +} + +/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar +pub fn eq_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a == b) +} + /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`]. pub fn eq_utf8( left: &GenericStringArray, @@ -697,13 +713,6 @@ pub fn eq_utf8_scalar( compare_op_scalar!(left, right, |a, b| a == b) } -pub fn eq_binary_scalar( - left: &GenericBinaryArray, - right: &[u8], -) -> Result { - compare_op_scalar!(left, right, |a, b| a == b) -} - #[inline] fn binary_boolean_op( left: &BooleanArray, @@ -817,6 +826,86 @@ pub fn neq_bool_scalar(left: &BooleanArray, right: bool) -> Result eq_bool_scalar(left, !right) } +/// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn neq_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a != b) +} + +/// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. +pub fn neq_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a != b) +} + +/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn lt_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a < b) +} + +/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. +pub fn lt_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a < b) +} + +/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn lt_eq_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a <= b) +} + +/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. +pub fn lt_eq_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a <= b) +} + +/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn gt_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a > b) +} + +/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. +pub fn gt_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a > b) +} + +/// Perform `left >= right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn gt_eq_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a >= b) +} + +/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. +pub fn gt_eq_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a >= b) +} + /// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`]. pub fn neq_utf8( left: &GenericStringArray, @@ -1172,20 +1261,6 @@ where } } -/// Perform `left == right` operation on an array and a numrtic scalar value. -/// Only support BinaryArrays so far. -pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result { - match left.data_type() { - DataType::Binary => { - let left = as_generic_binary_array::(left); - eq_binary_scalar(left, right) - } - _ => Err(ArrowError::ComputeError( - "eq_dyn_binary_scalar only supports Binary arrays".to_string(), - )), - } -} - /// Perform `left == right` operation on an array and a numeric scalar /// value. Supports StringArrays, and DictionaryArrays that have string values pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result { From 7921d4ba20b61b823af9823e910312beafb5dbba Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 18 Jan 2022 10:08:10 +0800 Subject: [PATCH 4/7] add tests for comparison of fully qualified BinaryArray Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 166 ++++++++++++++++++++++-- 1 file changed, 155 insertions(+), 11 deletions(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 01bc0a33fcda..c19053c092cf 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -2890,6 +2890,161 @@ mod tests { ); } + macro_rules! test_binary { + ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { + #[test] + fn $test_name() { + let left = BinaryArray::from_vec($left); + let right = BinaryArray::from_vec($right); + let res = $op(&left, &right).unwrap(); + let expected = $expected; + assert_eq!(expected.len(), res.len()); + for i in 0..res.len() { + let v = res.value(i); + assert_eq!(v, expected[i]); + } + + let left = LargeBinaryArray::from_vec($left); + let right = LargeBinaryArray::from_vec($right); + let res = $op(&left, &right).unwrap(); + let expected = $expected; + assert_eq!(expected.len(), res.len()); + for i in 0..res.len() { + let v = res.value(i); + assert_eq!(v, expected[i]); + } + } + }; + } + + #[test] + fn test_binary_eq_scalar_on_slice() { + let a = BinaryArray::from_opt_vec( + vec![Some(b"hi"), None, Some(b"hello"), Some(b"world")], + ); + let a = a.slice(1, 3); + let a = as_generic_binary_array::(&a); + let a_eq = eq_binary_scalar(&a, b"hello").unwrap(); + assert_eq!( + a_eq, + BooleanArray::from(vec![None, Some(true), Some(false)]) + ); + } + + macro_rules! test_binary_scalar { + ($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => { + #[test] + fn $test_name() { + let left = BinaryArray::from_vec($left); + let res = $op(&left, $right).unwrap(); + let expected = $expected; + assert_eq!(expected.len(), res.len()); + for i in 0..res.len() { + let v = res.value(i); + assert_eq!( + v, + expected[i], + "unexpected result when comparing {:?} at position {} to {:?} ", + left.value(i), + i, + $right + ); + } + + let left = LargeBinaryArray::from_vec($left); + let res = $op(&left, $right).unwrap(); + let expected = $expected; + assert_eq!(expected.len(), res.len()); + for i in 0..res.len() { + let v = res.value(i); + assert_eq!( + v, + expected[i], + "unexpected result when comparing {:?} at position {} to {:?} ", + left.value(i), + i, + $right + ); + } + } + }; + } + + test_binary!( + test_binary_array_neq, + vec![b"arrow", b"arrow", b"arrow", b"arrow"], + vec![b"arrow", b"parquet", b"datafusion", b"flight"], + neq_binary, + vec![false, true, true, true] + ); + test_binary_scalar!( + test_binary_array_neq_scalar, + vec![b"arrow", b"parquet", b"datafusion", b"flight"], + "arrow".as_bytes(), + neq_binary_scalar, + vec![false, true, true, true] + ); + + test_binary!( + test_binary_array_lt, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + vec![b"flight", b"flight", b"flight", b"flight"], + lt_binary, + vec![true, true, false, false] + ); + test_binary_scalar!( + test_binary_array_lt_scalar, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + "flight".as_bytes(), + lt_binary_scalar, + vec![true, true, false, false] + ); + + test_binary!( + test_binary_array_lt_eq, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + vec![b"flight", b"flight", b"flight", b"flight"], + lt_eq_binary, + vec![true, true, true, false] + ); + test_binary_scalar!( + test_binary_array_lt_eq_scalar, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + "flight".as_bytes(), + lt_eq_binary_scalar, + vec![true, true, true, false] + ); + + test_binary!( + test_binary_array_gt, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + vec![b"flight", b"flight", b"flight", b"flight"], + gt_binary, + vec![false, false, false, true] + ); + test_binary_scalar!( + test_binary_array_gt_scalar, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + "flight".as_bytes(), + gt_binary_scalar, + vec![false, false, false, true] + ); + + test_binary!( + test_binary_array_gt_eq, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + vec![b"flight", b"flight", b"flight", b"flight"], + gt_eq_binary, + vec![false, false, true, true] + ); + test_binary_scalar!( + test_binary_array_gt_eq_scalar, + vec![b"arrow", b"datafusion", b"flight", b"parquet"], + "flight".as_bytes(), + gt_eq_binary_scalar, + vec![false, false, true, true] + ); + // Expected behaviour: // contains("ab", ["ab", "cd", null]) = true // contains("ef", ["ab", "cd", null]) = false @@ -3672,17 +3827,6 @@ mod tests { assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected); } - #[test] - fn test_eq_dyn_binary_scalar() { - let values: Vec<&[u8]> = vec![b"one", b"two", b"", b"three"]; - let array = BinaryArray::from(values); - let a_eq = eq_dyn_binary_scalar(&array, b"two").unwrap(); - assert_eq!( - a_eq, - BooleanArray::from(vec![Some(false), Some(true), Some(false), Some(false)]) - ) - } - #[test] fn test_eq_dyn_utf8_scalar() { let array = StringArray::from(vec!["abc", "def", "xyz"]); From 6de06d9a6cd5d440b21876409bd04de96cf04d41 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 18 Jan 2022 10:21:11 +0800 Subject: [PATCH 5/7] add 2 missed tests Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index c19053c092cf..76e91cabc38f 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -2970,6 +2970,22 @@ mod tests { }; } + test_binary!( + test_binary_array_eq, + vec![b"arrow", b"arrow", b"arrow", b"arrow"], + vec![b"arrow", b"parquet", b"datafusion", b"flight"], + eq_binary, + vec![true, false, false, false] + ); + + test_binary_scalar!( + test_binary_array_eq_scalar, + vec![b"arrow", b"parquet", b"datafusion", b"flight"], + "arrow".as_bytes(), + eq_binary_scalar, + vec![true, false, false, false] + ); + test_binary!( test_binary_array_neq, vec![b"arrow", b"arrow", b"arrow", b"arrow"], From 5f1b3fec8ae04c77895688018af0a9d943aa942b Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 18 Jan 2022 10:30:25 +0800 Subject: [PATCH 6/7] move 2 functions Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 34 ++++++++++++------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 76e91cabc38f..20ae7aa03dda 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -681,22 +681,6 @@ pub fn regexp_is_match_utf8_scalar( Ok(BooleanArray::from(data)) } -/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. -pub fn eq_binary( - left: &GenericBinaryArray, - right: &GenericBinaryArray, -) -> Result { - compare_op!(left, right, |a, b| a == b) -} - -/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar -pub fn eq_binary_scalar( - left: &GenericBinaryArray, - right: &[u8], -) -> Result { - compare_op_scalar!(left, right, |a, b| a == b) -} - /// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`]. pub fn eq_utf8( left: &GenericStringArray, @@ -826,6 +810,22 @@ pub fn neq_bool_scalar(left: &BooleanArray, right: bool) -> Result eq_bool_scalar(left, !right) } +/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. +pub fn eq_binary( + left: &GenericBinaryArray, + right: &GenericBinaryArray, +) -> Result { + compare_op!(left, right, |a, b| a == b) +} + +/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar +pub fn eq_binary_scalar( + left: &GenericBinaryArray, + right: &[u8], +) -> Result { + compare_op_scalar!(left, right, |a, b| a == b) +} + /// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`]. pub fn neq_binary( left: &GenericBinaryArray, @@ -898,7 +898,7 @@ pub fn gt_eq_binary( compare_op!(left, right, |a, b| a >= b) } -/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. +/// Perform `left >= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar. pub fn gt_eq_binary_scalar( left: &GenericBinaryArray, right: &[u8], From 05619e3e00124d2b92de1006fd5bbe6e1a5b3a84 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 19 Jan 2022 09:39:20 +0800 Subject: [PATCH 7/7] fix reference error Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/comparison.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow/src/compute/kernels/comparison.rs b/arrow/src/compute/kernels/comparison.rs index 20ae7aa03dda..acfa47c4a28e 100644 --- a/arrow/src/compute/kernels/comparison.rs +++ b/arrow/src/compute/kernels/comparison.rs @@ -2924,7 +2924,7 @@ mod tests { ); let a = a.slice(1, 3); let a = as_generic_binary_array::(&a); - let a_eq = eq_binary_scalar(&a, b"hello").unwrap(); + let a_eq = eq_binary_scalar(a, b"hello").unwrap(); assert_eq!( a_eq, BooleanArray::from(vec![None, Some(true), Some(false)])