From be455665e6a0326140de629ca5e7aef1c3747d39 Mon Sep 17 00:00:00 2001 From: Steven Gu Date: Fri, 24 Dec 2021 12:28:54 +0000 Subject: [PATCH 1/5] Add `and_scalar` and `or_scalar` for boolean. --- src/compute/boolean.rs | 66 +++++++++++++++++++++++++++-- tests/it/compute/boolean.rs | 82 +++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 4 deletions(-) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index bd6b9be8e6d..11bb7fd59f8 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -3,11 +3,16 @@ use crate::array::{Array, BooleanArray}; use crate::bitmap::{Bitmap, MutableBitmap}; use crate::datatypes::DataType; use crate::error::{ArrowError, Result}; +use crate::scalar::BooleanScalar; use super::utils::combine_validities; -/// Helper function to implement binary kernels -fn binary_boolean_kernel(lhs: &BooleanArray, rhs: &BooleanArray, op: F) -> Result +/// Helper function to implement binary kernels for two boolean arrays +fn binary_boolean_arrays_kernel( + lhs: &BooleanArray, + rhs: &BooleanArray, + op: F, +) -> Result where F: Fn(&Bitmap, &Bitmap) -> Bitmap, { @@ -27,6 +32,25 @@ where Ok(BooleanArray::from_data(DataType::Boolean, values, validity)) } +/// Helper function to implement binary kernels for a boolean array and a boolean scalar +fn binary_boolean_array_and_scalar_kernel( + array: &BooleanArray, + scalar: &BooleanScalar, + op: F, +) -> BooleanArray +where + F: Fn(bool, bool) -> bool, +{ + let (rhs, validity) = match scalar.value() { + Some(val) => (val, array.validity().map(|x| x.clone())), + None => (bool::default(), Some(Bitmap::new_zeroed(array.len()))), + }; + + let values = Bitmap::from_trusted_len_iter(array.values_iter().map(|x| op(x, rhs))); + + BooleanArray::from_data(DataType::Boolean, values, validity) +} + /// Performs `AND` operation on two arrays. If either left or right value is null then the /// result is also null. /// # Error @@ -45,7 +69,7 @@ where /// # } /// ``` pub fn and(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { - binary_boolean_kernel(lhs, rhs, |lhs, rhs| lhs & rhs) + binary_boolean_arrays_kernel(lhs, rhs, |lhs, rhs| lhs & rhs) } /// Performs `OR` operation on two arrays. If either left or right value is null then the @@ -66,7 +90,7 @@ pub fn and(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { /// # } /// ``` pub fn or(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { - binary_boolean_kernel(lhs, rhs, |lhs, rhs| lhs | rhs) + binary_boolean_arrays_kernel(lhs, rhs, |lhs, rhs| lhs | rhs) } /// Performs unary `NOT` operation on an arrays. If value is null then the result is also @@ -133,3 +157,37 @@ pub fn is_not_null(input: &dyn Array) -> BooleanArray { }; BooleanArray::from_data(DataType::Boolean, values, None) } + +/// Performs `AND` operation on an array and a scalar value. If either left or right value +/// is null then the result is also null. +/// # Example +/// ```rust +/// use arrow2::array::BooleanArray; +/// use arrow2::compute::boolean::and_scalar; +/// # fn main() { +/// let array = BooleanArray::from_slice(vec![false, false, true, true]); +/// let scalar = BooleanScalar::new(Some(true)); +/// let result = and_scalar(&array, &scalar); +/// assert_eq!(result, BooleanArray::from(vec![false, false, true, true])); +/// # } +/// ``` +pub fn and_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray { + binary_boolean_array_and_scalar_kernel(array, scalar, |lhs, rhs| lhs && rhs) +} + +/// Performs `OR` operation on an array and a scalar value. If either left or right value +/// is null then the result is also null. +/// # Example +/// ```rust +/// use arrow2::array::BooleanArray; +/// use arrow2::compute::boolean::or_scalar; +/// # fn main() { +/// let array = BooleanArray::from_slice(vec![false, false, true, true]); +/// let scalar = BooleanScalar::new(Some(true)); +/// let result = or_scalar(&array, &scalar); +/// assert_eq!(result, BooleanArray::from(vec![true, true, true, true])); +/// # } +/// ``` +pub fn or_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray { + binary_boolean_array_and_scalar_kernel(array, scalar, |lhs, rhs| lhs || rhs) +} diff --git a/tests/it/compute/boolean.rs b/tests/it/compute/boolean.rs index 5468909df10..a1e431dc03c 100644 --- a/tests/it/compute/boolean.rs +++ b/tests/it/compute/boolean.rs @@ -1,5 +1,6 @@ use arrow2::array::*; use arrow2::compute::boolean::*; +use arrow2::scalar::BooleanScalar; #[test] fn array_and() { @@ -336,3 +337,84 @@ fn test_nullable_array_with_offset_is_not_null() { assert_eq!(expected, res); } + +#[test] +fn array_and_scalar() { + let array = BooleanArray::from_slice(vec![false, false, true, true]); + + let scalar = BooleanScalar::new(Some(true)); + let real = and_scalar(&array, &scalar); + + let expected = BooleanArray::from_slice(vec![false, false, true, true]); + assert_eq!(real, expected); + + let scalar = BooleanScalar::new(Some(false)); + let real = and_scalar(&array, &scalar); + + let expected = BooleanArray::from_slice(vec![false, false, false, false]); + + assert_eq!(real, expected); +} + +#[test] +fn array_and_scalar_validity() { + let array = BooleanArray::from(vec![None, Some(false), Some(true)]); + + let scalar = BooleanScalar::new(Some(true)); + let real = and_scalar(&array, &scalar); + + let expected = BooleanArray::from(vec![None, Some(false), Some(true)]); + assert_eq!(real, expected); + + let scalar = BooleanScalar::new(None); + let real = and_scalar(&array, &scalar); + + let expected = BooleanArray::from(vec![None; 3]); + assert_eq!(real, expected); + + let array = BooleanArray::from_slice(vec![true, false, true]); + let real = and_scalar(&array, &scalar); + + let expected = BooleanArray::from(vec![None; 3]); + assert_eq!(real, expected); +} + +#[test] +fn array_or_scalar() { + let array = BooleanArray::from_slice(vec![false, false, true, true]); + + let scalar = BooleanScalar::new(Some(true)); + let real = or_scalar(&array, &scalar); + + let expected = BooleanArray::from_slice(vec![true, true, true, true]); + assert_eq!(real, expected); + + let scalar = BooleanScalar::new(Some(false)); + let real = or_scalar(&array, &scalar); + + let expected = BooleanArray::from_slice(vec![false, false, true, true]); + assert_eq!(real, expected); +} + +#[test] +fn array_or_scalar_validity() { + let array = BooleanArray::from(vec![None, Some(false), Some(true)]); + + let scalar = BooleanScalar::new(Some(true)); + let real = or_scalar(&array, &scalar); + + let expected = BooleanArray::from(vec![None, Some(true), Some(true)]); + assert_eq!(real, expected); + + let scalar = BooleanScalar::new(None); + let real = or_scalar(&array, &scalar); + + let expected = BooleanArray::from(vec![None; 3]); + assert_eq!(real, expected); + + let array = BooleanArray::from_slice(vec![true, false, true]); + let real = and_scalar(&array, &scalar); + + let expected = BooleanArray::from(vec![None; 3]); + assert_eq!(real, expected); +} From 6fb50884bfef025590a526d7bee3cc4a086eaaa8 Mon Sep 17 00:00:00 2001 From: Steven Gu Date: Fri, 24 Dec 2021 13:26:50 +0000 Subject: [PATCH 2/5] Fix issues of `cargo clippy`. --- src/compute/boolean.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index 11bb7fd59f8..9714eae5122 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -42,7 +42,7 @@ where F: Fn(bool, bool) -> bool, { let (rhs, validity) = match scalar.value() { - Some(val) => (val, array.validity().map(|x| x.clone())), + Some(val) => (val, array.validity().cloned()), None => (bool::default(), Some(Bitmap::new_zeroed(array.len()))), }; @@ -164,6 +164,7 @@ pub fn is_not_null(input: &dyn Array) -> BooleanArray { /// ```rust /// use arrow2::array::BooleanArray; /// use arrow2::compute::boolean::and_scalar; +/// use arrows::scalar::BooleanScalar; /// # fn main() { /// let array = BooleanArray::from_slice(vec![false, false, true, true]); /// let scalar = BooleanScalar::new(Some(true)); @@ -181,6 +182,7 @@ pub fn and_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray /// ```rust /// use arrow2::array::BooleanArray; /// use arrow2::compute::boolean::or_scalar; +/// use arrows::scalar::BooleanScalar; /// # fn main() { /// let array = BooleanArray::from_slice(vec![false, false, true, true]); /// let scalar = BooleanScalar::new(Some(true)); From 958f3a98c983fe06d90b7bfff17451ea0385357b Mon Sep 17 00:00:00 2001 From: Steven Gu Date: Sat, 25 Dec 2021 02:13:03 +0000 Subject: [PATCH 3/5] Optimize to not compute by each items according to code review. --- src/compute/boolean.rs | 61 ++++++++++++++++--------------------- tests/it/compute/boolean.rs | 32 +++++++++---------- 2 files changed, 42 insertions(+), 51 deletions(-) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index 9714eae5122..8fe2ab05493 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -7,12 +7,8 @@ use crate::scalar::BooleanScalar; use super::utils::combine_validities; -/// Helper function to implement binary kernels for two boolean arrays -fn binary_boolean_arrays_kernel( - lhs: &BooleanArray, - rhs: &BooleanArray, - op: F, -) -> Result +/// Helper function to implement binary kernels +fn binary_boolean_kernel(lhs: &BooleanArray, rhs: &BooleanArray, op: F) -> Result where F: Fn(&Bitmap, &Bitmap) -> Bitmap, { @@ -32,25 +28,6 @@ where Ok(BooleanArray::from_data(DataType::Boolean, values, validity)) } -/// Helper function to implement binary kernels for a boolean array and a boolean scalar -fn binary_boolean_array_and_scalar_kernel( - array: &BooleanArray, - scalar: &BooleanScalar, - op: F, -) -> BooleanArray -where - F: Fn(bool, bool) -> bool, -{ - let (rhs, validity) = match scalar.value() { - Some(val) => (val, array.validity().cloned()), - None => (bool::default(), Some(Bitmap::new_zeroed(array.len()))), - }; - - let values = Bitmap::from_trusted_len_iter(array.values_iter().map(|x| op(x, rhs))); - - BooleanArray::from_data(DataType::Boolean, values, validity) -} - /// Performs `AND` operation on two arrays. If either left or right value is null then the /// result is also null. /// # Error @@ -69,7 +46,7 @@ where /// # } /// ``` pub fn and(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { - binary_boolean_arrays_kernel(lhs, rhs, |lhs, rhs| lhs & rhs) + binary_boolean_kernel(lhs, rhs, |lhs, rhs| lhs & rhs) } /// Performs `OR` operation on two arrays. If either left or right value is null then the @@ -90,7 +67,7 @@ pub fn and(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { /// # } /// ``` pub fn or(lhs: &BooleanArray, rhs: &BooleanArray) -> Result { - binary_boolean_arrays_kernel(lhs, rhs, |lhs, rhs| lhs | rhs) + binary_boolean_kernel(lhs, rhs, |lhs, rhs| lhs | rhs) } /// Performs unary `NOT` operation on an arrays. If value is null then the result is also @@ -164,16 +141,23 @@ pub fn is_not_null(input: &dyn Array) -> BooleanArray { /// ```rust /// use arrow2::array::BooleanArray; /// use arrow2::compute::boolean::and_scalar; -/// use arrows::scalar::BooleanScalar; +/// use arrow2::scalar::BooleanScalar; /// # fn main() { -/// let array = BooleanArray::from_slice(vec![false, false, true, true]); +/// let array = BooleanArray::from_slice(&[false, false, true, true]); /// let scalar = BooleanScalar::new(Some(true)); /// let result = and_scalar(&array, &scalar); -/// assert_eq!(result, BooleanArray::from(vec![false, false, true, true])); +/// assert_eq!(result, BooleanArray::from_slice(&[false, false, true, true])); /// # } /// ``` pub fn and_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray { - binary_boolean_array_and_scalar_kernel(array, scalar, |lhs, rhs| lhs && rhs) + match scalar.value() { + Some(true) => array.clone(), + Some(false) => { + let values = Bitmap::from_trusted_len_iter(std::iter::repeat(false).take(array.len())); + BooleanArray::from_data(DataType::Boolean, values, array.validity().cloned()) + } + None => BooleanArray::new_null(DataType::Boolean, array.len()), + } } /// Performs `OR` operation on an array and a scalar value. If either left or right value @@ -182,14 +166,21 @@ pub fn and_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray /// ```rust /// use arrow2::array::BooleanArray; /// use arrow2::compute::boolean::or_scalar; -/// use arrows::scalar::BooleanScalar; +/// use arrow2::scalar::BooleanScalar; /// # fn main() { -/// let array = BooleanArray::from_slice(vec![false, false, true, true]); +/// let array = BooleanArray::from_slice(&[false, false, true, true]); /// let scalar = BooleanScalar::new(Some(true)); /// let result = or_scalar(&array, &scalar); -/// assert_eq!(result, BooleanArray::from(vec![true, true, true, true])); +/// assert_eq!(result, BooleanArray::from_slice(&[true, true, true, true])); /// # } /// ``` pub fn or_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray { - binary_boolean_array_and_scalar_kernel(array, scalar, |lhs, rhs| lhs || rhs) + match scalar.value() { + Some(true) => { + let values = Bitmap::from_trusted_len_iter(std::iter::repeat(true).take(array.len())); + BooleanArray::from_data(DataType::Boolean, values, array.validity().cloned()) + } + Some(false) => array.clone(), + None => BooleanArray::new_null(DataType::Boolean, array.len()), + } } diff --git a/tests/it/compute/boolean.rs b/tests/it/compute/boolean.rs index a1e431dc03c..af77d98ed5b 100644 --- a/tests/it/compute/boolean.rs +++ b/tests/it/compute/boolean.rs @@ -340,81 +340,81 @@ fn test_nullable_array_with_offset_is_not_null() { #[test] fn array_and_scalar() { - let array = BooleanArray::from_slice(vec![false, false, true, true]); + let array = BooleanArray::from_slice(&[false, false, true, true]); let scalar = BooleanScalar::new(Some(true)); let real = and_scalar(&array, &scalar); - let expected = BooleanArray::from_slice(vec![false, false, true, true]); + let expected = BooleanArray::from_slice(&[false, false, true, true]); assert_eq!(real, expected); let scalar = BooleanScalar::new(Some(false)); let real = and_scalar(&array, &scalar); - let expected = BooleanArray::from_slice(vec![false, false, false, false]); + let expected = BooleanArray::from_slice(&[false, false, false, false]); assert_eq!(real, expected); } #[test] fn array_and_scalar_validity() { - let array = BooleanArray::from(vec![None, Some(false), Some(true)]); + let array = BooleanArray::from(&[None, Some(false), Some(true)]); let scalar = BooleanScalar::new(Some(true)); let real = and_scalar(&array, &scalar); - let expected = BooleanArray::from(vec![None, Some(false), Some(true)]); + let expected = BooleanArray::from(&[None, Some(false), Some(true)]); assert_eq!(real, expected); let scalar = BooleanScalar::new(None); let real = and_scalar(&array, &scalar); - let expected = BooleanArray::from(vec![None; 3]); + let expected = BooleanArray::from(&[None; 3]); assert_eq!(real, expected); - let array = BooleanArray::from_slice(vec![true, false, true]); + let array = BooleanArray::from_slice(&[true, false, true]); let real = and_scalar(&array, &scalar); - let expected = BooleanArray::from(vec![None; 3]); + let expected = BooleanArray::from(&[None; 3]); assert_eq!(real, expected); } #[test] fn array_or_scalar() { - let array = BooleanArray::from_slice(vec![false, false, true, true]); + let array = BooleanArray::from_slice(&[false, false, true, true]); let scalar = BooleanScalar::new(Some(true)); let real = or_scalar(&array, &scalar); - let expected = BooleanArray::from_slice(vec![true, true, true, true]); + let expected = BooleanArray::from_slice(&[true, true, true, true]); assert_eq!(real, expected); let scalar = BooleanScalar::new(Some(false)); let real = or_scalar(&array, &scalar); - let expected = BooleanArray::from_slice(vec![false, false, true, true]); + let expected = BooleanArray::from_slice(&[false, false, true, true]); assert_eq!(real, expected); } #[test] fn array_or_scalar_validity() { - let array = BooleanArray::from(vec![None, Some(false), Some(true)]); + let array = BooleanArray::from(&[None, Some(false), Some(true)]); let scalar = BooleanScalar::new(Some(true)); let real = or_scalar(&array, &scalar); - let expected = BooleanArray::from(vec![None, Some(true), Some(true)]); + let expected = BooleanArray::from(&[None, Some(true), Some(true)]); assert_eq!(real, expected); let scalar = BooleanScalar::new(None); let real = or_scalar(&array, &scalar); - let expected = BooleanArray::from(vec![None; 3]); + let expected = BooleanArray::from(&[None; 3]); assert_eq!(real, expected); - let array = BooleanArray::from_slice(vec![true, false, true]); + let array = BooleanArray::from_slice(&[true, false, true]); let real = and_scalar(&array, &scalar); - let expected = BooleanArray::from(vec![None; 3]); + let expected = BooleanArray::from(&[None; 3]); assert_eq!(real, expected); } From 4dfbe3f12695e392ac7f62a5d2d9bf1580f48dec Mon Sep 17 00:00:00 2001 From: Steven Gu Date: Sat, 25 Dec 2021 08:58:54 +0000 Subject: [PATCH 4/5] Replace `Bitmap::from_trusted_len_iter` with `Bitmap::new_zeroed` in `and_scalar` when scalar value is false. --- src/compute/boolean.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index 8fe2ab05493..60c5f2a107b 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -153,7 +153,7 @@ pub fn and_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray match scalar.value() { Some(true) => array.clone(), Some(false) => { - let values = Bitmap::from_trusted_len_iter(std::iter::repeat(false).take(array.len())); + let values = Bitmap::new_zeroed(array.len()); BooleanArray::from_data(DataType::Boolean, values, array.validity().cloned()) } None => BooleanArray::new_null(DataType::Boolean, array.len()), From ad36ece0ad8ff269194b4c82fdfdf53b325ca2a8 Mon Sep 17 00:00:00 2001 From: Steven Gu Date: Sat, 25 Dec 2021 09:13:23 +0000 Subject: [PATCH 5/5] Update to `MutableBitmap::extend_constant` to initialize a Bitmap of all true values. --- src/compute/boolean.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index 60c5f2a107b..4ef5b46b4b0 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -177,8 +177,9 @@ pub fn and_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray pub fn or_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray { match scalar.value() { Some(true) => { - let values = Bitmap::from_trusted_len_iter(std::iter::repeat(true).take(array.len())); - BooleanArray::from_data(DataType::Boolean, values, array.validity().cloned()) + let mut values = MutableBitmap::new(); + values.extend_constant(array.len(), true); + BooleanArray::from_data(DataType::Boolean, values.into(), array.validity().cloned()) } Some(false) => array.clone(), None => BooleanArray::new_null(DataType::Boolean, array.len()),