From 0f324b8aa3c0bb2e4369537af4399aae409342ad Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Fri, 7 Jan 2022 14:36:25 +0100 Subject: [PATCH 1/2] implement any kernel --- src/compute/boolean.rs | 36 +++++++++++++++++++++++++++++++ tests/it/compute/boolean.rs | 43 +++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index 4ef5b46b4b0..6374716e585 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -185,3 +185,39 @@ pub fn or_scalar(array: &BooleanArray, scalar: &BooleanScalar) -> BooleanArray { None => BooleanArray::new_null(DataType::Boolean, array.len()), } } + +/// Check if any of the values in the array is `true` +pub fn any(array: &BooleanArray) -> bool { + if array.is_empty() { + false + } else if array.validity().is_some() { + array.into_iter().any(|v| v == Some(true)) + } else { + let vals = array.values(); + let (mut bytes, start, mut len) = vals.as_slice(); + if start != 0 { + if array.values_iter().take(8 - start).any(|v| v) { + return true; + } + bytes = &bytes[1..]; + len -= start; + } + // remaining part of last byte + let remainder = len % 8; + if remainder != 0 { + let last = bytes[bytes.len() - 1]; + for i in 0..remainder { + if last & 1 << i != 0 { + return true; + } + } + // exclude last byte + bytes = &bytes[..bytes.len() - 1]; + } + // Safety: + // we transmute from integer types and the align_to function deals with correct alignment. + let (head, mid, tail) = unsafe { bytes.align_to::() }; + + head.iter().any(|&v| v != 0) || mid.iter().any(|&v| v != 0) || tail.iter().any(|&v| v != 0) + } +} diff --git a/tests/it/compute/boolean.rs b/tests/it/compute/boolean.rs index af77d98ed5b..22293219cee 100644 --- a/tests/it/compute/boolean.rs +++ b/tests/it/compute/boolean.rs @@ -1,6 +1,7 @@ use arrow2::array::*; use arrow2::compute::boolean::*; use arrow2::scalar::BooleanScalar; +use std::iter::FromIterator; #[test] fn array_and() { @@ -418,3 +419,45 @@ fn array_or_scalar_validity() { let expected = BooleanArray::from(&[None; 3]); assert_eq!(real, expected); } + +#[test] +fn test_any() { + let array = BooleanArray::from(&[None, Some(false), Some(true)]); + assert!(any(&array)); + let array = BooleanArray::from(&[None, Some(false), Some(false)]); + assert!(!any(&array)); + + // create some slices and offset to make test more interesting + let mut slices = Vec::with_capacity(8 * 10 + 1); + for offset in 0..8 { + for len in 20..30 { + slices.push((offset, len)) + } + } + + let all_true = BooleanArray::from_trusted_len_values_iter(std::iter::repeat(true).take(50)); + let all_false = BooleanArray::from_trusted_len_values_iter(std::iter::repeat(false).take(50)); + let various = BooleanArray::from_iter((0..50).map(|v| Some(v % 2 == 0))); + + for (offset, len) in slices { + let arr = all_true.slice(offset, len); + assert!(any(&arr)); + + let arr = all_false.slice(offset, len); + assert!(!any(&arr)); + + let arr = various.slice(offset, len); + assert!(any(&arr)); + } + let last_value = BooleanArray::from_iter((0..50).map(|v| Some(v == 49))); + assert!(any(&last_value)); + // slice of the true value + let arr = last_value.slice(0, 49); + assert!(!any(&arr)); + + let first_value = BooleanArray::from_iter((0..50).map(|v| Some(v == 0))); + assert!(any(&first_value)); + // slice of the true value + let arr = first_value.slice(1, 49); + assert!(!any(&arr)); +} From 8fdbbf492b25f7f0d0129af4caebb0b483c2cdbd Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sat, 8 Jan 2022 10:55:35 +0100 Subject: [PATCH 2/2] use cached null counts --- src/compute/boolean.rs | 34 ++++++++----------------- tests/it/compute/boolean.rs | 50 +++++++++++-------------------------- 2 files changed, 25 insertions(+), 59 deletions(-) diff --git a/src/compute/boolean.rs b/src/compute/boolean.rs index 6374716e585..e8e8c32dcd0 100644 --- a/src/compute/boolean.rs +++ b/src/compute/boolean.rs @@ -194,30 +194,16 @@ pub fn any(array: &BooleanArray) -> bool { array.into_iter().any(|v| v == Some(true)) } else { let vals = array.values(); - let (mut bytes, start, mut len) = vals.as_slice(); - if start != 0 { - if array.values_iter().take(8 - start).any(|v| v) { - return true; - } - bytes = &bytes[1..]; - len -= start; - } - // remaining part of last byte - let remainder = len % 8; - if remainder != 0 { - let last = bytes[bytes.len() - 1]; - for i in 0..remainder { - if last & 1 << i != 0 { - return true; - } - } - // exclude last byte - bytes = &bytes[..bytes.len() - 1]; - } - // Safety: - // we transmute from integer types and the align_to function deals with correct alignment. - let (head, mid, tail) = unsafe { bytes.align_to::() }; + vals.null_count() != 0 + } +} - head.iter().any(|&v| v != 0) || mid.iter().any(|&v| v != 0) || tail.iter().any(|&v| v != 0) +/// Check if all of the values in the array are `true` +pub fn all(array: &BooleanArray) -> bool { + if array.is_empty() || array.null_count() > 0 { + false + } else { + let vals = array.values(); + vals.null_count() == 0 } } diff --git a/tests/it/compute/boolean.rs b/tests/it/compute/boolean.rs index 22293219cee..822c8e432de 100644 --- a/tests/it/compute/boolean.rs +++ b/tests/it/compute/boolean.rs @@ -421,43 +421,23 @@ fn array_or_scalar_validity() { } #[test] -fn test_any() { +fn test_any_all() { let array = BooleanArray::from(&[None, Some(false), Some(true)]); assert!(any(&array)); + assert!(!all(&array)); let array = BooleanArray::from(&[None, Some(false), Some(false)]); assert!(!any(&array)); - - // create some slices and offset to make test more interesting - let mut slices = Vec::with_capacity(8 * 10 + 1); - for offset in 0..8 { - for len in 20..30 { - slices.push((offset, len)) - } - } - - let all_true = BooleanArray::from_trusted_len_values_iter(std::iter::repeat(true).take(50)); - let all_false = BooleanArray::from_trusted_len_values_iter(std::iter::repeat(false).take(50)); - let various = BooleanArray::from_iter((0..50).map(|v| Some(v % 2 == 0))); - - for (offset, len) in slices { - let arr = all_true.slice(offset, len); - assert!(any(&arr)); - - let arr = all_false.slice(offset, len); - assert!(!any(&arr)); - - let arr = various.slice(offset, len); - assert!(any(&arr)); - } - let last_value = BooleanArray::from_iter((0..50).map(|v| Some(v == 49))); - assert!(any(&last_value)); - // slice of the true value - let arr = last_value.slice(0, 49); - assert!(!any(&arr)); - - let first_value = BooleanArray::from_iter((0..50).map(|v| Some(v == 0))); - assert!(any(&first_value)); - // slice of the true value - let arr = first_value.slice(1, 49); - assert!(!any(&arr)); + assert!(!all(&array)); + let array = BooleanArray::from(&[None, Some(true), Some(true)]); + assert!(!all(&array)); + assert!(any(&array)); + let array = BooleanArray::from_iter(std::iter::repeat(false).take(10).map(Some)); + assert!(!any(&array)); + assert!(!all(&array)); + let array = BooleanArray::from_iter(std::iter::repeat(true).take(10).map(Some)); + assert!(all(&array)); + assert!(any(&array)); + let array = BooleanArray::from_iter([true, false, true, true].map(Some)); + assert!(!all(&array)); + assert!(any(&array)); }