From ba54cfe837823a8d72f8f7a4a87735d7dba8f989 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 2 Dec 2021 09:22:15 +0100 Subject: [PATCH 1/2] fix filter boolean array with masked out true bits --- src/compute/filter.rs | 10 ++++++++++ tests/it/compute/filter.rs | 16 ++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/compute/filter.rs b/src/compute/filter.rs index a5e0f61f9be..c960626fb63 100644 --- a/src/compute/filter.rs +++ b/src/compute/filter.rs @@ -1,6 +1,7 @@ //! Contains operators to filter arrays such as [`filter`]. use crate::array::growable::{make_growable, Growable}; use crate::bitmap::{utils::SlicesIterator, Bitmap, MutableBitmap}; +use crate::datatypes::DataType; use crate::record_batch::RecordBatch; use crate::{array::*, types::NativeType}; use crate::{buffer::MutableBuffer, error::Result}; @@ -127,6 +128,15 @@ pub fn build_filter(filter: &BooleanArray) -> Result { /// # } /// ``` pub fn filter(array: &dyn Array, filter: &BooleanArray) -> Result> { + // The validities may be masking out `true` bits, making the filter operation + // based on the values incorrect + if let Some(validities) = filter.validity() { + let values = filter.values(); + let new_values = values & validities; + let filter = BooleanArray::from_data(DataType::Boolean, new_values, None); + return crate::compute::filter::filter(array, &filter); + } + use crate::datatypes::PhysicalType::*; match array.data_type().to_physical_type() { Primitive(primitive) => with_match_primitive_type!(primitive, |$T| { diff --git a/tests/it/compute/filter.rs b/tests/it/compute/filter.rs index 1fce99ab91c..f5f2cbc18ed 100644 --- a/tests/it/compute/filter.rs +++ b/tests/it/compute/filter.rs @@ -1,4 +1,5 @@ use arrow2::array::*; +use arrow2::bitmap::Bitmap; use arrow2::compute::filter::*; #[test] @@ -112,6 +113,21 @@ fn binary_array_with_null() { assert!(d.is_null(1)); } +#[test] +fn masked_true_values() { + let a = Int32Array::from_slice(&[1, 2, 3]); + let b = BooleanArray::from_slice(&[true, false, true]); + let validity = Bitmap::from(&[true, false, false]); + + let b = b.with_validity(Some(validity)); + + let c = filter(&a, &b).unwrap(); + + let expected = Int32Array::from_slice(&[1]); + + assert_eq!(expected, c.as_ref()); +} + /* #[test] fn dictionary_array() { From ffbf82a7c300be8b58f8922f9b79702c490ab743 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Thu, 2 Dec 2021 10:23:42 +0100 Subject: [PATCH 2/2] update documentation on null behavior --- src/compute/filter.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/compute/filter.rs b/src/compute/filter.rs index c960626fb63..403df3e0f73 100644 --- a/src/compute/filter.rs +++ b/src/compute/filter.rs @@ -111,8 +111,10 @@ pub fn build_filter(filter: &BooleanArray) -> Result { } /// Filters an [Array], returning elements matching the filter (i.e. where the values are true). -/// WARNING: the nulls of `filter` are ignored and the value on its slot is considered. -/// Therefore, it is considered undefined behavior to pass `filter` with null values. +/// +/// Note that the nulls of `filter` are interpreted as `false` will lead to these elements being +/// masked out. +/// /// # Example /// ```rust /// # use arrow2::array::{Int32Array, PrimitiveArray, BooleanArray};