Skip to content

Commit

Permalink
Merge pull request #4484 from LiuYuHui/simd-bool
Browse files Browse the repository at this point in the history
ISSUE-4374: Simd selected for BooleanColumn
  • Loading branch information
BohuTANG authored Mar 20, 2022
2 parents 06a5058 + 545fffb commit be97814
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 8 deletions.
42 changes: 34 additions & 8 deletions common/datavalues/src/columns/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
use std::sync::Arc;

use common_arrow::arrow::array::*;
use common_arrow::arrow::bitmap::utils::BitChunkIterExact;
use common_arrow::arrow::bitmap::utils::BitChunksExact;
use common_arrow::arrow::bitmap::Bitmap;
use common_arrow::arrow::bitmap::MutableBitmap;
use common_arrow::arrow::datatypes::DataType as ArrowType;
Expand Down Expand Up @@ -103,17 +105,41 @@ impl Column for BooleanColumn {
}

fn filter(&self, filter: &BooleanColumn) -> ColumnRef {
if filter.values().null_count() == 0 {
let selected = filter.values().len() - filter.values().null_count();
if selected == self.len() {
return Arc::new(self.clone());
}
let iter = self
.values()
.iter()
.zip(filter.values().iter())
.filter(|(_, b)| *b)
.map(|(a, _)| a);
let mut bitmap = MutableBitmap::with_capacity(selected);
let (value_slice, _, value_length) = self.values().as_slice();
let (slice, _, length) = filter.values().as_slice();

let mut chunks = BitChunksExact::<u64>::new(value_slice, value_length);
let mut mask_chunks = BitChunksExact::<u64>::new(slice, length);

chunks
.by_ref()
.zip(mask_chunks.by_ref())
.for_each(|(chunk, mut mask)| {
while mask != 0 {
let n = mask.trailing_zeros() as usize;
let value: bool = chunk & (1 << n) != 0;
bitmap.push(value);
mask = mask & (mask - 1);
}
});

chunks
.remainder_iter()
.zip(mask_chunks.remainder_iter())
.for_each(|(value, is_selected)| {
if is_selected {
bitmap.push(value);
}
});

let col = Self::from_iterator(iter);
let col = BooleanColumn {
values: bitmap.into(),
};
Arc::new(col)
}

Expand Down
46 changes: 46 additions & 0 deletions common/datavalues/tests/it/columns/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use common_arrow::arrow::bitmap::Bitmap;
use common_arrow::arrow::bitmap::MutableBitmap;
use common_datavalues::prelude::*;

#[test]
Expand Down Expand Up @@ -49,3 +51,47 @@ fn test_boolean_column() {
let slice = data_column.slice(0, N / 2);
assert!(slice.len() == N / 2);
}

#[test]
fn test_filter_column() {
const N: usize = 1000;
let data_column = BooleanColumn::from_iterator((0..N).map(|e| e % 2 == 0));

struct Test {
filter: BooleanColumn,
expect: Vec<bool>,
}

let tests: Vec<Test> = vec![
Test {
filter: BooleanColumn::from_iterator((0..N).map(|_| true)),
expect: (0..N).map(|e| e % 2 == 0).collect(),
},
Test {
filter: BooleanColumn::from_iterator((0..N).map(|_| false)),
expect: vec![],
},
Test {
filter: BooleanColumn::from_iterator((0..N).map(|i| i % 3 == 0)),
expect: (0..N)
.map(|e| e % 2 == 0)
.enumerate()
.filter(|(i, _)| i % 3 == 0)
.map(|(_, e)| e)
.collect(),
},
];

for test in tests {
let res = data_column.filter(&test.filter);
let iter = test.expect.into_iter();
let bitmap: Bitmap = MutableBitmap::from_iter(iter).into();
assert_eq!(
res.as_any()
.downcast_ref::<BooleanColumn>()
.unwrap()
.values(),
&bitmap
);
}
}

0 comments on commit be97814

Please sign in to comment.