From c3222aa679f943754d472410c4eb4a9540d2391b Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Thu, 25 Nov 2021 17:32:10 +0100 Subject: [PATCH] Faster `take` with null values (2-3x) (#633) --- benches/take_kernels.rs | 11 ++++++++- src/compute/take/boolean.rs | 36 ++++++------------------------ src/compute/take/generic_binary.rs | 36 ++++++++---------------------- src/compute/take/primitive.rs | 36 ++++++------------------------ 4 files changed, 33 insertions(+), 86 deletions(-) diff --git a/benches/take_kernels.rs b/benches/take_kernels.rs index 50de3533dbb..646d0ebd3ca 100644 --- a/benches/take_kernels.rs +++ b/benches/take_kernels.rs @@ -54,6 +54,11 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_take(&values, &indices)) }); + let values_nulls = create_boolean_array(size, 0.2, 0.5); + c.bench_function(&format!("take bool values nulls 2^{}", log2_size), |b| { + b.iter(|| bench_take(&values_nulls, &indices)) + }); + c.bench_function(&format!("take bool nulls 2^{}", log2_size), |b| { b.iter(|| bench_take(&values, &indices_nulls)) }); @@ -63,10 +68,14 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_take(&values, &indices)) }); - let values = create_string_array::(512, 4, 0.0, 42); c.bench_function(&format!("take str nulls 2^{}", log2_size), |b| { b.iter(|| bench_take(&values, &indices_nulls)) }); + + let values_nulls = create_string_array::(size, 4, 0.2, 42); + c.bench_function(&format!("take str values nulls 2^{}", log2_size), |b| { + b.iter(|| bench_take(&values_nulls, &indices)) + }); }); let values = create_string_array::(512, 4, 0.0, 42); diff --git a/src/compute/take/boolean.rs b/src/compute/take/boolean.rs index c49b7e50d9b..a676e652c92 100644 --- a/src/compute/take/boolean.rs +++ b/src/compute/take/boolean.rs @@ -1,20 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - use crate::{ array::{Array, BooleanArray, PrimitiveArray}, bitmap::{Bitmap, MutableBitmap}, @@ -35,21 +18,16 @@ fn take_values_validity( values: &BooleanArray, indices: &[I], ) -> (Bitmap, Option) { - let mut validity = MutableBitmap::with_capacity(indices.len()); - let validity_values = values.validity().unwrap(); + let validity = indices + .iter() + .map(|index| validity_values.get_bit(index.to_usize())); + let validity = Bitmap::from_trusted_len_iter(validity); let values_values = values.values(); - - let values = indices.iter().map(|index| { - let index = index.to_usize(); - if validity_values.get_bit(index) { - validity.push(true); - } else { - validity.push(false); - } - values_values.get_bit(index) - }); + let values = indices + .iter() + .map(|index| values_values.get_bit(index.to_usize())); let buffer = Bitmap::from_trusted_len_iter(values); (buffer, validity.into()) diff --git a/src/compute/take/generic_binary.rs b/src/compute/take/generic_binary.rs index 22a8a707199..733cb56c55a 100644 --- a/src/compute/take/generic_binary.rs +++ b/src/compute/take/generic_binary.rs @@ -1,19 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. use crate::{ array::{GenericBinaryArray, Offset, PrimitiveArray}, bitmap::{Bitmap, MutableBitmap}, @@ -66,25 +50,23 @@ pub fn take_values_validity>( values: &A, indices: &[I], ) -> (Buffer, Buffer, Option) { + let validity_values = values.validity().unwrap(); + let validity = indices + .iter() + .map(|index| validity_values.get_bit(index.to_usize())); + let validity = Bitmap::from_trusted_len_iter(validity); + let mut length = O::default(); - let mut validity = MutableBitmap::with_capacity(indices.len()); - let null_values = values.validity().unwrap(); let offsets = values.offsets(); let values_values = values.values(); let mut starts = MutableBuffer::::with_capacity(indices.len()); let offsets = indices.iter().map(|index| { let index = index.to_usize(); - if null_values.get_bit(index) { - validity.push(true); - let start = offsets[index]; - length += offsets[index + 1] - start; - starts.push(start); - } else { - validity.push(false); - starts.push(O::default()); - } + let start = offsets[index]; + length += offsets[index + 1] - start; + starts.push(start); length }); let offsets = std::iter::once(O::default()).chain(offsets); diff --git a/src/compute/take/primitive.rs b/src/compute/take/primitive.rs index dd015dfdab5..5f88643a112 100644 --- a/src/compute/take/primitive.rs +++ b/src/compute/take/primitive.rs @@ -1,19 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. use crate::{ array::{Array, PrimitiveArray}, bitmap::{Bitmap, MutableBitmap}, @@ -39,24 +23,18 @@ fn take_values_validity( values: &PrimitiveArray, indices: &[I], ) -> (Buffer, Option) { - let mut null = MutableBitmap::with_capacity(indices.len()); + let values_validity = values.validity().unwrap(); - let null_values = values.validity().unwrap(); + let validity = indices + .iter() + .map(|index| values_validity.get_bit(index.to_usize())); + let validity = MutableBitmap::from_trusted_len_iter(validity); let values_values = values.values(); - - let values = indices.iter().map(|index| { - let index = index.to_usize(); - if null_values.get_bit(index) { - null.push(true); - } else { - null.push(false); - } - values_values[index] - }); + let values = indices.iter().map(|index| values_values[index.to_usize()]); let buffer = MutableBuffer::from_trusted_len_iter(values); - (buffer.into(), null.into()) + (buffer.into(), validity.into()) } // take implementation when only indices contain nulls