diff --git a/benches/filter_kernels.rs b/benches/filter_kernels.rs deleted file mode 100644 index 4ed1205f721..00000000000 --- a/benches/filter_kernels.rs +++ /dev/null @@ -1,141 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -extern crate arrow2; - -use std::sync::Arc; - -use arrow2::array::*; -use arrow2::compute::filter::{build_filter, filter, filter_record_batch, Filter}; -use arrow2::datatypes::{DataType, Field, Schema}; -use arrow2::record_batch::RecordBatch; -use arrow2::util:: - -use criterion::{criterion_group, criterion_main, Criterion}; - -fn bench_filter(data_array: &dyn Array, filter_array: &BooleanArray) { - criterion::black_box(filter(data_array, filter_array).unwrap()); -} - -fn bench_built_filter<'a>(filter: &Filter<'a>, array: &impl Array) { - criterion::black_box(filter(array)); -} - -fn add_benchmark(c: &mut Criterion) { - // scaling benchmarks - (10..=20).step_by(2).for_each(|log2_size| { - let size = 2usize.pow(log2_size); - - let filter_array = create_boolean_array(size, 0.0, 0.9); - let filter_array = - BooleanArray::from_data(DataType::Boolean, filter_array.values().clone(), None); - - let arr_a = create_primitive_array::(size, DataType::Float32, 0.0); - c.bench_function(&format!("filter 2^{} f32", log2_size), |b| { - b.iter(|| bench_filter(&arr_a, &filter_array)) - }); - - let arr_a = create_primitive_array::(size, DataType::Float32, 0.1); - - c.bench_function(&format!("filter null 2^{} f32", log2_size), |b| { - b.iter(|| bench_filter(&arr_a, &filter_array)) - }); - }); - - let size = 65536; - let filter_array = create_boolean_array(size, 0.0, 0.5); - let dense_filter_array = create_boolean_array(size, 0.0, 1.0 - 1.0 / 1024.0); - let sparse_filter_array = create_boolean_array(size, 0.0, 1.0 / 1024.0); - - let filter = build_filter(&filter_array).unwrap(); - let dense_filter = build_filter(&dense_filter_array).unwrap(); - let sparse_filter = build_filter(&sparse_filter_array).unwrap(); - - let data_array = create_primitive_array::(size, DataType::UInt8, 0.0); - - c.bench_function("filter u8", |b| { - b.iter(|| bench_filter(&data_array, &filter_array)) - }); - c.bench_function("filter u8 high selectivity", |b| { - b.iter(|| bench_filter(&data_array, &dense_filter_array)) - }); - c.bench_function("filter u8 low selectivity", |b| { - b.iter(|| bench_filter(&data_array, &sparse_filter_array)) - }); - - c.bench_function("filter context u8", |b| { - b.iter(|| bench_built_filter(&filter, &data_array)) - }); - c.bench_function("filter context u8 high selectivity", |b| { - b.iter(|| bench_built_filter(&dense_filter, &data_array)) - }); - c.bench_function("filter context u8 low selectivity", |b| { - b.iter(|| bench_built_filter(&sparse_filter, &data_array)) - }); - - let data_array = create_primitive_array::(size, DataType::UInt8, 0.5); - c.bench_function("filter context u8 w NULLs", |b| { - b.iter(|| bench_built_filter(&filter, &data_array)) - }); - c.bench_function("filter context u8 w NULLs high selectivity", |b| { - b.iter(|| bench_built_filter(&dense_filter, &data_array)) - }); - c.bench_function("filter context u8 w NULLs low selectivity", |b| { - b.iter(|| bench_built_filter(&sparse_filter, &data_array)) - }); - - let data_array = create_primitive_array::(size, DataType::Float32, 0.5); - c.bench_function("filter f32", |b| { - b.iter(|| bench_filter(&data_array, &filter_array)) - }); - c.bench_function("filter f32 high selectivity", |b| { - b.iter(|| bench_filter(&data_array, &dense_filter_array)) - }); - c.bench_function("filter context f32", |b| { - b.iter(|| bench_built_filter(&filter, &data_array)) - }); - c.bench_function("filter context f32 high selectivity", |b| { - b.iter(|| bench_built_filter(&dense_filter, &data_array)) - }); - c.bench_function("filter context f32 low selectivity", |b| { - b.iter(|| bench_built_filter(&sparse_filter, &data_array)) - }); - - let data_array = create_string_array::(size, 4, 0.5, 42); - c.bench_function("filter context string", |b| { - b.iter(|| bench_built_filter(&filter, &data_array)) - }); - c.bench_function("filter context string high selectivity", |b| { - b.iter(|| bench_built_filter(&dense_filter, &data_array)) - }); - c.bench_function("filter context string low selectivity", |b| { - b.iter(|| bench_built_filter(&sparse_filter, &data_array)) - }); - - let data_array = create_primitive_array::(size, DataType::Float32, 0.0); - - let field = Field::new("c1", data_array.data_type().clone(), true); - let schema = Schema::new(vec![field]); - - let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(data_array)]).unwrap(); - - c.bench_function("filter single record batch", |b| { - b.iter(|| filter_record_batch(&batch, &filter_array)) - }); -} - -criterion_group!(benches, add_benchmark); -criterion_main!(benches); diff --git a/src/array/fixed_size_list/iterator.rs b/src/array/fixed_size_list/iterator.rs index 6b222062882..69bf7d595c3 100644 --- a/src/array/fixed_size_list/iterator.rs +++ b/src/array/fixed_size_list/iterator.rs @@ -7,7 +7,7 @@ use super::FixedSizeListArray; impl IterableListArray for FixedSizeListArray { unsafe fn value_unchecked(&self, i: usize) -> Box { - FixedSizeListArray::value(self, i) + FixedSizeListArray::value_unchecked(self, i) } } diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index a8cce16b5c7..c92d0de8d73 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -103,12 +103,23 @@ impl FixedSizeListArray { } /// Returns the `Vec` at position `i`. + /// # Panic: + /// panics iff `i >= self.len()` #[inline] pub fn value(&self, i: usize) -> Box { self.values .slice(i * self.size as usize, self.size as usize) } + /// Returns the `Vec` at position `i`. + /// # Safety: + /// Caller must ensure that `i < self.len()` + #[inline] + pub unsafe fn value_unchecked(&self, i: usize) -> Box { + self.values + .slice_unchecked(i * self.size as usize, self.size as usize) + } + /// Sets the validity bitmap on this [`FixedSizeListArray`]. /// # Panic /// This function panics iff `validity.len() != self.len()`. diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 9095aa9d3c0..913c745215a 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -80,7 +80,10 @@ impl ListArray { let offset_1 = offsets[i + 1]; let length = (offset_1 - offset).to_usize(); - self.values.slice(offset.to_usize(), length) + // Safety: + // One of the invariants of the struct + // is that offsets are in bounds + unsafe { self.values.slice_unchecked(offset.to_usize(), length) } } } @@ -93,7 +96,7 @@ impl ListArray { let offset_1 = *self.offsets.as_ptr().add(i + 1); let length = (offset_1 - offset).to_usize(); - self.values.slice(offset.to_usize(), length) + self.values.slice_unchecked(offset.to_usize(), length) } /// Returns a slice of this [`ListArray`].