diff --git a/benches/comparison_kernels.rs b/benches/comparison_kernels.rs index 2c41f8dbd62..be76eb05976 100644 --- a/benches/comparison_kernels.rs +++ b/benches/comparison_kernels.rs @@ -42,6 +42,16 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function(&format!("bool scalar 2^{}", log2_size), |b| { b.iter(|| bench_op_scalar(&arr_a, &BooleanScalar::from(Some(true)), Operator::Eq)) }); + + let arr_a = create_string_array::(size, 0.1, 42); + let arr_b = create_string_array::(size, 0.1, 43); + c.bench_function(&format!("utf8 2^{}", log2_size), |b| { + b.iter(|| bench_op(&arr_a, &arr_b, Operator::Eq)) + }); + + c.bench_function(&format!("utf8 2^{}", log2_size), |b| { + b.iter(|| bench_op_scalar(&arr_a, &Utf8Scalar::::from(Some("abc")), Operator::Eq)) + }); }) } diff --git a/benches/filter_kernels.rs b/benches/filter_kernels.rs index 4ff8b0428d3..65aaad86751 100644 --- a/benches/filter_kernels.rs +++ b/benches/filter_kernels.rs @@ -113,7 +113,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_built_filter(&sparse_filter, &data_array)) }); - let data_array = create_string_array::(size, 0.5); + let data_array = create_string_array::(size, 0.5, 42); c.bench_function("filter context string", |b| { b.iter(|| bench_built_filter(&filter, &data_array)) }); diff --git a/benches/sort_kernel.rs b/benches/sort_kernel.rs index e78e4462b04..780b3619991 100644 --- a/benches/sort_kernel.rs +++ b/benches/sort_kernel.rs @@ -80,7 +80,7 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_lexsort(&arr_a, &arr_b)) }); - let arr_a = create_string_array::(size, 0.1); + let arr_a = create_string_array::(size, 0.1, 42); c.bench_function(&format!("sort utf8 null 2^{}", log2_size), |b| { b.iter(|| bench_sort(&arr_a)) }); diff --git a/benches/take_kernels.rs b/benches/take_kernels.rs index 9303a719f34..a66ae305035 100644 --- a/benches/take_kernels.rs +++ b/benches/take_kernels.rs @@ -91,36 +91,36 @@ fn add_benchmark(c: &mut Criterion) { b.iter(|| bench_take(&values, &indices)) }); - let values = create_string_array::(512, 0.0); + let values = create_string_array::(512, 0.0, 42); let indices = create_random_index(512, 0.0); c.bench_function("take str 512", |b| b.iter(|| bench_take(&values, &indices))); - let values = create_string_array::(1024, 0.0); + let values = create_string_array::(1024, 0.0, 42); let indices = create_random_index(1024, 0.0); c.bench_function("take str 1024", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_string_array::(512, 0.0); + let values = create_string_array::(512, 0.0, 42); let indices = create_random_index(512, 0.5); c.bench_function("take str null indices 512", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_string_array::(1024, 0.0); + let values = create_string_array::(1024, 0.0, 42); let indices = create_random_index(1024, 0.5); c.bench_function("take str null indices 1024", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_string_array::(1024, 0.5); + let values = create_string_array::(1024, 0.5, 42); let indices = create_random_index(1024, 0.0); c.bench_function("take str null values 1024", |b| { b.iter(|| bench_take(&values, &indices)) }); - let values = create_string_array::(1024, 0.5); + let values = create_string_array::(1024, 0.5, 42); let indices = create_random_index(1024, 0.5); c.bench_function("take str null values null indices 1024", |b| { b.iter(|| bench_take(&values, &indices)) diff --git a/benches/write_ipc.rs b/benches/write_ipc.rs index 23edc33b5f9..a91b8e5da67 100644 --- a/benches/write_ipc.rs +++ b/benches/write_ipc.rs @@ -35,7 +35,7 @@ fn add_benchmark(c: &mut Criterion) { }); (0..=10).step_by(2).for_each(|i| { - let array = &create_string_array::(1024 * 2usize.pow(i), 0.1); + let array = &create_string_array::(1024 * 2usize.pow(i), 0.1, 42); let a = format!("write utf8 2^{}", 10 + i); c.bench_function(&a, |b| b.iter(|| write(array).unwrap())); }); diff --git a/benches/write_parquet.rs b/benches/write_parquet.rs index 7cff2c13052..d1a8ff37ae7 100644 --- a/benches/write_parquet.rs +++ b/benches/write_parquet.rs @@ -55,13 +55,13 @@ fn add_benchmark(c: &mut Criterion) { }); (0..=10).step_by(2).for_each(|i| { - let array = &create_string_array::(1024 * 2usize.pow(i), 0.1); + let array = &create_string_array::(1024 * 2usize.pow(i), 0.1, 42); let a = format!("write utf8 2^{}", 10 + i); c.bench_function(&a, |b| b.iter(|| write(array, Encoding::Plain).unwrap())); }); (0..=10).step_by(2).for_each(|i| { - let array = &create_string_array::(1024 * 2usize.pow(i), 0.1); + let array = &create_string_array::(1024 * 2usize.pow(i), 0.1, 42); let a = format!("write utf8 delta 2^{}", 10 + i); c.bench_function(&a, |b| { b.iter(|| write(array, Encoding::DeltaLengthByteArray).unwrap()) diff --git a/src/compute/comparison/utf8.rs b/src/compute/comparison/utf8.rs index 4f12f2059c1..e36b89167ea 100644 --- a/src/compute/comparison/utf8.rs +++ b/src/compute/comparison/utf8.rs @@ -37,12 +37,9 @@ where let validity = combine_validities(lhs.validity(), rhs.validity()); let values = lhs - .iter() - .zip(rhs.iter()) - .map(|(lhs, rhs)| match (lhs, rhs) { - (Some(lhs), Some(rhs)) => op(lhs, rhs), - _ => false, - }); + .values_iter() + .zip(rhs.values_iter()) + .map(|(lhs, rhs)| op(lhs, rhs)); let values = Bitmap::from_trusted_len_iter(values); Ok(BooleanArray::from_data(values, validity)) @@ -57,10 +54,7 @@ where { let validity = lhs.validity().clone(); - let values = lhs.iter().map(|lhs| match lhs { - None => false, - Some(lhs) => op(lhs, rhs), - }); + let values = lhs.values_iter().map(|lhs| op(lhs, rhs)); let values = Bitmap::from_trusted_len_iter(values); BooleanArray::from_data(values, validity) diff --git a/src/util/bench_util.rs b/src/util/bench_util.rs index 1216d0d4c08..14cd3bc11a3 100644 --- a/src/util/bench_util.rs +++ b/src/util/bench_util.rs @@ -95,15 +95,15 @@ where } /// Creates an random (but fixed-seeded) array of a given size and null density -pub fn create_string_array(size: usize, null_density: f32) -> Utf8Array { - let rng = &mut seedable_rng(); +pub fn create_string_array(size: usize, null_density: f32, seed: u64) -> Utf8Array { + let mut rng = StdRng::seed_from_u64(seed); (0..size) .map(|_| { if rng.gen::() < null_density { None } else { - let value = rng + let value = (&mut rng) .sample_iter(&Alphanumeric) .take(4) .map(char::from)