diff --git a/benches/assign_ops.rs b/benches/assign_ops.rs index 0376edccf08..35f29b643c2 100644 --- a/benches/assign_ops.rs +++ b/benches/assign_ops.rs @@ -1,6 +1,10 @@ use criterion::{criterion_group, criterion_main, Criterion}; -use arrow2::{compute::arithmetics::basic::mul_scalar, util::bench_util::create_primitive_array}; +use arrow2::compute::arity_assign::binary; +use arrow2::{ + compute::arithmetics::basic::{mul, mul_scalar}, + util::bench_util::*, +}; fn add_benchmark(c: &mut Criterion) { (10..=20).step_by(2).for_each(|log2_size| { @@ -22,6 +26,26 @@ fn add_benchmark(c: &mut Criterion) { assert!(!a.value(10).is_nan()); }) }); + + let mut arr_a = create_primitive_array::(size, 0.2); + let mut arr_b = create_primitive_array_with_seed::(size, 0.2, 10); + // convert to be close to 1.01 + arr_b.apply_values(|x| x.iter_mut().for_each(|x| *x = 1.01 + *x / 20.0)); + + c.bench_function(&format!("apply_mul null 2^{}", log2_size), |b| { + b.iter(|| { + binary(criterion::black_box(&mut arr_a), &arr_b, |x, y| x * y); + assert!(!arr_a.value(10).is_nan()); + }) + }); + + let arr_a = create_primitive_array::(size, 0.2); + c.bench_function(&format!("mul null 2^{}", log2_size), |b| { + b.iter(|| { + let a = mul(criterion::black_box(&arr_a), &arr_b); + assert!(!a.value(10).is_nan()); + }) + }); }); } diff --git a/src/compute/arity_assign.rs b/src/compute/arity_assign.rs new file mode 100644 index 00000000000..d10b100117d --- /dev/null +++ b/src/compute/arity_assign.rs @@ -0,0 +1,57 @@ +//! Defines generics suitable to perform operations to [`PrimitiveArray`] in-place. + +use super::utils::check_same_len; +use crate::{array::PrimitiveArray, types::NativeType}; + +/// Applies an unary function to a [`PrimitiveArray`] in-place via cow semantics. +/// +/// # Implementation +/// This is the fastest method to apply a binary operation and it is often vectorized (SIMD). +/// # Panics +/// This function panics iff +/// * the arrays have a different length. +/// * the function itself panics. +#[inline] +pub fn unary(array: &mut PrimitiveArray, op: F) +where + I: NativeType, + F: Fn(I) -> I, +{ + array.apply_values(|values| values.iter_mut().for_each(|v| *v = op(*v))); +} + +/// Applies a binary operations to two [`PrimitiveArray`], applying the operation +/// in-place to the `lhs` via cow semantics. +/// +/// # Implementation +/// This is the fastest way to perform a binary operation and it is often vectorized (SIMD). +/// # Panics +/// This function panics iff +/// * the arrays have a different length. +/// * the function itself panics. +#[inline] +pub fn binary(lhs: &mut PrimitiveArray, rhs: &PrimitiveArray, op: F) +where + T: NativeType, + D: NativeType, + F: Fn(T, D) -> T, +{ + check_same_len(lhs, rhs).unwrap(); + + match rhs.validity() { + None => {} + Some(rhs) => { + if lhs.validity().is_none() { + *lhs = lhs.with_validity(Some(rhs.clone())) + } else { + lhs.apply_validity(|mut lhs| lhs &= rhs) + } + } + } + + lhs.apply_values(|x| { + x.iter_mut() + .zip(rhs.values().iter()) + .for_each(|(l, r)| *l = op(*l, *r)) + }); +} diff --git a/src/compute/mod.rs b/src/compute/mod.rs index 245f70302e2..d4f3ea24db4 100644 --- a/src/compute/mod.rs +++ b/src/compute/mod.rs @@ -18,6 +18,7 @@ pub mod aggregate; #[cfg_attr(docsrs, doc(cfg(feature = "compute_arithmetics")))] pub mod arithmetics; pub mod arity; +pub mod arity_assign; #[cfg(feature = "compute_bitwise")] #[cfg_attr(docsrs, doc(cfg(feature = "compute_bitwise")))] pub mod bitwise; diff --git a/tests/it/compute/arity_assign.rs b/tests/it/compute/arity_assign.rs new file mode 100644 index 00000000000..b3581e2fa3e --- /dev/null +++ b/tests/it/compute/arity_assign.rs @@ -0,0 +1,21 @@ +use arrow2::array::Int32Array; +use arrow2::compute::arity_assign::{binary, unary}; + +#[test] +fn test_unary_assign() { + let mut a = Int32Array::from([Some(5), Some(6), None, Some(10)]); + + unary(&mut a, |x| x + 10); + + assert_eq!(a, Int32Array::from([Some(15), Some(16), None, Some(20)])) +} + +#[test] +fn test_binary_assign() { + let mut a = Int32Array::from([Some(5), Some(6), None, Some(10)]); + let b = Int32Array::from([Some(1), Some(2), Some(1), None]); + + binary(&mut a, &b, |x, y| x + y); + + assert_eq!(a, Int32Array::from([Some(6), Some(8), None, None])) +} diff --git a/tests/it/compute/mod.rs b/tests/it/compute/mod.rs index f4972c21940..2a298cf2bd2 100644 --- a/tests/it/compute/mod.rs +++ b/tests/it/compute/mod.rs @@ -46,3 +46,5 @@ mod temporal; mod utf8; #[cfg(feature = "compute_window")] mod window; + +mod arity_assign;