Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improve arity_assign: ~2x improvement if we share data. #1076

Merged
merged 8 commits into from
Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 38 additions & 3 deletions src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,41 @@ impl<T: NativeType> PrimitiveArray<T> {
/// This function panics iff `validity.len() != self.len()`.
#[must_use]
pub fn with_validity(&self, validity: Option<Bitmap>) -> Self {
let mut out = self.clone();
out.set_validity(validity);
out
}

/// Update the validity buffer of this [`PrimitiveArray`].
/// # Panics
/// This function panics iff `values.len() != self.len()`.
pub fn set_validity(&mut self, validity: Option<Bitmap>) {
if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
panic!("validity should be as least as large as the array")
}
let mut arr = self.clone();
arr.validity = validity;
arr
self.validity = validity;
}

/// Returns a clone of this [`PrimitiveArray`] with a new values.
/// # Panics
/// This function panics iff `values.len() != self.len()`.
#[must_use]
pub fn with_values(&self, values: Vec<T>) -> Self {
let mut out = self.clone();
out.set_values(values);
out
}

/// Update the values buffer of this [`PrimitiveArray`].
/// # Panics
/// This function panics iff `values.len() != self.len()`.
pub fn set_values(&mut self, values: Buffer<T>) {
assert_eq!(
values.len(),
self.len(),
"values length should be equal to this arrays length"
);
self.values = values;
}

/// Applies a function `f` to the values of this array, cloning the values
Expand Down Expand Up @@ -511,3 +540,9 @@ pub type UInt16Vec = MutablePrimitiveArray<u16>;
pub type UInt32Vec = MutablePrimitiveArray<u32>;
/// A type definition [`MutablePrimitiveArray`] for `u64`
pub type UInt64Vec = MutablePrimitiveArray<u64>;

impl<T: NativeType> Default for PrimitiveArray<T> {
fn default() -> Self {
PrimitiveArray::new(T::PRIMITIVE.into(), Default::default(), None)
}
}
26 changes: 26 additions & 0 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,32 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
pub fn into_data(self) -> (DataType, Vec<T>, Option<MutableBitmap>) {
(self.data_type, self.values, self.validity)
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
f(&mut self.values);
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = &mut self.validity {
f(validity);
ritchie46 marked this conversation as resolved.
Show resolved Hide resolved
}
}
}

impl<T: NativeType> Default for MutablePrimitiveArray<T> {
Expand Down
29 changes: 24 additions & 5 deletions src/compute/arity_assign.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

use super::utils::check_same_len;
use crate::{array::PrimitiveArray, types::NativeType};
use either::Either;

/// Applies an unary function to a [`PrimitiveArray`] in-place via cow semantics.
///
Expand Down Expand Up @@ -48,10 +49,28 @@ where
}
}
}
// we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact
// so that we can later assign the result to out `&mut lhs`
let owned_lhs = std::mem::take(lhs);

lhs.apply_values(|x| {
x.iter_mut()
.zip(rhs.values().iter())
.for_each(|(l, r)| *l = op(*l, *r))
});
match owned_lhs.into_mut() {
Either::Left(mut immutable) => {
let values = immutable
.values()
.iter()
.zip(rhs.values().iter())
.map(|(l, r)| op(*l, *r))
.collect::<Vec<_>>();
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
immutable.set_values(values);
*lhs = immutable;
}
Either::Right(mut mutable) => {
mutable.apply_values(|x| {
x.iter_mut()
.zip(rhs.values().iter())
.for_each(|(l, r)| *l = op(*l, *r))
});
*lhs = mutable.into()
}
}
}