Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added cow APIs (2x-10x vs non-cow) #1061

Merged
merged 2 commits into from
Jun 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,11 @@ harness = false
[[bench]]
name = "slices_iterator"
harness = false

[[bench]]
name = "bitmap_assign_ops"
harness = false

[[bench]]
name = "assign_ops"
harness = false
29 changes: 29 additions & 0 deletions benches/assign_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::{compute::arithmetics::basic::mul_scalar, util::bench_util::create_primitive_array};

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let mut arr_a = create_primitive_array::<f32>(size, 0.2);
c.bench_function(&format!("apply_mul 2^{}", log2_size), |b| {
b.iter(|| {
criterion::black_box(&mut arr_a)
.apply_values(|x| x.iter_mut().for_each(|x| *x *= 1.01));
assert!(!arr_a.value(10).is_nan());
})
});

let arr_a = create_primitive_array::<f32>(size, 0.2);
c.bench_function(&format!("mul 2^{}", log2_size), |b| {
b.iter(|| {
let a = mul_scalar(criterion::black_box(&arr_a), &1.01f32);
assert!(!a.value(10).is_nan());
})
});
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
47 changes: 47 additions & 0 deletions benches/bitmap_assign_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
use criterion::{criterion_group, criterion_main, Criterion};

use arrow2::bitmap::{binary_assign, unary_assign};
use arrow2::bitmap::{Bitmap, MutableBitmap};

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let mut bitmap: MutableBitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
c.bench_function(&format!("mutablebitmap not 2^{}", log2_size), |b| {
b.iter(|| {
unary_assign(criterion::black_box(&mut bitmap), |x: u64| !x);
assert!(!bitmap.is_empty());
})
});

let bitmap: Bitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
c.bench_function(&format!("bitmap not 2^{}", log2_size), |b| {
b.iter(|| {
let r = !criterion::black_box(&bitmap);
assert!(!r.is_empty());
})
});

let mut lhs: MutableBitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
let rhs: Bitmap = (0..size).into_iter().map(|x| x % 4 == 0).collect();
c.bench_function(&format!("mutablebitmap and 2^{}", log2_size), |b| {
b.iter(|| {
binary_assign(criterion::black_box(&mut lhs), &rhs, |x: u64, y| x & y);
assert!(!bitmap.is_empty());
})
});

let lhs: Bitmap = (0..size).into_iter().map(|x| x % 3 == 0).collect();
let rhs: Bitmap = (0..size).into_iter().map(|x| x % 4 == 0).collect();
c.bench_function(&format!("bitmap and 2^{}", log2_size), |b| {
b.iter(|| {
let r = criterion::black_box(&lhs) & &rhs;
assert!(!r.is_empty());
})
});
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
40 changes: 39 additions & 1 deletion src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
bitmap::Bitmap,
bitmap::{Bitmap, MutableBitmap},
datatypes::{DataType, PhysicalType},
error::Error,
};
Expand Down Expand Up @@ -92,6 +92,44 @@ impl BooleanArray {
pub fn arced(self) -> std::sync::Arc<dyn Array> {
std::sync::Arc::new(self)
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_values<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
let values = std::mem::take(&mut self.values);
let mut values = values.make_mut();
f(&mut values);
if let Some(validity) = &self.validity {
assert_eq!(validity.len(), values.len());
}
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let values = std::mem::take(validity);
let mut bitmap = values.make_mut();
f(&mut bitmap);
assert_eq!(bitmap.len(), self.values.len());
*validity = bitmap.into();
}
}
}

// must use
Expand Down
35 changes: 34 additions & 1 deletion src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
bitmap::{
utils::{zip_validity, ZipValidity},
Bitmap,
Bitmap, MutableBitmap,
},
buffer::Buffer,
datatypes::*,
Expand Down Expand Up @@ -252,6 +252,39 @@ impl<T: NativeType> PrimitiveArray<T> {
arr
}

/// Applies a function `f` to the values of this array, cloning the values
/// iff they are being shared with others
///
/// This is an API to use clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
let values = std::mem::take(&mut self.values);
let mut values = values.make_mut();
f(&mut values);
self.values = values.into();
}

/// Applies a function `f` to the validity of this array, cloning it
/// iff it is being shared.
///
/// This is an API to leverage clone-on-write
/// # Implementation
/// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
/// if it is being shared (since it results in a `O(N)` memcopy).
/// # Panics
/// This function panics if the function modifies the length of the [`MutableBitmap`].
pub fn apply_validity<F: Fn(&mut MutableBitmap)>(&mut self, f: F) {
if let Some(validity) = self.validity.as_mut() {
let values = std::mem::take(validity);
let mut bitmap = values.make_mut();
f(&mut bitmap);
assert_eq!(bitmap.len(), self.values.len());
*validity = bitmap.into();
}
}

/// Try to convert this [`PrimitiveArray`] to a [`MutablePrimitiveArray`] via copy-on-write semantics.
///
/// A [`PrimitiveArray`] is backed by a [`Buffer`] and [`Bitmap`] which are essentially `Arc<Vec<_>>`.
Expand Down
191 changes: 191 additions & 0 deletions src/bitmap/assign_ops.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
use crate::bitmap::{Bitmap, MutableBitmap};

use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};

/// Applies a function to every bit of this [`MutableBitmap`] in chunks
///
/// This function can be for operations like `!` to a [`MutableBitmap`].
pub fn unary_assign<T: BitChunk, F: Fn(T) -> T>(bitmap: &mut MutableBitmap, op: F) {
let mut chunks = bitmap.bitchunks_exact_mut::<T>();

chunks.by_ref().for_each(|chunk| {
let new_chunk: T = match (chunk as &[u8]).try_into() {
Ok(a) => T::from_ne_bytes(a),
Err(_) => unreachable!(),
};
let new_chunk = op(new_chunk);
chunk.copy_from_slice(new_chunk.to_ne_bytes().as_ref());
});

if chunks.remainder().is_empty() {
return;
}
let mut new_remainder = T::zero().to_ne_bytes();
chunks
.remainder()
.iter()
.enumerate()
.for_each(|(index, b)| new_remainder[index] = *b);
new_remainder = op(T::from_ne_bytes(new_remainder)).to_ne_bytes();

let len = chunks.remainder().len();
chunks
.remainder()
.copy_from_slice(&new_remainder.as_ref()[..len]);
}

impl std::ops::Not for MutableBitmap {
type Output = Self;

#[inline]
fn not(mut self) -> Self {
unary_assign(&mut self, |a: u64| !a);
self
}
}

fn binary_assign_impl<I, T, F>(lhs: &mut MutableBitmap, mut rhs: I, op: F)
where
I: BitChunkIterExact<T>,
T: BitChunk,
F: Fn(T, T) -> T,
{
let mut lhs_chunks = lhs.bitchunks_exact_mut::<T>();

lhs_chunks
.by_ref()
.zip(rhs.by_ref())
.for_each(|(lhs, rhs)| {
let new_chunk: T = match (lhs as &[u8]).try_into() {
Ok(a) => T::from_ne_bytes(a),
Err(_) => unreachable!(),
};
let new_chunk = op(new_chunk, rhs);
lhs.copy_from_slice(new_chunk.to_ne_bytes().as_ref());
});

let rem_lhs = lhs_chunks.remainder();
let rem_rhs = rhs.remainder();
if rem_lhs.is_empty() {
return;
}
let mut new_remainder = T::zero().to_ne_bytes();
lhs_chunks
.remainder()
.iter()
.enumerate()
.for_each(|(index, b)| new_remainder[index] = *b);
new_remainder = op(T::from_ne_bytes(new_remainder), rem_rhs).to_ne_bytes();

let len = lhs_chunks.remainder().len();
lhs_chunks
.remainder()
.copy_from_slice(&new_remainder.as_ref()[..len]);
}

/// Apply a bitwise binary operation to a [`MutableBitmap`].
///
/// This function can be used for operations like `&=` to a [`MutableBitmap`].
/// # Panics
/// This function panics iff `lhs.len() != `rhs.len()`
pub fn binary_assign<T: BitChunk, F>(lhs: &mut MutableBitmap, rhs: &Bitmap, op: F)
where
F: Fn(T, T) -> T,
{
assert_eq!(lhs.len(), rhs.len());

let (slice, offset, length) = rhs.as_slice();
if offset == 0 {
let iter = BitChunksExact::<T>::new(slice, length);
binary_assign_impl(lhs, iter, op)
} else {
let rhs_chunks = rhs.chunks::<T>();
binary_assign_impl(lhs, rhs_chunks, op)
}
}

#[inline]
/// Compute bitwise OR operation in-place
fn or_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
if rhs.null_count() == 0 {
assert_eq!(lhs.len(), rhs.len());
lhs.clear();
lhs.extend_constant(rhs.len(), true);
} else if rhs.null_count() == rhs.len() {
// bitmap remains
} else {
binary_assign(lhs, rhs, |x: T, y| x | y)
}
}

impl<'a, 'b> std::ops::BitOrAssign<&'a Bitmap> for &'b mut MutableBitmap {
#[inline]
fn bitor_assign(&mut self, rhs: &'a Bitmap) {
or_assign::<u64>(self, rhs)
}
}

impl<'a, 'b> std::ops::BitOr<&'a Bitmap> for MutableBitmap {
type Output = Self;

#[inline]
fn bitor(mut self, rhs: &'a Bitmap) -> Self {
or_assign::<u64>(&mut self, rhs);
self
}
}

#[inline]
/// Compute bitwise `&` between `lhs` and `rhs`, assigning it to `lhs`
fn and_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
if rhs.null_count() == 0 {
// bitmap remains
}
if rhs.null_count() == rhs.len() {
assert_eq!(lhs.len(), rhs.len());
lhs.clear();
lhs.extend_constant(rhs.len(), false);
} else {
binary_assign(lhs, rhs, |x: T, y| x & y)
}
}

impl<'a, 'b> std::ops::BitAndAssign<&'a Bitmap> for &'b mut MutableBitmap {
#[inline]
fn bitand_assign(&mut self, rhs: &'a Bitmap) {
and_assign::<u64>(self, rhs)
}
}

impl<'a, 'b> std::ops::BitAnd<&'a Bitmap> for MutableBitmap {
type Output = Self;

#[inline]
fn bitand(mut self, rhs: &'a Bitmap) -> Self {
and_assign::<u64>(&mut self, rhs);
self
}
}

#[inline]
/// Compute bitwise XOR operation
fn xor_assign<T: BitChunk>(lhs: &mut MutableBitmap, rhs: &Bitmap) {
binary_assign(lhs, rhs, |x: T, y| x ^ y)
}

impl<'a, 'b> std::ops::BitXorAssign<&'a Bitmap> for &'b mut MutableBitmap {
#[inline]
fn bitxor_assign(&mut self, rhs: &'a Bitmap) {
xor_assign::<u64>(self, rhs)
}
}

impl<'a, 'b> std::ops::BitXor<&'a Bitmap> for MutableBitmap {
type Output = Self;

#[inline]
fn bitxor(mut self, rhs: &'a Bitmap) -> Self {
xor_assign::<u64>(&mut self, rhs);
self
}
}
Loading