Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Improved performance of concatenating non-aligned validities (15x) #291

Merged
merged 2 commits into from
Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,11 @@ harness = false
[[bench]]
name = "arithmetic_kernels"
harness = false

[[bench]]
name = "bitmap"
harness = false

[[bench]]
name = "concat"
harness = false
60 changes: 60 additions & 0 deletions benches/bitmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
extern crate arrow2;

use std::iter::FromIterator;

use arrow2::bitmap::*;

use criterion::{criterion_group, criterion_main, Criterion};

//

fn add_benchmark(c: &mut Criterion) {
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let bitmap2 = Bitmap::from_iter((0..size).into_iter().map(|x| x % 3 == 0));

c.bench_function(&format!("bitmap extend aligned 2^{}", log2_size), |b| {
let mut bitmap1 = MutableBitmap::new();
b.iter(|| {
bitmap1.extend_from_bitmap(&bitmap2);
bitmap1.clear();
})
});

c.bench_function(&format!("bitmap extend unaligned 2^{}", log2_size), |b| {
let mut bitmap1 = MutableBitmap::with_capacity(1);
b.iter(|| {
bitmap1.push(true);
bitmap1.extend_from_bitmap(&bitmap2);
bitmap1.clear();
})
});

c.bench_function(
&format!("bitmap extend_constant aligned 2^{}", log2_size),
|b| {
let mut bitmap1 = MutableBitmap::new();
b.iter(|| {
bitmap1.extend_constant(size, true);
bitmap1.clear();
})
},
);

c.bench_function(
&format!("bitmap extend_constant unaligned 2^{}", log2_size),
|b| {
let mut bitmap1 = MutableBitmap::with_capacity(1);
b.iter(|| {
bitmap1.push(true);
bitmap1.extend_constant(size, true);
bitmap1.clear();
})
},
);
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
52 changes: 52 additions & 0 deletions benches/concat.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
extern crate arrow2;

use arrow2::{
compute::concat,
datatypes::DataType,
util::bench_util::{create_boolean_array, create_primitive_array},
};

use criterion::{criterion_group, criterion_main, Criterion};

fn add_benchmark(c: &mut Criterion) {
(20..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);

let array1 = create_primitive_array::<i32>(8, DataType::Int32, 0.5);
let array2 = create_primitive_array::<i32>(size + 1, DataType::Int32, 0.5);

c.bench_function(&format!("int32 concat aligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
})
});

let array1 = create_primitive_array::<i32>(9, DataType::Int32, 0.5);

c.bench_function(&format!("int32 concat unaligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
})
});

let array1 = create_boolean_array(8, 0.5, 0.5);
let array2 = create_boolean_array(size + 1, 0.5, 0.5);

c.bench_function(&format!("boolean concat aligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
})
});

let array1 = create_boolean_array(9, 0.5, 0.5);

c.bench_function(&format!("boolean concat unaligned 2^{}", log2_size), |b| {
b.iter(|| {
let _ = concat::concatenate(&[&array1, &array2]);
})
});
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
5 changes: 3 additions & 2 deletions src/array/growable/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {

let array = self.arrays[index];
let values = array.values();
let iter = (start..start + len).map(|i| values.get_bit(i));
unsafe { self.values.extend_from_trusted_len_iter_unchecked(iter) };

let (slice, offset, _) = values.as_slice();
self.values.extend_from_slice(slice, start + offset, len);
}

fn extend_validity(&mut self, additional: usize) {
Expand Down
17 changes: 5 additions & 12 deletions src/array/growable/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,12 @@ pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> E
if let Some(bitmap) = array.validity() {
Box::new(move |validity, start, len| {
assert!(start + len <= bitmap.len());
unsafe {
let iter = (start..start + len).map(|i| bitmap.get_bit_unchecked(i));
validity.extend_from_trusted_len_iter_unchecked(iter);
};
let (slice, offset, _) = bitmap.as_slice();
validity.extend_from_slice(slice, start + offset, len);
})
} else if use_validity {
Box::new(|validity, _, len| {
let iter = (0..len).map(|_| true);
unsafe {
validity.extend_from_trusted_len_iter_unchecked(iter);
};
validity.extend_constant(len, true);
})
} else {
Box::new(|_, _, _| {})
Expand All @@ -51,10 +46,8 @@ pub(super) fn extend_validity(
) {
if let Some(bitmap) = validity {
assert!(start + len <= bitmap.len());
unsafe {
let iter = (start..start + len).map(|i| bitmap.get_bit_unchecked(i));
mutable_validity.extend_from_trusted_len_iter_unchecked(iter);
};
let (slice, offset, _) = bitmap.as_slice();
mutable_validity.extend_from_slice(slice, start + offset, len);
} else if use_validity {
mutable_validity.extend_constant(len, true);
};
Expand Down
113 changes: 101 additions & 12 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::iter::FromIterator;

use crate::bitmap::utils::merge_reversed;
use crate::{buffer::MutableBuffer, trusted_len::TrustedLen};

use super::utils::{fmt, get_bit, null_count, set, set_bit, BitmapIter};
Expand Down Expand Up @@ -39,6 +40,13 @@ impl MutableBitmap {
}
}

/// Empties the [`MutableBitmap`].
#[inline]
pub fn clear(&mut self) {
self.length = 0;
self.buffer.clear();
}

/// Initializes a zeroed [`MutableBitmap`].
#[inline]
pub fn from_len_zeroed(length: usize) -> Self {
Expand Down Expand Up @@ -123,16 +131,63 @@ impl MutableBitmap {
self.length = len;
}

fn extend_set(&mut self, mut additional: usize) {
let offset = self.length % 8;
let added = if offset != 0 {
// offset != 0 => at least one byte in the buffer
let last_index = self.buffer.len() - 1;
let last = &mut self.buffer[last_index];

let remaining = 0b11111111u8;
let remaining = remaining >> 8usize.saturating_sub(additional);
let remaining = remaining << offset;
*last |= remaining;
std::cmp::min(additional, 8 - offset)
} else {
0
};
self.length += added;
additional = additional.saturating_sub(added);
if additional > 0 {
debug_assert_eq!(self.length % 8, 0);
let existing = self.buffer.len();
let required = (self.length + additional).saturating_add(7) / 8;
// add remaining as full bytes
self.buffer.extend_from_trusted_len_iter(
std::iter::repeat(0b11111111u8).take(required - existing),
);
self.length += additional;
}
}

fn extend_unset(&mut self, mut additional: usize) {
let offset = self.length % 8;
let added = if offset != 0 {
// offset != 0 => at least one byte in the buffer
let last_index = self.buffer.len() - 1;
let last = &mut self.buffer[last_index];
*last &= 0b11111111u8 >> (8 - offset); // unset them
std::cmp::min(additional, 8 - offset)
} else {
0
};
self.length += added;
additional = additional.saturating_sub(added);
if additional > 0 {
debug_assert_eq!(self.length % 8, 0);
self.buffer
.resize((self.length + additional).saturating_add(7) / 8, 0);
self.length += additional;
}
}

/// Extends [`MutableBitmap`] by `additional` values of constant `value`.
#[inline]
pub fn extend_constant(&mut self, additional: usize, value: bool) {
if value {
let iter = std::iter::repeat(true).take(additional);
self.extend_from_trusted_len_iter(iter);
self.extend_set(additional)
} else {
self.buffer
.resize((self.length + additional).saturating_add(7) / 8, 0);
self.length += additional;
self.extend_unset(additional)
}
}

Expand Down Expand Up @@ -413,6 +468,42 @@ impl MutableBitmap {
Ok(Self { buffer, length })
}

fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
let own_offset = self.length % 8;
// e.g.
// [a, b, --101010] <- to be extended
// [00111111, 11010101] <- to extend
// [a, b, 11101010, --001111] expected result
let aligned_offset = offset / 8;
let bytes_len = length.saturating_add(7) / 8;
let items = &slice[aligned_offset..aligned_offset + bytes_len];
// self has some offset => we need to shift all `items`, and merge the first
let buffer = self.buffer.as_mut_slice();
let last = &mut buffer[buffer.len() - 1];

// --101010 | 00111111 << 6 = 11101010
// erase previous
*last &= 0b11111111u8 >> (8 - own_offset); // unset before setting
*last |= items[0] << own_offset;

let remaining = [items[items.len() - 1], 0];
let bytes = items
.windows(2)
.chain(std::iter::once(remaining.as_ref()))
.map(|w| merge_reversed(w[0], w[1], 8 - own_offset));
self.buffer.extend_from_trusted_len_iter(bytes);

self.length += length;
}

fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
let aligned_offset = offset / 8;
let bytes_len = length.saturating_add(7) / 8;
let items = &slice[aligned_offset..aligned_offset + bytes_len];
self.buffer.extend_from_slice(items);
self.length += length;
}

/// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
/// This is the fastest way to extend a [`MutableBitmap`].
/// # Implementation
Expand All @@ -421,16 +512,14 @@ impl MutableBitmap {
#[inline]
pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
assert!(offset + length <= slice.len() * 8);
if length == 0 {
return;
};
let is_aligned = self.length % 8 == 0;
let other_is_aligned = offset % 8 == 0;
match (is_aligned, other_is_aligned) {
(true, true) => {
let aligned_offset = offset / 8;
let bytes_len = length.saturating_add(7) / 8;
let items = &slice[aligned_offset..aligned_offset + bytes_len];
self.buffer.extend_from_slice(items);
self.length += length;
}
(true, true) => self.extend_aligned(slice, offset, length),
(false, true) => self.extend_unaligned(slice, offset, length),
// todo: further optimize the other branches.
_ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
}
Expand Down
2 changes: 1 addition & 1 deletion src/bitmap/utils/chunk_iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub use crate::types::BitChunk;
pub use chunks_exact::BitChunksExact;

use crate::{trusted_len::TrustedLen, types::BitChunkIter};
use merge::merge_reversed;
pub(crate) use merge::merge_reversed;

pub trait BitChunkIterExact<B: BitChunk>: Iterator<Item = B> {
fn remainder(&self) -> B;
Expand Down
1 change: 1 addition & 0 deletions src/bitmap/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod iterator;
mod slice_iterator;
mod zip_validity;

pub(crate) use chunk_iterator::merge_reversed;
pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact};
pub use fmt::fmt;
pub use iterator::BitmapIter;
Expand Down
Loading