Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance of extend bitmap.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 17, 2021
1 parent afb05d2 commit 17cade2
Show file tree
Hide file tree
Showing 6 changed files with 177 additions and 28 deletions.
5 changes: 3 additions & 2 deletions src/array/growable/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,9 @@ impl<'a> Growable<'a> for GrowableBoolean<'a> {

let array = self.arrays[index];
let values = array.values();
let iter = (start..start + len).map(|i| values.get_bit(i));
unsafe { self.values.extend_from_trusted_len_iter_unchecked(iter) };

let (slice, offset, _) = values.as_slice();
self.values.extend_from_slice(slice, start + offset, len);
}

fn extend_validity(&mut self, additional: usize) {
Expand Down
17 changes: 5 additions & 12 deletions src/array/growable/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,12 @@ pub(super) fn build_extend_null_bits(array: &dyn Array, use_validity: bool) -> E
if let Some(bitmap) = array.validity() {
Box::new(move |validity, start, len| {
assert!(start + len <= bitmap.len());
unsafe {
let iter = (start..start + len).map(|i| bitmap.get_bit_unchecked(i));
validity.extend_from_trusted_len_iter_unchecked(iter);
};
let (slice, offset, _) = bitmap.as_slice();
validity.extend_from_slice(slice, start + offset, len);
})
} else if use_validity {
Box::new(|validity, _, len| {
let iter = (0..len).map(|_| true);
unsafe {
validity.extend_from_trusted_len_iter_unchecked(iter);
};
validity.extend_constant(len, true);
})
} else {
Box::new(|_, _, _| {})
Expand All @@ -51,10 +46,8 @@ pub(super) fn extend_validity(
) {
if let Some(bitmap) = validity {
assert!(start + len <= bitmap.len());
unsafe {
let iter = (start..start + len).map(|i| bitmap.get_bit_unchecked(i));
mutable_validity.extend_from_trusted_len_iter_unchecked(iter);
};
let (slice, offset, _) = bitmap.as_slice();
mutable_validity.extend_from_slice(slice, start + offset, len);
} else if use_validity {
mutable_validity.extend_constant(len, true);
};
Expand Down
104 changes: 92 additions & 12 deletions src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::iter::FromIterator;

use crate::bitmap::utils::merge_reversed;
use crate::{buffer::MutableBuffer, trusted_len::TrustedLen};

use super::utils::{fmt, get_bit, null_count, set, set_bit, BitmapIter};
Expand Down Expand Up @@ -130,16 +131,63 @@ impl MutableBitmap {
self.length = len;
}

fn extend_set(&mut self, mut additional: usize) {
let offset = self.length % 8;
let added = if offset != 0 {
// offset != 0 => at least one byte in the buffer
let last_index = self.buffer.len() - 1;
let last = &mut self.buffer[last_index];

let remaining = 0b11111111u8;
let remaining = remaining >> 8usize.saturating_sub(additional);
let remaining = remaining << offset;
*last |= remaining;
std::cmp::min(additional, 8 - offset)
} else {
0
};
self.length += added;
additional = additional.saturating_sub(added);
if additional > 0 {
debug_assert_eq!(self.length % 8, 0);
let existing = self.buffer.len();
let required = (self.length + additional).saturating_add(7) / 8;
// add remaining as full bytes
self.buffer.extend_from_trusted_len_iter(
std::iter::repeat(0b11111111u8).take(required - existing),
);
self.length += additional;
}
}

fn extend_unset(&mut self, mut additional: usize) {
let offset = self.length % 8;
let added = if offset != 0 {
// offset != 0 => at least one byte in the buffer
let last_index = self.buffer.len() - 1;
let last = &mut self.buffer[last_index];
*last &= 0b11111111u8 >> (8 - offset); // unset them
std::cmp::min(additional, 8 - offset)
} else {
0
};
self.length += added;
additional = additional.saturating_sub(added);
if additional > 0 {
debug_assert_eq!(self.length % 8, 0);
self.buffer
.resize((self.length + additional).saturating_add(7) / 8, 0);
self.length += additional;
}
}

/// Extends [`MutableBitmap`] by `additional` values of constant `value`.
#[inline]
pub fn extend_constant(&mut self, additional: usize, value: bool) {
if value {
let iter = std::iter::repeat(true).take(additional);
self.extend_from_trusted_len_iter(iter);
self.extend_set(additional)
} else {
self.buffer
.resize((self.length + additional).saturating_add(7) / 8, 0);
self.length += additional;
self.extend_unset(additional)
}
}

Expand Down Expand Up @@ -420,6 +468,40 @@ impl MutableBitmap {
Ok(Self { buffer, length })
}

fn extend_unaligned(&mut self, slice: &[u8], offset: usize, length: usize) {
let own_offset = self.length % 8;
// e.g.
// [a, b, --101010] <- to be extended
// [00111111, 11010101] <- to extend
// [a, b, 11101010, --001111] expected result
let aligned_offset = offset / 8;
let bytes_len = length.saturating_add(7) / 8;
let items = &slice[aligned_offset..aligned_offset + bytes_len];
// self has some offset => we need to shift all `items`, and merge the first
let buffer = self.buffer.as_mut_slice();
let last = &mut buffer[buffer.len() - 1];

// --101010 | 00111111 << 6 = 11101010
*last |= items[0] << own_offset;

let remaining = [items[items.len() - 1], 0];
let bytes = items
.windows(2)
.chain(std::iter::once(remaining.as_ref()))
.map(|w| merge_reversed(w[0], w[1], 8 - own_offset));
self.buffer.extend_from_trusted_len_iter(bytes);

self.length += length;
}

fn extend_aligned(&mut self, slice: &[u8], offset: usize, length: usize) {
let aligned_offset = offset / 8;
let bytes_len = length.saturating_add(7) / 8;
let items = &slice[aligned_offset..aligned_offset + bytes_len];
self.buffer.extend_from_slice(items);
self.length += length;
}

/// Extends the [`MutableBitmap`] from a slice of bytes with optional offset.
/// This is the fastest way to extend a [`MutableBitmap`].
/// # Implementation
Expand All @@ -428,16 +510,14 @@ impl MutableBitmap {
#[inline]
pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
assert!(offset + length <= slice.len() * 8);
if length == 0 {
return;
};
let is_aligned = self.length % 8 == 0;
let other_is_aligned = offset % 8 == 0;
match (is_aligned, other_is_aligned) {
(true, true) => {
let aligned_offset = offset / 8;
let bytes_len = length.saturating_add(7) / 8;
let items = &slice[aligned_offset..aligned_offset + bytes_len];
self.buffer.extend_from_slice(items);
self.length += length;
}
(true, true) => self.extend_aligned(slice, offset, length),
(false, true) => self.extend_unaligned(slice, offset, length),
// todo: further optimize the other branches.
_ => self.extend_from_trusted_len_iter(BitmapIter::new(slice, offset, length)),
}
Expand Down
2 changes: 1 addition & 1 deletion src/bitmap/utils/chunk_iterator/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub use crate::types::BitChunk;
pub use chunks_exact::BitChunksExact;

use crate::{trusted_len::TrustedLen, types::BitChunkIter};
use merge::merge_reversed;
pub(crate) use merge::merge_reversed;

pub trait BitChunkIterExact<B: BitChunk>: Iterator<Item = B> {
fn remainder(&self) -> B;
Expand Down
1 change: 1 addition & 0 deletions src/bitmap/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod iterator;
mod slice_iterator;
mod zip_validity;

pub(crate) use chunk_iterator::merge_reversed;
pub use chunk_iterator::{BitChunk, BitChunkIterExact, BitChunks, BitChunksExact};
pub use fmt::fmt;
pub use iterator::BitmapIter;
Expand Down
76 changes: 75 additions & 1 deletion tests/it/bitmap/mutable.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use arrow2::bitmap::{Bitmap, MutableBitmap};
use arrow2::{
bitmap::{Bitmap, MutableBitmap},
buffer::MutableBuffer,
};

#[test]
fn trusted_len() {
Expand Down Expand Up @@ -156,6 +159,31 @@ fn extend_from_bitmap() {
assert_eq!(bitmap.as_slice()[0], 0b00101101);
}

#[test]
fn extend_from_bitmap_offset() {
let other = Bitmap::from_u8_slice(&[0b00111111], 8);
let mut bitmap = MutableBitmap::from_buffer(MutableBuffer::from(&[1, 0, 0b00101010]), 22);

// call is optimized to perform a memcopy
bitmap.extend_from_bitmap(&other);

assert_eq!(bitmap.len(), 22 + 8);
assert_eq!(bitmap.as_slice(), &[1, 0, 0b11101010, 0b00001111]);

// more than one byte
let other = Bitmap::from_u8_slice(&[0b00111111, 0b00001111, 0b0001100], 20);
let mut bitmap = MutableBitmap::from_buffer(MutableBuffer::from(&[1, 0, 0b00101010]), 22);

// call is optimized to perform a memcopy
bitmap.extend_from_bitmap(&other);

assert_eq!(bitmap.len(), 22 + 20);
assert_eq!(
bitmap.as_slice(),
&[1, 0, 0b11101010, 0b11001111, 0b0000011, 0b0000011]
);
}

#[test]
fn debug() {
let mut b = MutableBitmap::new();
Expand All @@ -173,3 +201,49 @@ fn debug() {
b.push(true);
assert_eq!(format!("{:?}", b), "[0b11000001, 0b_______1]");
}

#[test]
fn extend_set() {
let mut b = MutableBitmap::new();
b.extend_constant(6, true);
assert_eq!(b.as_slice(), &[0b11111111]);
assert_eq!(b.len(), 6);

let mut b = MutableBitmap::from(&[false]);
b.extend_constant(6, true);
assert_eq!(b.as_slice(), &[0b01111110]);
assert_eq!(b.len(), 1 + 6);

let mut b = MutableBitmap::from(&[false]);
b.extend_constant(9, true);
assert_eq!(b.as_slice(), &[0b11111110, 0b11111111]);
assert_eq!(b.len(), 1 + 9);

let mut b = MutableBitmap::from(&[false, false, false, false]);
b.extend_constant(2, true);
assert_eq!(b.as_slice(), &[0b00110000]);
assert_eq!(b.len(), 4 + 2);
}

#[test]
fn extend_unset() {
let mut b = MutableBitmap::new();
b.extend_constant(6, false);
assert_eq!(b.as_slice(), &[0b0000000]);
assert_eq!(b.len(), 6);

let mut b = MutableBitmap::from(&[true]);
b.extend_constant(6, false);
assert_eq!(b.as_slice(), &[0b00000001]);
assert_eq!(b.len(), 1 + 6);

let mut b = MutableBitmap::from(&[true]);
b.extend_constant(9, false);
assert_eq!(b.as_slice(), &[0b0000001, 0b00000000]);
assert_eq!(b.len(), 1 + 9);

let mut b = MutableBitmap::from(&[true, true, true, true]);
b.extend_constant(2, false);
assert_eq!(b.as_slice(), &[0b00001111]);
assert_eq!(b.len(), 4 + 2);
}

0 comments on commit 17cade2

Please sign in to comment.