diff --git a/src/bitmap/mutable.rs b/src/bitmap/mutable.rs index 58631abde76..adcf9793cc0 100644 --- a/src/bitmap/mutable.rs +++ b/src/bitmap/mutable.rs @@ -1,3 +1,4 @@ +use std::hint::unreachable_unchecked; use std::iter::FromIterator; use crate::bitmap::utils::merge_reversed; @@ -309,23 +310,58 @@ impl FromIterator for MutableBitmap { } } +/// # Safety +/// The iterator must be trustedLen and its len must be least `len`. #[inline] -fn extend>(buffer: &mut [u8], length: usize, mut iterator: I) { - let chunks = length / 8; - let reminder = length % 8; - - buffer[..chunks].iter_mut().for_each(|byte| { - (0..8).for_each(|i| { - *byte = set(*byte, i, iterator.next().unwrap()); - }) - }); - - if reminder != 0 { - let last = &mut buffer[chunks]; - iterator.enumerate().for_each(|(i, value)| { - *last = set(*last, i, value); - }); +unsafe fn get_byte_unchecked(len: usize, iterator: &mut impl Iterator) -> u8 { + let mut byte_accum: u8 = 0; + let mut mask: u8 = 1; + for _ in 0..len { + let value = match iterator.next() { + Some(value) => value, + None => unsafe { unreachable_unchecked() }, + }; + + byte_accum |= match value { + true => mask, + false => 0, + }; + mask <<= 1; + } + byte_accum +} + +/// Extends the [`MutableBuffer`] from `iterator` +/// # Safety +/// The iterator MUST be [`TrustedLen`]. +#[inline] +unsafe fn extend_aligned_trusted_iter_unchecked( + buffer: &mut MutableBuffer, + mut iterator: impl Iterator, +) -> usize { + let additional_bits = iterator.size_hint().1.unwrap(); + let chunks = additional_bits / 8; + let remainder = additional_bits % 8; + + let additional = chunks + (remainder > 0) as usize; + buffer.reserve(additional); + + if chunks > 0 { + for _ in 0..chunks { + // Soundness: iterator lenght is at least chunks * 8 + let byte = unsafe { get_byte_unchecked(8, &mut iterator) }; + // Soundness: capacity was allocated above + unsafe { buffer.push_unchecked(byte) }; + } } + + if remainder > 0 { + // Soundness: iterator has exactly remainder items. + let byte = unsafe { get_byte_unchecked(remainder, &mut iterator) }; + // Soundness: reserve above took remainder into account + unsafe { buffer.push_unchecked(byte) }; + } + additional_bits } impl MutableBitmap { @@ -339,6 +375,7 @@ impl MutableBitmap { /// Extends `self` from an iterator of trusted len. /// # Safety /// The caller must guarantee that the iterator has a trusted len. + #[inline] pub unsafe fn extend_from_trusted_len_iter_unchecked>( &mut self, mut iterator: I, @@ -379,40 +416,33 @@ impl MutableBitmap { // everything is aligned; proceed with the bulk operation debug_assert_eq!(self.length % 8, 0); - self.buffer.extend_constant((length + 7) / 8, 0); - - extend(&mut self.buffer[self.length / 8..], length, iterator); + unsafe { extend_aligned_trusted_iter_unchecked(&mut self.buffer, iterator) }; self.length += length; } /// Creates a new [`MutableBitmap`] from an iterator of booleans. /// # Safety /// The iterator must report an accurate length. + #[inline] pub unsafe fn from_trusted_len_iter_unchecked(iterator: I) -> Self where I: Iterator, { - let length = iterator.size_hint().1.unwrap(); + let mut buffer = MutableBuffer::::new(); - let mut buffer = MutableBuffer::::from_len_zeroed((length + 7) / 8); - - extend(&mut buffer, length, iterator); + let length = extend_aligned_trusted_iter_unchecked(&mut buffer, iterator); Self { buffer, length } } /// Creates a new [`MutableBitmap`] from an iterator of booleans. + #[inline] pub fn from_trusted_len_iter(iterator: I) -> Self where I: TrustedLen, { - let length = iterator.size_hint().1.unwrap(); - - let mut buffer = MutableBuffer::::from_len_zeroed((length + 7) / 8); - - extend(&mut buffer, length, iterator); - - Self { buffer, length } + // Safety: Iterator is `TrustedLen` + unsafe { Self::from_trusted_len_iter_unchecked(iterator) } } /// Creates a new [`MutableBitmap`] from an iterator of booleans. diff --git a/tests/it/bitmap/mutable.rs b/tests/it/bitmap/mutable.rs index 858a2fb5883..432a59083fc 100644 --- a/tests/it/bitmap/mutable.rs +++ b/tests/it/bitmap/mutable.rs @@ -3,6 +3,13 @@ use arrow2::{ buffer::MutableBuffer, }; +#[test] +fn from_slice() { + let slice = &[true, false, true]; + let a = MutableBitmap::from(slice); + assert_eq!(a.iter().collect::>(), slice); +} + #[test] fn trusted_len() { let data = vec![true; 65];