From aa83856a0cdd82635d4857b5855df512e93a6fcc Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Sat, 18 Jun 2022 07:13:14 +0200 Subject: [PATCH] Fixed error in chunked (#1081) --- .../utils/chunk_iterator/chunks_exact.rs | 13 +++-- src/bitmap/utils/chunks_exact_mut.rs | 3 + src/compute/arity_assign.rs | 55 +++++++++---------- tests/it/bitmap/assign_ops.rs | 19 +++++++ tests/it/bitmap/utils/bit_chunks_exact.rs | 2 +- 5 files changed, 55 insertions(+), 37 deletions(-) diff --git a/src/bitmap/utils/chunk_iterator/chunks_exact.rs b/src/bitmap/utils/chunk_iterator/chunks_exact.rs index 5b2eaf47819..df4520bd207 100644 --- a/src/bitmap/utils/chunk_iterator/chunks_exact.rs +++ b/src/bitmap/utils/chunk_iterator/chunks_exact.rs @@ -16,14 +16,15 @@ pub struct BitChunksExact<'a, T: BitChunk> { impl<'a, T: BitChunk> BitChunksExact<'a, T> { /// Creates a new [`BitChunksExact`]. #[inline] - pub fn new(slice: &'a [u8], len: usize) -> Self { - assert!(len <= slice.len() * 8); + pub fn new(bitmap: &'a [u8], length: usize) -> Self { + assert!(length <= bitmap.len() * 8); let size_of = std::mem::size_of::(); - let split = (len / 8 / size_of) * size_of; - let chunks = &slice[..split]; - let remainder = &slice[split..]; - let remainder_len = len - chunks.len() * 8; + let bitmap = &bitmap[..length.saturating_add(7) / 8]; + + let split = (length / 8 / size_of) * size_of; + let (chunks, remainder) = bitmap.split_at(split); + let remainder_len = length - chunks.len() * 8; let iter = chunks.chunks_exact(size_of); Self { diff --git a/src/bitmap/utils/chunks_exact_mut.rs b/src/bitmap/utils/chunks_exact_mut.rs index afc55ee093d..7a5a91a1280 100644 --- a/src/bitmap/utils/chunks_exact_mut.rs +++ b/src/bitmap/utils/chunks_exact_mut.rs @@ -17,8 +17,11 @@ impl<'a, T: BitChunk> BitChunksExactMut<'a, T> { /// Returns a new [`BitChunksExactMut`] #[inline] pub fn new(bitmap: &'a mut [u8], length: usize) -> Self { + assert!(length <= bitmap.len() * 8); let size_of = std::mem::size_of::(); + let bitmap = &mut bitmap[..length.saturating_add(7) / 8]; + let split = (length / 8 / size_of) * size_of; let (chunks, remainder) = bitmap.split_at_mut(split); let remainder_len = length - chunks.len() * 8; diff --git a/src/compute/arity_assign.rs b/src/compute/arity_assign.rs index b8746cdd81c..ff504c9690f 100644 --- a/src/compute/arity_assign.rs +++ b/src/compute/arity_assign.rs @@ -46,42 +46,37 @@ where // new buffer, that is benchmarked to be ~2x faster than first memcpy and assign in place // for the validity bits it can be much faster as we might need to iterate all bits if the // bitmap has an offset. - match rhs.validity() { - None => {} - Some(rhs) => { - if lhs.validity().is_none() { - *lhs = lhs.with_validity(Some(rhs.clone())) - } else { - lhs.apply_validity(|bitmap| { - // we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact - // so that we can later assign the result to out `&mut bitmap` - let owned_lhs = std::mem::take(bitmap); + if let Some(rhs) = rhs.validity() { + if lhs.validity().is_none() { + *lhs = lhs.with_validity(Some(rhs.clone())) + } else { + lhs.apply_validity(|bitmap| { + // we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact + // so that we can later assign the result to out `&mut bitmap` + let owned_lhs = std::mem::take(bitmap); - match owned_lhs.into_mut() { - // we take alloc and write to new buffer - Either::Left(immutable) => { - // we allocate a new bitmap because that is a lot faster - // than doing the memcpy or the potential iteration of bits if - // we are dealing with an offset - let new = &immutable & rhs; - *bitmap = new; - } - // we can mutate in place, happy days. - Either::Right(mut mutable) => { - let mut mutable_ref = &mut mutable; - mutable_ref &= rhs; - *bitmap = mutable.into() - } + *bitmap = match owned_lhs.into_mut() { + // we take alloc and write to new buffer + Either::Left(immutable) => { + // we allocate a new bitmap + &immutable & rhs } - }); - } + // we can mutate in place, happy days. + Either::Right(mut mutable) => { + let mut mutable_ref = &mut mutable; + mutable_ref &= rhs; + mutable.into() + } + } + }); } } + // we need to take ownership for the `into_mut` call, but leave the `&mut` lhs intact // so that we can later assign the result to out `&mut lhs` let owned_lhs = std::mem::take(lhs); - match owned_lhs.into_mut() { + *lhs = match owned_lhs.into_mut() { // we take alloc and write to new buffer Either::Left(mut immutable) => { let values = immutable @@ -91,7 +86,7 @@ where .map(|(l, r)| op(*l, *r)) .collect::>(); immutable.set_values(values.into()); - *lhs = immutable; + immutable } // we can mutate in place Either::Right(mut mutable) => { @@ -100,7 +95,7 @@ where .zip(rhs.values().iter()) .for_each(|(l, r)| *l = op(*l, *r)) }); - *lhs = mutable.into() + mutable.into() } } } diff --git a/tests/it/bitmap/assign_ops.rs b/tests/it/bitmap/assign_ops.rs index ef01fdd2089..caf5586cf94 100644 --- a/tests/it/bitmap/assign_ops.rs +++ b/tests/it/bitmap/assign_ops.rs @@ -1,5 +1,9 @@ +use proptest::prelude::*; + use arrow2::bitmap::{binary_assign, unary_assign, Bitmap, MutableBitmap}; +use crate::bitmap::bitmap_strategy; + #[test] fn basics() { let mut b = MutableBitmap::from_iter(std::iter::repeat(true).take(10)); @@ -58,3 +62,18 @@ fn fast_paths() { let b = b | &c; assert_eq!(b, MutableBitmap::from_iter([true, false])); } + +proptest! { + /// Asserts that !bitmap equals all bits flipped + #[test] + #[cfg_attr(miri, ignore)] // miri and proptest do not work well :( + fn not(b in bitmap_strategy()) { + let not_b: MutableBitmap = b.iter().map(|x| !x).collect(); + + let mut b = b.make_mut(); + + unary_assign(&mut b, |x: u8| !x); + + assert_eq!(b, not_b); + } +} diff --git a/tests/it/bitmap/utils/bit_chunks_exact.rs b/tests/it/bitmap/utils/bit_chunks_exact.rs index 38280624dd1..56ac806f16b 100644 --- a/tests/it/bitmap/utils/bit_chunks_exact.rs +++ b/tests/it/bitmap/utils/bit_chunks_exact.rs @@ -29,5 +29,5 @@ fn remainder_u16() { ); assert_eq!(iter.next(), Some(511)); assert_eq!(iter.next(), None); - assert_eq!(iter.remainder(), 0b1101_1011_0000_0001u16); + assert_eq!(iter.remainder(), 1u16); }