Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Fixed error in computing slices of set bitmaps. #293

Merged
merged 2 commits into from
Aug 18, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ pub struct Bitmap {

impl std::fmt::Debug for Bitmap {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt(&self.bytes, self.offset, self.length, f)
let (bytes, offset, len) = self.as_slice();
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
fmt(bytes, offset, len, f)
}
}

Expand Down Expand Up @@ -111,8 +112,8 @@ impl Bitmap {
/// exceeds the allocated capacity of `self`.
#[inline]
pub fn slice(mut self, offset: usize, length: usize) -> Self {
assert!(offset + length <= self.length);
self.offset += offset;
assert!(self.offset + length <= self.bytes.len() * 8);
jorgecarleitao marked this conversation as resolved.
Show resolved Hide resolved
self.length = length;
self.null_count = null_count(&self.bytes, self.offset, self.length);
self
Expand Down
4 changes: 2 additions & 2 deletions src/bitmap/utils/slice_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,15 @@ impl<'a> Iterator for SlicesIterator<'a> {
// at the beginning of a byte => try to skip it all together
match (self.on_region, self.current_byte) {
(true, &255u8) => {
self.len += 8;
self.len = std::cmp::min(self.max_len - self.start, self.len + 8);
match self.values.next() {
Some(v) => self.current_byte = v,
None => return self.finish(),
};
continue;
}
(false, &0) => {
self.len += 8;
self.len = std::cmp::min(self.max_len - self.start, self.len + 8);
match self.values.next() {
Some(v) => self.current_byte = v,
None => return self.finish(),
Expand Down
209 changes: 102 additions & 107 deletions tests/it/bitmap/utils/slice_iterator.rs
Original file line number Diff line number Diff line change
@@ -1,144 +1,139 @@
use arrow2::bitmap::utils::SlicesIterator;
use arrow2::bitmap::Bitmap;

#[cfg(test)]
mod tests {
use super::*;
#[test]
fn check_invariant() {
let values = (0..8).map(|i| i % 2 != 0).collect::<Bitmap>();
let iter = SlicesIterator::new(&values);

#[test]
fn check_invariant() {
let values = (0..8).map(|i| i % 2 != 0).collect::<Bitmap>();
let iter = SlicesIterator::new(&values);
let slots = iter.slots();

let slots = iter.slots();
let slices = iter.collect::<Vec<_>>();

let slices = iter.collect::<Vec<_>>();
assert_eq!(slices, vec![(1, 1), (3, 1), (5, 1), (7, 1)]);

assert_eq!(slices, vec![(1, 1), (3, 1), (5, 1), (7, 1)]);

let mut sum = 0;
for (_, len) in slices {
sum += len;
}
assert_eq!(sum, slots);
let mut sum = 0;
for (_, len) in slices {
sum += len;
}
assert_eq!(sum, slots);
}

#[test]
fn single_set() {
let values = (0..16).map(|i| i == 1).collect::<Bitmap>();
#[test]
fn single_set() {
let values = (0..16).map(|i| i == 1).collect::<Bitmap>();

let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

assert_eq!(chunks, vec![(1, 1)]);
assert_eq!(count, 1);
}
assert_eq!(chunks, vec![(1, 1)]);
assert_eq!(count, 1);
}

#[test]
fn single_unset() {
let values = (0..64).map(|i| i != 1).collect::<Bitmap>();
#[test]
fn single_unset() {
let values = (0..64).map(|i| i != 1).collect::<Bitmap>();

let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

assert_eq!(chunks, vec![(0, 1), (2, 62)]);
assert_eq!(count, 64 - 1);
}
assert_eq!(chunks, vec![(0, 1), (2, 62)]);
assert_eq!(count, 64 - 1);
}

#[test]
fn generic() {
let values = (0..130).map(|i| i % 62 != 0).collect::<Bitmap>();
#[test]
fn generic() {
let values = (0..130).map(|i| i % 62 != 0).collect::<Bitmap>();

let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

assert_eq!(chunks, vec![(1, 61), (63, 61), (125, 5)]);
assert_eq!(count, 61 + 61 + 5);
}
assert_eq!(chunks, vec![(1, 61), (63, 61), (125, 5)]);
assert_eq!(count, 61 + 61 + 5);
}

#[test]
fn incomplete_byte() {
let values = (0..6).map(|i| i == 1).collect::<Bitmap>();
#[test]
fn incomplete_byte() {
let values = (0..6).map(|i| i == 1).collect::<Bitmap>();

let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

assert_eq!(chunks, vec![(1, 1)]);
assert_eq!(count, 1);
}
assert_eq!(chunks, vec![(1, 1)]);
assert_eq!(count, 1);
}

#[test]
fn incomplete_byte1() {
let values = (0..12).map(|i| i == 9).collect::<Bitmap>();
#[test]
fn incomplete_byte1() {
let values = (0..12).map(|i| i == 9).collect::<Bitmap>();

let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

assert_eq!(chunks, vec![(9, 1)]);
assert_eq!(count, 1);
}
assert_eq!(chunks, vec![(9, 1)]);
assert_eq!(count, 1);
}

#[test]
fn end_of_byte() {
let values = (0..16).map(|i| i != 7).collect::<Bitmap>();
#[test]
fn end_of_byte() {
let values = (0..16).map(|i| i != 7).collect::<Bitmap>();

let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

assert_eq!(chunks, vec![(0, 7), (8, 8)]);
assert_eq!(count, 15);
}
assert_eq!(chunks, vec![(0, 7), (8, 8)]);
assert_eq!(count, 15);
}

#[test]
fn bla() {
let values = vec![true, true, true, true, true, true, true, false]
.into_iter()
.collect::<Bitmap>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
assert_eq!(values.null_count() + iter.slots(), values.len());
#[test]
fn bla() {
let values = vec![true, true, true, true, true, true, true, false]
.into_iter()
.collect::<Bitmap>();
let iter = SlicesIterator::new(&values);
let count = iter.slots();
assert_eq!(values.null_count() + iter.slots(), values.len());

let total = iter.into_iter().fold(0, |acc, x| acc + x.1);
let total = iter.into_iter().fold(0, |acc, x| acc + x.1);

assert_eq!(count, total);
}
assert_eq!(count, total);
}

#[test]
fn past_end_should_not_be_returned() {
let values = Bitmap::from_u8_slice(&[0b11111010], 3);
let iter = SlicesIterator::new(&values);
let count = iter.slots();
assert_eq!(values.null_count() + iter.slots(), values.len());
#[test]
fn past_end_should_not_be_returned() {
let values = Bitmap::from_u8_slice(&[0b11111010], 3);
let iter = SlicesIterator::new(&values);
let count = iter.slots();
assert_eq!(values.null_count() + iter.slots(), values.len());

let total = iter.into_iter().fold(0, |acc, x| acc + x.1);
let total = iter.into_iter().fold(0, |acc, x| acc + x.1);

assert_eq!(count, total);
}
assert_eq!(count, total);
}

#[test]
fn sliced() {
let values = Bitmap::from_u8_slice(&[0b11111010, 0b11111011], 16);
let values = values.slice(8, 2);
let iter = SlicesIterator::new(&values);
#[test]
fn sliced() {
let values = Bitmap::from_u8_slice(&[0b11111010, 0b11111011], 16);
let values = values.slice(8, 2);
let iter = SlicesIterator::new(&values);

let chunks = iter.collect::<Vec<_>>();
let chunks = iter.collect::<Vec<_>>();

// the first "11" in the second byte
assert_eq!(chunks, vec![(0, 2)]);
}
// the first "11" in the second byte
assert_eq!(chunks, vec![(0, 2)]);
}

#[test]
fn remainder_1() {
let values = Bitmap::from_u8_slice(&[0, 0, 0b00000000, 0b00010101], 27);
let values = values.slice(22, 5);
let iter = SlicesIterator::new(&values);
let chunks = iter.collect::<Vec<_>>();
assert_eq!(chunks, vec![(2, 1), (4, 1)]);
}
#[test]
fn remainder_1() {
let values = Bitmap::from_u8_slice(&[0, 0, 0b00000000, 0b00010101], 27);
let values = values.slice(22, 5);
let iter = SlicesIterator::new(&values);
let chunks = iter.collect::<Vec<_>>();
assert_eq!(chunks, vec![(2, 1), (4, 1)]);
}
43 changes: 43 additions & 0 deletions tests/it/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use arrow2::array::{Array, BooleanArray, Utf8Array};
use arrow2::bitmap::utils::SlicesIterator;
use rand::distributions::{Alphanumeric, Bernoulli, Uniform};
use rand::prelude::StdRng;
use rand::Rng;
use rand::SeedableRng;
use std::iter::FromIterator;

#[test]
fn filter_slices() {
let mut rng = StdRng::seed_from_u64(42);
let length = 50000;

let values_iter = (0..length).map(|_| {
let len = (&mut rng).sample(Uniform::new(0usize, 8));
let s: String = (&mut rng)
.sample_iter(&Alphanumeric)
.take(len)
.map(char::from)
.collect();
s
});

let arr = Utf8Array::<i32>::from_iter_values(values_iter);
let values_iter = (0..length).map(|_| {
let v: bool = (&mut rng).sample(Bernoulli::new(0.5).unwrap());
Some(v)
});
let mask = BooleanArray::from_iter(values_iter);

for offset in 100usize..(length - 1) {
let len = (&mut rng).sample(Uniform::new(0, length - offset));
let arr_s = arr.slice(offset, len);
let mask_s = mask.slice(offset, len);

let iter = SlicesIterator::new(mask_s.values());
iter.for_each(|(start, len)| {
if start + len > arr_s.len() {
panic!("Fail")
}
});
}
}
2 changes: 2 additions & 0 deletions tests/it/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ mod ffi;

mod io;
mod test_util;

mod filter;