Skip to content

Commit

Permalink
Added take with nulls.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Nov 25, 2021
1 parent 77960c3 commit d1eda54
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 5 deletions.
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@ harness = false
[[bench]]
name = "take"
harness = false

[[bench]]
name = "take_nulls_bitmap"
harness = false
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ core_simd_take 2^20 f32 time: [911.13 us 912.21 us 913.33 us]
naive_take 2^20 f32 time: [912.39 us 915.22 us 918.41 us]
```

### Nullable take of values (`Bitmap)

```
core_simd_take_nulls 2^20 f32 time: [950.40 us 954.08 us 958.88 us]
naive_take_nulls 2^20 f32 time: [2.3714 ms 2.3968 ms 2.4296 ms]
```

## Bench results on default

Command:
Expand Down
40 changes: 40 additions & 0 deletions benches/take_nulls_bitmap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use criterion::{criterion_group, criterion_main, Criterion};

use simd_benches::bitmap_ops;
use simd_benches::take::*;

fn close(l: &[f32], r: &[f32]) {
for (l, r) in l.iter().zip(r.iter()) {
assert!((l - r).abs() < l * 0.001 || (l.abs() < 0.000001 && r.abs() < 0.000001));
}
}

fn add_benchmark(c: &mut Criterion) {
let name = "";
(10..=20).step_by(2).for_each(|log2_size| {
let size = 2usize.pow(log2_size);
let array = (0..size).map(|x| 1.0 + x as f32).collect::<Vec<_>>();
let mut mask = vec![0u8; size / 8];
// 10% nulls
(0..size).for_each(|x| bitmap_ops::set_bit(&mut mask, x, (1 + x) % 10 != 0));
let mask = (mask, size);
let indices = (0..size).collect::<Vec<_>>();
// check that they are equal...
close(
&core_simd_take_nulls(&array, &indices, &mask),
&naive_take_nulls(&array, &indices, &mask),
);

c.bench_function(
&format!("core_simd_take_nulls{} 2^{} f32", name, log2_size),
|b| b.iter(|| core_simd_take_nulls(&array, &indices, &mask)),
);
c.bench_function(
&format!("naive_take_nulls{} 2^{} f32", name, log2_size),
|b| b.iter(|| naive_take_nulls(&array, &indices, &mask)),
);
});
}

criterion_group!(benches, add_benchmark);
criterion_main!(benches);
55 changes: 50 additions & 5 deletions src/take.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,70 @@
use core_simd::*;

use super::bitmap_ops::*;

pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
indices.iter().map(|i| values[*i]).collect()
}

const LANES: usize = 8;
const MASK_LANES: usize = 8 / 8;

pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec<f32> {
let chunks = indices.chunks_exact(8);
let chunks = indices.chunks_exact(LANES);
// todo handle remainder

let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
let result_chunks = result.chunks_exact_mut(8);
let result_chunks = result.chunks_exact_mut(LANES);
chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| {
let idxs: [usize; 8] = chunk.try_into().unwrap();
let idxs: [usize; LANES] = chunk.try_into().unwrap();
let idxs: usizex8 = usizex8::from_array(idxs);

let r = Simd::gather_or_default(&values, idxs);
let r: [f32; 8] = r.to_array();
let r: [f32; LANES] = r.to_array();

let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap();
let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap();
*r_chunk = r;
});

result
}

type Bitmap = (Vec<u8>, usize);

pub fn naive_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> {
let mask = (0..mask.1).map(|x| get_bit(&mask.0, x));

indices
.iter()
.zip(mask)
.map(|(x, m)| if m { values[*x] } else { 0.0f32 })
.collect()
}

pub fn core_simd_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> {
assert_eq!(mask.1 % 16, 0); // todo: handle remainders
let chunks = indices.chunks_exact(LANES);
let mask_chunks = mask.0.chunks_exact(MASK_LANES);
//let remainder = chunks.remainder();
//let mask_remainder = mask_chunks.remainder();

let mut result = vec![0.0; indices.len()]; // todo: maybeUninit
let result_chunks = result.chunks_exact_mut(LANES);
chunks
.zip(mask_chunks)
.zip(result_chunks)
.for_each(|((chunk, mask_chunk), r_chunk)| {
let idxs: [usize; LANES] = chunk.try_into().unwrap();
let idxs: usizex8 = usizex8::from_array(idxs);

let mask: [u8; MASK_LANES] = mask_chunk.try_into().unwrap();
let mask = masksizex8::from_bitmask(mask);

let r = Simd::gather_select(&values, mask, idxs, Simd::splat(f32::default()));
let r: [f32; LANES] = r.to_array();

let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap();
*r_chunk = r;
});
result
}

0 comments on commit d1eda54

Please sign in to comment.