-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
77960c3
commit d1eda54
Showing
4 changed files
with
101 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,3 +25,7 @@ harness = false | |
[[bench]] | ||
name = "take" | ||
harness = false | ||
|
||
[[bench]] | ||
name = "take_nulls_bitmap" | ||
harness = false |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
use criterion::{criterion_group, criterion_main, Criterion}; | ||
|
||
use simd_benches::bitmap_ops; | ||
use simd_benches::take::*; | ||
|
||
fn close(l: &[f32], r: &[f32]) { | ||
for (l, r) in l.iter().zip(r.iter()) { | ||
assert!((l - r).abs() < l * 0.001 || (l.abs() < 0.000001 && r.abs() < 0.000001)); | ||
} | ||
} | ||
|
||
fn add_benchmark(c: &mut Criterion) { | ||
let name = ""; | ||
(10..=20).step_by(2).for_each(|log2_size| { | ||
let size = 2usize.pow(log2_size); | ||
let array = (0..size).map(|x| 1.0 + x as f32).collect::<Vec<_>>(); | ||
let mut mask = vec![0u8; size / 8]; | ||
// 10% nulls | ||
(0..size).for_each(|x| bitmap_ops::set_bit(&mut mask, x, (1 + x) % 10 != 0)); | ||
let mask = (mask, size); | ||
let indices = (0..size).collect::<Vec<_>>(); | ||
// check that they are equal... | ||
close( | ||
&core_simd_take_nulls(&array, &indices, &mask), | ||
&naive_take_nulls(&array, &indices, &mask), | ||
); | ||
|
||
c.bench_function( | ||
&format!("core_simd_take_nulls{} 2^{} f32", name, log2_size), | ||
|b| b.iter(|| core_simd_take_nulls(&array, &indices, &mask)), | ||
); | ||
c.bench_function( | ||
&format!("naive_take_nulls{} 2^{} f32", name, log2_size), | ||
|b| b.iter(|| naive_take_nulls(&array, &indices, &mask)), | ||
); | ||
}); | ||
} | ||
|
||
criterion_group!(benches, add_benchmark); | ||
criterion_main!(benches); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,70 @@ | ||
use core_simd::*; | ||
|
||
use super::bitmap_ops::*; | ||
|
||
pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec<f32> { | ||
indices.iter().map(|i| values[*i]).collect() | ||
} | ||
|
||
const LANES: usize = 8; | ||
const MASK_LANES: usize = 8 / 8; | ||
|
||
pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec<f32> { | ||
let chunks = indices.chunks_exact(8); | ||
let chunks = indices.chunks_exact(LANES); | ||
// todo handle remainder | ||
|
||
let mut result = vec![0.0; indices.len()]; // todo: maybeUninit | ||
let result_chunks = result.chunks_exact_mut(8); | ||
let result_chunks = result.chunks_exact_mut(LANES); | ||
chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| { | ||
let idxs: [usize; 8] = chunk.try_into().unwrap(); | ||
let idxs: [usize; LANES] = chunk.try_into().unwrap(); | ||
let idxs: usizex8 = usizex8::from_array(idxs); | ||
|
||
let r = Simd::gather_or_default(&values, idxs); | ||
let r: [f32; 8] = r.to_array(); | ||
let r: [f32; LANES] = r.to_array(); | ||
|
||
let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap(); | ||
let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap(); | ||
*r_chunk = r; | ||
}); | ||
|
||
result | ||
} | ||
|
||
type Bitmap = (Vec<u8>, usize); | ||
|
||
pub fn naive_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> { | ||
let mask = (0..mask.1).map(|x| get_bit(&mask.0, x)); | ||
|
||
indices | ||
.iter() | ||
.zip(mask) | ||
.map(|(x, m)| if m { values[*x] } else { 0.0f32 }) | ||
.collect() | ||
} | ||
|
||
pub fn core_simd_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec<f32> { | ||
assert_eq!(mask.1 % 16, 0); // todo: handle remainders | ||
let chunks = indices.chunks_exact(LANES); | ||
let mask_chunks = mask.0.chunks_exact(MASK_LANES); | ||
//let remainder = chunks.remainder(); | ||
//let mask_remainder = mask_chunks.remainder(); | ||
|
||
let mut result = vec![0.0; indices.len()]; // todo: maybeUninit | ||
let result_chunks = result.chunks_exact_mut(LANES); | ||
chunks | ||
.zip(mask_chunks) | ||
.zip(result_chunks) | ||
.for_each(|((chunk, mask_chunk), r_chunk)| { | ||
let idxs: [usize; LANES] = chunk.try_into().unwrap(); | ||
let idxs: usizex8 = usizex8::from_array(idxs); | ||
|
||
let mask: [u8; MASK_LANES] = mask_chunk.try_into().unwrap(); | ||
let mask = masksizex8::from_bitmask(mask); | ||
|
||
let r = Simd::gather_select(&values, mask, idxs, Simd::splat(f32::default())); | ||
let r: [f32; LANES] = r.to_array(); | ||
|
||
let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap(); | ||
*r_chunk = r; | ||
}); | ||
result | ||
} |