From d1eda541a5fcf8542c814f72fc67a0827f4ab218 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Thu, 25 Nov 2021 05:24:08 +0000 Subject: [PATCH] Added take with nulls. --- Cargo.toml | 4 +++ README.md | 7 +++++ benches/take_nulls_bitmap.rs | 40 ++++++++++++++++++++++++++ src/take.rs | 55 ++++++++++++++++++++++++++++++++---- 4 files changed, 101 insertions(+), 5 deletions(-) create mode 100644 benches/take_nulls_bitmap.rs diff --git a/Cargo.toml b/Cargo.toml index 5fac02b..5f99f32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,3 +25,7 @@ harness = false [[bench]] name = "take" harness = false + +[[bench]] +name = "take_nulls_bitmap" +harness = false diff --git a/README.md b/README.md index a1644aa..fee6a96 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,13 @@ core_simd_take 2^20 f32 time: [911.13 us 912.21 us 913.33 us] naive_take 2^20 f32 time: [912.39 us 915.22 us 918.41 us] ``` +### Nullable take of values (`Bitmap) + +``` +core_simd_take_nulls 2^20 f32 time: [950.40 us 954.08 us 958.88 us] +naive_take_nulls 2^20 f32 time: [2.3714 ms 2.3968 ms 2.4296 ms] +``` + ## Bench results on default Command: diff --git a/benches/take_nulls_bitmap.rs b/benches/take_nulls_bitmap.rs new file mode 100644 index 0000000..c3a8058 --- /dev/null +++ b/benches/take_nulls_bitmap.rs @@ -0,0 +1,40 @@ +use criterion::{criterion_group, criterion_main, Criterion}; + +use simd_benches::bitmap_ops; +use simd_benches::take::*; + +fn close(l: &[f32], r: &[f32]) { + for (l, r) in l.iter().zip(r.iter()) { + assert!((l - r).abs() < l * 0.001 || (l.abs() < 0.000001 && r.abs() < 0.000001)); + } +} + +fn add_benchmark(c: &mut Criterion) { + let name = ""; + (10..=20).step_by(2).for_each(|log2_size| { + let size = 2usize.pow(log2_size); + let array = (0..size).map(|x| 1.0 + x as f32).collect::>(); + let mut mask = vec![0u8; size / 8]; + // 10% nulls + (0..size).for_each(|x| bitmap_ops::set_bit(&mut mask, x, (1 + x) % 10 != 0)); + let mask = (mask, size); + let indices = (0..size).collect::>(); + // check that they are equal... + close( + &core_simd_take_nulls(&array, &indices, &mask), + &naive_take_nulls(&array, &indices, &mask), + ); + + c.bench_function( + &format!("core_simd_take_nulls{} 2^{} f32", name, log2_size), + |b| b.iter(|| core_simd_take_nulls(&array, &indices, &mask)), + ); + c.bench_function( + &format!("naive_take_nulls{} 2^{} f32", name, log2_size), + |b| b.iter(|| naive_take_nulls(&array, &indices, &mask)), + ); + }); +} + +criterion_group!(benches, add_benchmark); +criterion_main!(benches); diff --git a/src/take.rs b/src/take.rs index 38bbd5f..9245403 100644 --- a/src/take.rs +++ b/src/take.rs @@ -1,25 +1,70 @@ use core_simd::*; +use super::bitmap_ops::*; + pub fn naive_take(values: &[f32], indices: &[usize]) -> Vec { indices.iter().map(|i| values[*i]).collect() } +const LANES: usize = 8; +const MASK_LANES: usize = 8 / 8; + pub fn core_simd_take(values: &[f32], indices: &[usize]) -> Vec { - let chunks = indices.chunks_exact(8); + let chunks = indices.chunks_exact(LANES); // todo handle remainder let mut result = vec![0.0; indices.len()]; // todo: maybeUninit - let result_chunks = result.chunks_exact_mut(8); + let result_chunks = result.chunks_exact_mut(LANES); chunks.zip(result_chunks).for_each(|(chunk, r_chunk)| { - let idxs: [usize; 8] = chunk.try_into().unwrap(); + let idxs: [usize; LANES] = chunk.try_into().unwrap(); let idxs: usizex8 = usizex8::from_array(idxs); let r = Simd::gather_or_default(&values, idxs); - let r: [f32; 8] = r.to_array(); + let r: [f32; LANES] = r.to_array(); - let r_chunk: &mut [f32; 8] = r_chunk.try_into().unwrap(); + let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap(); *r_chunk = r; }); result } + +type Bitmap = (Vec, usize); + +pub fn naive_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec { + let mask = (0..mask.1).map(|x| get_bit(&mask.0, x)); + + indices + .iter() + .zip(mask) + .map(|(x, m)| if m { values[*x] } else { 0.0f32 }) + .collect() +} + +pub fn core_simd_take_nulls(values: &[f32], indices: &[usize], mask: &Bitmap) -> Vec { + assert_eq!(mask.1 % 16, 0); // todo: handle remainders + let chunks = indices.chunks_exact(LANES); + let mask_chunks = mask.0.chunks_exact(MASK_LANES); + //let remainder = chunks.remainder(); + //let mask_remainder = mask_chunks.remainder(); + + let mut result = vec![0.0; indices.len()]; // todo: maybeUninit + let result_chunks = result.chunks_exact_mut(LANES); + chunks + .zip(mask_chunks) + .zip(result_chunks) + .for_each(|((chunk, mask_chunk), r_chunk)| { + let idxs: [usize; LANES] = chunk.try_into().unwrap(); + let idxs: usizex8 = usizex8::from_array(idxs); + + let mask: [u8; MASK_LANES] = mask_chunk.try_into().unwrap(); + let mask = masksizex8::from_bitmask(mask); + + let r = Simd::gather_select(&values, mask, idxs, Simd::splat(f32::default())); + let r: [f32; LANES] = r.to_array(); + + let r_chunk: &mut [f32; LANES] = r_chunk.try_into().unwrap(); + *r_chunk = r; + }); + result +}