From 90b4f859425336228d1d88e2d688a1e7251fdfef Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 26 Sep 2021 13:55:43 +0200 Subject: [PATCH] use align_to --- src/compute/aggregate/sum.rs | 41 ++++++++++-------------------------- src/types/simd/mod.rs | 9 ++------ src/types/simd/native.rs | 9 ++++---- src/types/simd/packed.rs | 10 ++++----- 4 files changed, 22 insertions(+), 47 deletions(-) diff --git a/src/compute/aggregate/sum.rs b/src/compute/aggregate/sum.rs index 2314aef95c2..0e0567f2953 100644 --- a/src/compute/aggregate/sum.rs +++ b/src/compute/aggregate/sum.rs @@ -19,43 +19,24 @@ pub trait Sum { fn simd_sum(self) -> T; } -fn split_by_alignment(values: &[T]) -> (&[T], &[T]) { - let alignment = std::mem::align_of::(); - - let vals_ptr = values.as_ptr(); - let bytes_offset = vals_ptr.align_offset(alignment); - let type_offset = if bytes_offset > 0 { - std::mem::align_of::() / bytes_offset - } else { - 0 - }; - - let head = &values[..type_offset]; - let aligned_values = &values[type_offset..]; - (head, aligned_values) -} - #[multiversion] #[clone(target = "x86_64+avx")] -fn nonnull_sum(values: &[T]) -> T +fn nonnull_sum<'a, T>(values: &'a [T]) -> T where T: NativeType + Simd + Add + std::iter::Sum, - T::Simd: Add + Sum, + T::Simd: Sum + Add, { - let (head, aligned_values) = split_by_alignment::(values); - - let mut chunks = aligned_values.chunks_exact(T::Simd::LANES); - // Safety: - // we just made sure that we work on a slice af data aligned to T::Simd - let sum = chunks.by_ref().fold(T::Simd::default(), |acc, chunk| { - acc + unsafe { T::Simd::from_chunk_aligned_unchecked(chunk) } - }); + // T::Simd is the vector type T and the alignment is similar to aligning to [T; alignment] + // the alignment of T::Simd ensures that it fits T. + let (head, simd_vals, tail) = unsafe { values.align_to::() }; - let remainder = T::Simd::from_incomplete_chunk(chunks.remainder(), T::default()); - let reduced = sum + remainder; + let mut reduced = T::Simd::from_incomplete_chunk(&[], T::default()); + for chunk in simd_vals { + reduced = reduced + chunk.clone() + } - reduced.simd_sum() + head.iter().copied().sum() + reduced.simd_sum() + head.iter().copied().sum() + tail.iter().copied().sum() } /// # Panics @@ -108,7 +89,7 @@ where /// Returns the sum of values in the array. /// /// Returns `None` if the array is empty or only contains null values. -pub fn sum_primitive(array: &PrimitiveArray) -> Option +pub fn sum_primitive<'a, T>(array: &'a PrimitiveArray) -> Option where T: NativeType + Simd + Add + std::iter::Sum, T::Simd: Add + Sum, diff --git a/src/types/simd/mod.rs b/src/types/simd/mod.rs index 41745dd81ba..0b463333937 100644 --- a/src/types/simd/mod.rs +++ b/src/types/simd/mod.rs @@ -28,17 +28,12 @@ pub trait NativeSimd: Default { /// * iff `v.len()` != `T::LANES` fn from_chunk(v: &[Self::Native]) -> Self; - /// Convert itself from a slice. - /// # Safety: - /// Caller must ensure: - /// * `v.len() == T::LANES` - /// * slice is aligned to `Self` - unsafe fn from_chunk_aligned_unchecked(v: &[Self::Native]) -> Self; - /// creates a new Self from `v` by populating items from `v` up to its length. /// Items from `v` at positions larger than the number of lanes are ignored; /// remaining items are populated with `remaining`. fn from_incomplete_chunk(v: &[Self::Native], remaining: Self::Native) -> Self; + + fn clone(&self) -> Self; } /// Trait implemented by some [`NativeType`] that have a SIMD representation. diff --git a/src/types/simd/native.rs b/src/types/simd/native.rs index f2b55117d4a..3ae1587306b 100644 --- a/src/types/simd/native.rs +++ b/src/types/simd/native.rs @@ -29,17 +29,16 @@ macro_rules! simd { ($name)(v.try_into().unwrap()) } - #[inline] - unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self { - ($name)(v.try_into().unwrap()) - } - #[inline] fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self { let mut a = [remaining; $lanes]; a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b); Self(a) } + #[inline] + fn clone(&self) -> Self { + self.clone() + } } impl std::ops::Index for $name { diff --git a/src/types/simd/packed.rs b/src/types/simd/packed.rs index cb5096ca826..78bebb24b12 100644 --- a/src/types/simd/packed.rs +++ b/src/types/simd/packed.rs @@ -23,17 +23,17 @@ macro_rules! simd { <$name>::from_slice_unaligned(v) } - #[inline] - unsafe fn from_chunk_aligned_unchecked(v: &[$type]) -> Self { - <$name>::from_slice_aligned_unchecked(v) - } - #[inline] fn from_incomplete_chunk(v: &[$type], remaining: $type) -> Self { let mut a = [remaining; $lanes]; a.iter_mut().zip(v.iter()).for_each(|(a, b)| *a = *b); <$name>::from_chunk(a.as_ref()) } + + #[inline] + fn clone(&self) -> Self { + *self + } } }; }