From b018ad3d41de500c52156fb8a84d2197d966f370 Mon Sep 17 00:00:00 2001 From: The 8472 Date: Sat, 2 Sep 2023 13:37:00 +0200 Subject: [PATCH] optimize zipping over array iterators --- library/core/src/array/iter.rs | 27 +++++- library/core/src/iter/adapters/zip.rs | 90 +++++++++++++++++++ library/core/tests/iter/adapters/zip.rs | 6 +- .../assembly/libs/issue-115339-zip-arrays.rs | 25 ++++++ 4 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 tests/assembly/libs/issue-115339-zip-arrays.rs diff --git a/library/core/src/array/iter.rs b/library/core/src/array/iter.rs index 587877dff552f..56ba12c463647 100644 --- a/library/core/src/array/iter.rs +++ b/library/core/src/array/iter.rs @@ -4,7 +4,7 @@ use crate::num::NonZeroUsize; use crate::{ fmt, intrinsics::transmute_unchecked, - iter::{self, ExactSizeIterator, FusedIterator, TrustedLen}, + iter::{self, ExactSizeIterator, FusedIterator, TrustedLen, TrustedRandomAccessNoCoerce}, mem::MaybeUninit, ops::{IndexRange, Range}, ptr, @@ -293,6 +293,12 @@ impl Iterator for IntoIter { NonZeroUsize::new(remaining).map_or(Ok(()), Err) } + + #[inline] + unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item { + // SAFETY: The caller must provide an idx that is in bound of the remainder. + unsafe { self.data.as_ptr().add(self.alive.start()).add(idx).cast::().read() } + } } #[stable(feature = "array_value_iter_impls", since = "1.40.0")] @@ -374,6 +380,25 @@ impl FusedIterator for IntoIter {} #[stable(feature = "array_value_iter_impls", since = "1.40.0")] unsafe impl TrustedLen for IntoIter {} +#[doc(hidden)] +#[unstable(issue = "none", feature = "std_internals")] +#[rustc_unsafe_specialization_marker] +pub trait NonDrop {} + +// T: Copy as approximation for !Drop since get_unchecked does not advance self.alive +// and thus we can't implement drop-handling +#[unstable(issue = "none", feature = "std_internals")] +impl NonDrop for T {} + +#[doc(hidden)] +#[unstable(issue = "none", feature = "std_internals")] +unsafe impl TrustedRandomAccessNoCoerce for IntoIter +where + T: NonDrop, +{ + const MAY_HAVE_SIDE_EFFECT: bool = false; +} + #[stable(feature = "array_value_iter_impls", since = "1.40.0")] impl Clone for IntoIter { fn clone(&self) -> Self { diff --git a/library/core/src/iter/adapters/zip.rs b/library/core/src/iter/adapters/zip.rs index b6b0c90cb7d14..77ccf5085022b 100644 --- a/library/core/src/iter/adapters/zip.rs +++ b/library/core/src/iter/adapters/zip.rs @@ -94,6 +94,14 @@ where ZipImpl::nth(self, n) } + #[inline] + fn fold(self, init: Acc, f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc, + { + ZipImpl::fold(self, init, f) + } + #[inline] unsafe fn __iterator_get_unchecked(&mut self, idx: usize) -> Self::Item where @@ -129,6 +137,9 @@ trait ZipImpl { where A: DoubleEndedIterator + ExactSizeIterator, B: DoubleEndedIterator + ExactSizeIterator; + fn fold(self, init: Acc, f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc; // This has the same safety requirements as `Iterator::__iterator_get_unchecked` unsafe fn get_unchecked(&mut self, idx: usize) -> ::Item where @@ -228,6 +239,14 @@ where { unreachable!("Always specialized"); } + + #[inline] + default fn fold(self, init: Acc, f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc, + { + SpecFold::spec_fold(self, init, f) + } } #[doc(hidden)] @@ -251,6 +270,24 @@ where // `Iterator::__iterator_get_unchecked`. unsafe { (self.a.__iterator_get_unchecked(idx), self.b.__iterator_get_unchecked(idx)) } } + + #[inline] + fn fold(mut self, init: Acc, mut f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc, + { + let mut accum = init; + let len = ZipImpl::size_hint(&self).0; + for i in 0..len { + // SAFETY: since Self: TrustedRandomAccessNoCoerce we can trust the size-hint to + // calculate the length and then use that to do unchecked iteration. + // fold consumes the iterator so we don't need to fixup any state. + unsafe { + accum = f(accum, self.get_unchecked(i)); + } + } + accum + } } #[doc(hidden)] @@ -590,3 +627,56 @@ unsafe impl SpecTrustedRandomAccess f unsafe { self.__iterator_get_unchecked(index) } } } + +trait SpecFold: Iterator { + fn spec_fold(self, init: B, f: F) -> B + where + Self: Sized, + F: FnMut(B, Self::Item) -> B; +} + +impl SpecFold for Zip { + // Adapted from default impl from the Iterator trait + #[inline] + default fn spec_fold(mut self, init: Acc, mut f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc, + { + let mut accum = init; + while let Some(x) = ZipImpl::next(&mut self) { + accum = f(accum, x); + } + accum + } +} + +impl SpecFold for Zip { + #[inline] + fn spec_fold(mut self, init: Acc, mut f: F) -> Acc + where + F: FnMut(Acc, Self::Item) -> Acc, + { + let mut accum = init; + loop { + let (upper, more) = if let Some(upper) = ZipImpl::size_hint(&self).1 { + (upper, false) + } else { + // Per TrustedLen contract a None upper bound means more than usize::MAX items + (usize::MAX, true) + }; + + for _ in 0..upper { + let pair = + // SAFETY: TrustedLen guarantees that at least `upper` many items are available + // therefore we know they can't be None + unsafe { (self.a.next().unwrap_unchecked(), self.b.next().unwrap_unchecked()) }; + accum = f(accum, pair); + } + + if !more { + break; + } + } + accum + } +} diff --git a/library/core/tests/iter/adapters/zip.rs b/library/core/tests/iter/adapters/zip.rs index 585cfbb90e40c..c3508be8598fb 100644 --- a/library/core/tests/iter/adapters/zip.rs +++ b/library/core/tests/iter/adapters/zip.rs @@ -184,7 +184,11 @@ fn test_zip_nested_sideffectful() { let it = xs.iter_mut().map(|x| *x = 1).enumerate().zip(&ys); it.count(); } - assert_eq!(&xs, &[1, 1, 1, 1, 1, 0]); + let length_aware = &xs == &[1, 1, 1, 1, 0, 0]; + let probe_first = &xs == &[1, 1, 1, 1, 1, 0]; + + // either implementation is valid according to zip documentation + assert!(length_aware || probe_first); } #[test] diff --git a/tests/assembly/libs/issue-115339-zip-arrays.rs b/tests/assembly/libs/issue-115339-zip-arrays.rs new file mode 100644 index 0000000000000..26b7b9770bc8f --- /dev/null +++ b/tests/assembly/libs/issue-115339-zip-arrays.rs @@ -0,0 +1,25 @@ +// assembly-output: emit-asm +// # zen3 previously exhibited odd vectorization +// compile-flags: --crate-type=lib -Ctarget-cpu=znver3 -O +// only-x86_64 +// ignore-sgx + +use std::iter; + +// previously this produced a long chain of +// 56: vpextrb $6, %xmm0, %ecx +// 57: orb %cl, 22(%rsi) +// 58: vpextrb $7, %xmm0, %ecx +// 59: orb %cl, 23(%rsi) +// [...] + +// CHECK-LABEL: zip_arrays: +#[no_mangle] +pub fn zip_arrays(mut a: [u8; 32], b: [u8; 32]) -> [u8; 32] { + // CHECK-NOT: vpextrb + // CHECK-NOT: orb %cl + // CHECK: vorps + iter::zip(&mut a, b).for_each(|(a, b)| *a |= b); + // CHECK: retq + a +}