From 89b8a97aead4aa366eb2587ffdcfa7df39f3815a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Sun, 20 Sep 2020 14:46:07 +0200 Subject: [PATCH 1/2] Refactor memchr to allow optimization --- library/core/src/slice/cmp.rs | 2 ++ library/core/src/slice/memchr.rs | 40 ++++++++++++--------- library/core/src/slice/mod.rs | 1 + src/test/codegen/issue-75659.rs | 62 ++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 16 deletions(-) create mode 100644 src/test/codegen/issue-75659.rs diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs index 27a358bddaf4f..ed26c59c17c43 100644 --- a/library/core/src/slice/cmp.rs +++ b/library/core/src/slice/cmp.rs @@ -268,12 +268,14 @@ where } impl SliceContains for u8 { + #[inline] fn slice_contains(&self, x: &[Self]) -> bool { memchr::memchr(*self, x).is_some() } } impl SliceContains for i8 { + #[inline] fn slice_contains(&self, x: &[Self]) -> bool { let byte = *self as u8; // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()` diff --git a/library/core/src/slice/memchr.rs b/library/core/src/slice/memchr.rs index 3b13ed5fed396..0c0f1750264eb 100644 --- a/library/core/src/slice/memchr.rs +++ b/library/core/src/slice/memchr.rs @@ -12,6 +12,7 @@ const HI_U64: u64 = 0x8080808080808080; // Use truncation. const LO_USIZE: usize = LO_U64 as usize; const HI_USIZE: usize = HI_U64 as usize; +const USIZE_BYTES: usize = mem::size_of::(); /// Returns `true` if `x` contains any zero byte. /// @@ -38,19 +39,29 @@ fn repeat_byte(b: u8) -> usize { } /// Returns the first index matching the byte `x` in `text`. +#[inline] pub fn memchr(x: u8, text: &[u8]) -> Option { + // Fast path for small slices + if text.len() < 2 * USIZE_BYTES { + return text.iter().position(|elt| *elt == x); + } + + memchr_general_case(x, text) +} + +fn memchr_general_case(x: u8, text: &[u8]) -> Option { // Scan for a single byte value by reading two `usize` words at a time. // // Split `text` in three parts // - unaligned initial part, before the first word aligned address in text // - body, scan by 2 words at a time // - the last remaining part, < 2 word size + + // search up to an aligned boundary let len = text.len(); let ptr = text.as_ptr(); - let usize_bytes = mem::size_of::(); + let mut offset = ptr.align_offset(USIZE_BYTES); - // search up to an aligned boundary - let mut offset = ptr.align_offset(usize_bytes); if offset > 0 { offset = cmp::min(offset, len); if let Some(index) = text[..offset].iter().position(|elt| *elt == x) { @@ -60,22 +71,19 @@ pub fn memchr(x: u8, text: &[u8]) -> Option { // search the body of the text let repeated_x = repeat_byte(x); + while offset <= len - 2 * USIZE_BYTES { + unsafe { + let u = *(ptr.add(offset) as *const usize); + let v = *(ptr.add(offset + USIZE_BYTES) as *const usize); - if len >= 2 * usize_bytes { - while offset <= len - 2 * usize_bytes { - unsafe { - let u = *(ptr.add(offset) as *const usize); - let v = *(ptr.add(offset + usize_bytes) as *const usize); - - // break if there is a matching byte - let zu = contains_zero_byte(u ^ repeated_x); - let zv = contains_zero_byte(v ^ repeated_x); - if zu || zv { - break; - } + // break if there is a matching byte + let zu = contains_zero_byte(u ^ repeated_x); + let zv = contains_zero_byte(v ^ repeated_x); + if zu || zv { + break; } - offset += usize_bytes * 2; } + offset += USIZE_BYTES * 2; } // Find the byte after the point the body loop stopped. diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index ff926c517bc7c..8ffd94369fc29 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -1633,6 +1633,7 @@ impl [T] { /// assert!(!v.iter().any(|e| e == "hi")); /// ``` #[stable(feature = "rust1", since = "1.0.0")] + #[inline] pub fn contains(&self, x: &T) -> bool where T: PartialEq, diff --git a/src/test/codegen/issue-75659.rs b/src/test/codegen/issue-75659.rs new file mode 100644 index 0000000000000..8a4b7ef8c4337 --- /dev/null +++ b/src/test/codegen/issue-75659.rs @@ -0,0 +1,62 @@ +// This test checks that the call to memchr/slice_contains is optimized away +// when searching in small slices. + +// compile-flags: -O + +#![crate_type = "lib"] + +// CHECK-LABEL: @foo1 +#[no_mangle] +pub fn foo1(x: u8, data: &[u8; 1]) -> bool { + // CHECK-NOT: memchr + // CHECK-NOT: slice_contains + data.contains(&x) +} + +// CHECK-LABEL: @foo2 +#[no_mangle] +pub fn foo2(x: u8, data: &[u8; 2]) -> bool { + // CHECK-NOT: memchr + // CHECK-NOT: slice_contains + data.contains(&x) +} + +// CHECK-LABEL: @foo3 +#[no_mangle] +pub fn foo3(x: u8, data: &[u8; 3]) -> bool { + // CHECK-NOT: memchr + // CHECK-NOT: slice_contains + data.contains(&x) +} + +// CHECK-LABEL: @foo4 +#[no_mangle] +pub fn foo4(x: u8, data: &[u8; 4]) -> bool { + // CHECK-NOT: memchr + // CHECK-NOT: slice_contains + data.contains(&x) +} + +// CHECK-LABEL: @foo8 +#[no_mangle] +pub fn foo8(x: u8, data: &[u8; 8]) -> bool { + // CHECK-NOT: memchr + // CHECK-NOT: slice_contains + data.contains(&x) +} + +// CHECK-LABEL: @foo8_i8 +#[no_mangle] +pub fn foo8_i8(x: i8, data: &[i8; 8]) -> bool { + // CHECK-NOT: memchr + // CHECK-NOT: slice_contains + !data.contains(&x) +} + +// Check that the general case isn't inlined +// CHECK-LABEL: @foo80 +#[no_mangle] +pub fn foo80(x: u8, data: &[u8; 80]) -> bool { + // CHECK: call core::slice::memchr + data.contains(&x) +} From de623bfaf77a69d86e1561f6b4e0fdd9d193cf60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20Buga?= Date: Thu, 1 Oct 2020 16:02:32 +0200 Subject: [PATCH 2/2] Only test on x86_64 --- src/test/codegen/issue-75659.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/test/codegen/issue-75659.rs b/src/test/codegen/issue-75659.rs index 8a4b7ef8c4337..d093c841d68ef 100644 --- a/src/test/codegen/issue-75659.rs +++ b/src/test/codegen/issue-75659.rs @@ -2,6 +2,7 @@ // when searching in small slices. // compile-flags: -O +// only-x86_64 #![crate_type = "lib"]