From d2874707bd35bf8f455ca82a716d4da325b74567 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Mon, 10 May 2021 21:56:27 +0100 Subject: [PATCH 1/8] faster parsing when not possible to overflow --- library/core/src/num/mod.rs | 85 ++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 35 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index f0bd976ba83d5..b19b7dc4f7a6e 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -769,12 +769,14 @@ pub enum FpCategory { #[doc(hidden)] trait FromStrRadixHelper: PartialOrd + Copy { - fn min_value() -> Self; - fn max_value() -> Self; + const MIN: Self; fn from_u32(u: u32) -> Self; fn checked_mul(&self, other: u32) -> Option; fn checked_sub(&self, other: u32) -> Option; fn checked_add(&self, other: u32) -> Option; + unsafe fn unchecked_mul(&self, other: u32) -> Self; + unsafe fn unchecked_sub(&self, other: u32) -> Self; + unsafe fn unchecked_add(&self, other: u32) -> Self; } macro_rules! from_str_radix_int_impl { @@ -792,10 +794,7 @@ from_str_radix_int_impl! { isize i8 i16 i32 i64 i128 usize u8 u16 u32 u64 u128 } macro_rules! doit { ($($t:ty)*) => ($(impl FromStrRadixHelper for $t { - #[inline] - fn min_value() -> Self { Self::MIN } - #[inline] - fn max_value() -> Self { Self::MAX } + const MIN: Self = Self::MIN; #[inline] fn from_u32(u: u32) -> Self { u as Self } #[inline] @@ -810,6 +809,27 @@ macro_rules! doit { fn checked_add(&self, other: u32) -> Option { Self::checked_add(*self, other as Self) } + #[inline] + unsafe fn unchecked_mul(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_mul` must be upheld by the caller. + unsafe { + Self::unchecked_mul(*self, other as Self) + } + } + #[inline] + unsafe fn unchecked_sub(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_sub` must be upheld by the caller. + unsafe { + Self::unchecked_sub(*self, other as Self) + } + } + #[inline] + unsafe fn unchecked_add(&self, other: u32) -> Self { + // SAFETY: Conditions of `Self::unchecked_add` must be upheld by the caller. + unsafe { + Self::unchecked_add(*self, other as Self) + } + } })*) } doit! { i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize } @@ -828,7 +848,7 @@ fn from_str_radix(src: &str, radix: u32) -> Result T::min_value(); + let is_signed_ty = T::from_u32(0) > T::MIN; // all valid digits are ascii, so we will just iterate over the utf8 bytes // and cast them to chars. .to_digit() will safely return None for anything @@ -846,37 +866,32 @@ fn from_str_radix(src: &str, radix: u32) -> Result x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; - result = match result.checked_add(x) { - Some(result) => result, - None => return Err(PIE { kind: PosOverflow }), - }; + + if radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize { + // SAFETY: Consider the highest radix of 16: + // `u8::MAX` is `ff` (2 characters), `u16::MAX` is `ffff` (4 characters) + // We can be sure that any src len of 2 would fit in a u8 so we don't need + // to check for overflow. + unsafe { + let unchecked_additive_op = + if is_positive { T::unchecked_add } else { T::unchecked_sub }; + + for &c in digits { + result = result.unchecked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + result = unchecked_additive_op(&result, x); + } } } else { - // The number is negative + let additive_op = if is_positive { T::checked_add } else { T::checked_sub }; + let overflow_err = || PIE { kind: if is_positive { PosOverflow } else { NegOverflow } }; + for &c in digits { - let x = match (c as char).to_digit(radix) { - Some(x) => x, - None => return Err(PIE { kind: InvalidDigit }), - }; - result = match result.checked_mul(radix) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; - result = match result.checked_sub(x) { - Some(result) => result, - None => return Err(PIE { kind: NegOverflow }), - }; + let mul = result.checked_mul(radix); + let x = (c as char).to_digit(radix).ok_or(PIE { kind: InvalidDigit })?; + // multiply done early for performance reasons. + result = mul.ok_or_else(overflow_err)?; + result = additive_op(&result, x).ok_or_else(overflow_err)?; } } Ok(result) From 4b700e0d5a2c36848f8406324cc5b1828d904564 Mon Sep 17 00:00:00 2001 From: Giles Cope Date: Tue, 18 May 2021 08:51:20 +0100 Subject: [PATCH 2/8] add likely and clearer comments --- library/core/src/num/mod.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index b19b7dc4f7a6e..e29e0792db1f4 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -867,8 +867,13 @@ fn from_str_radix(src: &str, radix: u32) -> Result() * 2 - is_signed_ty as usize { - // SAFETY: Consider the highest radix of 16: + if intrinsics::likely( + radix <= 16 && digits.len() <= mem::size_of::() * 2 - is_signed_ty as usize, + ) { + // SAFETY: We can take this fast path when `radix.pow(digits.len()) - 1 <= T::MAX` + // but the condition above is a faster (conservative) approximation of this. + // + // Consider the highest radix of 16: // `u8::MAX` is `ff` (2 characters), `u16::MAX` is `ffff` (4 characters) // We can be sure that any src len of 2 would fit in a u8 so we don't need // to check for overflow. @@ -887,9 +892,14 @@ fn from_str_radix(src: &str, radix: u32) -> Result Date: Tue, 18 May 2021 18:22:04 +0100 Subject: [PATCH 3/8] Update library/core/src/num/mod.rs Co-authored-by: LingMan --- library/core/src/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index e29e0792db1f4..943c811cc9166 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -893,8 +893,8 @@ fn from_str_radix(src: &str, radix: u32) -> Result Date: Tue, 18 May 2021 18:22:21 +0100 Subject: [PATCH 4/8] Update library/core/src/num/mod.rs Co-authored-by: LingMan --- library/core/src/num/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/core/src/num/mod.rs b/library/core/src/num/mod.rs index 943c811cc9166..672c1a823a689 100644 --- a/library/core/src/num/mod.rs +++ b/library/core/src/num/mod.rs @@ -896,8 +896,8 @@ fn from_str_radix(src: &str, radix: u32) -> Result