Add support for requiring base prefixes and suffixes.

This requires them when parsing but also adds them to our float and integer writers when writing formats. This is useful for cases like hex floats where the floats only make sense when they have a literal `0x` prefixing them.
Alexhuszagh · Jan 12, 2025 · fdef013 · fdef013
1 parent 933a8da
commit fdef013
Show file tree

Hide file tree

Showing 18 changed files with 882 additions and 122 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -15,7 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Added `build_checked` to our `Options` API (#204).
 - Added `has_digit_separator` to `NumberFormat` (#204).
 - Re-export `NumberFormat` to our other crates (#204).
-- Add `Options::from_radix` for all options for similar APIs for each (#208).
+- Added `Options::from_radix` for all options for similar APIs for each (#208).
+- Support for requiring both integer and fraction digits with exponents, that is, `1.e5` and `.1e5`, as opposed to just requiring `1e5` (#215).
+- Added `supports_parsing_integers`, `supports_parsing_floats`, `supports_writing_integers`, and `supports_writing_floats` for our number formats (#215).
+- Added `required_base_prefix` and `required_base_suffix` for our number formats, requiring base prefixes and/or suffixes when parsing, and allowing writing base prefixes and/or suffixes (#215).
 
 ### Changed
 

diff --git a/lexical-parse-float/src/parse.rs b/lexical-parse-float/src/parse.rs
@@ -536,25 +536,31 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
     // INTEGER
 
     // Check to see if we have a valid base prefix.
+    // NOTE: `lz_prefix` is if we had a leading zero when
+    // checking for a base prefix: it is not if the prefix
+    // exists or not.
     #[allow(unused_variables)]
-    let mut is_prefix = false;
-    #[cfg(feature = "format")]
+    let mut lz_prefix = false;
+    #[cfg(all(feature = "format", feature = "power-of-two"))]
     {
         let base_prefix = format.base_prefix();
+        let mut has_prefix = false;
         let mut iter = byte.integer_iter();
         if base_prefix != 0 && iter.read_if_value_cased(b'0').is_some() {
             // Check to see if the next character is the base prefix.
             // We must have a format like `0x`, `0d`, `0o`.
             // NOTE: The check for empty integer digits happens below so
             // we don't need a redundant check here.
-            is_prefix = true;
-            if iter.read_if_value(base_prefix, format.case_sensitive_base_prefix()).is_some()
-                && iter.is_buffer_empty()
-                && format.required_integer_digits()
-            {
+            lz_prefix = true;
+            let prefix = iter.read_if_value(base_prefix, format.case_sensitive_base_prefix());
+            has_prefix = prefix.is_some();
+            if has_prefix && iter.is_buffer_empty() && format.required_integer_digits() {
                 return Err(Error::EmptyInteger(iter.cursor()));
             }
         }
+        if format.required_base_prefix() && !has_prefix {
+            return Err(Error::MissingBasePrefix(iter.cursor()));
+        }
     }
 
     // Parse our integral digits.
@@ -600,7 +606,7 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
 
     // Check if integer leading zeros are disabled.
     #[cfg(feature = "format")]
-    if !is_prefix && format.no_float_leading_zeros() {
+    if !lz_prefix && format.no_float_leading_zeros() {
         if integer_digits.len() > 1 && integer_digits.first() == Some(&b'0') {
             return Err(Error::InvalidLeadingZeros(start.cursor()));
         }
@@ -741,11 +747,14 @@ pub fn parse_number<'a, const FORMAT: u128, const IS_PARTIAL: bool>(
     // that the first character **is not** a digit separator.
     #[allow(unused_variables)]
     let base_suffix = format.base_suffix();
-    #[cfg(feature = "format")]
+    #[cfg(all(feature = "format", feature = "power-of-two"))]
     if base_suffix != 0 {
-        if byte.first_is(base_suffix, format.case_sensitive_base_suffix()) {
+        let is_suffix = byte.first_is(base_suffix, format.case_sensitive_base_suffix());
+        if is_suffix {
             // SAFETY: safe since `byte.len() >= 1`.
             unsafe { byte.step_unchecked() };
+        } else if format.required_base_suffix() {
+            return Err(Error::MissingBaseSuffix(byte.cursor()));
         }
     }
 

diff --git a/lexical-parse-float/tests/api_tests.rs b/lexical-parse-float/tests/api_tests.rs
@@ -1307,3 +1307,39 @@ fn supported_test() {
     let value = f64::from_lexical_partial_with_options::<FORMAT>(float.as_bytes(), &OPTIONS);
     assert_eq!(value, Ok((12345.0, 7)));
 }
+
+#[test]
+#[cfg(all(feature = "format", feature = "power-of-two"))]
+fn require_base_prefix_test() {
+    use core::num;
+
+    const PREFIX: u128 = NumberFormatBuilder::new()
+        .base_prefix(num::NonZeroU8::new(b'd'))
+        .required_base_prefix(true)
+        .build_strict();
+    const OPTIONS: Options = Options::new();
+
+    let value = f64::from_lexical_with_options::<PREFIX>(b"0d12345", &OPTIONS);
+    assert_eq!(value, Ok(12345.0));
+    let value = f64::from_lexical_with_options::<PREFIX>(b"12345", &OPTIONS);
+    assert_eq!(value, Err(Error::MissingBasePrefix(0)));
+
+    let value = f64::from_lexical_with_options::<PREFIX>(b"-0d12345", &OPTIONS);
+    assert_eq!(value, Ok(-12345.0));
+    let value = f64::from_lexical_with_options::<PREFIX>(b"-12345", &OPTIONS);
+    assert_eq!(value, Err(Error::MissingBasePrefix(1)));
+
+    const SUFFIX: u128 = NumberFormatBuilder::rebuild(PREFIX)
+        .base_suffix(num::NonZeroU8::new(b'z'))
+        .required_base_suffix(true)
+        .build_strict();
+    let value = f64::from_lexical_with_options::<SUFFIX>(b"0d12345z", &OPTIONS);
+    assert_eq!(value, Ok(12345.0));
+    let value = f64::from_lexical_with_options::<SUFFIX>(b"0d12345", &OPTIONS);
+    assert_eq!(value, Err(Error::MissingBaseSuffix(7)));
+
+    let value = f64::from_lexical_with_options::<SUFFIX>(b"-0d12345z", &OPTIONS);
+    assert_eq!(value, Ok(-12345.0));
+    let value = f64::from_lexical_with_options::<SUFFIX>(b"-0d12345", &OPTIONS);
+    assert_eq!(value, Err(Error::MissingBaseSuffix(8)));
+}
diff --git a/lexical-parse-integer/src/algorithm.rs b/lexical-parse-integer/src/algorithm.rs
@@ -120,19 +120,26 @@ macro_rules! into_error {
 #[cfg(feature = "format")]
 macro_rules! fmt_invalid_digit {
     (
-        $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
+        $value:ident,
+        $iter:ident,
+        $c:expr,
+        $start_index:ident,
+        $invalid_digit:ident,
+        $has_suffix:ident,
+        $is_end:expr $(,)?
     ) => {{
         // NOTE: If we have non-contiguous iterators, we could have a skip character
         // here at the boundary. This does not affect safety but it does affect
         // correctness.
         debug_assert!($iter.is_contiguous() || $is_end);
 
-        let base_suffix = NumberFormat::<FORMAT>::BASE_SUFFIX;
-        let uncased_base_suffix = NumberFormat::<FORMAT>::CASE_SENSITIVE_BASE_SUFFIX;
+        let format = NumberFormat::<FORMAT> {};
+        let base_suffix = format.base_suffix();
+        let uncased_base_suffix = format.case_sensitive_base_suffix();
         // Need to check for a base suffix, if so, return a valid value.
         // We can't have a base suffix at the first value (need at least
         // 1 digit).
-        if base_suffix != 0 && $iter.cursor() - $start_index > 1 {
+        if cfg!(feature = "power-of-two") && base_suffix != 0 && $iter.cursor() - $start_index > 1 {
             let is_suffix = if uncased_base_suffix {
                 $c == base_suffix
             } else {
@@ -144,6 +151,7 @@ macro_rules! fmt_invalid_digit {
             // contiguous iterators.
             if is_suffix && $is_end && $iter.is_buffer_empty() {
                 // Break out of the loop, we've finished parsing.
+                $has_suffix = true;
                 break;
             } else if !$iter.is_buffer_empty() {
                 // Haven't finished parsing, so we're going to call
@@ -165,7 +173,13 @@ macro_rules! fmt_invalid_digit {
 #[cfg(not(feature = "format"))]
 macro_rules! fmt_invalid_digit {
     (
-        $value:ident, $iter:ident, $c:expr, $start_index:ident, $invalid_digit:ident, $is_end:expr
+        $value:ident,
+        $iter:ident,
+        $c:expr,
+        $start_index:ident,
+        $invalid_digit:ident,
+        $has_suffix:ident,
+        $is_end:expr $(,)?
     ) => {{
         $invalid_digit!($value, $iter.cursor(), $iter.current_count());
     }};
@@ -393,6 +407,7 @@ where
 /// * `add_op` - The unchecked add/sub op.
 /// * `start_index` - The offset where parsing started.
 /// * `invalid_digit` - Behavior when an invalid digit is found.
+/// * `has_suffix` - If a base suffix was found at the end of the buffer.
 /// * `is_end` - If iter corresponds to the full input.
 ///
 /// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1480>
@@ -403,15 +418,24 @@ macro_rules! parse_1digit_unchecked {
         $add_op:ident,
         $start_index:ident,
         $invalid_digit:ident,
-        $is_end:expr
+        $has_suffix:ident,
+        $is_end:expr $(,)?
     ) => {{
         // This is a slower parsing algorithm, going 1 digit at a time, but doing it in
         // an unchecked loop.
         let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
         while let Some(&c) = $iter.next() {
             let digit = match char_to_digit_const(c, radix) {
                 Some(v) => v,
-                None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, $is_end),
+                None => fmt_invalid_digit!(
+                    $value,
+                    $iter,
+                    c,
+                    $start_index,
+                    $invalid_digit,
+                    $has_suffix,
+                    $is_end,
+                ),
             };
             // multiply first since compilers are good at optimizing things out and will do
             // a fused mul/add We must do this after getting the digit for
@@ -431,6 +455,7 @@ macro_rules! parse_1digit_unchecked {
 /// * `add_op` - The checked add/sub op.
 /// * `start_index` - The offset where parsing started.
 /// * `invalid_digit` - Behavior when an invalid digit is found.
+/// * `has_suffix` - If a base suffix was found at the end of the buffer.
 /// * `overflow` - If the error is overflow or underflow.
 ///
 /// core: <https://doc.rust-lang.org/1.81.0/src/core/num/mod.rs.html#1505>
@@ -441,15 +466,24 @@ macro_rules! parse_1digit_checked {
         $add_op:ident,
         $start_index:ident,
         $invalid_digit:ident,
-        $overflow:ident
+        $has_suffix:ident,
+        $overflow:ident $(,)?
     ) => {{
         // This is a slower parsing algorithm, going 1 digit at a time, but doing it in
         // an unchecked loop.
         let radix = NumberFormat::<FORMAT>::MANTISSA_RADIX;
         while let Some(&c) = $iter.next() {
             let digit = match char_to_digit_const(c, radix) {
                 Some(v) => v,
-                None => fmt_invalid_digit!($value, $iter, c, $start_index, $invalid_digit, true),
+                None => fmt_invalid_digit!(
+                    $value,
+                    $iter,
+                    c,
+                    $start_index,
+                    $invalid_digit,
+                    $has_suffix,
+                    true,
+                ),
             };
             // multiply first since compilers are good at optimizing things out and will do
             // a fused mul/add
@@ -477,6 +511,7 @@ macro_rules! parse_1digit_checked {
 /// * `start_index` - The offset where parsing started.
 /// * `invalid_digit` - Behavior when an invalid digit is found.
 /// * `no_multi_digit` - If to disable multi-digit optimizations.
+/// * `has_suffix` - If a base suffix was found at the end of the buffer.
 /// * `is_end` - If iter corresponds to the full input.
 macro_rules! parse_digits_unchecked {
     (
@@ -486,7 +521,8 @@ macro_rules! parse_digits_unchecked {
         $start_index:ident,
         $invalid_digit:ident,
         $no_multi_digit:expr,
-        $is_end:expr
+        $has_suffix:ident,
+        $is_end:expr $(,)?
     ) => {{
         let can_multi = can_try_parse_multidigits::<_, FORMAT>(&$iter);
         let use_multi = can_multi && !$no_multi_digit;
@@ -510,7 +546,15 @@ macro_rules! parse_digits_unchecked {
                 $value = $value.wrapping_mul(radix4).$add_op(value);
             }
         }
-        parse_1digit_unchecked!($value, $iter, $add_op, $start_index, $invalid_digit, $is_end)
+        parse_1digit_unchecked!(
+            $value,
+            $iter,
+            $add_op,
+            $start_index,
+            $invalid_digit,
+            $has_suffix,
+            $is_end
+        )
     }};
 }
 
@@ -528,6 +572,7 @@ macro_rules! parse_digits_unchecked {
 /// * `invalid_digit` - Behavior when an invalid digit is found.
 /// * `overflow` - If the error is overflow or underflow.
 /// * `no_multi_digit` - If to disable multi-digit optimizations.
+/// * `has_suffix` - If a base suffix was found at the end of the buffer.
 /// * `overflow_digits` - The number of digits before we need to consider
 ///   checked ops.
 macro_rules! parse_digits_checked {
@@ -540,7 +585,8 @@ macro_rules! parse_digits_checked {
         $invalid_digit:ident,
         $overflow:ident,
         $no_multi_digit:expr,
-        $overflow_digits:expr
+        $has_suffix:ident,
+        $overflow_digits:expr $(,)?
     ) => {{
         // Can use the unchecked for the `max_digits` here. If we
         // have a non-contiguous iterator, we could have a case like
@@ -557,13 +603,22 @@ macro_rules! parse_digits_checked {
                     $start_index,
                     $invalid_digit,
                     $no_multi_digit,
+                    $has_suffix,
                     false
                 );
             }
         }
 
         // NOTE: all our multi-digit optimizations have been done here: skip this
-        parse_1digit_checked!($value, $iter, $add_op, $start_index, $invalid_digit, $overflow)
+        parse_1digit_checked!(
+            $value,
+            $iter,
+            $add_op,
+            $start_index,
+            $invalid_digit,
+            $has_suffix,
+            $overflow
+        )
     }};
 }
 
@@ -650,6 +705,9 @@ macro_rules! algorithm {
                 }
             }
         }
+        if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_prefix() && !is_prefix {
+            return Err(Error::MissingBasePrefix(iter.cursor()));
+        }
 
         // If we have a format that doesn't accept leading zeros,
         // check if the next value is invalid. It's invalid if the
@@ -684,14 +742,60 @@ macro_rules! algorithm {
     //      culminates in **way** slower performance overall for simple
     //      integers, and no improvement for large integers.
     let mut value = T::ZERO;
+    #[allow(unused_mut)]
+    let mut has_suffix = false;
     if cannot_overflow && is_negative {
-        parse_digits_unchecked!(value, iter, wrapping_sub, start_index, $invalid_digit, $no_multi_digit, true);
+        parse_digits_unchecked!(
+            value,
+            iter,
+            wrapping_sub,
+            start_index,
+            $invalid_digit,
+            $no_multi_digit,
+            has_suffix,
+            true,
+        );
     } if cannot_overflow {
-        parse_digits_unchecked!(value, iter, wrapping_add, start_index, $invalid_digit, $no_multi_digit, true);
+        parse_digits_unchecked!(
+            value,
+            iter,
+            wrapping_add,
+            start_index,
+            $invalid_digit,
+            $no_multi_digit,
+            has_suffix,
+            true,
+        );
     } else if is_negative {
-        parse_digits_checked!(value, iter, checked_sub, wrapping_sub, start_index, $invalid_digit, Underflow, $no_multi_digit, overflow_digits);
+        parse_digits_checked!(
+            value,
+            iter,
+            checked_sub,
+            wrapping_sub,
+            start_index,
+            $invalid_digit,
+            Underflow,
+            $no_multi_digit,
+            has_suffix,
+            overflow_digits,
+        );
     } else {
-        parse_digits_checked!(value, iter, checked_add, wrapping_add, start_index, $invalid_digit, Overflow, $no_multi_digit, overflow_digits);
+        parse_digits_checked!(
+            value,
+            iter,
+            checked_add,
+            wrapping_add,
+            start_index,
+            $invalid_digit,
+            Overflow,
+            $no_multi_digit,
+            has_suffix,
+            overflow_digits,
+        );
+    }
+
+    if cfg!(all(feature = "format", feature = "power-of-two")) && format.required_base_suffix() && !has_suffix {
+        return Err(Error::MissingBaseSuffix(iter.cursor()));
     }
 
     $into_ok!(value, iter.buffer_length(), iter.current_count())