From 155a3d6d00e675c3efef8af1026d4826a40a1169 Mon Sep 17 00:00:00 2001 From: Alex Huszagh Date: Tue, 21 Jan 2025 21:08:08 -0600 Subject: [PATCH] Dummy commit, revert later. --- CHANGELOG | 1 + .../tests/algorithm_tests.rs | 5 +- lexical-parse-integer/tests/api_tests.rs | 197 +++++++++++++++++- lexical-util/src/format.rs | 18 +- lexical-util/src/prebuilt_formats.rs | 105 ++++++++-- 5 files changed, 299 insertions(+), 27 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 42760a75..57a2b748 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `required_base_prefix` and `required_base_suffix` for our number formats, requiring base prefixes and/or suffixes when parsing, and allowing writing base prefixes and/or suffixes (#215). - Added `NumberFormatBuilder::none()` for create a format with no flags set (#215). - Added in many more digit separator flags for the `NumberFormat`, including for signs, base prefixes, base suffixes, and restricting digit separators at the start of the number (#215). +- Added many more pre-defined formatting constants (#215). ### Changed diff --git a/lexical-parse-integer/tests/algorithm_tests.rs b/lexical-parse-integer/tests/algorithm_tests.rs index 6ce0d1ba..ab3c47c4 100644 --- a/lexical-parse-integer/tests/algorithm_tests.rs +++ b/lexical-parse-integer/tests/algorithm_tests.rs @@ -4,6 +4,7 @@ mod util; use lexical_parse_integer::algorithm; use lexical_parse_integer::options::SMALL_NUMBERS; +use lexical_util::error::Error; use lexical_util::format::STANDARD; use lexical_util::iterator::AsBytes; #[cfg(feature = "power-of-two")] @@ -135,7 +136,7 @@ fn algorithm_test() { assert_eq!(parse_u32(b"12345"), Ok((12345, 5))); assert_eq!(parse_u32(b"+12345"), Ok((12345, 6))); - assert_eq!(parse_u32(b"-12345"), Ok((0, 0))); + assert_eq!(parse_u32(b"-12345"), Err(Error::Empty(0))); assert_eq!(parse_i32(b"12345"), Ok((12345, 5))); assert_eq!(parse_i32(b"-12345"), Ok((-12345, 6))); assert_eq!(parse_i32(b"+12345"), Ok((12345, 6))); @@ -170,7 +171,7 @@ fn algorithm_128_test() { assert_eq!(parse_u128(b"12345"), Ok((12345, 5))); assert_eq!(parse_u128(b"+12345"), Ok((12345, 6))); - assert_eq!(parse_u128(b"-12345"), Ok((0, 0))); + assert_eq!(parse_u128(b"-12345"), Err(Error::Empty(0))); assert_eq!(parse_i128(b"12345"), Ok((12345, 5))); assert_eq!(parse_i128(b"-12345"), Ok((-12345, 6))); assert_eq!(parse_i128(b"+12345"), Ok((12345, 6))); diff --git a/lexical-parse-integer/tests/api_tests.rs b/lexical-parse-integer/tests/api_tests.rs index d2baabdd..e9507b35 100644 --- a/lexical-parse-integer/tests/api_tests.rs +++ b/lexical-parse-integer/tests/api_tests.rs @@ -270,10 +270,16 @@ fn i32_integer_consecutive_digit_separator_test() { .integer_consecutive_digit_separator(true) .build_strict(); - assert!(i32::from_lexical_with_options::(b"3_1", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"3__1", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"_31", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"31_", &OPTIONS).is_err()); + assert_eq!(i32::from_lexical_with_options::(b"3_1", &OPTIONS), Ok(31)); + assert_eq!(i32::from_lexical_with_options::(b"3__1", &OPTIONS), Ok(31)); + assert_eq!( + i32::from_lexical_with_options::(b"_31", &OPTIONS), + Err(Error::InvalidDigit(0)) + ); + assert_eq!( + i32::from_lexical_with_options::(b"31_", &OPTIONS), + Err(Error::InvalidDigit(2)) + ); } #[test] @@ -349,13 +355,16 @@ fn base_prefix_and_suffix_test() { .base_suffix(num::NonZeroU8::new(b'h')) .build_strict(); const OPTIONS: Options = Options::new(); - assert!(i32::from_lexical_with_options::(b"+3h", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"+0x3", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"+0x3h", &OPTIONS).is_ok()); - assert!(i32::from_lexical_with_options::(b"+0x3h ", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"+0xh", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"+h", &OPTIONS).is_err()); - assert!(i32::from_lexical_with_options::(b"+0x", &OPTIONS).is_err()); + assert_eq!(i32::from_lexical_with_options::(b"+3h", &OPTIONS), Ok(3)); + assert_eq!(i32::from_lexical_with_options::(b"+0x3", &OPTIONS), Ok(3)); + assert_eq!(i32::from_lexical_with_options::(b"+0x3h", &OPTIONS), Ok(3)); + assert_eq!( + i32::from_lexical_with_options::(b"+0x3h ", &OPTIONS), + Err(Error::InvalidDigit(4)) + ); + assert_eq!(i32::from_lexical_with_options::(b"+0xh", &OPTIONS), Err(Error::Empty(4))); + assert_eq!(i32::from_lexical_with_options::(b"+h", &OPTIONS), Err(Error::Empty(2))); + assert_eq!(i32::from_lexical_with_options::(b"+0x", &OPTIONS), Err(Error::Empty(3))); } #[test] @@ -415,6 +424,8 @@ fn require_base_prefix_test() { let value = i64::from_lexical_with_options::(b"0d12345", &OPTIONS); assert_eq!(value, Ok(12345)); + let value = i64::from_lexical_with_options::(b"0D12345", &OPTIONS); + assert_eq!(value, Ok(12345)); let value = i64::from_lexical_with_options::(b"12345", &OPTIONS); assert_eq!(value, Err(Error::MissingBasePrefix(0))); @@ -447,3 +458,167 @@ fn require_base_prefix_test() { let value = u64::from_lexical_with_options::(b"0d12345", &OPTIONS); assert_eq!(value, Err(Error::MissingBaseSuffix(7))); } + +#[test] +#[cfg(all(feature = "format", feature = "power-of-two"))] +fn base_prefix_digit_separator_edge_cases_test() { + // TODO: Add in these tests to parse_float + use core::num; + + const OPTIONS: Options = Options::new(); + const NO_PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(1))); + + let value = i64::from_lexical_with_options::(b"+_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + const OPT_PREFIX: u128 = NumberFormatBuilder::new() + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'd')) + .leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"1", &OPTIONS); + assert_eq!(value, Ok(1)); + + const PREFIX: u128 = + NumberFormatBuilder::rebuild(OPT_PREFIX).required_base_prefix(true).build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + const LEAD_PREFIX: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_leading_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+_0d_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+_0d__12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(4))); + + const INTERN_PREFIX: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_internal_digit_separator(true) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+_0d_12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0d__12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(3))); + + const TRAIL_PREFIX: u128 = NumberFormatBuilder::rebuild(PREFIX) + .base_prefix_trailing_digit_separator(true) + .leading_digit_separator(false) + .build_strict(); + + let value = i64::from_lexical_with_options::(b"_+0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(0))); + + let value = i64::from_lexical_with_options::(b"+_0d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+_0d_12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0_d12345", &OPTIONS); + assert_eq!(value, Err(Error::MissingBasePrefix(1))); + + let value = i64::from_lexical_with_options::(b"+0d_12345", &OPTIONS); + assert_eq!(value, Ok(12345)); + + let value = i64::from_lexical_with_options::(b"+0d__12345", &OPTIONS); + assert_eq!(value, Err(Error::InvalidDigit(4))); + + // TODO: Need all the custom suffix ones too + + // const SUFFIX: u128 = NumberFormatBuilder::rebuild(PREFIX) + // .base_suffix(num::NonZeroU8::new(b'z')) + // .required_base_suffix(true) + // .build_strict(); + // let value = i64::from_lexical_with_options::(b"0d12345z", + // &OPTIONS); assert_eq!(value, Ok(12345)); + // let value = i64::from_lexical_with_options::(b"0d12345", + // &OPTIONS); assert_eq!(value, Err(Error::MissingBaseSuffix(7))); + // + // let value = i64::from_lexical_with_options::(b"-0d12345z", + // &OPTIONS); assert_eq!(value, Ok(-12345)); + // let value = i64::from_lexical_with_options::(b"-0d12345", + // &OPTIONS); assert_eq!(value, Err(Error::MissingBaseSuffix(8))); + // + // let value = u64::from_lexical_with_options::(b"0d12345z", + // &OPTIONS); assert_eq!(value, Ok(12345)); + // let value = u64::from_lexical_with_options::(b"0d12345", + // &OPTIONS); assert_eq!(value, Err(Error::MissingBaseSuffix(7))); + + // TODO: This fails: I think this is the correct behavior actually... + // TODO: This is wrong, we should fix this + // TODO: Should migrate `parse_base_prefix` and `parse_base_suffix` to + // the end... + // TODO: base_prefix_leading_digit_separator + // TODO: base_prefix_internal_digit_separator + // TODO: base_prefix_trailing_digit_separator (identical to + // `integer_leading_digit_separator`) + // TODO: base_prefix_consecutive_digit_separator + // TODO: leading_base_suffix_digit_separator (identical to + // `trailing_digit_separator`, depending on context) + // TODO: internal_base_suffix_digit_separator + // TODO: trailing_base_suffix_digit_separator + // TODO: consecutive_base_suffix_digit_separator + // TODO: start_digit_separator (absolute start, can overlap with + // leading_base_prefix_digit_separator or leading_integer_digit_separator + // depending on context) + // let value = i64::from_lexical_with_options::(b"+0d_12345", + // &OPTIONS); assert_eq!(value, Err(Error::InvalidDigit(3))); + // + // // TODO:> Add suffix + // + // // TODO: Need Post-base suffix digit separator + // // This shouldn't be internal I don't think... + // + // const INTERNAL: u128 = NumberFormatBuilder::new() + // .digit_separator(num::NonZeroU8::new(b'_')) + // .base_prefix(num::NonZeroU8::new(b'd')) + // .required_base_prefix(true) + // .internal_digit_separator(true) + // .build_strict(); + // let value = i64::from_lexical_with_options::(b"+0d_12345", + // &OPTIONS); assert_eq!(value, Err(Error::InvalidDigit(3))); + + // TODO: Need +} diff --git a/lexical-util/src/format.rs b/lexical-util/src/format.rs index a3646b40..bb316bf8 100644 --- a/lexical-util/src/format.rs +++ b/lexical-util/src/format.rs @@ -46,7 +46,10 @@ //! # Pre-Defined Formats //! //! These are the pre-defined formats for parsing numbers from various -//! programming, markup, and data languages. +//! programming, markup, and data languages. This does not contain those +//! created via a custom formatting API in the language, such as [`UpperHex`]. +//! +//! [`UpperHex`]: core::fmt::UpperHex //! //! - [`STANDARD`]: Standard number format. This is identical to the Rust string //! format. @@ -55,6 +58,19 @@ doc = " - [`RUST_LITERAL`]: Number format for a [`Rust`] literal floating-point number. - [`RUST_STRING`]: Number format to parse a [`Rust`] float from string. +" +)] +#![cfg_attr( + all(feature = "format", feature = "power-of-two"), + doc = " +- [`RUST_HEX_LITERAL`]: Number format for a [`Rust`] literal hexadecimal number. +- [`RUST_BINARY_LITERAL`]: Number format for a [`Rust`] literal binary number. +- [`RUST_OCTAL_LITERAL`]: Number format for a [`Rust`] literal octal number. +" +)] +#![cfg_attr( + feature = "format", + doc = " - [`PYTHON_LITERAL`]: Number format for a [`Python`] literal floating-point number. - [`PYTHON_STRING`]: Number format to parse a [`Python`] float from string. - [`PYTHON3_LITERAL`]: Number format for a [`Python3`] literal floating-point number. diff --git a/lexical-util/src/prebuilt_formats.rs b/lexical-util/src/prebuilt_formats.rs index e67adba2..75923627 100644 --- a/lexical-util/src/prebuilt_formats.rs +++ b/lexical-util/src/prebuilt_formats.rs @@ -696,17 +696,7 @@ use crate::format::NumberFormatBuilder; // PRE-DEFINED CONSTANTS // --------------------- -// -// Sample Format Shorthand: -// ------------------------ -// -// The format shorthand lists the test cases, and if applicable, -// the digit separator character. For example, the shorthand -// `[134-_]` specifies it passes tests 1, 3, and 4, and uses -// `'_'` as a digit-separator character. Meanwhile, `[0]` means it -// passes test 0, and has no digit separator. -// RUST LITERAL [4569ABFGHIJKMN-_] /// Number format for a [`Rust`] literal floating-point number. /// /// [`Rust`]: https://www.rust-lang.org/ @@ -715,11 +705,12 @@ pub const RUST_LITERAL: u128 = NumberFormatBuilder::new() .digit_separator(num::NonZeroU8::new(b'_')) .required_integer_digits(true) .required_mantissa_digits(true) + .required_exponent_digits(true) .no_positive_mantissa_sign(true) .no_special(true) - .required_exponent_digits(true) .required_integer_digits_with_exponent(true) .required_fraction_digits_with_exponent(true) + .required_mantissa_digits_with_exponent(true) .supports_parsing_floats(true) .supports_parsing_integers(true) .supports_writing_floats(true) @@ -729,12 +720,99 @@ pub const RUST_LITERAL: u128 = NumberFormatBuilder::new() .consecutive_digit_separator(true) .build_strict(); -// RUST STRING [0134567MN] /// Number format to parse a [`Rust`] float from string. /// /// [`Rust`]: https://www.rust-lang.org/ #[rustfmt::skip] -pub const RUST_STRING: u128 = NumberFormatBuilder::new().build_strict(); +pub const RUST_STRING: u128 = NumberFormatBuilder::new() + .required_mantissa_digits(true) + .required_exponent_digits(true) + .required_mantissa_digits_with_exponent(true) + .supports_parsing_floats(true) + .supports_parsing_integers(true) + .supports_writing_floats(true) + .supports_writing_integers(true) + .build_strict(); + +/// Number format for a [`Rust`] literal hexadecimal number. +/// +/// [`Rust`]: https://www.rust-lang.org/ +#[rustfmt::skip] +#[cfg(feature = "power-of-two")] +pub const RUST_HEX_LITERAL: u128 = NumberFormatBuilder::new() + .mantissa_radix(16) + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'x')) + .case_sensitive_base_prefix(true) + .required_base_prefix(true) + .required_integer_digits(true) + .no_positive_mantissa_sign(true) + .no_special(true) + .supports_parsing_floats(false) + .supports_parsing_integers(true) + .supports_writing_floats(false) + .supports_writing_integers(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build_strict(); + +/// Number format for a [`Rust`] literal binary number. +/// +/// [`Rust`]: https://www.rust-lang.org/ +#[rustfmt::skip] +#[cfg(feature = "power-of-two")] +pub const RUST_BINARY_LITERAL: u128 = NumberFormatBuilder::new() + .mantissa_radix(2) + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'b')) + .case_sensitive_base_prefix(true) + .required_base_prefix(true) + .required_integer_digits(true) + .no_positive_mantissa_sign(true) + .no_special(true) + .supports_parsing_floats(false) + .supports_parsing_integers(true) + .supports_writing_floats(false) + .supports_writing_integers(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build_strict(); + +/// Number format for a [`Rust`] literal octal number. +/// +/// [`Rust`]: https://www.rust-lang.org/ +#[rustfmt::skip] +#[cfg(feature = "power-of-two")] +pub const RUST_OCTAL_LITERAL: u128 = NumberFormatBuilder::new() + .mantissa_radix(8) + .digit_separator(num::NonZeroU8::new(b'_')) + .base_prefix(num::NonZeroU8::new(b'o')) + .case_sensitive_base_prefix(true) + .required_base_prefix(true) + .required_integer_digits(true) + .no_positive_mantissa_sign(true) + .no_special(true) + .supports_parsing_floats(false) + .supports_parsing_integers(true) + .supports_writing_floats(false) + .supports_writing_integers(true) + .internal_digit_separator(true) + .trailing_digit_separator(true) + .consecutive_digit_separator(true) + .build_strict(); + +// NOTE: We don't include Rust's formatting API here, since it's +// 3 forms (hex, octal, and binary), consists of upper and lower +// modes, and can have or omit the base prefix. This is a lot of +// forms and each can trivially be created via the formatting API, +// such as `UpperHex` with `base_prefix` as `X` and +// `required_base_prefix`. + +// TODO: START HERE +// TODO: Rust HEX, OCTAL, BINARY STRINGS +// Will also need Python hex and Rust hex, etc. literals /// Number format for a [`Python`] literal floating-point number. /// @@ -2185,4 +2263,5 @@ pub const IGNORE: u128 = NumberFormatBuilder::new() .digit_separator_flags(true) .required_exponent_digits(false) .required_mantissa_digits(false) + .required_mantissa_digits_with_exponent(false) .build_strict();