From 5e9d9853e751240b28138efd165e793a7ee5dad4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kaan=20B=C3=BCy=C3=BCkerdem?= <76952012+kaanyalova@users.noreply.github.com> Date: Thu, 31 Oct 2024 11:19:49 +0300 Subject: [PATCH] Fix inconsistencies with cpython while parsing format strings (#124) * Fix inconsistencies with cpython while parsing format strings which contain colons inside square brackets Co-authored-by: Jeong, YunWon <69878+youknowone@users.noreply.github.com> --- format/src/format.rs | 73 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 8 deletions(-) diff --git a/format/src/format.rs b/format/src/format.rs index fd497b9..bc1bec2 100644 --- a/format/src/format.rs +++ b/format/src/format.rs @@ -863,17 +863,46 @@ impl FormatString { } fn parse_part_in_brackets(text: &str) -> Result { - let parts: Vec<&str> = text.splitn(2, ':').collect(); + let mut chars = text.chars().peekable(); + + let mut left = String::new(); + let mut right = String::new(); + + let mut split = false; + let mut selected = &mut left; + let mut inside_brackets = false; + + while let Some(char) = chars.next() { + if char == '[' { + inside_brackets = true; + + selected.push(char); + + while let Some(next_char) = chars.next() { + selected.push(next_char); + + if next_char == ']' { + inside_brackets = false; + break; + } + if chars.peek().is_none() { + return Err(FormatParseError::MissingRightBracket); + } + } + } else if char == ':' && !split && !inside_brackets { + split = true; + selected = &mut right; + } else { + selected.push(char); + } + } + // before the comma is a keyword or arg index, after the comma is maybe a spec. - let arg_part = parts[0]; + let arg_part: &str = &left; - let format_spec = if parts.len() > 1 { - parts[1].to_owned() - } else { - String::new() - }; + let format_spec = if split { right } else { String::new() }; - // On parts[0] can still be the conversion (!r, !s, !a) + // left can still be the conversion (!r, !s, !a) let parts: Vec<&str> = arg_part.splitn(2, '!').collect(); // before the bang is a keyword or arg index, after the comma is maybe a conversion spec. let arg_part = parts[0]; @@ -1168,6 +1197,34 @@ mod tests { ); } + #[test] + fn test_square_brackets_inside_format() { + assert_eq!( + FormatString::from_str("{[:123]}"), + Ok(FormatString { + format_parts: vec![FormatPart::Field { + field_name: "[:123]".to_owned(), + conversion_spec: None, + format_spec: "".to_owned(), + }], + }), + ); + + assert_eq!(FormatString::from_str("{asdf[:123]asdf}"), { + Ok(FormatString { + format_parts: vec![FormatPart::Field { + field_name: "asdf[:123]asdf".to_owned(), + conversion_spec: None, + format_spec: "".to_owned(), + }], + }) + }); + + assert_eq!(FormatString::from_str("{[1234}"), { + Err(FormatParseError::MissingRightBracket) + }); + } + #[test] fn test_format_parse_escape() { let expected = Ok(FormatString {