From 0a07a2ca62af13e51b924f4e7dd65b010c9a4fe9 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Fri, 8 Sep 2023 12:56:55 +0200 Subject: [PATCH] Extract string part and normalized string (#7219) --- .../src/expression/string.rs | 185 ++++++++++-------- 1 file changed, 101 insertions(+), 84 deletions(-) diff --git a/crates/ruff_python_formatter/src/expression/string.rs b/crates/ruff_python_formatter/src/expression/string.rs index 063eb350d5b63..a5a706d0a428d 100644 --- a/crates/ruff_python_formatter/src/expression/string.rs +++ b/crates/ruff_python_formatter/src/expression/string.rs @@ -138,29 +138,26 @@ impl<'a> FormatString<'a> { impl<'a> Format> for FormatString<'a> { fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { + let locator = f.context().locator(); match self.layout { StringLayout::Default => { if self.string.is_implicit_concatenated() { in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f) } else { - FormatStringPart::new( - self.string.range(), - self.string.quoting(&f.context().locator()), - &f.context().locator(), - f.options().quote_style(), - ) - .fmt(f) + StringPart::from_source(self.string.range(), &locator) + .normalize( + self.string.quoting(&locator), + &locator, + f.options().quote_style(), + ) + .fmt(f) } } StringLayout::DocString => { - let string_part = FormatStringPart::new( - self.string.range(), - // f-strings can't be docstrings - Quoting::CanChange, - &f.context().locator(), - f.options().quote_style(), - ); - format_docstring(&string_part, f) + let string_part = StringPart::from_source(self.string.range(), &locator); + let normalized = + string_part.normalize(Quoting::CanChange, &locator, f.options().quote_style()); + format_docstring(&normalized, f) } StringLayout::ImplicitConcatenatedStringInBinaryLike => { FormatStringContinuation::new(self.string).fmt(f) @@ -259,16 +256,14 @@ impl Format> for FormatStringContinuation<'_> { }); let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end); + let part = StringPart::from_source(token_range, &locator); + let normalized = + part.normalize(self.string.quoting(&locator), &locator, quote_style); joiner.entry(&format_args![ line_suffix_boundary(), leading_comments(leading_part_comments), - FormatStringPart::new( - token_range, - self.string.quoting(&locator), - &locator, - quote_style, - ), + normalized, trailing_comments(trailing_part_comments) ]); @@ -289,21 +284,20 @@ impl Format> for FormatStringContinuation<'_> { } } -struct FormatStringPart { +#[derive(Debug)] +struct StringPart { + /// The prefix. prefix: StringPrefix, - preferred_quotes: StringQuotes, - range: TextRange, - is_raw_string: bool, -} -impl Ranged for FormatStringPart { - fn range(&self) -> TextRange { - self.range - } + /// The actual quotes of the string in the source + quotes: StringQuotes, + + /// The range of the string's content (full range minus quotes and prefix) + content_range: TextRange, } -impl FormatStringPart { - fn new(range: TextRange, quoting: Quoting, locator: &Locator, quote_style: QuoteStyle) -> Self { +impl StringPart { + fn from_source(range: TextRange, locator: &Locator) -> Self { let string_content = locator.slice(range); let prefix = StringPrefix::parse(string_content); @@ -317,46 +311,80 @@ impl FormatStringPart { ); let raw_content_range = relative_raw_content_range + range.start(); - let raw_content = &string_content[relative_raw_content_range]; - let is_raw_string = prefix.is_raw_string(); + Self { + prefix, + content_range: raw_content_range, + quotes, + } + } + + /// Computes the strings preferred quotes and normalizes its content. + fn normalize<'a>( + self, + quoting: Quoting, + locator: &'a Locator, + quote_style: QuoteStyle, + ) -> NormalizedString<'a> { + let raw_content = locator.slice(self.content_range); + let preferred_quotes = match quoting { - Quoting::Preserve => quotes, + Quoting::Preserve => self.quotes, Quoting::CanChange => { - if is_raw_string { - preferred_quotes_raw(raw_content, quotes, quote_style) + if self.prefix.is_raw_string() { + preferred_quotes_raw(raw_content, self.quotes, quote_style) } else { - preferred_quotes(raw_content, quotes, quote_style) + preferred_quotes(raw_content, self.quotes, quote_style) } } }; - Self { - prefix, - range: raw_content_range, + let normalized = normalize_string( + locator.slice(self.content_range), preferred_quotes, - is_raw_string, + self.prefix.is_raw_string(), + ); + + NormalizedString { + prefix: self.prefix, + content_range: self.content_range, + text: normalized, + quotes: preferred_quotes, } } } -impl Format> for FormatStringPart { - fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> { - let normalized = normalize_string( - f.context().locator().slice(self.range), - self.preferred_quotes, - self.is_raw_string, - ); +#[derive(Debug)] +struct NormalizedString<'a> { + prefix: StringPrefix, + + /// The quotes of the normalized string (preferred quotes) + quotes: StringQuotes, - write!(f, [self.prefix, self.preferred_quotes])?; - match normalized { + /// The range of the string's content in the source (minus prefix and quotes). + content_range: TextRange, + + /// The normalized text + text: Cow<'a, str>, +} + +impl Ranged for NormalizedString<'_> { + fn range(&self) -> TextRange { + self.content_range + } +} + +impl Format> for NormalizedString<'_> { + fn fmt(&self, f: &mut Formatter>) -> FormatResult<()> { + write!(f, [self.prefix, self.quotes])?; + match &self.text { Cow::Borrowed(_) => { source_text_slice(self.range()).fmt(f)?; } Cow::Owned(normalized) => { - text(&normalized, Some(self.start())).fmt(f)?; + text(normalized, Some(self.start())).fmt(f)?; } } - self.preferred_quotes.fmt(f) + self.quotes.fmt(f) } } @@ -802,35 +830,30 @@ fn count_indentation_like_black(line: &str, tab_width: TabWidth) -> TextSize { /// line c /// """ /// ``` -fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> FormatResult<()> { - let locator = f.context().locator(); +fn format_docstring(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> { + let docstring = &normalized.text; // Black doesn't change the indentation of docstrings that contain an escaped newline - if locator.slice(string_part).contains("\\\n") { - return string_part.fmt(f); + if docstring.contains("\\\n") { + return normalized.fmt(f); } - let normalized = normalize_string( - locator.slice(string_part), - string_part.preferred_quotes, - string_part.is_raw_string, - ); // is_borrowed is unstable :/ - let already_normalized = matches!(normalized, Cow::Borrowed(_)); + let already_normalized = matches!(docstring, Cow::Borrowed(_)); - let mut lines = normalized.lines().peekable(); + let mut lines = docstring.lines().peekable(); // Start the string write!( f, [ - source_position(string_part.start()), - string_part.prefix, - string_part.preferred_quotes + normalized.prefix, + normalized.quotes, + source_position(normalized.start()), ] )?; // We track where in the source docstring we are (in source code byte offsets) - let mut offset = string_part.start(); + let mut offset = normalized.start(); // The first line directly after the opening quotes has different rules than the rest, mainly // that we remove all leading whitespace as there's no indentation @@ -844,7 +867,7 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form // Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep // inner quotes and closing quotes from getting to close to avoid `""""content` - if trim_both.starts_with(string_part.preferred_quotes.style.as_char()) { + if trim_both.starts_with(normalized.quotes.style.as_char()) { space().fmt(f)?; } @@ -863,15 +886,15 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form offset += first.text_len(); // Check if we have a single line (or empty) docstring - if normalized[first.len()..].trim().is_empty() { + if docstring[first.len()..].trim().is_empty() { // For `"""\n"""` or other whitespace between the quotes, black keeps a single whitespace, // but `""""""` doesn't get one inserted. - if needs_chaperone_space(string_part, trim_end) - || (trim_end.is_empty() && !normalized.is_empty()) + if needs_chaperone_space(normalized, trim_end) + || (trim_end.is_empty() && !docstring.is_empty()) { space().fmt(f)?; } - string_part.preferred_quotes.fmt(f)?; + normalized.quotes.fmt(f)?; return Ok(()); } @@ -906,27 +929,21 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form } // Same special case in the last line as for the first line - let trim_end = normalized + let trim_end = docstring .as_ref() .trim_end_matches(|c: char| c.is_whitespace() && c != '\n'); - if needs_chaperone_space(string_part, trim_end) { + if needs_chaperone_space(normalized, trim_end) { space().fmt(f)?; } - write!( - f, - [ - string_part.preferred_quotes, - source_position(string_part.end()) - ] - ) + write!(f, [source_position(normalized.end()), normalized.quotes]) } /// If the last line of the docstring is `content" """` or `content\ """`, we need a chaperone space /// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes, /// so `content\\ """` doesn't need a space while `content\\\ """` does. -fn needs_chaperone_space(string_part: &FormatStringPart, trim_end: &str) -> bool { - trim_end.ends_with(string_part.preferred_quotes.style.as_char()) +fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool { + trim_end.ends_with(normalized.quotes.style.as_char()) || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1 }