Skip to content

Commit

Permalink
Extract string part and normalized string (#7219)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser authored Sep 8, 2023
1 parent 47a253f commit 0a07a2c
Showing 1 changed file with 101 additions and 84 deletions.
185 changes: 101 additions & 84 deletions crates/ruff_python_formatter/src/expression/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,29 +138,26 @@ impl<'a> FormatString<'a> {

impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
match self.layout {
StringLayout::Default => {
if self.string.is_implicit_concatenated() {
in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
} else {
FormatStringPart::new(
self.string.range(),
self.string.quoting(&f.context().locator()),
&f.context().locator(),
f.options().quote_style(),
)
.fmt(f)
StringPart::from_source(self.string.range(), &locator)
.normalize(
self.string.quoting(&locator),
&locator,
f.options().quote_style(),
)
.fmt(f)
}
}
StringLayout::DocString => {
let string_part = FormatStringPart::new(
self.string.range(),
// f-strings can't be docstrings
Quoting::CanChange,
&f.context().locator(),
f.options().quote_style(),
);
format_docstring(&string_part, f)
let string_part = StringPart::from_source(self.string.range(), &locator);
let normalized =
string_part.normalize(Quoting::CanChange, &locator, f.options().quote_style());
format_docstring(&normalized, f)
}
StringLayout::ImplicitConcatenatedStringInBinaryLike => {
FormatStringContinuation::new(self.string).fmt(f)
Expand Down Expand Up @@ -259,16 +256,14 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
});

let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
let part = StringPart::from_source(token_range, &locator);
let normalized =
part.normalize(self.string.quoting(&locator), &locator, quote_style);

joiner.entry(&format_args![
line_suffix_boundary(),
leading_comments(leading_part_comments),
FormatStringPart::new(
token_range,
self.string.quoting(&locator),
&locator,
quote_style,
),
normalized,
trailing_comments(trailing_part_comments)
]);

Expand All @@ -289,21 +284,20 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
}
}

struct FormatStringPart {
#[derive(Debug)]
struct StringPart {
/// The prefix.
prefix: StringPrefix,
preferred_quotes: StringQuotes,
range: TextRange,
is_raw_string: bool,
}

impl Ranged for FormatStringPart {
fn range(&self) -> TextRange {
self.range
}
/// The actual quotes of the string in the source
quotes: StringQuotes,

/// The range of the string's content (full range minus quotes and prefix)
content_range: TextRange,
}

impl FormatStringPart {
fn new(range: TextRange, quoting: Quoting, locator: &Locator, quote_style: QuoteStyle) -> Self {
impl StringPart {
fn from_source(range: TextRange, locator: &Locator) -> Self {
let string_content = locator.slice(range);

let prefix = StringPrefix::parse(string_content);
Expand All @@ -317,46 +311,80 @@ impl FormatStringPart {
);
let raw_content_range = relative_raw_content_range + range.start();

let raw_content = &string_content[relative_raw_content_range];
let is_raw_string = prefix.is_raw_string();
Self {
prefix,
content_range: raw_content_range,
quotes,
}
}

/// Computes the strings preferred quotes and normalizes its content.
fn normalize<'a>(
self,
quoting: Quoting,
locator: &'a Locator,
quote_style: QuoteStyle,
) -> NormalizedString<'a> {
let raw_content = locator.slice(self.content_range);

let preferred_quotes = match quoting {
Quoting::Preserve => quotes,
Quoting::Preserve => self.quotes,
Quoting::CanChange => {
if is_raw_string {
preferred_quotes_raw(raw_content, quotes, quote_style)
if self.prefix.is_raw_string() {
preferred_quotes_raw(raw_content, self.quotes, quote_style)
} else {
preferred_quotes(raw_content, quotes, quote_style)
preferred_quotes(raw_content, self.quotes, quote_style)
}
}
};

Self {
prefix,
range: raw_content_range,
let normalized = normalize_string(
locator.slice(self.content_range),
preferred_quotes,
is_raw_string,
self.prefix.is_raw_string(),
);

NormalizedString {
prefix: self.prefix,
content_range: self.content_range,
text: normalized,
quotes: preferred_quotes,
}
}
}

impl Format<PyFormatContext<'_>> for FormatStringPart {
fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
let normalized = normalize_string(
f.context().locator().slice(self.range),
self.preferred_quotes,
self.is_raw_string,
);
#[derive(Debug)]
struct NormalizedString<'a> {
prefix: StringPrefix,

/// The quotes of the normalized string (preferred quotes)
quotes: StringQuotes,

write!(f, [self.prefix, self.preferred_quotes])?;
match normalized {
/// The range of the string's content in the source (minus prefix and quotes).
content_range: TextRange,

/// The normalized text
text: Cow<'a, str>,
}

impl Ranged for NormalizedString<'_> {
fn range(&self) -> TextRange {
self.content_range
}
}

impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
write!(f, [self.prefix, self.quotes])?;
match &self.text {
Cow::Borrowed(_) => {
source_text_slice(self.range()).fmt(f)?;
}
Cow::Owned(normalized) => {
text(&normalized, Some(self.start())).fmt(f)?;
text(normalized, Some(self.start())).fmt(f)?;
}
}
self.preferred_quotes.fmt(f)
self.quotes.fmt(f)
}
}

Expand Down Expand Up @@ -802,35 +830,30 @@ fn count_indentation_like_black(line: &str, tab_width: TabWidth) -> TextSize {
/// line c
/// """
/// ```
fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> FormatResult<()> {
let locator = f.context().locator();
fn format_docstring(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
let docstring = &normalized.text;

// Black doesn't change the indentation of docstrings that contain an escaped newline
if locator.slice(string_part).contains("\\\n") {
return string_part.fmt(f);
if docstring.contains("\\\n") {
return normalized.fmt(f);
}

let normalized = normalize_string(
locator.slice(string_part),
string_part.preferred_quotes,
string_part.is_raw_string,
);
// is_borrowed is unstable :/
let already_normalized = matches!(normalized, Cow::Borrowed(_));
let already_normalized = matches!(docstring, Cow::Borrowed(_));

let mut lines = normalized.lines().peekable();
let mut lines = docstring.lines().peekable();

// Start the string
write!(
f,
[
source_position(string_part.start()),
string_part.prefix,
string_part.preferred_quotes
normalized.prefix,
normalized.quotes,
source_position(normalized.start()),
]
)?;
// We track where in the source docstring we are (in source code byte offsets)
let mut offset = string_part.start();
let mut offset = normalized.start();

// The first line directly after the opening quotes has different rules than the rest, mainly
// that we remove all leading whitespace as there's no indentation
Expand All @@ -844,7 +867,7 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form

// Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
// inner quotes and closing quotes from getting to close to avoid `""""content`
if trim_both.starts_with(string_part.preferred_quotes.style.as_char()) {
if trim_both.starts_with(normalized.quotes.style.as_char()) {
space().fmt(f)?;
}

Expand All @@ -863,15 +886,15 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
offset += first.text_len();

// Check if we have a single line (or empty) docstring
if normalized[first.len()..].trim().is_empty() {
if docstring[first.len()..].trim().is_empty() {
// For `"""\n"""` or other whitespace between the quotes, black keeps a single whitespace,
// but `""""""` doesn't get one inserted.
if needs_chaperone_space(string_part, trim_end)
|| (trim_end.is_empty() && !normalized.is_empty())
if needs_chaperone_space(normalized, trim_end)
|| (trim_end.is_empty() && !docstring.is_empty())
{
space().fmt(f)?;
}
string_part.preferred_quotes.fmt(f)?;
normalized.quotes.fmt(f)?;
return Ok(());
}

Expand Down Expand Up @@ -906,27 +929,21 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
}

// Same special case in the last line as for the first line
let trim_end = normalized
let trim_end = docstring
.as_ref()
.trim_end_matches(|c: char| c.is_whitespace() && c != '\n');
if needs_chaperone_space(string_part, trim_end) {
if needs_chaperone_space(normalized, trim_end) {
space().fmt(f)?;
}

write!(
f,
[
string_part.preferred_quotes,
source_position(string_part.end())
]
)
write!(f, [source_position(normalized.end()), normalized.quotes])
}

/// If the last line of the docstring is `content" """` or `content\ """`, we need a chaperone space
/// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
/// so `content\\ """` doesn't need a space while `content\\\ """` does.
fn needs_chaperone_space(string_part: &FormatStringPart, trim_end: &str) -> bool {
trim_end.ends_with(string_part.preferred_quotes.style.as_char())
fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
trim_end.ends_with(normalized.quotes.style.as_char())
|| trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
}

Expand Down

0 comments on commit 0a07a2c

Please sign in to comment.