From 35c7939ae5d3a0e34963bbd57bbc4b91c3ce0ce3 Mon Sep 17 00:00:00 2001 From: Cassaundra Smith Date: Mon, 21 Nov 2022 20:36:11 -0800 Subject: [PATCH] Fix incorrect span when using byte-escaped rbrace Fix #103826. --- compiler/rustc_parse_format/src/lib.rs | 127 +++++++++++++++---------- src/test/ui/fmt/issue-103826.rs | 8 ++ src/test/ui/fmt/issue-103826.stderr | 20 ++++ 3 files changed, 107 insertions(+), 48 deletions(-) create mode 100644 src/test/ui/fmt/issue-103826.rs create mode 100644 src/test/ui/fmt/issue-103826.stderr diff --git a/compiler/rustc_parse_format/src/lib.rs b/compiler/rustc_parse_format/src/lib.rs index 0113eb4e3d102..28bd84b9a6d69 100644 --- a/compiler/rustc_parse_format/src/lib.rs +++ b/compiler/rustc_parse_format/src/lib.rs @@ -38,6 +38,24 @@ impl InnerSpan { } } +/// The location and before/after width of a character whose width has changed from its source code +/// representation +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct InnerWidthMapping { + /// Index of the character in the source + pub position: usize, + /// The inner width in characters + pub before: usize, + /// The transformed width in characters + pub after: usize, +} + +impl InnerWidthMapping { + pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping { + InnerWidthMapping { position, before, after } + } +} + /// The type of format string that we are parsing. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum ParseMode { @@ -200,8 +218,8 @@ pub struct Parser<'a> { style: Option, /// Start and end byte offset of every successfully parsed argument pub arg_places: Vec, - /// Characters that need to be shifted - skips: Vec, + /// Characters whose length has been changed from their in-code representation + width_map: Vec, /// Span of the last opening brace seen, used for error reporting last_opening_brace: Option, /// Whether the source string is comes from `println!` as opposed to `format!` or `print!` @@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> { '{' => { let curr_last_brace = self.last_opening_brace; let byte_pos = self.to_span_index(pos); - let lbrace_end = self.to_span_index(pos + 1); + let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos)); self.last_opening_brace = Some(byte_pos.to(lbrace_end)); self.cur.next(); if self.consume('{') { @@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> { Some(String(self.string(pos + 1))) } else { let arg = self.argument(lbrace_end); - if let Some(rbrace_byte_idx) = self.must_consume('}') { - let lbrace_inner_offset = self.to_span_index(pos); - let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx); + if let Some(rbrace_pos) = self.must_consume('}') { if self.is_literal { + let lbrace_byte_pos = self.to_span_index(pos); + let rbrace_byte_pos = self.to_span_index(rbrace_pos); + + let width = self.to_span_width(rbrace_pos); + self.arg_places.push( - lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)), + lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)), ); } } else { @@ -285,7 +306,7 @@ impl<'a> Parser<'a> { append_newline: bool, mode: ParseMode, ) -> Parser<'a> { - let (skips, is_literal) = find_skips_from_snippet(snippet, style); + let (width_map, is_literal) = find_width_map_from_snippet(snippet, style); Parser { mode, input: s, @@ -294,7 +315,7 @@ impl<'a> Parser<'a> { curarg: 0, style, arg_places: vec![], - skips, + width_map, last_opening_brace: None, append_newline, is_literal, @@ -367,21 +388,34 @@ impl<'a> Parser<'a> { None } + fn remap_pos(&self, mut pos: usize) -> InnerOffset { + for width in &self.width_map { + if pos > width.position { + pos += width.before - width.after; + } else if pos == width.position && width.after == 0 { + pos += width.before; + } else { + break; + } + } + + InnerOffset(pos) + } + fn to_span_index(&self, pos: usize) -> InnerOffset { - let mut pos = pos; // This handles the raw string case, the raw argument is the number of # // in r###"..."### (we need to add one because of the `r`). let raw = self.style.map_or(0, |raw| raw + 1); - for skip in &self.skips { - if pos > *skip { - pos += 1; - } else if pos == *skip && raw == 0 { - pos += 1; - } else { - break; - } + let pos = self.remap_pos(pos); + InnerOffset(raw + pos.0 + 1) + } + + fn to_span_width(&self, pos: usize) -> usize { + let pos = self.remap_pos(pos); + match self.width_map.iter().find(|w| w.position == pos.0) { + Some(w) => w.before, + None => 1, } - InnerOffset(raw + pos + 1) } fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan { @@ -809,10 +843,10 @@ impl<'a> Parser<'a> { /// Finds the indices of all characters that have been processed and differ between the actual /// written code (code snippet) and the `InternedString` that gets processed in the `Parser` /// in order to properly synthesise the intra-string `Span`s for error diagnostics. -fn find_skips_from_snippet( +fn find_width_map_from_snippet( snippet: Option, str_style: Option, -) -> (Vec, bool) { +) -> (Vec, bool) { let snippet = match snippet { Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s, _ => return (vec![], false), @@ -825,43 +859,39 @@ fn find_skips_from_snippet( let snippet = &snippet[1..snippet.len() - 1]; let mut s = snippet.char_indices(); - let mut skips = vec![]; + let mut width_mappings = vec![]; while let Some((pos, c)) = s.next() { match (c, s.clone().next()) { // skip whitespace and empty lines ending in '\\' - ('\\', Some((next_pos, '\n'))) => { - skips.push(pos); - skips.push(next_pos); + ('\\', Some((_, '\n'))) => { let _ = s.next(); + let mut width = 2; - while let Some((pos, c)) = s.clone().next() { + while let Some((_, c)) = s.clone().next() { if matches!(c, ' ' | '\n' | '\t') { - skips.push(pos); + width += 1; let _ = s.next(); } else { break; } } + + width_mappings.push(InnerWidthMapping::new(pos, width, 0)); } - ('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { - skips.push(next_pos); + ('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => { + width_mappings.push(InnerWidthMapping::new(pos, 2, 1)); let _ = s.next(); } ('\\', Some((_, 'x'))) => { - for _ in 0..3 { - // consume `\xAB` literal - if let Some((pos, _)) = s.next() { - skips.push(pos); - } else { - break; - } - } + // consume `\xAB` literal + s.nth(2); + width_mappings.push(InnerWidthMapping::new(pos, 4, 1)); } ('\\', Some((_, 'u'))) => { - if let Some((pos, _)) = s.next() { - skips.push(pos); - } - if let Some((next_pos, next_c)) = s.next() { + let mut width = 2; + let _ = s.next(); + + if let Some((_, next_c)) = s.next() { if next_c == '{' { // consume up to 6 hexanumeric chars let digits_len = @@ -881,19 +911,18 @@ fn find_skips_from_snippet( let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1)); // skip '{' and '}' also - for pos in (next_pos..).take(required_skips + 2) { - skips.push(pos) - } + width += required_skips + 2; s.nth(digits_len); } else if next_c.is_digit(16) { - skips.push(next_pos); + width += 1; + // We suggest adding `{` and `}` when appropriate, accept it here as if // it were correct let mut i = 0; // consume up to 6 hexanumeric chars - while let (Some((next_pos, c)), _) = (s.next(), i < 6) { + while let (Some((_, c)), _) = (s.next(), i < 6) { if c.is_digit(16) { - skips.push(next_pos); + width += 1; } else { break; } @@ -901,11 +930,13 @@ fn find_skips_from_snippet( } } } + + width_mappings.push(InnerWidthMapping::new(pos, width, 1)); } _ => {} } } - (skips, true) + (width_mappings, true) } #[cfg(test)] diff --git a/src/test/ui/fmt/issue-103826.rs b/src/test/ui/fmt/issue-103826.rs new file mode 100644 index 0000000000000..a11ec37a06dbb --- /dev/null +++ b/src/test/ui/fmt/issue-103826.rs @@ -0,0 +1,8 @@ +fn main() { + format!("{\x7D"); + //~^ ERROR 1 positional argument in format string, but no arguments were given + format!("\x7B\x7D"); + //~^ ERROR 1 positional argument in format string, but no arguments were given + format!("{\x7D {\x7D"); + //~^ ERROR 2 positional arguments in format string, but no arguments were given +} diff --git a/src/test/ui/fmt/issue-103826.stderr b/src/test/ui/fmt/issue-103826.stderr new file mode 100644 index 0000000000000..0f27e1930bc3e --- /dev/null +++ b/src/test/ui/fmt/issue-103826.stderr @@ -0,0 +1,20 @@ +error: 1 positional argument in format string, but no arguments were given + --> $DIR/issue-103826.rs:2:14 + | +LL | format!("{\x7D"); + | ^^^^^ + +error: 1 positional argument in format string, but no arguments were given + --> $DIR/issue-103826.rs:4:14 + | +LL | format!("\x7B\x7D"); + | ^^^^^^^^ + +error: 2 positional arguments in format string, but no arguments were given + --> $DIR/issue-103826.rs:6:14 + | +LL | format!("{\x7D {\x7D"); + | ^^^^^ ^^^^^ + +error: aborting due to 3 previous errors +