Skip to content

Commit

Permalink
Rollup merge of #87596 - jesyspa:issue-87318-hidden-whitespace, r=est…
Browse files Browse the repository at this point in the history
…ebank

Add warning when whitespace is not skipped after an escaped newline

Fixes issue #87318, also simplifies issue #87319.

* Add support to the lexer to emit warnings as well as errors.
* Emit a warning when a string literal contains an escaped newline, but when (some of) the whitespace on the next line is not skipped due to it being non-ASCII.
  • Loading branch information
JohnTitor authored Jul 30, 2021
2 parents 5e2655d + 5d59b44 commit aa9e6aa
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 8 deletions.
24 changes: 20 additions & 4 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,11 @@ impl LitKind {
unescape_literal(&s, Mode::Str, &mut |_, unescaped_char| {
match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
}
});
error?;
Expand All @@ -83,7 +87,11 @@ impl LitKind {
unescape_literal(&s, Mode::RawStr, &mut |_, unescaped_char| {
match unescaped_char {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
}
});
error?;
Expand All @@ -100,7 +108,11 @@ impl LitKind {
unescape_byte_literal(&s, Mode::ByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
}
});
error?;
Expand All @@ -114,7 +126,11 @@ impl LitKind {
unescape_byte_literal(&s, Mode::RawByteStr, &mut |_, unescaped_byte| {
match unescaped_byte {
Ok(c) => buf.push(c),
Err(_) => error = Err(LitError::LexerError),
Err(err) => {
if err.is_fatal() {
error = Err(LitError::LexerError);
}
}
}
});
error?;
Expand Down
34 changes: 30 additions & 4 deletions compiler/rustc_lexer/src/unescape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use std::str::Chars;
#[cfg(test)]
mod tests;

/// Errors that can occur during string unescaping.
/// Errors and warnings that can occur during string unescaping.
#[derive(Debug, PartialEq, Eq)]
pub enum EscapeError {
/// Expected 1 char, but 0 were found.
Expand Down Expand Up @@ -56,6 +56,20 @@ pub enum EscapeError {
NonAsciiCharInByte,
/// Non-ascii character in byte string literal.
NonAsciiCharInByteString,

/// After a line ending with '\', the next line contains whitespace
/// characters that are not skipped.
UnskippedWhitespaceWarning,
}

impl EscapeError {
/// Returns true for actual errors, as opposed to warnings.
pub fn is_fatal(&self) -> bool {
match self {
EscapeError::UnskippedWhitespaceWarning => false,
_ => true,
}
}
}

/// Takes a contents of a literal (without quotes) and produces a
Expand Down Expand Up @@ -283,7 +297,7 @@ where
// if unescaped '\' character is followed by '\n'.
// For details see [Rust language reference]
// (https://doc.rust-lang.org/reference/tokens.html#string-literals).
skip_ascii_whitespace(&mut chars);
skip_ascii_whitespace(&mut chars, start, callback);
continue;
}
_ => scan_escape(first_char, &mut chars, mode),
Expand All @@ -297,13 +311,25 @@ where
callback(start..end, unescaped_char);
}

fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
fn skip_ascii_whitespace<F>(chars: &mut Chars<'_>, start: usize, callback: &mut F)
where
F: FnMut(Range<usize>, Result<char, EscapeError>),
{
let str = chars.as_str();
let first_non_space = str
.bytes()
.position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
.unwrap_or(str.len());
*chars = str[first_non_space..].chars()
let tail = &str[first_non_space..];
if let Some(c) = tail.chars().nth(0) {
// For error reporting, we would like the span to contain the character that was not
// skipped. The +1 is necessary to account for the leading \ that started the escape.
let end = start + first_non_space + c.len_utf8() + 1;
if c.is_whitespace() {
callback(start..end, Err(EscapeError::UnskippedWhitespaceWarning));
}
}
*chars = tail.chars();
}
}

Expand Down
19 changes: 19 additions & 0 deletions compiler/rustc_lexer/src/unescape/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,25 @@ fn test_unescape_char_good() {
check(r"\u{1F63b}", '😻');
}

#[test]
fn test_unescape_str_warn() {
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
let mut unescaped = Vec::with_capacity(literal.len());
unescape_literal(literal, Mode::Str, &mut |range, res| unescaped.push((range, res)));
assert_eq!(unescaped, expected);
}

check(
"\\\n \u{a0} x",
&[
(0..5, Err(EscapeError::UnskippedWhitespaceWarning)),
(3..5, Ok('\u{a0}')),
(5..6, Ok(' ')),
(6..7, Ok('x')),
],
);
}

#[test]
fn test_unescape_str_good() {
fn check(literal_text: &str, expected: &str) {
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,12 @@ pub(crate) fn emit_unescape_error(
let msg = "invalid trailing slash in literal";
handler.struct_span_err(span, msg).span_label(span, msg).emit();
}
EscapeError::UnskippedWhitespaceWarning => {
let (c, char_span) = last_char();
let msg =
format!("non-ASCII whitespace symbol '{}' is not skipped", c.escape_unicode());
handler.struct_span_warn(span, &msg).span_label(char_span, &msg).emit();
}
}
}

Expand Down

0 comments on commit aa9e6aa

Please sign in to comment.