From 0a568fbc727a5d0644c80b1f305e0a9a9d4dbce9 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 29 Jan 2024 09:47:02 +1100 Subject: [PATCH] Be more careful about interpreting a label as a mistyped char literal. Currently the parser will interpret any label in certain positions as a mistyped char literal, on the assumption that the trailing single quote was accidentally omitted. This is reasonable for a label like 'a (because 'a' would be valid) but not reasonable for a label like 'abc (because 'abc' is not valid). This commit restricts this behaviour only to labels that would be valid char literals, via the new `could_be_unclosed_char_literal` function. The commit also augments the `label-is-actually-char.rs` test in a couple of ways: - Adds testing of labels with identifiers longer than one char, e.g. 'abc. - Adds a new match with simpler patterns, because the `recover_unclosed_char` call in `parse_pat_with_range_pat` was not being exercised (in this test or any other ui tests). Fixes #120397, an assertion failure, which was caused by this behaviour in the parser interacting with some new stricter char literal checking added in #120329. --- compiler/rustc_parse/src/parser/expr.rs | 17 ++++- compiler/rustc_parse/src/parser/pat.rs | 4 +- tests/ui/parser/label-is-actually-char.rs | 41 +++++++++-- tests/ui/parser/label-is-actually-char.stderr | 73 +++++++++++++++---- 4 files changed, 112 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index ad26a930829e1..583f6f2ae5f4b 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -28,6 +28,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack; use rustc_errors::{ AddToDiagnostic, Applicability, Diagnostic, DiagnosticBuilder, PResult, StashKey, }; +use rustc_lexer::unescape::unescape_char; use rustc_macros::Subdiagnostic; use rustc_session::errors::{report_lit_error, ExprParenthesesNeeded}; use rustc_session::lint::builtin::BREAK_WITH_LABEL_AND_LOOP; @@ -1652,6 +1653,7 @@ impl<'a> Parser<'a> { && self.may_recover() && (matches!(self.token.kind, token::CloseDelim(_) | token::Comma) || self.token.is_punct()) + && could_be_unclosed_char_literal(label_.ident) { let (lit, _) = self.recover_unclosed_char(label_.ident, Parser::mk_token_lit_char, |self_| { @@ -1744,6 +1746,7 @@ impl<'a> Parser<'a> { mk_lit_char: impl FnOnce(Symbol, Span) -> L, err: impl FnOnce(&Self) -> DiagnosticBuilder<'a>, ) -> L { + assert!(could_be_unclosed_char_literal(ident)); if let Some(diag) = self.dcx().steal_diagnostic(ident.span, StashKey::LifetimeIsChar) { diag.with_span_suggestion_verbose( ident.span.shrink_to_hi(), @@ -2034,8 +2037,11 @@ impl<'a> Parser<'a> { let msg = format!("unexpected token: {}", super::token_descr(&token)); self_.dcx().struct_span_err(token.span, msg) }; - // On an error path, eagerly consider a lifetime to be an unclosed character lit - if self.token.is_lifetime() { + // On an error path, eagerly consider a lifetime to be an unclosed character lit, if that + // makes sense. + if let Some(ident) = self.token.lifetime() + && could_be_unclosed_char_literal(ident) + { let lt = self.expect_lifetime(); Ok(self.recover_unclosed_char(lt.ident, mk_lit_char, err)) } else { @@ -3763,6 +3769,13 @@ impl<'a> Parser<'a> { } } +/// Could this lifetime/label be an unclosed char literal? For example, `'a` +/// could be, but `'abc` could not. +pub(crate) fn could_be_unclosed_char_literal(ident: Ident) -> bool { + ident.name.as_str().starts_with('\'') + && unescape_char(ident.without_first_quote().name.as_str()).is_ok() +} + /// Used to forbid `let` expressions in certain syntactic locations. #[derive(Clone, Copy, Subdiagnostic)] pub(crate) enum ForbiddenLetReason { diff --git a/compiler/rustc_parse/src/parser/pat.rs b/compiler/rustc_parse/src/parser/pat.rs index 7918e03750ce3..55144c6165012 100644 --- a/compiler/rustc_parse/src/parser/pat.rs +++ b/compiler/rustc_parse/src/parser/pat.rs @@ -10,6 +10,7 @@ use crate::errors::{ UnexpectedParenInRangePatSugg, UnexpectedVertVertBeforeFunctionParam, UnexpectedVertVertInPattern, }; +use crate::parser::expr::could_be_unclosed_char_literal; use crate::{maybe_recover_from_interpolated_ty_qpath, maybe_whole}; use rustc_ast::mut_visit::{noop_visit_pat, MutVisitor}; use rustc_ast::ptr::P; @@ -443,11 +444,12 @@ impl<'a> Parser<'a> { } else { PatKind::Path(qself, path) } - } else if matches!(self.token.kind, token::Lifetime(_)) + } else if let token::Lifetime(lt) = self.token.kind // In pattern position, we're totally fine with using "next token isn't colon" // as a heuristic. We could probably just always try to recover if it's a lifetime, // because we never have `'a: label {}` in a pattern position anyways, but it does // keep us from suggesting something like `let 'a: Ty = ..` => `let 'a': Ty = ..` + && could_be_unclosed_char_literal(Ident::with_dummy_span(lt)) && !self.look_ahead(1, |token| matches!(token.kind, token::Colon)) { // Recover a `'a` as a `'a'` literal diff --git a/tests/ui/parser/label-is-actually-char.rs b/tests/ui/parser/label-is-actually-char.rs index 183da603da434..74df898d1910a 100644 --- a/tests/ui/parser/label-is-actually-char.rs +++ b/tests/ui/parser/label-is-actually-char.rs @@ -1,16 +1,43 @@ +// Note: it's ok to interpret 'a as 'a', but but not ok to interpret 'abc as +// 'abc' because 'abc' is not a valid char literal. + fn main() { let c = 'a; //~^ ERROR expected `while`, `for`, `loop` or `{` after a label //~| HELP add `'` to close the char literal - match c { + + let c = 'abc; + //~^ ERROR expected `while`, `for`, `loop` or `{` after a label + //~| ERROR expected expression, found `;` +} + +fn f() { + match 'a' { 'a'..='b => {} //~^ ERROR unexpected token: `'b` //~| HELP add `'` to close the char literal - _ => {} + 'c'..='def => {} + //~^ ERROR unexpected token: `'def` } - let x = ['a, 'b]; - //~^ ERROR expected `while`, `for`, `loop` or `{` after a label - //~| ERROR expected `while`, `for`, `loop` or `{` after a label - //~| HELP add `'` to close the char literal - //~| HELP add `'` to close the char literal +} + +fn g() { + match 'g' { + 'g => {} + //~^ ERROR expected pattern, found `=>` + //~| HELP add `'` to close the char literal + 'hij => {} + //~^ ERROR expected pattern, found `'hij` + _ => {} + } +} + +fn h() { + let x = ['a, 'b, 'cde]; + //~^ ERROR expected `while`, `for`, `loop` or `{` after a label + //~| HELP add `'` to close the char literal + //~| ERROR expected `while`, `for`, `loop` or `{` after a label + //~| HELP add `'` to close the char literal + //~| ERROR expected `while`, `for`, `loop` or `{` after a label + //~| ERROR expected expression, found `]` } diff --git a/tests/ui/parser/label-is-actually-char.stderr b/tests/ui/parser/label-is-actually-char.stderr index 28c8d2ada3adb..10a7e1803b532 100644 --- a/tests/ui/parser/label-is-actually-char.stderr +++ b/tests/ui/parser/label-is-actually-char.stderr @@ -1,5 +1,5 @@ error: expected `while`, `for`, `loop` or `{` after a label - --> $DIR/label-is-actually-char.rs:2:15 + --> $DIR/label-is-actually-char.rs:5:15 | LL | let c = 'a; | ^ expected `while`, `for`, `loop` or `{` after a label @@ -9,8 +9,20 @@ help: add `'` to close the char literal LL | let c = 'a'; | + +error: expected `while`, `for`, `loop` or `{` after a label + --> $DIR/label-is-actually-char.rs:9:17 + | +LL | let c = 'abc; + | ^ expected `while`, `for`, `loop` or `{` after a label + +error: expected expression, found `;` + --> $DIR/label-is-actually-char.rs:9:17 + | +LL | let c = 'abc; + | ^ expected expression + error: unexpected token: `'b` - --> $DIR/label-is-actually-char.rs:6:15 + --> $DIR/label-is-actually-char.rs:16:15 | LL | 'a'..='b => {} | ^^ @@ -20,27 +32,62 @@ help: add `'` to close the char literal LL | 'a'..='b' => {} | + +error: unexpected token: `'def` + --> $DIR/label-is-actually-char.rs:19:15 + | +LL | 'c'..='def => {} + | ^^^^ + +error: expected pattern, found `=>` + --> $DIR/label-is-actually-char.rs:26:11 + | +LL | 'g => {} + | ^^ expected pattern + | +help: add `'` to close the char literal + | +LL | 'g' => {} + | + + +error: expected pattern, found `'hij` + --> $DIR/label-is-actually-char.rs:29:8 + | +LL | 'hij => {} + | ^^^^ expected pattern + error: expected `while`, `for`, `loop` or `{` after a label - --> $DIR/label-is-actually-char.rs:11:16 + --> $DIR/label-is-actually-char.rs:36:15 | -LL | let x = ['a, 'b]; - | ^ expected `while`, `for`, `loop` or `{` after a label +LL | let x = ['a, 'b, 'cde]; + | ^ expected `while`, `for`, `loop` or `{` after a label | help: add `'` to close the char literal | -LL | let x = ['a', 'b]; - | + +LL | let x = ['a', 'b, 'cde]; + | + error: expected `while`, `for`, `loop` or `{` after a label - --> $DIR/label-is-actually-char.rs:11:20 + --> $DIR/label-is-actually-char.rs:36:19 | -LL | let x = ['a, 'b]; - | ^ expected `while`, `for`, `loop` or `{` after a label +LL | let x = ['a, 'b, 'cde]; + | ^ expected `while`, `for`, `loop` or `{` after a label | help: add `'` to close the char literal | -LL | let x = ['a, 'b']; - | + +LL | let x = ['a, 'b', 'cde]; + | + + +error: expected `while`, `for`, `loop` or `{` after a label + --> $DIR/label-is-actually-char.rs:36:25 + | +LL | let x = ['a, 'b, 'cde]; + | ^ expected `while`, `for`, `loop` or `{` after a label + +error: expected expression, found `]` + --> $DIR/label-is-actually-char.rs:36:25 + | +LL | let x = ['a, 'b, 'cde]; + | ^ expected expression -error: aborting due to 4 previous errors +error: aborting due to 11 previous errors