From 24f88f8dbf987c6849c0d9b395faf12c782bda13 Mon Sep 17 00:00:00 2001 From: Vadim Petrochenkov Date: Sun, 7 Jan 2024 14:21:14 +0300 Subject: [PATCH] macro_rules: Eagerly mark spans of produced tokens --- compiler/rustc_expand/src/mbe.rs | 10 +- compiler/rustc_expand/src/mbe/macro_check.rs | 4 +- compiler/rustc_expand/src/mbe/macro_rules.rs | 2 +- compiler/rustc_expand/src/mbe/quoted.rs | 7 +- compiler/rustc_expand/src/mbe/transcribe.rs | 92 +++++++++++++------ .../meta-variable-depth-outside-repeat.stderr | 5 + .../out-of-bounds-arguments.stderr | 15 +++ .../syntax-errors.stderr | 15 +++ tests/ui/parser/macro/macro-repeat.stderr | 10 +- 9 files changed, 122 insertions(+), 38 deletions(-) diff --git a/compiler/rustc_expand/src/mbe.rs b/compiler/rustc_expand/src/mbe.rs index ca4a1f327adc8..92469b66a6644 100644 --- a/compiler/rustc_expand/src/mbe.rs +++ b/compiler/rustc_expand/src/mbe.rs @@ -19,14 +19,14 @@ use rustc_span::Span; /// Contains the sub-token-trees of a "delimited" token tree such as `(a b c)`. /// The delimiters are not represented explicitly in the `tts` vector. -#[derive(PartialEq, Encodable, Decodable, Debug)] +#[derive(Clone, PartialEq, Encodable, Decodable, Debug)] struct Delimited { delim: Delimiter, /// FIXME: #67062 has details about why this is sub-optimal. tts: Vec, } -#[derive(PartialEq, Encodable, Decodable, Debug)] +#[derive(Clone, PartialEq, Encodable, Decodable, Debug)] struct SequenceRepetition { /// The sequence of token trees tts: Vec, @@ -64,7 +64,7 @@ pub(crate) enum KleeneOp { /// Similar to `tokenstream::TokenTree`, except that `Sequence`, `MetaVar`, `MetaVarDecl`, and /// `MetaVarExpr` are "first-class" token trees. Useful for parsing macros. -#[derive(Debug, PartialEq, Encodable, Decodable)] +#[derive(Clone, Debug, PartialEq, Encodable, Decodable)] enum TokenTree { Token(Token), /// A delimited sequence, e.g. `($e:expr)` (RHS) or `{ $e }` (LHS). @@ -72,7 +72,7 @@ enum TokenTree { /// A kleene-style repetition sequence, e.g. `$($e:expr)*` (RHS) or `$($e),*` (LHS). Sequence(DelimSpan, SequenceRepetition), /// e.g., `$var`. - MetaVar(Span, Ident), + MetaVar(Span, Ident, Span), /// e.g., `$var:expr`. Only appears on the LHS. MetaVarDecl(Span, Ident /* name to bind */, Option), /// A meta-variable expression inside `${...}`. @@ -97,7 +97,7 @@ impl TokenTree { fn span(&self) -> Span { match *self { TokenTree::Token(Token { span, .. }) - | TokenTree::MetaVar(span, _) + | TokenTree::MetaVar(span, ..) | TokenTree::MetaVarDecl(span, _, _) => span, TokenTree::Delimited(span, ..) | TokenTree::MetaVarExpr(span, _) diff --git a/compiler/rustc_expand/src/mbe/macro_check.rs b/compiler/rustc_expand/src/mbe/macro_check.rs index e66cfbe6fb689..94d795ac30fc4 100644 --- a/compiler/rustc_expand/src/mbe/macro_check.rs +++ b/compiler/rustc_expand/src/mbe/macro_check.rs @@ -242,7 +242,7 @@ fn check_binders( // the outer macro. See ui/macros/macro-of-higher-order.rs where $y:$fragment in the // LHS of the nested macro (and RHS of the outer macro) is parsed as MetaVar(y) Colon // MetaVar(fragment) and not as MetaVarDecl(y, fragment). - TokenTree::MetaVar(span, name) => { + TokenTree::MetaVar(span, name, _) => { if macros.is_empty() { sess.dcx.span_bug(span, "unexpected MetaVar in lhs"); } @@ -342,7 +342,7 @@ fn check_occurrences( TokenTree::MetaVarDecl(span, _name, _kind) => { sess.dcx.span_bug(span, "unexpected MetaVarDecl in rhs") } - TokenTree::MetaVar(span, name) => { + TokenTree::MetaVar(span, name, _) => { let name = MacroRulesNormalizedIdent::new(name); check_ops_is_prefix(sess, node_id, macros, binders, ops, span, name); } diff --git a/compiler/rustc_expand/src/mbe/macro_rules.rs b/compiler/rustc_expand/src/mbe/macro_rules.rs index e9797abcbdf24..6ad910fb4fd69 100644 --- a/compiler/rustc_expand/src/mbe/macro_rules.rs +++ b/compiler/rustc_expand/src/mbe/macro_rules.rs @@ -1378,7 +1378,7 @@ fn is_in_follow(tok: &mbe::TokenTree, kind: NonterminalKind) -> IsInFollow { fn quoted_tt_to_string(tt: &mbe::TokenTree) -> String { match tt { mbe::TokenTree::Token(token) => pprust::token_to_string(token).into(), - mbe::TokenTree::MetaVar(_, name) => format!("${name}"), + mbe::TokenTree::MetaVar(_, name, _) => format!("${name}"), mbe::TokenTree::MetaVarDecl(_, name, Some(kind)) => format!("${name}:{kind}"), mbe::TokenTree::MetaVarDecl(_, name, None) => format!("${name}:"), _ => panic!( diff --git a/compiler/rustc_expand/src/mbe/quoted.rs b/compiler/rustc_expand/src/mbe/quoted.rs index cd4ba7a9a62b4..f01f519c8d055 100644 --- a/compiler/rustc_expand/src/mbe/quoted.rs +++ b/compiler/rustc_expand/src/mbe/quoted.rs @@ -54,7 +54,7 @@ pub(super) fn parse( // parse out the matcher (i.e., in `$id:ident` this would parse the `:` and `ident`). let tree = parse_tree(tree, &mut trees, parsing_patterns, sess, node_id, features, edition); match tree { - TokenTree::MetaVar(start_sp, ident) if parsing_patterns => { + TokenTree::MetaVar(start_sp, ident, _) if parsing_patterns => { let span = match trees.next() { Some(&tokenstream::TokenTree::Token(Token { kind: token::Colon, span }, _)) => { match trees.next() { @@ -223,7 +223,7 @@ fn parse_tree<'a>( if ident.name == kw::Crate && !is_raw { TokenTree::token(token::Ident(kw::DollarCrate, is_raw), span) } else { - TokenTree::MetaVar(span, ident) + TokenTree::MetaVar(span, ident, ident.span) } } @@ -245,7 +245,8 @@ fn parse_tree<'a>( let msg = format!("expected identifier, found `{}`", pprust::token_to_string(token),); sess.dcx.span_err(token.span, msg); - TokenTree::MetaVar(token.span, Ident::empty()) + let ident = Ident::empty(); + TokenTree::MetaVar(token.span, ident, ident.span) } // There are no more tokens. Just return the `$` we already have. diff --git a/compiler/rustc_expand/src/mbe/transcribe.rs b/compiler/rustc_expand/src/mbe/transcribe.rs index 434891ebc76e4..cb0b09c754dcc 100644 --- a/compiler/rustc_expand/src/mbe/transcribe.rs +++ b/compiler/rustc_expand/src/mbe/transcribe.rs @@ -76,6 +76,55 @@ impl<'a> Iterator for Frame<'a> { } } +fn mark_tt(tt: &mut mbe::TokenTree, marker: &mut Marker) { + // Spans that never end up in the output don't need to be marked. + // `_ident`s are metavariable names and need to keep their original spans to resolve correctly + // (they also never end up in the output). + match tt { + mbe::TokenTree::Token(token) => mut_visit::visit_token(token, marker), + mbe::TokenTree::Delimited(dspan, _dspacing, delimited) => { + mut_visit::visit_delim_span(dspan, marker); + mark_delimited(delimited, marker); + } + mbe::TokenTree::Sequence(_dspan, rep) => { + // Sequence delimiter spans never ends up in the output. + mark_sequence_repetition(rep, marker); + } + mbe::TokenTree::MetaVar(span, _ident, marked_span) => { + marker.visit_span(span); + marker.visit_span(marked_span); + } + mbe::TokenTree::MetaVarExpr(dspan, expr) => { + mut_visit::visit_delim_span(dspan, marker); + match expr { + MetaVarExpr::Count(_ident, _depth) => {} + MetaVarExpr::Ignore(_ident) => {} + MetaVarExpr::Index(_depth) | MetaVarExpr::Length(_depth) => {} + } + } + mbe::TokenTree::MetaVarDecl(..) => unreachable!(), + } +} + +fn mark_sequence_repetition(rep: &mut mbe::SequenceRepetition, marker: &mut Marker) { + let mbe::SequenceRepetition { tts, separator, kleene, num_captures: _ } = rep; + for tt in tts { + mark_tt(tt, marker); + } + if let Some(sep) = separator { + mut_visit::visit_token(sep, marker); + } + // Kleenee token span never ends up in the output. + let mbe::KleeneToken { span: _, op: _ } = kleene; +} + +fn mark_delimited(delimited: &mut mbe::Delimited, marker: &mut Marker) { + let mbe::Delimited { delim: _, tts } = delimited; + for tt in tts { + mark_tt(tt, marker); + } +} + /// This can do Macro-By-Example transcription. /// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the /// invocation. We are assuming we already know there is a match. @@ -108,11 +157,15 @@ pub(super) fn transcribe<'a>( return Ok(TokenStream::default()); } + let mut src = src.clone(); + let expn_id = cx.current_expansion.id; + mark_delimited(&mut src, &mut Marker(expn_id, transparency, Default::default())); + // We descend into the RHS (`src`), expanding things as we go. This stack contains the things // we have yet to expand/are still expanding. We start the stack off with the whole RHS. The // choice of spacing values doesn't matter. let mut stack: SmallVec<[Frame<'_>; 1]> = - smallvec![Frame::new(src, src_span, DelimSpacing::new(Spacing::Alone, Spacing::Alone))]; + smallvec![Frame::new(&src, src_span, DelimSpacing::new(Spacing::Alone, Spacing::Alone))]; // As we descend in the RHS, we will need to be able to match nested sequences of matchers. // `repeats` keeps track of where we are in matching at each level, with the last element being @@ -132,7 +185,6 @@ pub(super) fn transcribe<'a>( // again, and we are done transcribing. let mut result: Vec = Vec::new(); let mut result_stack = Vec::new(); - let mut marker = Marker(cx.current_expansion.id, transparency, Default::default()); loop { // Look at the last frame on the stack. @@ -245,10 +297,11 @@ pub(super) fn transcribe<'a>( } // Replace the meta-var with the matched token tree from the invocation. - mbe::TokenTree::MetaVar(mut sp, mut original_ident) => { + mbe::TokenTree::MetaVar(sp, original_ident, marked_span) => { + let sp = *sp; // Find the matched nonterminal from the macro invocation, and use it to replace // the meta-var. - let ident = MacroRulesNormalizedIdent::new(original_ident); + let ident = MacroRulesNormalizedIdent::new(*original_ident); if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) { match cur_matched { MatchedTokenTree(tt) => { @@ -260,7 +313,6 @@ pub(super) fn transcribe<'a>( // Other variables are emitted into the output stream as groups with // `Delimiter::Invisible` to maintain parsing priorities. // `Interpolated` is currently used for such groups in rustc parser. - marker.visit_span(&mut sp); result .push(TokenTree::token_alone(token::Interpolated(nt.clone()), sp)); } @@ -272,11 +324,9 @@ pub(super) fn transcribe<'a>( } else { // If we aren't able to match the meta-var, we push it back into the result but // with modified syntax context. (I believe this supports nested macros). - marker.visit_span(&mut sp); - marker.visit_ident(&mut original_ident); result.push(TokenTree::token_joint_hidden(token::Dollar, sp)); result.push(TokenTree::Token( - Token::from_ast_ident(original_ident), + Token::from_ast_ident(Ident::new(original_ident.name, *marked_span)), Spacing::Alone, )); } @@ -284,7 +334,7 @@ pub(super) fn transcribe<'a>( // Replace meta-variable expressions with the result of their expansion. mbe::TokenTree::MetaVarExpr(sp, expr) => { - transcribe_metavar_expr(cx, expr, interp, &mut marker, &repeats, &mut result, sp)?; + transcribe_metavar_expr(cx, expr, interp, &repeats, &mut result, sp)?; } // If we are entering a new delimiter, we push its contents to the `stack` to be @@ -292,13 +342,12 @@ pub(super) fn transcribe<'a>( // We will produce all of the results of the inside of the `Delimited` and then we will // jump back out of the Delimited, pop the result_stack and add the new results back to // the previous results (from outside the Delimited). - mbe::TokenTree::Delimited(mut span, spacing, delimited) => { - mut_visit::visit_delim_span(&mut span, &mut marker); + mbe::TokenTree::Delimited(span, spacing, delimited) => { stack.push(Frame::Delimited { tts: &delimited.tts, delim: delimited.delim, idx: 0, - span, + span: *span, spacing: *spacing, }); result_stack.push(mem::take(&mut result)); @@ -307,10 +356,7 @@ pub(super) fn transcribe<'a>( // Nothing much to do here. Just push the token to the result, being careful to // preserve syntax context. mbe::TokenTree::Token(token) => { - let mut token = token.clone(); - mut_visit::visit_token(&mut token, &mut marker); - let tt = TokenTree::Token(token, Spacing::Alone); - result.push(tt); + result.push(TokenTree::Token(token.clone(), Spacing::Alone)); } // There should be no meta-var declarations in the invocation of a macro. @@ -475,7 +521,7 @@ fn lockstep_iter_size( size.with(lockstep_iter_size(tt, interpolations, repeats)) }) } - TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => { + TokenTree::MetaVar(_, name, _) | TokenTree::MetaVarDecl(_, name, _) => { let name = MacroRulesNormalizedIdent::new(*name); match lookup_cur_matched(name, interpolations, repeats) { Some(matched) => match matched { @@ -620,23 +666,17 @@ fn transcribe_metavar_expr<'a>( cx: &ExtCtxt<'a>, expr: &MetaVarExpr, interp: &FxHashMap, - marker: &mut Marker, repeats: &[(usize, usize)], result: &mut Vec, sp: &DelimSpan, ) -> PResult<'a, ()> { - let mut visited_span = || { - let mut span = sp.entire(); - marker.visit_span(&mut span); - span - }; match *expr { MetaVarExpr::Count(original_ident, depth) => { let matched = matched_from_ident(cx, original_ident, interp)?; let count = count_repetitions(cx, depth, matched, repeats, sp)?; let tt = TokenTree::token_alone( TokenKind::lit(token::Integer, sym::integer(count), None), - visited_span(), + sp.entire(), ); result.push(tt); } @@ -648,7 +688,7 @@ fn transcribe_metavar_expr<'a>( Some((index, _)) => { result.push(TokenTree::token_alone( TokenKind::lit(token::Integer, sym::integer(*index), None), - visited_span(), + sp.entire(), )); } None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "index")), @@ -657,7 +697,7 @@ fn transcribe_metavar_expr<'a>( Some((_, length)) => { result.push(TokenTree::token_alone( TokenKind::lit(token::Integer, sym::integer(*length), None), - visited_span(), + sp.entire(), )); } None => return Err(out_of_bounds_err(cx, repeats.len(), sp.entire(), "length")), diff --git a/tests/ui/macros/meta-variable-depth-outside-repeat.stderr b/tests/ui/macros/meta-variable-depth-outside-repeat.stderr index 49b5053a023cc..009a1b99caf02 100644 --- a/tests/ui/macros/meta-variable-depth-outside-repeat.stderr +++ b/tests/ui/macros/meta-variable-depth-outside-repeat.stderr @@ -3,6 +3,11 @@ error: meta-variable expression `length` with depth parameter must be called ins | LL | ${length(0)} | ^^^^^^^^^^^ +... +LL | const _: i32 = metavar!(0); + | ----------- in this macro invocation + | + = note: this error originates in the macro `metavar` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 1 previous error diff --git a/tests/ui/macros/rfc-3086-metavar-expr/out-of-bounds-arguments.stderr b/tests/ui/macros/rfc-3086-metavar-expr/out-of-bounds-arguments.stderr index f757b8af219fd..846f3901c6e52 100644 --- a/tests/ui/macros/rfc-3086-metavar-expr/out-of-bounds-arguments.stderr +++ b/tests/ui/macros/rfc-3086-metavar-expr/out-of-bounds-arguments.stderr @@ -3,18 +3,33 @@ error: depth parameter of meta-variable expression `count` must be less than 4 | LL | ${count($foo, 10)}, | ^^^^^^^^^^^^^^^^^ +... +LL | a!( { [ (a) ] [ (b c) ] } ); + | --------------------------- in this macro invocation + | + = note: this error originates in the macro `a` (in Nightly builds, run with -Z macro-backtrace for more info) error: depth parameter of meta-variable expression `index` must be less than 3 --> $DIR/out-of-bounds-arguments.rs:19:18 | LL | ${index(10)}, | ^^^^^^^^^^^ +... +LL | b!( { [ a b ] } ); + | ----------------- in this macro invocation + | + = note: this error originates in the macro `b` (in Nightly builds, run with -Z macro-backtrace for more info) error: depth parameter of meta-variable expression `length` must be less than 2 --> $DIR/out-of-bounds-arguments.rs:32:18 | LL | ${length(10)} | ^^^^^^^^^^^^ +... +LL | c!({ a }); + | --------- in this macro invocation + | + = note: this error originates in the macro `c` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 3 previous errors diff --git a/tests/ui/macros/rfc-3086-metavar-expr/syntax-errors.stderr b/tests/ui/macros/rfc-3086-metavar-expr/syntax-errors.stderr index 0dda38290ab6b..57d7324c85f5f 100644 --- a/tests/ui/macros/rfc-3086-metavar-expr/syntax-errors.stderr +++ b/tests/ui/macros/rfc-3086-metavar-expr/syntax-errors.stderr @@ -201,18 +201,33 @@ error: `count` can not be placed inside the inner-most repetition | LL | ( $i:ident ) => { ${ count($i) } }; | ^^^^^^^^^^^^^ +... +LL | curly__no_rhs_dollar__no_round!(a); + | ---------------------------------- in this macro invocation + | + = note: this error originates in the macro `curly__no_rhs_dollar__no_round` (in Nightly builds, run with -Z macro-backtrace for more info) error: `count` can not be placed inside the inner-most repetition --> $DIR/syntax-errors.rs:17:24 | LL | ( $i:ident ) => { ${ count($i) } }; | ^^^^^^^^^^^^^ +... +LL | curly__rhs_dollar__no_round!(a); + | ------------------------------- in this macro invocation + | + = note: this error originates in the macro `curly__rhs_dollar__no_round` (in Nightly builds, run with -Z macro-backtrace for more info) error: variable 'i' is still repeating at this depth --> $DIR/syntax-errors.rs:34:36 | LL | ( $( $i:ident ),* ) => { count($i) }; | ^^ +... +LL | no_curly__rhs_dollar__round!(a, b, c); + | ------------------------------------- in this macro invocation + | + = note: this error originates in the macro `no_curly__rhs_dollar__round` (in Nightly builds, run with -Z macro-backtrace for more info) error: expected expression, found `$` --> $DIR/syntax-errors.rs:53:9 diff --git a/tests/ui/parser/macro/macro-repeat.stderr b/tests/ui/parser/macro/macro-repeat.stderr index ade2bbf9b0b05..d6f82926fa350 100644 --- a/tests/ui/parser/macro/macro-repeat.stderr +++ b/tests/ui/parser/macro/macro-repeat.stderr @@ -3,14 +3,22 @@ error: variable 'v' is still repeating at this depth | LL | $v | ^^ +... +LL | mac!(0); + | ------- in this macro invocation + | + = note: this error originates in the macro `mac` (in Nightly builds, run with -Z macro-backtrace for more info) error: variable 'v' is still repeating at this depth --> $DIR/macro-repeat.rs:3:9 | LL | $v | ^^ +... +LL | mac!(1); + | ------- in this macro invocation | - = note: duplicate diagnostic emitted due to `-Z deduplicate-diagnostics=no` + = note: this error originates in the macro `mac` (in Nightly builds, run with -Z macro-backtrace for more info) error: aborting due to 2 previous errors