From 09c3e82050fceffe20d8f06a31d8befc85f2164b Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 9 Mar 2022 14:18:32 +1100 Subject: [PATCH 1/6] Refactor the second half of `parse_tt`. The current structure makes it hard to tell that there are just four distinct code paths, depending on how many items there are in `bb_items` and `next_items`. This commit introduces a `match` that clarifies things. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 129 +++++++++--------- 1 file changed, 68 insertions(+), 61 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 04137086088dd..718f155c60c05 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -744,70 +744,77 @@ pub(super) fn parse_tt( // unnecessary implicit clone later in `Rc::make_mut`. drop(eof_items); - // If there are no possible next positions AND we aren't waiting for the black-box parser, - // then there is a syntax error. - if bb_items.is_empty() && next_items.is_empty() { - return Failure(parser.token.clone(), "no rules expected this token in macro call"); - } + match (next_items.len(), bb_items.len()) { + (0, 0) => { + // There are no possible next positions AND we aren't waiting for the black-box + // parser: syntax error. + return Failure(parser.token.clone(), "no rules expected this token in macro call"); + } - if (!bb_items.is_empty() && !next_items.is_empty()) || bb_items.len() > 1 { - // We need to call out to parse some rust nonterminal (black-box) parser. But something - // is wrong, because there is not EXACTLY ONE of these. - let nts = bb_items - .iter() - .map(|item| match item.top_elts.get_tt(item.idx) { - TokenTree::MetaVarDecl(_, bind, Some(kind)) => format!("{} ('{}')", kind, bind), - _ => panic!(), - }) - .collect::>() - .join(" or "); - - return Error( - parser.token.span, - format!( - "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", - match next_items.len() { - 0 => format!("built-in NTs {}.", nts), - 1 => format!("built-in NTs {} or 1 other option.", nts), - n => format!("built-in NTs {} or {} other options.", nts, n), - } - ), - ); - } + (_, 0) => { + // Dump all possible `next_items` into `cur_items` for the next iteration. Then + // process the next token. + cur_items.extend(next_items.drain(..)); + parser.to_mut().bump(); + } - if !next_items.is_empty() { - // Dump all possible `next_items` into `cur_items` for the next iteration. Then process - // the next token. - cur_items.extend(next_items.drain(..)); - parser.to_mut().bump(); - } else { - // Finally, we have the case where we need to call the black-box parser to get some - // nonterminal. - assert_eq!(bb_items.len(), 1); - - let mut item = bb_items.pop().unwrap(); - if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx) { - let match_cur = item.match_cur; - // We use the span of the metavariable declaration to determine any - // edition-specific matching behavior for non-terminals. - let nt = match parser.to_mut().parse_nonterminal(kind) { - Err(mut err) => { - err.span_label( - span, - format!("while parsing argument for this `{}` macro fragment", kind), - ) - .emit(); - return ErrorReported; - } - Ok(nt) => nt, - }; - item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt))); - item.idx += 1; - item.match_cur += 1; - } else { - unreachable!() + (0, 1) => { + // We need to call the black-box parser to get some nonterminal. + let mut item = bb_items.pop().unwrap(); + if let TokenTree::MetaVarDecl(span, _, Some(kind)) = item.top_elts.get_tt(item.idx) + { + let match_cur = item.match_cur; + // We use the span of the metavariable declaration to determine any + // edition-specific matching behavior for non-terminals. + let nt = match parser.to_mut().parse_nonterminal(kind) { + Err(mut err) => { + err.span_label( + span, + format!( + "while parsing argument for this `{}` macro fragment", + kind + ), + ) + .emit(); + return ErrorReported; + } + Ok(nt) => nt, + }; + item.push_match(match_cur, MatchedNonterminal(Lrc::new(nt))); + item.idx += 1; + item.match_cur += 1; + } else { + unreachable!() + } + cur_items.push(item); + } + + (_, _) => { + // We need to call the black-box parser to get some nonterminal, but something is + // wrong. + let nts = bb_items + .iter() + .map(|item| match item.top_elts.get_tt(item.idx) { + TokenTree::MetaVarDecl(_, bind, Some(kind)) => { + format!("{} ('{}')", kind, bind) + } + _ => panic!(), + }) + .collect::>() + .join(" or "); + + return Error( + parser.token.span, + format!( + "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", + match next_items.len() { + 0 => format!("built-in NTs {}.", nts), + 1 => format!("built-in NTs {} or 1 other option.", nts), + n => format!("built-in NTs {} or {} other options.", nts, n), + } + ), + ); } - cur_items.push(item); } assert!(!cur_items.is_empty()); From 4d4baf7c9a6adb2f8e7071df7af523fa10a568d6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 9 Mar 2022 16:58:13 +1100 Subject: [PATCH 2/6] Disallow `TokenTree::{MetaVar,MetaVarExpr}` in matchers. They should only appear in transcribers. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 718f155c60c05..0985ccc32d477 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -439,9 +439,8 @@ fn nameize>( } Occupied(..) => return Err((sp, format!("duplicated bind name: {}", bind_name))), }, - // FIXME(c410-f3r) MetaVar and MetaVarExpr should be handled instead of being ignored - // https://github.com/rust-lang/rust/issues/9390 - TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) | TokenTree::Token(..) => {} + TokenTree::Token(..) => (), + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), } Ok(()) @@ -655,7 +654,9 @@ fn inner_parse_loop<'root, 'tt>( // rules. NOTE that this is not necessarily an error unless _all_ items in // `cur_items` end up doing this. There may still be some other matchers that do // end up working out. - TokenTree::Token(..) | TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => {} + TokenTree::Token(..) => {} + + TokenTree::MetaVar(..) | TokenTree::MetaVarExpr(..) => unreachable!(), } } } From 9f0798b2eb9da741ce7ee7e427aa4b9ea5a08662 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Thu, 10 Mar 2022 10:06:57 +1100 Subject: [PATCH 3/6] Add a useful assertion. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index 0985ccc32d477..ecbb15d6b719e 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -521,6 +521,8 @@ fn inner_parse_loop<'root, 'tt>( // then we could be at the end of a sequence or at the beginning of the next // repetition. if let Some(repetition) = &item.repetition { + debug_assert!(matches!(item.top_elts, Tt(TokenTree::Sequence(..)))); + // At this point, regardless of whether there is a separator, we should add all // matches from the complete repetition of the sequence to the shared, top-level // `matches` list (actually, `up.matches`, which could itself not be the top-level, From c13ca42d6700499462f0fb687e95f28ffd776cb4 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 9 Mar 2022 14:34:24 +1100 Subject: [PATCH 4/6] Move `eof_items` handling entirely within `inner_parse_loop`. Also rename `inner_parse_loop` as `parse_tt_inner`, because it's no longer just a loop. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 90 +++++++++---------- 1 file changed, 40 insertions(+), 50 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index ecbb15d6b719e..d986c5b078d4c 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -122,7 +122,7 @@ impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { /// An unzipping of `TokenTree`s... see the `stack` field of `MatcherPos`. /// -/// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have +/// This is used by `parse_tt_inner` to keep track of delimited submatchers that we have /// descended into. #[derive(Clone)] struct MatcherTtFrame<'tt> { @@ -480,21 +480,24 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { /// successful execution of this function. /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in /// the function `parse`. -/// - `eof_items`: the set of items that would be valid if this was the EOF. /// - `bb_items`: the set of items that are waiting for the black-box parser. /// - `token`: the current token of the parser. /// /// # Returns /// -/// A `ParseResult`. Note that matches are kept track of through the items generated. -fn inner_parse_loop<'root, 'tt>( +/// `Some(result)` if everything is finished, `None` otherwise. Note that matches are kept track of +/// through the items generated. +fn parse_tt_inner<'root, 'tt>( sess: &ParseSess, + ms: &[TokenTree], cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, next_items: &mut Vec>, bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - eof_items: &mut EofItems<'root, 'tt>, token: &Token, -) -> Result<(), (rustc_span::Span, String)> { +) -> Option { + // Matcher positions that would be valid if the macro invocation was over now + let mut eof_items = EofItems::None; + // Pop items from `cur_items` until it is empty. while let Some(mut item) = cur_items.pop() { // When unzipped trees end, remove them. This corresponds to backtracking out of a @@ -566,7 +569,7 @@ fn inner_parse_loop<'root, 'tt>( } else { // If we are not in a repetition, then being at the end of a matcher means that we // have reached the potential end of the input. - *eof_items = match eof_items { + eof_items = match eof_items { EofItems::None => EofItems::One(item), EofItems::One(_) | EofItems::Multiple => EofItems::Multiple, } @@ -614,7 +617,7 @@ fn inner_parse_loop<'root, 'tt>( // We need to match a metavar (but the identifier is invalid)... this is an error TokenTree::MetaVarDecl(span, _, None) => { if sess.missing_fragment_specifiers.borrow_mut().remove(&span).is_some() { - return Err((span, "missing fragment specifier".to_string())); + return Some(Error(span, "missing fragment specifier".to_string())); } } @@ -663,8 +666,29 @@ fn inner_parse_loop<'root, 'tt>( } } - // Yay a successful parse (so far)! - Ok(()) + // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, + // either the parse is ambiguous (which should never happen) or there is a syntax error. + if *token == token::Eof { + Some(match eof_items { + EofItems::One(mut eof_item) => { + let matches = + eof_item.matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap()); + nameize(sess, ms, matches) + } + EofItems::Multiple => { + Error(token.span, "ambiguity: multiple successful parses".to_string()) + } + EofItems::None => Failure( + Token::new( + token::Eof, + if token.span.is_dummy() { token.span } else { token.span.shrink_to_hi() }, + ), + "missing tokens in macro arguments", + ), + }) + } else { + None + } } /// Use the given sequence of token trees (`ms`) as a matcher. Match the token @@ -675,7 +699,7 @@ pub(super) fn parse_tt( macro_name: Ident, ) -> NamedParseResult { // A queue of possible matcher positions. We initialize it with the matcher position in which - // the "dot" is before the first token of the first token tree in `ms`. `inner_parse_loop` then + // the "dot" is before the first token of the first token tree in `ms`. `parse_tt_inner` then // processes all of these possible matcher positions and produces possible next positions into // `next_items`. After some post-processing, the contents of `next_items` replenish `cur_items` // and we start over again. @@ -692,61 +716,27 @@ pub(super) fn parse_tt( // Matcher positions black-box parsed by parser.rs (`parser`) let mut bb_items = SmallVec::new(); - // Matcher positions that would be valid if the macro invocation was over now - let mut eof_items = EofItems::None; - // Process `cur_items` until either we have finished the input or we need to get some // parsing from the black-box parser done. The result is that `next_items` will contain a // bunch of possible next matcher positions in `next_items`. - match inner_parse_loop( + if let Some(result) = parse_tt_inner( parser.sess, + ms, &mut cur_items, &mut next_items, &mut bb_items, - &mut eof_items, &parser.token, ) { - Ok(()) => {} - Err((sp, msg)) => return Error(sp, msg), + return result; } - // inner parse loop handled all cur_items, so it's empty + // `parse_tt_inner` handled all cur_items, so it's empty. assert!(cur_items.is_empty()); - // We need to do some post processing after the `inner_parse_loop`. + // We need to do some post processing after the `parse_tt_inner`. // // Error messages here could be improved with links to original rules. - // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, - // either the parse is ambiguous (which should never happen) or there is a syntax error. - if parser.token == token::Eof { - return match eof_items { - EofItems::One(mut eof_item) => { - let matches = - eof_item.matches.iter_mut().map(|dv| Lrc::make_mut(dv).pop().unwrap()); - nameize(parser.sess, ms, matches) - } - EofItems::Multiple => { - Error(parser.token.span, "ambiguity: multiple successful parses".to_string()) - } - EofItems::None => Failure( - Token::new( - token::Eof, - if parser.token.span.is_dummy() { - parser.token.span - } else { - parser.token.span.shrink_to_hi() - }, - ), - "missing tokens in macro arguments", - ), - }; - } - // Performance hack: `eof_items` may share matchers via `Rc` with other things that we want - // to modify. Dropping `eof_items` now may drop these refcounts to 1, preventing an - // unnecessary implicit clone later in `Rc::make_mut`. - drop(eof_items); - match (next_items.len(), bb_items.len()) { (0, 0) => { // There are no possible next positions AND we aren't waiting for the black-box From 235a87fbd3ab5a7e9d0225fef55dafeb60e76dd6 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 9 Mar 2022 14:37:45 +1100 Subject: [PATCH 5/6] Make next_items a `SmallVec`. For consistency, and to make the code slightly nicer. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index d986c5b078d4c..a8cf8b7c14b14 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -491,7 +491,7 @@ fn parse_tt_inner<'root, 'tt>( sess: &ParseSess, ms: &[TokenTree], cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, - next_items: &mut Vec>, + next_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, token: &Token, ) -> Option { @@ -708,10 +708,9 @@ pub(super) fn parse_tt( // there are frequently *no* others! -- are allocated on the heap. let mut initial = MatcherPos::new(ms); let mut cur_items = smallvec![MatcherPosHandle::Ref(&mut initial)]; - let mut next_items = Vec::new(); loop { - assert!(next_items.is_empty()); + let mut next_items = SmallVec::new(); // Matcher positions black-box parsed by parser.rs (`parser`) let mut bb_items = SmallVec::new(); From 95d13fa37db8ddc6a7a45d5748a4484904830e25 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Wed, 9 Mar 2022 14:51:31 +1100 Subject: [PATCH 6/6] Move a `parse_tt` error case into a separate function. --- compiler/rustc_expand/src/mbe/macro_parser.rs | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/compiler/rustc_expand/src/mbe/macro_parser.rs b/compiler/rustc_expand/src/mbe/macro_parser.rs index a8cf8b7c14b14..dedfd779bb416 100644 --- a/compiler/rustc_expand/src/mbe/macro_parser.rs +++ b/compiler/rustc_expand/src/mbe/macro_parser.rs @@ -762,10 +762,7 @@ pub(super) fn parse_tt( Err(mut err) => { err.span_label( span, - format!( - "while parsing argument for this `{}` macro fragment", - kind - ), + format!("while parsing argument for this `{kind}` macro fragment"), ) .emit(); return ErrorReported; @@ -784,27 +781,11 @@ pub(super) fn parse_tt( (_, _) => { // We need to call the black-box parser to get some nonterminal, but something is // wrong. - let nts = bb_items - .iter() - .map(|item| match item.top_elts.get_tt(item.idx) { - TokenTree::MetaVarDecl(_, bind, Some(kind)) => { - format!("{} ('{}')", kind, bind) - } - _ => panic!(), - }) - .collect::>() - .join(" or "); - - return Error( + return bb_items_ambiguity_error( + macro_name, + next_items, + bb_items, parser.token.span, - format!( - "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", - match next_items.len() { - 0 => format!("built-in NTs {}.", nts), - 1 => format!("built-in NTs {} or 1 other option.", nts), - n => format!("built-in NTs {} or {} other options.", nts, n), - } - ), ); } } @@ -812,3 +793,33 @@ pub(super) fn parse_tt( assert!(!cur_items.is_empty()); } } + +fn bb_items_ambiguity_error<'root, 'tt>( + macro_name: Ident, + next_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + bb_items: SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + token_span: rustc_span::Span, +) -> NamedParseResult { + let nts = bb_items + .iter() + .map(|item| match item.top_elts.get_tt(item.idx) { + TokenTree::MetaVarDecl(_, bind, Some(kind)) => { + format!("{} ('{}')", kind, bind) + } + _ => panic!(), + }) + .collect::>() + .join(" or "); + + Error( + token_span, + format!( + "local ambiguity when calling macro `{macro_name}`: multiple parsing options: {}", + match next_items.len() { + 0 => format!("built-in NTs {}.", nts), + 1 => format!("built-in NTs {} or 1 other option.", nts), + n => format!("built-in NTs {} or {} other options.", nts, n), + } + ), + ) +}