From 68e76dca09da9a59d29e27872645ba38222ca98d Mon Sep 17 00:00:00 2001 From: Niko Matsakis Date: Mon, 12 Nov 2018 09:18:57 +1100 Subject: [PATCH] Make `MatcherPos::stack` a `SmallVec`. This avoids some allocations. --- src/libsyntax/ext/tt/macro_parser.rs | 91 +++++++++++++++++----------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_parser.rs b/src/libsyntax/ext/tt/macro_parser.rs index 06c1d58070e21..7a0a764d4639a 100644 --- a/src/libsyntax/ext/tt/macro_parser.rs +++ b/src/libsyntax/ext/tt/macro_parser.rs @@ -107,12 +107,12 @@ use std::rc::Rc; /// Either a sequence of token trees or a single one. This is used as the representation of the /// sequence of tokens that make up a matcher. #[derive(Clone)] -enum TokenTreeOrTokenTreeSlice<'a> { +enum TokenTreeOrTokenTreeSlice<'tt> { Tt(TokenTree), - TtSeq(&'a [TokenTree]), + TtSeq(&'tt [TokenTree]), } -impl<'a> TokenTreeOrTokenTreeSlice<'a> { +impl<'tt> TokenTreeOrTokenTreeSlice<'tt> { /// Returns the number of constituent top-level token trees of `self` (top-level in that it /// will not recursively descend into subtrees). fn len(&self) -> usize { @@ -136,23 +136,41 @@ impl<'a> TokenTreeOrTokenTreeSlice<'a> { /// This is used by `inner_parse_loop` to keep track of delimited submatchers that we have /// descended into. #[derive(Clone)] -struct MatcherTtFrame<'a> { +struct MatcherTtFrame<'tt> { /// The "parent" matcher that we are descending into. - elts: TokenTreeOrTokenTreeSlice<'a>, + elts: TokenTreeOrTokenTreeSlice<'tt>, /// The position of the "dot" in `elts` at the time we descended. idx: usize, } type NamedMatchVec = SmallVec<[NamedMatch; 4]>; -/// Represents a single "position" (aka "matcher position", aka "item"), as described in the module -/// documentation. +/// Represents a single "position" (aka "matcher position", aka "item"), as +/// described in the module documentation. +/// +/// Here: +/// +/// - `'root` represents the lifetime of the stack slot that holds the root +/// `MatcherPos`. As described in `MatcherPosHandle`, the root `MatcherPos` +/// structure is stored on the stack, but subsequent instances are put into +/// the heap. +/// - `'tt` represents the lifetime of the token trees that this matcher +/// position refers to. +/// +/// It is important to distinguish these two lifetimes because we have a +/// `SmallVec>` below, and the destructor of +/// that is considered to possibly access the data from its elements (it lacks +/// a `#[may_dangle]` attribute). As a result, the compiler needs to know that +/// all the elements in that `SmallVec` strictly outlive the root stack slot +/// lifetime. By separating `'tt` from `'root`, we can show that. #[derive(Clone)] -struct MatcherPos<'a> { +struct MatcherPos<'root, 'tt: 'root> { /// The token or sequence of tokens that make up the matcher - top_elts: TokenTreeOrTokenTreeSlice<'a>, + top_elts: TokenTreeOrTokenTreeSlice<'tt>, + /// The position of the "dot" in this matcher idx: usize, + /// The first span of source source that the beginning of this matcher corresponds to. In other /// words, the token in the source whose span is `sp_open` is matched against the first token of /// the matcher. @@ -182,26 +200,31 @@ struct MatcherPos<'a> { /// in this matcher. match_hi: usize, - // Specifically used if we are matching a repetition. If we aren't both should be `None`. + // The following fields are used if we are matching a repetition. If we aren't, they should be + // `None`. + /// The KleeneOp of this sequence if we are in a repetition. seq_op: Option, - /// The separator if we are in a repetition + + /// The separator if we are in a repetition. sep: Option, + /// The "parent" matcher position if we are in a repetition. That is, the matcher position just /// before we enter the sequence. - up: Option>, + up: Option>, - // Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from - // a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc - // comment... + /// Specifically used to "unzip" token trees. By "unzip", we mean to unwrap the delimiters from + /// a delimited token tree (e.g. something wrapped in `(` `)`) or to get the contents of a doc + /// comment... + /// /// When matching against matchers with nested delimited submatchers (e.g. `pat ( pat ( .. ) /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does /// that where the bottom of the stack is the outermost matcher. - // Also, throughout the comments, this "descent" is often referred to as "unzipping"... - stack: Vec>, + /// Also, throughout the comments, this "descent" is often referred to as "unzipping"... + stack: SmallVec<[MatcherTtFrame<'tt>; 1]>, } -impl<'a> MatcherPos<'a> { +impl<'root, 'tt> MatcherPos<'root, 'tt> { /// Add `m` as a named match for the `idx`-th metavar. fn push_match(&mut self, idx: usize, m: NamedMatch) { let matches = Rc::make_mut(&mut self.matches[idx]); @@ -218,12 +241,12 @@ impl<'a> MatcherPos<'a> { // Therefore, the initial MatcherPos is always allocated on the stack, // subsequent ones (of which there aren't that many) are allocated on the heap, // and this type is used to encapsulate both cases. -enum MatcherPosHandle<'a> { - Ref(&'a mut MatcherPos<'a>), - Box(Box>), +enum MatcherPosHandle<'root, 'tt: 'root> { + Ref(&'root mut MatcherPos<'root, 'tt>), + Box(Box>), } -impl<'a> Clone for MatcherPosHandle<'a> { +impl<'root, 'tt> Clone for MatcherPosHandle<'root, 'tt> { // This always produces a new Box. fn clone(&self) -> Self { MatcherPosHandle::Box(match *self { @@ -233,8 +256,8 @@ impl<'a> Clone for MatcherPosHandle<'a> { } } -impl<'a> Deref for MatcherPosHandle<'a> { - type Target = MatcherPos<'a>; +impl<'root, 'tt> Deref for MatcherPosHandle<'root, 'tt> { + type Target = MatcherPos<'root, 'tt>; fn deref(&self) -> &Self::Target { match *self { MatcherPosHandle::Ref(ref r) => r, @@ -243,8 +266,8 @@ impl<'a> Deref for MatcherPosHandle<'a> { } } -impl<'a> DerefMut for MatcherPosHandle<'a> { - fn deref_mut(&mut self) -> &mut MatcherPos<'a> { +impl<'root, 'tt> DerefMut for MatcherPosHandle<'root, 'tt> { + fn deref_mut(&mut self) -> &mut MatcherPos<'root, 'tt> { match *self { MatcherPosHandle::Ref(ref mut r) => r, MatcherPosHandle::Box(ref mut b) => b, @@ -292,7 +315,7 @@ fn create_matches(len: usize) -> Box<[Rc]> { /// Generate the top-level matcher position in which the "dot" is before the first token of the /// matcher `ms` and we are going to start matching at the span `open` in the source. -fn initial_matcher_pos(ms: &[TokenTree], open: Span) -> MatcherPos { +fn initial_matcher_pos<'root, 'tt>(ms: &'tt [TokenTree], open: Span) -> MatcherPos<'root, 'tt> { let match_idx_hi = count_names(ms); let matches = create_matches(match_idx_hi); MatcherPos { @@ -312,7 +335,7 @@ fn initial_matcher_pos(ms: &[TokenTree], open: Span) -> MatcherPos { match_hi: match_idx_hi, // Haven't descended into any delimiters, so empty stack - stack: vec![], + stack: smallvec![], // Haven't descended into any sequences, so both of these are `None`. seq_op: None, @@ -445,12 +468,12 @@ fn token_name_eq(t1: &Token, t2: &Token) -> bool { /// # Returns /// /// A `ParseResult`. Note that matches are kept track of through the items generated. -fn inner_parse_loop<'a>( +fn inner_parse_loop<'root, 'tt>( sess: &ParseSess, - cur_items: &mut SmallVec<[MatcherPosHandle<'a>; 1]>, - next_items: &mut Vec>, - eof_items: &mut SmallVec<[MatcherPosHandle<'a>; 1]>, - bb_items: &mut SmallVec<[MatcherPosHandle<'a>; 1]>, + cur_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + next_items: &mut Vec>, + eof_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, + bb_items: &mut SmallVec<[MatcherPosHandle<'root, 'tt>; 1]>, token: &Token, span: syntax_pos::Span, ) -> ParseResult<()> { @@ -554,7 +577,7 @@ fn inner_parse_loop<'a>( let matches = create_matches(item.matches.len()); cur_items.push(MatcherPosHandle::Box(Box::new(MatcherPos { - stack: vec![], + stack: smallvec![], sep: seq.separator.clone(), seq_op: Some(seq.op), idx: 0,