Improve print_tts by changing tokenstream::Spacing.

`proc_macro::Spacing` only appears on `Punct` tokens, where: - `Joint` means "the next token follows immediately and is a `Punct`. - `Alone` means "the next token doesn't follow immediately *or* it follows immediately and is not a `Punct`. `tokenstream::Spacing` appears on all `TokenTree::Token` instances, both punct and non-punct, where: - `Joint` means "the next token follows immediately and is a punct" (i.e. satisfies `is_op`). - `Alone` means "the next token doesn't follow immediately *or* it follows immediately and is not a punct". The fact that `Alone` is used for two different cases is awkward. This commit replaces `tokenstream::Spacing` with a new type `FollowedBy` that separates those two cases: - `Space` means "the next token doesn't follow immediately" - `Op` means "the next token follows immediately and is a punct". - `Other` means "the next token follows immediately and is not a punct". The mapping from old to new is: - `Joint` -> `Op` - `Alone` -> `Space` or `Other`, depending on the situation. We can use `FollowedBy` to *drastically* improve the output of `print_tts`. For example, this: ``` stringify!(let a: Vec<u32> = vec![];) ``` currently produces this string: ``` let a : Vec < u32 > = vec! [] ; ``` With this PR, it now produces this string: ``` let a: Vec<u32> = vec![] ; ``` (The space after the `]` is because `TokenTree::Delimited` currently doesn't have `FollowedBy` information. Adding this as a follow-up should be straightforward.) The new `print_tts` doesn't replicate original code perfectly. E.g. multiple space characters will be condensed into a single space character. But it's much improved. `print_tts` still produces the old, uglier output for code produced by proc macros. Because we have to translate the generated code from `Spacing` to the more expressive `Followed`, which results in too much `FollowedBy::Space` usage and no `FollowedBy::Other` usage. So `tt_prepend_space` still exists and is used by `print_tts` in conjunction with the `FollowedBy` field. This change will also help with the removal of `Token::Interpolated`. Currently interpolated tokens are pretty-printed nicely via AST pretty printing. `Token::Interpolated` removal will mean they get printed with `print_tts`. Without this change, that would result in much uglier output for code produced by decl macro expansions. With this change, AST pretty printing and `print_tts` produce similar results.
rust-lang · Aug 8, 2023 · a6942db · a6942db
1 parent 00e93e1
commit a6942db
Show file tree

Hide file tree

Showing 59 changed files with 490 additions and 246 deletions.
diff --git a/compiler/rustc_ast/src/attr/mod.rs b/compiler/rustc_ast/src/attr/mod.rs
@@ -6,7 +6,7 @@ use crate::ast::{MetaItem, MetaItemKind, NestedMetaItem, NormalAttr};
 use crate::ast::{Path, PathSegment, DUMMY_NODE_ID};
 use crate::ptr::P;
 use crate::token::{self, CommentKind, Delimiter, Token};
-use crate::tokenstream::{DelimSpan, Spacing, TokenTree};
+use crate::tokenstream::{DelimSpan, FollowedBy, TokenTree};
 use crate::tokenstream::{LazyAttrTokenStream, TokenStream};
 use crate::util::comments;
 use crate::util::literal::escape_string_symbol;
@@ -183,7 +183,7 @@ impl Attribute {
                 .to_tokenstream(),
             &AttrKind::DocComment(comment_kind, data) => TokenStream::new(vec![TokenTree::Token(
                 Token::new(token::DocComment(comment_kind, self.style, data), self.span),
-                Spacing::Alone,
+                FollowedBy::Space,
             )]),
         }
     }
@@ -570,7 +570,7 @@ pub fn mk_attr_nested_word(
 ) -> Attribute {
     let inner_tokens = TokenStream::new(vec![TokenTree::Token(
         Token::from_ast_ident(Ident::new(inner, span)),
-        Spacing::Alone,
+        FollowedBy::Space,
     )]);
     let outer_ident = Ident::new(outer, span);
     let path = Path::from_ident(outer_ident);

diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs
@@ -44,7 +44,7 @@ use std::{cmp, fmt, iter, mem};
 pub enum TokenTree {
     /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
     /// delimiters are implicitly represented by `Delimited`.
-    Token(Token, Spacing),
+    Token(Token, FollowedBy),
     /// A delimited sequence of token trees.
     Delimited(DelimSpan, Delimiter, TokenStream),
 }
@@ -54,7 +54,7 @@ pub enum TokenTree {
 fn _dummy()
 where
     Token: sync::DynSend + sync::DynSync,
-    Spacing: sync::DynSend + sync::DynSync,
+    FollowedBy: sync::DynSend + sync::DynSync,
     DelimSpan: sync::DynSend + sync::DynSync,
     Delimiter: sync::DynSend + sync::DynSync,
     TokenStream: sync::DynSend + sync::DynSync,
@@ -89,20 +89,25 @@ impl TokenTree {
         }
     }
 
-    /// Create a `TokenTree::Token` with alone spacing.
-    pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
-        TokenTree::Token(Token::new(kind, span), Spacing::Alone)
+    /// Create a `TokenTree::Token` with `FollowedBy::Space`.
+    pub fn token_fby_space(kind: TokenKind, span: Span) -> TokenTree {
+        TokenTree::Token(Token::new(kind, span), FollowedBy::Space)
     }
 
-    /// Create a `TokenTree::Token` with joint spacing.
-    pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
-        TokenTree::Token(Token::new(kind, span), Spacing::Joint)
+    /// Create a `TokenTree::Token` with `FollowedBy::Punct`.
+    pub fn token_fby_punct(kind: TokenKind, span: Span) -> TokenTree {
+        TokenTree::Token(Token::new(kind, span), FollowedBy::Punct)
+    }
+
+    /// Create a `TokenTree::Token` with `FollowedBy::Other`.
+    pub fn token_fby_other(kind: TokenKind, span: Span) -> TokenTree {
+        TokenTree::Token(Token::new(kind, span), FollowedBy::Other)
     }
 
     pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
         match self {
-            TokenTree::Token(token, spacing) => match token.uninterpolate() {
-                Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
+            TokenTree::Token(token, fby) => match token.uninterpolate() {
+                Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *fby)),
                 Cow::Borrowed(_) => Cow::Borrowed(self),
             },
             _ => Cow::Borrowed(self),
@@ -182,7 +187,7 @@ pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
 /// Like `TokenTree`, but for `AttrTokenStream`.
 #[derive(Clone, Debug, Encodable, Decodable)]
 pub enum AttrTokenTree {
-    Token(Token, Spacing),
+    Token(Token, FollowedBy),
     Delimited(DelimSpan, Delimiter, AttrTokenStream),
     /// Stores the attributes for an attribute target,
     /// along with the tokens for that attribute target.
@@ -205,8 +210,8 @@ impl AttrTokenStream {
             .0
             .iter()
             .flat_map(|tree| match &tree {
-                AttrTokenTree::Token(inner, spacing) => {
-                    smallvec![TokenTree::Token(inner.clone(), *spacing)].into_iter()
+                AttrTokenTree::Token(inner, fby) => {
+                    smallvec![TokenTree::Token(inner.clone(), *fby)].into_iter()
                 }
                 AttrTokenTree::Delimited(span, delim, stream) => {
                     smallvec![TokenTree::Delimited(*span, *delim, stream.to_tokenstream()),]
@@ -307,21 +312,40 @@ pub struct AttributesData {
 #[derive(Clone, Debug, Default, Encodable, Decodable)]
 pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);
 
-/// Similar to `proc_macro::Spacing`, but for tokens.
-///
-/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when
-/// we convert to `proc_macro::TokenTree` for proc macros only `Punct`
-/// `TokenTree`s have a `proc_macro::Spacing`.
+/// Describes what immediately follows a token. Used for pretty-printing and
+/// conversions to `proc_macro::Spacing`.
 #[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
-pub enum Spacing {
-    /// The token is not immediately followed by an operator token (as
-    /// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`,
-    /// `+/*foo*/=`, `+ident`, and `+()`.
-    Alone,
-
-    /// The token is immediately followed by an operator token. E.g. a `+`
-    /// token is `Joint` in `+=` and `++`.
-    Joint,
+pub enum FollowedBy {
+    /// The token is immediately followed by whitespace or a non-doc comment.
+    /// When constructing token streams, use this for each token that should be
+    /// pretty-printed with a space after it.
+    ///
+    /// Converts to `Spacing::Alone`, and `Spacing::Alone` converts back to
+    /// this.
+    Space,
+
+    /// The token is immediately followed by punctuation (as determined by
+    /// `Token::is_punct`). When constructing token streams, use this for each
+    /// token that (a) should be pretty-printed without a space after it, and
+    /// (b) is followed by an punctuation token.
+    ///
+    /// Converts to `Spacing::Joint`, and `Spacing::Joint` converts back to
+    /// this.
+    Punct,
+
+    /// The token is immediately followed by something else: an identifier,
+    /// lifetime, literal, delimiter, doc comment, or EOF. When constructing
+    /// token streams, use this for each token that (a) should be
+    /// pretty-printed without a space after it, and (b) is followed by a
+    /// non-punctuation token.
+    ///
+    /// Converts to `Spacing::Alone`, but `Spacing::Alone` converts back to
+    /// `FollowedBy::Space`. Because of that, pretty-printing of `TokenStream`s
+    /// produced by proc macros is unavoidably uglier (with more whitespace
+    /// between tokens) than pretty-printing of `TokenStream`'s produced by
+    /// other means (i.e. user-written code, internally constructed token
+    /// streams, and token streams produced by declarative macros).
+    Other,
 }
 
 impl TokenStream {
@@ -336,7 +360,7 @@ impl TokenStream {
                 let sp = match (&ts, &next) {
                     (_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
                     (
-                        TokenTree::Token(token_left, Spacing::Alone),
+                        TokenTree::Token(token_left, FollowedBy::Space | FollowedBy::Other),
                         TokenTree::Token(token_right, _),
                     ) if ((token_left.is_ident() && !token_left.is_reserved_ident())
                         || token_left.is_lit())
@@ -349,7 +373,7 @@ impl TokenStream {
                     _ => continue,
                 };
                 let sp = sp.shrink_to_hi();
-                let comma = TokenTree::token_alone(token::Comma, sp);
+                let comma = TokenTree::token_fby_space(token::Comma, sp);
                 suggestion = Some((pos, comma, sp));
             }
         }
@@ -425,14 +449,22 @@ impl TokenStream {
         self
     }
 
-    /// Create a token stream containing a single token with alone spacing.
-    pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
-        TokenStream::new(vec![TokenTree::token_alone(kind, span)])
+    /// Create a token stream containing a single token with
+    /// `FollowedBy::Space`.
+    pub fn token_fby_space(kind: TokenKind, span: Span) -> TokenStream {
+        TokenStream::new(vec![TokenTree::token_fby_space(kind, span)])
+    }
+
+    /// Create a token stream containing a single token with
+    /// `FollowedBy::Punct`.
+    pub fn token_fby_punct(kind: TokenKind, span: Span) -> TokenStream {
+        TokenStream::new(vec![TokenTree::token_fby_punct(kind, span)])
     }
 
-    /// Create a token stream containing a single token with joint spacing.
-    pub fn token_joint(kind: TokenKind, span: Span) -> TokenStream {
-        TokenStream::new(vec![TokenTree::token_joint(kind, span)])
+    /// Create a token stream containing a single token with
+    /// `FollowedBy::Other`.
+    pub fn token_fby_other(kind: TokenKind, span: Span) -> TokenStream {
+        TokenStream::new(vec![TokenTree::token_fby_other(kind, span)])
     }
 
     /// Create a token stream containing a single `Delimited`.
@@ -458,16 +490,16 @@ impl TokenStream {
     pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
         match nt {
             Nonterminal::NtIdent(ident, is_raw) => {
-                TokenStream::token_alone(token::Ident(ident.name, *is_raw), ident.span)
+                TokenStream::token_fby_space(token::Ident(ident.name, *is_raw), ident.span)
             }
             Nonterminal::NtLifetime(ident) => {
-                TokenStream::token_alone(token::Lifetime(ident.name), ident.span)
+                TokenStream::token_fby_space(token::Lifetime(ident.name), ident.span)
             }
             Nonterminal::NtItem(item) => TokenStream::from_ast(item),
             Nonterminal::NtBlock(block) => TokenStream::from_ast(block),
             Nonterminal::NtStmt(stmt) if let StmtKind::Empty = stmt.kind => {
                 // FIXME: Properly collect tokens for empty statements.
-                TokenStream::token_alone(token::Semi, stmt.span)
+                TokenStream::token_fby_space(token::Semi, stmt.span)
             }
             Nonterminal::NtStmt(stmt) => TokenStream::from_ast(stmt),
             Nonterminal::NtPat(pat) => TokenStream::from_ast(pat),
@@ -479,23 +511,23 @@ impl TokenStream {
         }
     }
 
-    fn flatten_token(token: &Token, spacing: Spacing) -> TokenTree {
+    fn flatten_token(token: &Token, fby: FollowedBy) -> TokenTree {
         match &token.kind {
             token::Interpolated(nt) if let token::NtIdent(ident, is_raw) = **nt => {
-                TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing)
+                TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), fby)
             }
             token::Interpolated(nt) => TokenTree::Delimited(
                 DelimSpan::from_single(token.span),
                 Delimiter::Invisible,
                 TokenStream::from_nonterminal_ast(nt).flattened(),
             ),
-            _ => TokenTree::Token(token.clone(), spacing),
+            _ => TokenTree::Token(token.clone(), fby),
         }
     }
 
     fn flatten_token_tree(tree: &TokenTree) -> TokenTree {
         match tree {
-            TokenTree::Token(token, spacing) => TokenStream::flatten_token(token, *spacing),
+            TokenTree::Token(token, fby) => TokenStream::flatten_token(token, *fby),
             TokenTree::Delimited(span, delim, tts) => {
                 TokenTree::Delimited(*span, *delim, tts.flattened())
             }
@@ -521,13 +553,13 @@ impl TokenStream {
     // If `vec` is not empty, try to glue `tt` onto its last token. The return
     // value indicates if gluing took place.
     fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
-        if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = vec.last()
-            && let TokenTree::Token(tok, spacing) = tt
+        if let Some(TokenTree::Token(last_tok, FollowedBy::Punct)) = vec.last()
+            && let TokenTree::Token(tok, fby) = tt
             && let Some(glued_tok) = last_tok.glue(tok)
         {
             // ...then overwrite the last token tree in `vec` with the
             // glued token, and skip the first token tree from `stream`.
-            *vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
+            *vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *fby);
             true
         } else {
             false
@@ -583,7 +615,7 @@ impl TokenStream {
                 match tt {
                     &TokenTree::Token(
                         Token { kind: token::DocComment(_, attr_style, data), span },
-                        _spacing,
+                        _fby,
                     ) => {
                         let desugared = desugared_tts(attr_style, data, span);
                         let desugared_len = desugared.len();
@@ -630,9 +662,9 @@ impl TokenStream {
                 delim_span,
                 Delimiter::Bracket,
                 [
-                    TokenTree::token_alone(token::Ident(sym::doc, false), span),
-                    TokenTree::token_alone(token::Eq, span),
-                    TokenTree::token_alone(
+                    TokenTree::token_fby_space(token::Ident(sym::doc, false), span),
+                    TokenTree::token_fby_space(token::Eq, span),
+                    TokenTree::token_fby_space(
                         TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
                         span,
                     ),
@@ -643,12 +675,12 @@ impl TokenStream {
 
             if attr_style == AttrStyle::Inner {
                 vec![
-                    TokenTree::token_alone(token::Pound, span),
-                    TokenTree::token_alone(token::Not, span),
+                    TokenTree::token_fby_space(token::Pound, span),
+                    TokenTree::token_fby_space(token::Not, span),
                     body,
                 ]
             } else {
-                vec![TokenTree::token_alone(token::Pound, span), body]
+                vec![TokenTree::token_fby_space(token::Pound, span), body]
             }
         }
     }

diff --git a/compiler/rustc_ast_pretty/src/pprust/state.rs b/compiler/rustc_ast_pretty/src/pprust/state.rs
@@ -7,7 +7,7 @@ use crate::pp::{self, Breaks};
 use rustc_ast::attr::AttrIdGenerator;
 use rustc_ast::ptr::P;
 use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind};
-use rustc_ast::tokenstream::{TokenStream, TokenTree};
+use rustc_ast::tokenstream::{FollowedBy, TokenStream, TokenTree};
 use rustc_ast::util::classify;
 use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
 use rustc_ast::util::parser;
@@ -547,14 +547,15 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
     /// appropriate macro, transcribe back into the grammar we just parsed from,
     /// and then pretty-print the resulting AST nodes (so, e.g., we print
     /// expression arguments as expressions). It can be done! I think.
-    fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) {
+    fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) -> FollowedBy {
         match tt {
-            TokenTree::Token(token, _) => {
+            TokenTree::Token(token, fby) => {
                 let token_str = self.token_to_string_ext(token, convert_dollar_crate);
                 self.word(token_str);
                 if let token::DocComment(..) = token.kind {
                     self.hardbreak()
                 }
+                *fby
             }
             TokenTree::Delimited(dspan, delim, tts) => {
                 self.print_mac_common(
@@ -566,16 +567,29 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
                     convert_dollar_crate,
                     dspan.entire(),
                 );
+                // FIXME: add two `FollowedBy` fields to `TokenTree::Delimited`
+                // and use the close delim one here.
+                FollowedBy::Space
             }
         }
     }
 
     fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
         let mut iter = tts.trees().peekable();
         while let Some(tt) = iter.next() {
-            self.print_tt(tt, convert_dollar_crate);
+            let fby = self.print_tt(tt, convert_dollar_crate);
             if let Some(next) = iter.peek() {
-                if tt_prepend_space(next, tt) {
+                // Should we print a space after `tt`? There are two guiding
+                // factors.
+                // - `fby` is the more important and accurate one. Most tokens
+                //   have good followed-by information, and `Punct`/`Other` get
+                //   used a lot.
+                // - `tt_prepend_space` is the backup. Code produced by proc
+                //   macros has worse followed-by information, with no `Other`
+                //   usage and too much `Space` usage, resulting in over-spaced
+                //   output such as `let a : Vec < u32 > = Vec :: new() ;`.
+                //   `tt_prepend_space` avoids some of excess whitespace.
+                if fby == FollowedBy::Space && tt_prepend_space(next, tt) {
                     self.space();
                 }
             }
@@ -855,7 +869,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
     }
 
     fn tt_to_string(&self, tt: &TokenTree) -> String {
-        Self::to_string(|s| s.print_tt(tt, false))
+        Self::to_string(|s| {
+            s.print_tt(tt, false);
+        })
     }
 
     fn tts_to_string(&self, tokens: &TokenStream) -> String {

diff --git a/compiler/rustc_builtin_macros/src/assert/context.rs b/compiler/rustc_builtin_macros/src/assert/context.rs
@@ -151,7 +151,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
     fn build_panic(&self, expr_str: &str, panic_path: Path) -> P<Expr> {
         let escaped_expr_str = escape_to_fmt(expr_str);
         let initial = [
-            TokenTree::token_alone(
+            TokenTree::token_fby_space(
                 token::Literal(token::Lit {
                     kind: token::LitKind::Str,
                     symbol: Symbol::intern(&if self.fmt_string.is_empty() {
@@ -166,12 +166,12 @@ impl<'cx, 'a> Context<'cx, 'a> {
                 }),
                 self.span,
             ),
-            TokenTree::token_alone(token::Comma, self.span),
+            TokenTree::token_fby_space(token::Comma, self.span),
         ];
         let captures = self.capture_decls.iter().flat_map(|cap| {
             [
-                TokenTree::token_alone(token::Ident(cap.ident.name, false), cap.ident.span),
-                TokenTree::token_alone(token::Comma, self.span),
+                TokenTree::token_fby_space(token::Ident(cap.ident.name, false), cap.ident.span),
+                TokenTree::token_fby_space(token::Comma, self.span),
             ]
         });
         self.cx.expr(