Skip to content

Commit

Permalink
Improve print_tts by changing tokenstream::Spacing.
Browse files Browse the repository at this point in the history
`proc_macro::Spacing` only appears on `Punct` tokens, where:
- `Joint` means "the next token follows immediately and is a `Punct`.
- `Alone` means "the next token doesn't follow immediately *or* it
  follows immediately and is not a `Punct`.

`tokenstream::Spacing` appears on all `TokenTree::Token` instances,
both punct and non-punct, where:
- `Joint` means "the next token follows immediately and is a punct" (i.e.
  satisfies `is_op`).
- `Alone` means "the next token doesn't follow immediately *or* it
  follows immediately and is not a punct".

The fact that `Alone` is used for two different cases is awkward.
This commit replaces `tokenstream::Spacing` with a new type `FollowedBy`
that separates those two cases:
- `Space` means "the next token doesn't follow immediately"
- `Op` means "the next token follows immediately and is a punct".
- `Other` means "the next token follows immediately and is not a punct".

The mapping from old to new is:
- `Joint` -> `Op`
- `Alone` -> `Space` or `Other`, depending on the situation.

We can use `FollowedBy` to *drastically* improve the output of
`print_tts`. For example, this:
```
stringify!(let a: Vec<u32> = vec![];)
```
currently produces this string:
```
let a : Vec < u32 > = vec! [] ;
```
With this PR, it now produces this string:
```
let a: Vec<u32> = vec![] ;
```
(The space after the `]` is because `TokenTree::Delimited` currently
doesn't have `FollowedBy` information. Adding this as a follow-up should
be straightforward.)

The new `print_tts` doesn't replicate original code perfectly. E.g.
multiple space characters will be condensed into a single space
character. But it's much improved.

`print_tts` still produces the old, uglier output for code produced by
proc macros. Because we have to translate the generated code from
`Spacing` to the more expressive `Followed`, which results in too much
`FollowedBy::Space` usage and no `FollowedBy::Other` usage. So
`tt_prepend_space` still exists and is used by `print_tts` in
conjunction with the `FollowedBy` field.

This change will also help with the removal of `Token::Interpolated`.
Currently interpolated tokens are pretty-printed nicely via AST pretty
printing. `Token::Interpolated` removal will mean they get printed with
`print_tts`. Without this change, that would result in much uglier
output for code produced by decl macro expansions. With this change, AST
pretty printing and `print_tts` produce similar results.
  • Loading branch information
nnethercote committed Aug 8, 2023
1 parent 00e93e1 commit a6942db
Show file tree
Hide file tree
Showing 59 changed files with 490 additions and 246 deletions.
6 changes: 3 additions & 3 deletions compiler/rustc_ast/src/attr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use crate::ast::{MetaItem, MetaItemKind, NestedMetaItem, NormalAttr};
use crate::ast::{Path, PathSegment, DUMMY_NODE_ID};
use crate::ptr::P;
use crate::token::{self, CommentKind, Delimiter, Token};
use crate::tokenstream::{DelimSpan, Spacing, TokenTree};
use crate::tokenstream::{DelimSpan, FollowedBy, TokenTree};
use crate::tokenstream::{LazyAttrTokenStream, TokenStream};
use crate::util::comments;
use crate::util::literal::escape_string_symbol;
Expand Down Expand Up @@ -183,7 +183,7 @@ impl Attribute {
.to_tokenstream(),
&AttrKind::DocComment(comment_kind, data) => TokenStream::new(vec![TokenTree::Token(
Token::new(token::DocComment(comment_kind, self.style, data), self.span),
Spacing::Alone,
FollowedBy::Space,
)]),
}
}
Expand Down Expand Up @@ -570,7 +570,7 @@ pub fn mk_attr_nested_word(
) -> Attribute {
let inner_tokens = TokenStream::new(vec![TokenTree::Token(
Token::from_ast_ident(Ident::new(inner, span)),
Spacing::Alone,
FollowedBy::Space,
)]);
let outer_ident = Ident::new(outer, span);
let path = Path::from_ident(outer_ident);
Expand Down
136 changes: 84 additions & 52 deletions compiler/rustc_ast/src/tokenstream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use std::{cmp, fmt, iter, mem};
pub enum TokenTree {
/// A single token. Should never be `OpenDelim` or `CloseDelim`, because
/// delimiters are implicitly represented by `Delimited`.
Token(Token, Spacing),
Token(Token, FollowedBy),
/// A delimited sequence of token trees.
Delimited(DelimSpan, Delimiter, TokenStream),
}
Expand All @@ -54,7 +54,7 @@ pub enum TokenTree {
fn _dummy()
where
Token: sync::DynSend + sync::DynSync,
Spacing: sync::DynSend + sync::DynSync,
FollowedBy: sync::DynSend + sync::DynSync,
DelimSpan: sync::DynSend + sync::DynSync,
Delimiter: sync::DynSend + sync::DynSync,
TokenStream: sync::DynSend + sync::DynSync,
Expand Down Expand Up @@ -89,20 +89,25 @@ impl TokenTree {
}
}

/// Create a `TokenTree::Token` with alone spacing.
pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
TokenTree::Token(Token::new(kind, span), Spacing::Alone)
/// Create a `TokenTree::Token` with `FollowedBy::Space`.
pub fn token_fby_space(kind: TokenKind, span: Span) -> TokenTree {
TokenTree::Token(Token::new(kind, span), FollowedBy::Space)
}

/// Create a `TokenTree::Token` with joint spacing.
pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
TokenTree::Token(Token::new(kind, span), Spacing::Joint)
/// Create a `TokenTree::Token` with `FollowedBy::Punct`.
pub fn token_fby_punct(kind: TokenKind, span: Span) -> TokenTree {
TokenTree::Token(Token::new(kind, span), FollowedBy::Punct)
}

/// Create a `TokenTree::Token` with `FollowedBy::Other`.
pub fn token_fby_other(kind: TokenKind, span: Span) -> TokenTree {
TokenTree::Token(Token::new(kind, span), FollowedBy::Other)
}

pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
match self {
TokenTree::Token(token, spacing) => match token.uninterpolate() {
Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
TokenTree::Token(token, fby) => match token.uninterpolate() {
Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *fby)),
Cow::Borrowed(_) => Cow::Borrowed(self),
},
_ => Cow::Borrowed(self),
Expand Down Expand Up @@ -182,7 +187,7 @@ pub struct AttrTokenStream(pub Lrc<Vec<AttrTokenTree>>);
/// Like `TokenTree`, but for `AttrTokenStream`.
#[derive(Clone, Debug, Encodable, Decodable)]
pub enum AttrTokenTree {
Token(Token, Spacing),
Token(Token, FollowedBy),
Delimited(DelimSpan, Delimiter, AttrTokenStream),
/// Stores the attributes for an attribute target,
/// along with the tokens for that attribute target.
Expand All @@ -205,8 +210,8 @@ impl AttrTokenStream {
.0
.iter()
.flat_map(|tree| match &tree {
AttrTokenTree::Token(inner, spacing) => {
smallvec![TokenTree::Token(inner.clone(), *spacing)].into_iter()
AttrTokenTree::Token(inner, fby) => {
smallvec![TokenTree::Token(inner.clone(), *fby)].into_iter()
}
AttrTokenTree::Delimited(span, delim, stream) => {
smallvec![TokenTree::Delimited(*span, *delim, stream.to_tokenstream()),]
Expand Down Expand Up @@ -307,21 +312,40 @@ pub struct AttributesData {
#[derive(Clone, Debug, Default, Encodable, Decodable)]
pub struct TokenStream(pub(crate) Lrc<Vec<TokenTree>>);

/// Similar to `proc_macro::Spacing`, but for tokens.
///
/// Note that all `ast::TokenTree::Token` instances have a `Spacing`, but when
/// we convert to `proc_macro::TokenTree` for proc macros only `Punct`
/// `TokenTree`s have a `proc_macro::Spacing`.
/// Describes what immediately follows a token. Used for pretty-printing and
/// conversions to `proc_macro::Spacing`.
#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
pub enum Spacing {
/// The token is not immediately followed by an operator token (as
/// determined by `Token::is_op`). E.g. a `+` token is `Alone` in `+ =`,
/// `+/*foo*/=`, `+ident`, and `+()`.
Alone,

/// The token is immediately followed by an operator token. E.g. a `+`
/// token is `Joint` in `+=` and `++`.
Joint,
pub enum FollowedBy {
/// The token is immediately followed by whitespace or a non-doc comment.
/// When constructing token streams, use this for each token that should be
/// pretty-printed with a space after it.
///
/// Converts to `Spacing::Alone`, and `Spacing::Alone` converts back to
/// this.
Space,

/// The token is immediately followed by punctuation (as determined by
/// `Token::is_punct`). When constructing token streams, use this for each
/// token that (a) should be pretty-printed without a space after it, and
/// (b) is followed by an punctuation token.
///
/// Converts to `Spacing::Joint`, and `Spacing::Joint` converts back to
/// this.
Punct,

/// The token is immediately followed by something else: an identifier,
/// lifetime, literal, delimiter, doc comment, or EOF. When constructing
/// token streams, use this for each token that (a) should be
/// pretty-printed without a space after it, and (b) is followed by a
/// non-punctuation token.
///
/// Converts to `Spacing::Alone`, but `Spacing::Alone` converts back to
/// `FollowedBy::Space`. Because of that, pretty-printing of `TokenStream`s
/// produced by proc macros is unavoidably uglier (with more whitespace
/// between tokens) than pretty-printing of `TokenStream`'s produced by
/// other means (i.e. user-written code, internally constructed token
/// streams, and token streams produced by declarative macros).
Other,
}

impl TokenStream {
Expand All @@ -336,7 +360,7 @@ impl TokenStream {
let sp = match (&ts, &next) {
(_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
(
TokenTree::Token(token_left, Spacing::Alone),
TokenTree::Token(token_left, FollowedBy::Space | FollowedBy::Other),
TokenTree::Token(token_right, _),
) if ((token_left.is_ident() && !token_left.is_reserved_ident())
|| token_left.is_lit())
Expand All @@ -349,7 +373,7 @@ impl TokenStream {
_ => continue,
};
let sp = sp.shrink_to_hi();
let comma = TokenTree::token_alone(token::Comma, sp);
let comma = TokenTree::token_fby_space(token::Comma, sp);
suggestion = Some((pos, comma, sp));
}
}
Expand Down Expand Up @@ -425,14 +449,22 @@ impl TokenStream {
self
}

/// Create a token stream containing a single token with alone spacing.
pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
TokenStream::new(vec![TokenTree::token_alone(kind, span)])
/// Create a token stream containing a single token with
/// `FollowedBy::Space`.
pub fn token_fby_space(kind: TokenKind, span: Span) -> TokenStream {
TokenStream::new(vec![TokenTree::token_fby_space(kind, span)])
}

/// Create a token stream containing a single token with
/// `FollowedBy::Punct`.
pub fn token_fby_punct(kind: TokenKind, span: Span) -> TokenStream {
TokenStream::new(vec![TokenTree::token_fby_punct(kind, span)])
}

/// Create a token stream containing a single token with joint spacing.
pub fn token_joint(kind: TokenKind, span: Span) -> TokenStream {
TokenStream::new(vec![TokenTree::token_joint(kind, span)])
/// Create a token stream containing a single token with
/// `FollowedBy::Other`.
pub fn token_fby_other(kind: TokenKind, span: Span) -> TokenStream {
TokenStream::new(vec![TokenTree::token_fby_other(kind, span)])
}

/// Create a token stream containing a single `Delimited`.
Expand All @@ -458,16 +490,16 @@ impl TokenStream {
pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
match nt {
Nonterminal::NtIdent(ident, is_raw) => {
TokenStream::token_alone(token::Ident(ident.name, *is_raw), ident.span)
TokenStream::token_fby_space(token::Ident(ident.name, *is_raw), ident.span)
}
Nonterminal::NtLifetime(ident) => {
TokenStream::token_alone(token::Lifetime(ident.name), ident.span)
TokenStream::token_fby_space(token::Lifetime(ident.name), ident.span)
}
Nonterminal::NtItem(item) => TokenStream::from_ast(item),
Nonterminal::NtBlock(block) => TokenStream::from_ast(block),
Nonterminal::NtStmt(stmt) if let StmtKind::Empty = stmt.kind => {
// FIXME: Properly collect tokens for empty statements.
TokenStream::token_alone(token::Semi, stmt.span)
TokenStream::token_fby_space(token::Semi, stmt.span)
}
Nonterminal::NtStmt(stmt) => TokenStream::from_ast(stmt),
Nonterminal::NtPat(pat) => TokenStream::from_ast(pat),
Expand All @@ -479,23 +511,23 @@ impl TokenStream {
}
}

fn flatten_token(token: &Token, spacing: Spacing) -> TokenTree {
fn flatten_token(token: &Token, fby: FollowedBy) -> TokenTree {
match &token.kind {
token::Interpolated(nt) if let token::NtIdent(ident, is_raw) = **nt => {
TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), spacing)
TokenTree::Token(Token::new(token::Ident(ident.name, is_raw), ident.span), fby)
}
token::Interpolated(nt) => TokenTree::Delimited(
DelimSpan::from_single(token.span),
Delimiter::Invisible,
TokenStream::from_nonterminal_ast(nt).flattened(),
),
_ => TokenTree::Token(token.clone(), spacing),
_ => TokenTree::Token(token.clone(), fby),
}
}

fn flatten_token_tree(tree: &TokenTree) -> TokenTree {
match tree {
TokenTree::Token(token, spacing) => TokenStream::flatten_token(token, *spacing),
TokenTree::Token(token, fby) => TokenStream::flatten_token(token, *fby),
TokenTree::Delimited(span, delim, tts) => {
TokenTree::Delimited(*span, *delim, tts.flattened())
}
Expand All @@ -521,13 +553,13 @@ impl TokenStream {
// If `vec` is not empty, try to glue `tt` onto its last token. The return
// value indicates if gluing took place.
fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
if let Some(TokenTree::Token(last_tok, Spacing::Joint)) = vec.last()
&& let TokenTree::Token(tok, spacing) = tt
if let Some(TokenTree::Token(last_tok, FollowedBy::Punct)) = vec.last()
&& let TokenTree::Token(tok, fby) = tt
&& let Some(glued_tok) = last_tok.glue(tok)
{
// ...then overwrite the last token tree in `vec` with the
// glued token, and skip the first token tree from `stream`.
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
*vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *fby);
true
} else {
false
Expand Down Expand Up @@ -583,7 +615,7 @@ impl TokenStream {
match tt {
&TokenTree::Token(
Token { kind: token::DocComment(_, attr_style, data), span },
_spacing,
_fby,
) => {
let desugared = desugared_tts(attr_style, data, span);
let desugared_len = desugared.len();
Expand Down Expand Up @@ -630,9 +662,9 @@ impl TokenStream {
delim_span,
Delimiter::Bracket,
[
TokenTree::token_alone(token::Ident(sym::doc, false), span),
TokenTree::token_alone(token::Eq, span),
TokenTree::token_alone(
TokenTree::token_fby_space(token::Ident(sym::doc, false), span),
TokenTree::token_fby_space(token::Eq, span),
TokenTree::token_fby_space(
TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
span,
),
Expand All @@ -643,12 +675,12 @@ impl TokenStream {

if attr_style == AttrStyle::Inner {
vec![
TokenTree::token_alone(token::Pound, span),
TokenTree::token_alone(token::Not, span),
TokenTree::token_fby_space(token::Pound, span),
TokenTree::token_fby_space(token::Not, span),
body,
]
} else {
vec![TokenTree::token_alone(token::Pound, span), body]
vec![TokenTree::token_fby_space(token::Pound, span), body]
}
}
}
Expand Down
28 changes: 22 additions & 6 deletions compiler/rustc_ast_pretty/src/pprust/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::pp::{self, Breaks};
use rustc_ast::attr::AttrIdGenerator;
use rustc_ast::ptr::P;
use rustc_ast::token::{self, BinOpToken, CommentKind, Delimiter, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{TokenStream, TokenTree};
use rustc_ast::tokenstream::{FollowedBy, TokenStream, TokenTree};
use rustc_ast::util::classify;
use rustc_ast::util::comments::{gather_comments, Comment, CommentStyle};
use rustc_ast::util::parser;
Expand Down Expand Up @@ -547,14 +547,15 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
/// appropriate macro, transcribe back into the grammar we just parsed from,
/// and then pretty-print the resulting AST nodes (so, e.g., we print
/// expression arguments as expressions). It can be done! I think.
fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) {
fn print_tt(&mut self, tt: &TokenTree, convert_dollar_crate: bool) -> FollowedBy {
match tt {
TokenTree::Token(token, _) => {
TokenTree::Token(token, fby) => {
let token_str = self.token_to_string_ext(token, convert_dollar_crate);
self.word(token_str);
if let token::DocComment(..) = token.kind {
self.hardbreak()
}
*fby
}
TokenTree::Delimited(dspan, delim, tts) => {
self.print_mac_common(
Expand All @@ -566,16 +567,29 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
convert_dollar_crate,
dspan.entire(),
);
// FIXME: add two `FollowedBy` fields to `TokenTree::Delimited`
// and use the close delim one here.
FollowedBy::Space
}
}
}

fn print_tts(&mut self, tts: &TokenStream, convert_dollar_crate: bool) {
let mut iter = tts.trees().peekable();
while let Some(tt) = iter.next() {
self.print_tt(tt, convert_dollar_crate);
let fby = self.print_tt(tt, convert_dollar_crate);
if let Some(next) = iter.peek() {
if tt_prepend_space(next, tt) {
// Should we print a space after `tt`? There are two guiding
// factors.
// - `fby` is the more important and accurate one. Most tokens
// have good followed-by information, and `Punct`/`Other` get
// used a lot.
// - `tt_prepend_space` is the backup. Code produced by proc
// macros has worse followed-by information, with no `Other`
// usage and too much `Space` usage, resulting in over-spaced
// output such as `let a : Vec < u32 > = Vec :: new() ;`.
// `tt_prepend_space` avoids some of excess whitespace.
if fby == FollowedBy::Space && tt_prepend_space(next, tt) {
self.space();
}
}
Expand Down Expand Up @@ -855,7 +869,9 @@ pub trait PrintState<'a>: std::ops::Deref<Target = pp::Printer> + std::ops::Dere
}

fn tt_to_string(&self, tt: &TokenTree) -> String {
Self::to_string(|s| s.print_tt(tt, false))
Self::to_string(|s| {
s.print_tt(tt, false);
})
}

fn tts_to_string(&self, tokens: &TokenStream) -> String {
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_builtin_macros/src/assert/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ impl<'cx, 'a> Context<'cx, 'a> {
fn build_panic(&self, expr_str: &str, panic_path: Path) -> P<Expr> {
let escaped_expr_str = escape_to_fmt(expr_str);
let initial = [
TokenTree::token_alone(
TokenTree::token_fby_space(
token::Literal(token::Lit {
kind: token::LitKind::Str,
symbol: Symbol::intern(&if self.fmt_string.is_empty() {
Expand All @@ -166,12 +166,12 @@ impl<'cx, 'a> Context<'cx, 'a> {
}),
self.span,
),
TokenTree::token_alone(token::Comma, self.span),
TokenTree::token_fby_space(token::Comma, self.span),
];
let captures = self.capture_decls.iter().flat_map(|cap| {
[
TokenTree::token_alone(token::Ident(cap.ident.name, false), cap.ident.span),
TokenTree::token_alone(token::Comma, self.span),
TokenTree::token_fby_space(token::Ident(cap.ident.name, false), cap.ident.span),
TokenTree::token_fby_space(token::Comma, self.span),
]
});
self.cx.expr(
Expand Down
Loading

0 comments on commit a6942db

Please sign in to comment.