Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move Nonterminal::to_tokenstream to parser & don't rely directly on parser in lowering #65392

Merged
merged 3 commits into from
Oct 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions src/librustc/hir/lowering.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ use syntax::print::pprust;
use syntax::source_map::{respan, ExpnData, ExpnKind, DesugaringKind, Spanned};
use syntax::symbol::{kw, sym, Symbol};
use syntax::tokenstream::{TokenStream, TokenTree};
use syntax::parse::token::{self, Token};
use syntax::parse::token::{self, Nonterminal, Token};
use syntax::parse::ParseSess;
use syntax::visit::{self, Visitor};
use syntax_pos::Span;

Expand All @@ -86,6 +87,11 @@ pub struct LoweringContext<'a> {

resolver: &'a mut dyn Resolver,

/// HACK(Centril): there is a cyclic dependency between the parser and lowering
/// if we don't have this function pointer. To avoid that dependency so that
/// librustc is independent of the parser, we use dynamic dispatch here.
nt_to_tokenstream: NtToTokenstream,

/// The items being lowered are collected here.
items: BTreeMap<hir::HirId, hir::Item>,

Expand Down Expand Up @@ -180,6 +186,8 @@ pub trait Resolver {
fn has_derives(&self, node_id: NodeId, derives: SpecialDerives) -> bool;
}

type NtToTokenstream = fn(&Nonterminal, &ParseSess, Span) -> TokenStream;

/// Context of `impl Trait` in code, which determines whether it is allowed in an HIR subtree,
/// and if so, what meaning it has.
#[derive(Debug)]
Expand Down Expand Up @@ -236,6 +244,7 @@ pub fn lower_crate(
dep_graph: &DepGraph,
krate: &Crate,
resolver: &mut dyn Resolver,
nt_to_tokenstream: NtToTokenstream,
) -> hir::Crate {
// We're constructing the HIR here; we don't care what we will
// read, since we haven't even constructed the *input* to
Expand All @@ -249,6 +258,7 @@ pub fn lower_crate(
sess,
cstore,
resolver,
nt_to_tokenstream,
items: BTreeMap::new(),
trait_items: BTreeMap::new(),
impl_items: BTreeMap::new(),
Expand Down Expand Up @@ -1022,7 +1032,7 @@ impl<'a> LoweringContext<'a> {
fn lower_token(&mut self, token: Token) -> TokenStream {
match token.kind {
token::Interpolated(nt) => {
let tts = nt.to_tokenstream(&self.sess.parse_sess, token.span);
let tts = (self.nt_to_tokenstream)(&nt, &self.sess.parse_sess, token.span);
self.lower_token_stream(tts)
}
_ => TokenTree::Token(token).into(),
Expand Down
3 changes: 2 additions & 1 deletion src/librustc_interface/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,8 @@ pub fn lower_to_hir(
) -> Result<hir::map::Forest> {
// Lower AST to HIR.
let hir_forest = time(sess, "lowering AST -> HIR", || {
let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver);
let nt_to_tokenstream = syntax::parse::nt_to_tokenstream;
let hir_crate = lower_crate(sess, cstore, &dep_graph, &krate, resolver, nt_to_tokenstream);

if sess.opts.debugging_opts.hir_stats {
hir_stats::print_hir_stats(&hir_crate);
Expand Down
2 changes: 1 addition & 1 deletion src/libsyntax/ext/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
}

Interpolated(nt) => {
let stream = nt.to_tokenstream(sess, span);
let stream = parse::nt_to_tokenstream(&nt, sess, span);
TokenTree::Group(Group {
delimiter: Delimiter::None,
stream,
Expand Down
138 changes: 134 additions & 4 deletions src/libsyntax/parse/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@ use crate::ast::{self, CrateConfig, NodeId};
use crate::early_buffered_lints::{BufferedEarlyLint, BufferedEarlyLintId};
use crate::source_map::{SourceMap, FilePathMapping};
use crate::feature_gate::UnstableFeatures;
use crate::parse::parser::Parser;
use crate::parse::parser::emit_unclosed_delims;
use crate::parse::token::TokenKind;
use crate::tokenstream::{TokenStream, TokenTree};
use crate::parse::parser::{Parser, emit_unclosed_delims};
use crate::parse::token::{Nonterminal, TokenKind};
use crate::tokenstream::{self, TokenStream, TokenTree};
use crate::print::pprust;
use crate::symbol::Symbol;

Expand All @@ -24,6 +23,8 @@ use std::borrow::Cow;
use std::path::{Path, PathBuf};
use std::str;

use log::info;

#[cfg(test)]
mod tests;

Expand Down Expand Up @@ -407,3 +408,132 @@ impl SeqSep {
}
}
}

// NOTE(Centril): The following probably shouldn't be here but it acknowledges the
// fact that architecturally, we are using parsing (read on below to understand why).

pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> TokenStream {
// A `Nonterminal` is often a parsed AST item. At this point we now
// need to convert the parsed AST to an actual token stream, e.g.
// un-parse it basically.
//
// Unfortunately there's not really a great way to do that in a
// guaranteed lossless fashion right now. The fallback here is to just
// stringify the AST node and reparse it, but this loses all span
// information.
//
// As a result, some AST nodes are annotated with the token stream they
// came from. Here we attempt to extract these lossless token streams
// before we fall back to the stringification.
let tokens = match *nt {
Nonterminal::NtItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtTraitItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtImplItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtIdent(ident, is_raw) => {
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
}
Nonterminal::NtLifetime(ident) => {
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
}
Nonterminal::NtTT(ref tt) => {
Some(tt.clone().into())
}
_ => None,
};

// FIXME(#43081): Avoid this pretty-print + reparse hack
let source = pprust::nonterminal_to_string(nt);
let filename = FileName::macro_expansion_source_code(&source);
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));

// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
// of tokens my not be invalidated or updated. Consequently if the
// "lossless" token stream disagrees with our actual stringification
// (which has historically been much more battle-tested) then we go
// with the lossy stream anyway (losing span information).
//
// Note that the comparison isn't `==` here to avoid comparing spans,
// but it *also* is a "probable" equality which is a pretty weird
// definition. We mostly want to catch actual changes to the AST
// like a `#[cfg]` being processed or some weird `macro_rules!`
// expansion.
//
// What we *don't* want to catch is the fact that a user-defined
// literal like `0xf` is stringified as `15`, causing the cached token
// stream to not be literal `==` token-wise (ignoring spans) to the
// token stream we got from stringification.
//
// Instead the "probably equal" check here is "does each token
// recursively have the same discriminant?" We basically don't look at
// the token values here and assume that such fine grained token stream
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
return tokens
}
info!("cached tokens found, but they're not \"probably equal\", \
going with stringified version");
}
return tokens_for_real
}

fn prepend_attrs(
sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
span: syntax_pos::Span
) -> Option<tokenstream::TokenStream> {
let tokens = tokens?;
if attrs.len() == 0 {
return Some(tokens.clone())
}
let mut builder = tokenstream::TokenStreamBuilder::new();
for attr in attrs {
assert_eq!(attr.style, ast::AttrStyle::Outer,
"inner attributes should prevent cached tokens from existing");

let source = pprust::attribute_to_string(attr);
let macro_filename = FileName::macro_expansion_source_code(&source);
if attr.is_sugared_doc {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
builder.push(stream);
continue
}

// synthesize # [ $path $tokens ] manually here
let mut brackets = tokenstream::TokenStreamBuilder::new();

// For simple paths, push the identifier directly
if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
let ident = attr.path.segments[0].ident;
let token = token::Ident(ident.name, ident.as_str().starts_with("r#"));
brackets.push(tokenstream::TokenTree::token(token, ident.span));

// ... and for more complicated paths, fall back to a reparse hack that
// should eventually be removed.
} else {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
brackets.push(stream);
}

brackets.push(attr.tokens.clone());

// The span we list here for `#` and for `[ ... ]` are both wrong in
// that it encompasses more than each token, but it hopefully is "good
// enough" for now at least.
builder.push(tokenstream::TokenTree::token(token::Pound, attr.span));
let delim_span = tokenstream::DelimSpan::from_single(attr.span);
builder.push(tokenstream::TokenTree::Delimited(
delim_span, token::DelimToken::Bracket, brackets.build().into()));
}
builder.push(tokens.clone());
Some(builder.build())
}
137 changes: 3 additions & 134 deletions src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@ pub use DelimToken::*;
pub use LitKind::*;
pub use TokenKind::*;

use crate::ast::{self};
use crate::parse::{parse_stream_from_source_str, ParseSess};
use crate::print::pprust;
use crate::ast;
use crate::ptr::P;
use crate::symbol::kw;
use crate::tokenstream::{self, DelimSpan, TokenStream, TokenTree};
use crate::tokenstream::TokenTree;

use syntax_pos::symbol::Symbol;
use syntax_pos::{self, Span, FileName, DUMMY_SP};
use log::info;
use syntax_pos::{self, Span, DUMMY_SP};

use std::fmt;
use std::mem;
Expand Down Expand Up @@ -737,131 +734,3 @@ impl fmt::Debug for Nonterminal {
}
}
}

impl Nonterminal {
pub fn to_tokenstream(&self, sess: &ParseSess, span: Span) -> TokenStream {
// A `Nonterminal` is often a parsed AST item. At this point we now
// need to convert the parsed AST to an actual token stream, e.g.
// un-parse it basically.
//
// Unfortunately there's not really a great way to do that in a
// guaranteed lossless fashion right now. The fallback here is to just
// stringify the AST node and reparse it, but this loses all span
// information.
//
// As a result, some AST nodes are annotated with the token stream they
// came from. Here we attempt to extract these lossless token streams
// before we fall back to the stringification.
let tokens = match *self {
Nonterminal::NtItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtTraitItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtImplItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtIdent(ident, is_raw) => {
Some(TokenTree::token(Ident(ident.name, is_raw), ident.span).into())
}
Nonterminal::NtLifetime(ident) => {
Some(TokenTree::token(Lifetime(ident.name), ident.span).into())
}
Nonterminal::NtTT(ref tt) => {
Some(tt.clone().into())
}
_ => None,
};

// FIXME(#43081): Avoid this pretty-print + reparse hack
let source = pprust::nonterminal_to_string(self);
let filename = FileName::macro_expansion_source_code(&source);
let tokens_for_real = parse_stream_from_source_str(filename, source, sess, Some(span));

// During early phases of the compiler the AST could get modified
// directly (e.g., attributes added or removed) and the internal cache
// of tokens my not be invalidated or updated. Consequently if the
// "lossless" token stream disagrees with our actual stringification
// (which has historically been much more battle-tested) then we go
// with the lossy stream anyway (losing span information).
//
// Note that the comparison isn't `==` here to avoid comparing spans,
// but it *also* is a "probable" equality which is a pretty weird
// definition. We mostly want to catch actual changes to the AST
// like a `#[cfg]` being processed or some weird `macro_rules!`
// expansion.
//
// What we *don't* want to catch is the fact that a user-defined
// literal like `0xf` is stringified as `15`, causing the cached token
// stream to not be literal `==` token-wise (ignoring spans) to the
// token stream we got from stringification.
//
// Instead the "probably equal" check here is "does each token
// recursively have the same discriminant?" We basically don't look at
// the token values here and assume that such fine grained token stream
// modifications, including adding/removing typically non-semantic
// tokens such as extra braces and commas, don't happen.
if let Some(tokens) = tokens {
if tokens.probably_equal_for_proc_macro(&tokens_for_real) {
return tokens
}
info!("cached tokens found, but they're not \"probably equal\", \
going with stringified version");
}
return tokens_for_real
}
}

fn prepend_attrs(sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
span: syntax_pos::Span)
-> Option<tokenstream::TokenStream>
{
let tokens = tokens?;
if attrs.len() == 0 {
return Some(tokens.clone())
}
let mut builder = tokenstream::TokenStreamBuilder::new();
for attr in attrs {
assert_eq!(attr.style, ast::AttrStyle::Outer,
"inner attributes should prevent cached tokens from existing");

let source = pprust::attribute_to_string(attr);
let macro_filename = FileName::macro_expansion_source_code(&source);
if attr.is_sugared_doc {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
builder.push(stream);
continue
}

// synthesize # [ $path $tokens ] manually here
let mut brackets = tokenstream::TokenStreamBuilder::new();

// For simple paths, push the identifier directly
if attr.path.segments.len() == 1 && attr.path.segments[0].args.is_none() {
let ident = attr.path.segments[0].ident;
let token = Ident(ident.name, ident.as_str().starts_with("r#"));
brackets.push(tokenstream::TokenTree::token(token, ident.span));

// ... and for more complicated paths, fall back to a reparse hack that
// should eventually be removed.
} else {
let stream = parse_stream_from_source_str(macro_filename, source, sess, Some(span));
brackets.push(stream);
}

brackets.push(attr.tokens.clone());

// The span we list here for `#` and for `[ ... ]` are both wrong in
// that it encompasses more than each token, but it hopefully is "good
// enough" for now at least.
builder.push(tokenstream::TokenTree::token(Pound, attr.span));
let delim_span = DelimSpan::from_single(attr.span);
builder.push(tokenstream::TokenTree::Delimited(
delim_span, DelimToken::Bracket, brackets.build().into()));
}
builder.push(tokens.clone());
Some(builder.build())
}