From 2b07f0fb0018649588cda06bd33c33b71c5cac70 Mon Sep 17 00:00:00 2001 From: John Clements Date: Tue, 12 Mar 2013 11:37:31 -0700 Subject: [PATCH 1/3] field renaming --- src/libsyntax/ext/tt/transcribe.rs | 57 +++++++++++++++--------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/src/libsyntax/ext/tt/transcribe.rs b/src/libsyntax/ext/tt/transcribe.rs index 67c2f438269d8..f0e1273534ab1 100644 --- a/src/libsyntax/ext/tt/transcribe.rs +++ b/src/libsyntax/ext/tt/transcribe.rs @@ -26,7 +26,7 @@ use core::vec; `~` */ ///an unzipping of `token_tree`s struct TtFrame { - readme: @mut ~[ast::token_tree], + forest: @mut ~[ast::token_tree], idx: uint, dotdotdoted: bool, sep: Option, @@ -37,7 +37,7 @@ pub struct TtReader { sp_diag: @span_handler, interner: @ident_interner, // the unzipped tree: - cur: @mut TtFrame, + stack: @mut TtFrame, /* for MBE-style macro transcription */ interpolations: LinearMap, repeat_idx: ~[uint], @@ -58,8 +58,8 @@ pub fn new_tt_reader(sp_diag: @span_handler, let r = @mut TtReader { sp_diag: sp_diag, interner: itr, - cur: @mut TtFrame { - readme: @mut src, + stack: @mut TtFrame { + forest: @mut src, idx: 0u, dotdotdoted: false, sep: None, @@ -81,7 +81,7 @@ pub fn new_tt_reader(sp_diag: @span_handler, fn dup_tt_frame(f: @mut TtFrame) -> @mut TtFrame { @mut TtFrame { - readme: @mut (copy *f.readme), + forest: @mut (copy *f.forest), idx: f.idx, dotdotdoted: f.dotdotdoted, sep: copy f.sep, @@ -96,7 +96,7 @@ pub fn dup_tt_reader(r: @mut TtReader) -> @mut TtReader { @mut TtReader { sp_diag: r.sp_diag, interner: r.interner, - cur: dup_tt_frame(r.cur), + stack: dup_tt_frame(r.stack), interpolations: r.interpolations, repeat_idx: copy r.repeat_idx, repeat_len: copy r.repeat_len, @@ -167,7 +167,8 @@ fn lockstep_iter_size(t: token_tree, r: &mut TtReader) -> lis { } } - +// return the next token from the TtReader. +// EFFECT: advances the reader's token field pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { let ret_val = TokenAndSpan { tok: copy r.cur_tok, @@ -175,37 +176,37 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { }; loop { { - let cur = &mut *r.cur; - let readme = &mut *cur.readme; - if cur.idx < readme.len() { + let stack = &mut *r.stack; + let forest = &mut *stack.forest; + if stack.idx < forest.len() { break; } } /* done with this set; pop or repeat? */ - if ! r.cur.dotdotdoted + if ! r.stack.dotdotdoted || { *r.repeat_idx.last() == *r.repeat_len.last() - 1 } { - match r.cur.up { + match r.stack.up { None => { r.cur_tok = EOF; return ret_val; } Some(tt_f) => { - if r.cur.dotdotdoted { + if r.stack.dotdotdoted { r.repeat_idx.pop(); r.repeat_len.pop(); } - r.cur = tt_f; - r.cur.idx += 1u; + r.stack = tt_f; + r.stack.idx += 1u; } } } else { /* repeat */ - r.cur.idx = 0u; + r.stack.idx = 0u; r.repeat_idx[r.repeat_idx.len() - 1u] += 1u; - match r.cur.sep { + match r.stack.sep { Some(copy tk) => { r.cur_tok = tk; /* repeat same span, I guess */ return ret_val; @@ -216,21 +217,21 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { } loop { /* because it's easiest, this handles `tt_delim` not starting with a `tt_tok`, even though it won't happen */ - match r.cur.readme[r.cur.idx] { + match r.stack.forest[r.stack.idx] { tt_delim(copy tts) => { - r.cur = @mut TtFrame { - readme: @mut tts, + r.stack = @mut TtFrame { + forest: @mut tts, idx: 0u, dotdotdoted: false, sep: None, - up: option::Some(r.cur) + up: option::Some(r.stack) }; // if this could be 0-length, we'd need to potentially recur here } tt_tok(sp, copy tok) => { r.cur_span = sp; r.cur_tok = tok; - r.cur.idx += 1u; + r.stack.idx += 1u; return ret_val; } tt_seq(sp, copy tts, copy sep, zerok) => { @@ -256,17 +257,17 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { once"); } - r.cur.idx += 1u; + r.stack.idx += 1u; return tt_next_token(r); } else { r.repeat_len.push(len); r.repeat_idx.push(0u); - r.cur = @mut TtFrame { - readme: @mut tts, + r.stack = @mut TtFrame { + forest: @mut tts, idx: 0u, dotdotdoted: true, sep: sep, - up: Some(r.cur) + up: Some(r.stack) }; } } @@ -280,13 +281,13 @@ pub fn tt_next_token(r: &mut TtReader) -> TokenAndSpan { (b) we actually can, since it's a token. */ matched_nonterminal(nt_ident(sn,b)) => { r.cur_span = sp; r.cur_tok = IDENT(sn,b); - r.cur.idx += 1u; + r.stack.idx += 1u; return ret_val; } matched_nonterminal(ref other_whole_nt) => { r.cur_span = sp; r.cur_tok = INTERPOLATED(copy *other_whole_nt); - r.cur.idx += 1u; + r.stack.idx += 1u; return ret_val; } matched_seq(*) => { From 556143c488f8b8b2ac25ac29efdf030017cba7d7 Mon Sep 17 00:00:00 2001 From: John Clements Date: Fri, 8 Mar 2013 10:19:19 -0800 Subject: [PATCH 2/3] commenting parser --- src/libsyntax/parse/common.rs | 15 ++++++++++++++- src/libsyntax/parse/lexer.rs | 7 ++++++- src/libsyntax/parse/mod.rs | 8 ++++++-- src/libsyntax/parse/parser.rs | 24 ++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/libsyntax/parse/common.rs b/src/libsyntax/parse/common.rs index c14c7bed1399f..ae7dd8ff96fce 100644 --- a/src/libsyntax/parse/common.rs +++ b/src/libsyntax/parse/common.rs @@ -159,6 +159,9 @@ pub impl Parser { } } + // if the given word is not a keyword, signal an error. + // if the next token is the given keyword, eat it and return + // true. Otherwise, return false. fn eat_keyword(&self, word: &~str) -> bool { self.require_keyword(word); let is_kw = match *self.token { @@ -169,6 +172,9 @@ pub impl Parser { is_kw } + // if the given word is not a keyword, signal an error. + // if the next token is not the given word, signal an error. + // otherwise, eat it. fn expect_keyword(&self, word: &~str) { self.require_keyword(word); if !self.eat_keyword(word) { @@ -182,10 +188,12 @@ pub impl Parser { } } + // return true if the given string is a strict keyword fn is_strict_keyword(&self, word: &~str) -> bool { self.strict_keywords.contains(word) } + // signal an error if the current token is a strict keyword fn check_strict_keywords(&self) { match *self.token { token::IDENT(_, false) => { @@ -196,16 +204,19 @@ pub impl Parser { } } + // signal an error if the given string is a strict keyword fn check_strict_keywords_(&self, w: &~str) { if self.is_strict_keyword(w) { self.fatal(fmt!("found `%s` in ident position", *w)); } } + // return true if this is a reserved keyword fn is_reserved_keyword(&self, word: &~str) -> bool { self.reserved_keywords.contains(word) } + // signal an error if the current token is a reserved keyword fn check_reserved_keywords(&self) { match *self.token { token::IDENT(_, false) => { @@ -216,6 +227,7 @@ pub impl Parser { } } + // signal an error if the given string is a reserved keyword fn check_reserved_keywords_(&self, w: &~str) { if self.is_reserved_keyword(w) { self.fatal(fmt!("`%s` is a reserved keyword", *w)); @@ -223,7 +235,8 @@ pub impl Parser { } // expect and consume a GT. if a >> is seen, replace it - // with a single > and continue. + // with a single > and continue. If a GT is not seen, + // signal an error. fn expect_gt(&self) { if *self.token == token::GT { self.bump(); diff --git a/src/libsyntax/parse/lexer.rs b/src/libsyntax/parse/lexer.rs index ffd2a1d801c96..512b6c0ec78eb 100644 --- a/src/libsyntax/parse/lexer.rs +++ b/src/libsyntax/parse/lexer.rs @@ -80,7 +80,8 @@ pub fn new_low_level_string_reader(span_diagnostic: @span_handler, last_pos: filemap.start_pos, col: CharPos(0), curr: initial_char, - filemap: filemap, interner: itr, + filemap: filemap, + interner: itr, /* dummy values; not read */ peek_tok: token::EOF, peek_span: codemap::dummy_sp() @@ -150,6 +151,7 @@ impl reader for TtReader { } // EFFECT: advance peek_tok and peek_span to refer to the next token. +// EFFECT: update the interner, maybe. fn string_advance_token(r: @mut StringReader) { match (consume_whitespace_and_comments(r)) { Some(comment) => { @@ -539,6 +541,9 @@ fn ident_continue(c: char) -> bool { || (c > 'z' && char::is_XID_continue(c)) } +// return the next token from the string +// EFFECT: advances the input past that token +// EFFECT: updates the interner fn next_token_inner(rdr: @mut StringReader) -> token::Token { let mut accum_str = ~""; let mut c = rdr.curr; diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index cdb67129c355b..ba7a9624c1895 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -45,10 +45,14 @@ pub mod classify; /// Reporting obsolete syntax pub mod obsolete; +// info about a parsing session. +// This structure and the reader both have +// an interner associated with them. If they're +// not the same, bad things can happen. pub struct ParseSess { - cm: @codemap::CodeMap, + cm: @codemap::CodeMap, // better be the same as the one in the reader! next_id: node_id, - span_diagnostic: @span_handler, + span_diagnostic: @span_handler, // better be the same as the one in the reader! interner: @ident_interner, } diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index d93e5995d4f8e..fc5254726534d 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -246,6 +246,7 @@ pub fn Parser(sess: @mut ParseSess, } } +// ooh, nasty mutable fields everywhere.... pub struct Parser { sess: @mut ParseSess, cfg: crate_cfg, @@ -338,6 +339,7 @@ pub impl Parser { self.sess.interner.get(id) } + // is this one of the keywords that signals a closure type? fn token_is_closure_keyword(&self, tok: &token::Token) -> bool { self.token_is_keyword(&~"pure", tok) || self.token_is_keyword(&~"unsafe", tok) || @@ -345,6 +347,7 @@ pub impl Parser { self.token_is_keyword(&~"fn", tok) } + // parse a ty_bare_fun type: fn parse_ty_bare_fn(&self) -> ty_ { /* @@ -372,6 +375,7 @@ pub impl Parser { }); } + // parse a ty_closure type fn parse_ty_closure(&self, sigil: ast::Sigil, region: Option<@ast::Lifetime>) -> ty_ @@ -430,6 +434,7 @@ pub impl Parser { } } + // parse a function type (following the 'fn') fn parse_ty_fn_decl(&self) -> (fn_decl, OptVec) { /* @@ -541,12 +546,14 @@ pub impl Parser { } + // parse a possibly mutable type fn parse_mt(&self) -> mt { let mutbl = self.parse_mutability(); let t = self.parse_ty(false); mt { ty: t, mutbl: mutbl } } + // parse [mut/const/imm] ID : TY fn parse_ty_field(&self) -> ty_field { let lo = self.span.lo; let mutbl = self.parse_mutability(); @@ -563,6 +570,7 @@ pub impl Parser { ) } + // parse optional return type [ -> TY ] in function decl fn parse_ret_ty(&self) -> (ret_style, @Ty) { return if self.eat(&token::RARROW) { let lo = self.span.lo; @@ -591,6 +599,7 @@ pub impl Parser { } } + // parse a type. // Useless second parameter for compatibility with quasiquote macros. // Bleh! fn parse_ty(&self, _: bool) -> @Ty { @@ -627,15 +636,19 @@ pub impl Parser { t } } else if *self.token == token::AT { + // MANAGED POINTER self.bump(); self.parse_box_or_uniq_pointee(ManagedSigil, ty_box) } else if *self.token == token::TILDE { + // OWNED POINTER self.bump(); self.parse_box_or_uniq_pointee(OwnedSigil, ty_uniq) } else if *self.token == token::BINOP(token::STAR) { + // STAR POINTER (bare pointer?) self.bump(); ty_ptr(self.parse_mt()) } else if *self.token == token::LBRACE { + // STRUCTURAL RECORD (remove?) let elems = self.parse_unspanned_seq( &token::LBRACE, &token::RBRACE, @@ -648,6 +661,7 @@ pub impl Parser { self.obsolete(*self.last_span, ObsoleteRecordType); ty_nil } else if *self.token == token::LBRACKET { + // VECTOR self.expect(&token::LBRACKET); let mt = self.parse_mt(); if mt.mutbl == m_mutbl { // `m_const` too after snapshot @@ -663,16 +677,20 @@ pub impl Parser { self.expect(&token::RBRACKET); t } else if *self.token == token::BINOP(token::AND) { + // BORROWED POINTER self.bump(); self.parse_borrowed_pointee() } else if self.eat_keyword(&~"extern") { + // EXTERN FUNCTION self.parse_ty_bare_fn() } else if self.token_is_closure_keyword(© *self.token) { + // CLOSURE let result = self.parse_ty_closure(ast::BorrowedSigil, None); self.obsolete(*self.last_span, ObsoleteBareFnType); result } else if *self.token == token::MOD_SEP || is_ident_or_path(&*self.token) { + // NAMED TYPE let path = self.parse_path_with_tps(false); ty_path(path, self.get_id()) } else { @@ -881,6 +899,8 @@ pub impl Parser { let global = self.eat(&token::MOD_SEP); let mut ids = ~[]; loop { + // if there's a ::< coming, stop processing + // the path. let is_not_last = self.look_ahead(2u) != token::LT && self.look_ahead(1u) == token::MOD_SEP; @@ -900,6 +920,9 @@ pub impl Parser { types: ~[] } } + // parse a path optionally with type parameters. If 'colons' + // is true, then type parameters must be preceded by colons, + // as in a::t:: fn parse_path_with_tps(&self, colons: bool) -> @ast::path { debug!("parse_path_with_tps(colons=%b)", colons); @@ -1067,6 +1090,7 @@ pub impl Parser { self.token_is_keyword(&~"const", tok) } + // parse mutability declaration (mut/const/imm) fn parse_mutability(&self) -> mutability { if self.eat_keyword(&~"mut") { m_mutbl From f2e47cddf835af49a925d91639d7fefb8c23d08f Mon Sep 17 00:00:00 2001 From: John Clements Date: Wed, 6 Mar 2013 12:38:13 -0800 Subject: [PATCH 3/3] change to parsing using tts also, updates test cases a bit --- src/librustc/driver/driver.rs | 2 +- src/libsyntax/parse/mod.rs | 44 ++++++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/src/librustc/driver/driver.rs b/src/librustc/driver/driver.rs index 3aa0959317a4a..1f463a1614d23 100644 --- a/src/librustc/driver/driver.rs +++ b/src/librustc/driver/driver.rs @@ -150,7 +150,7 @@ pub fn parse_input(sess: Session, +cfg: ast::crate_cfg, input: input) -> @ast::crate { match input { file_input(ref file) => { - parse::parse_crate_from_file(&(*file), cfg, sess.parse_sess) + parse::parse_crate_from_file_using_tts(&(*file), cfg, sess.parse_sess) } str_input(ref src) => { // FIXME (#2319): Don't really want to box the source string diff --git a/src/libsyntax/parse/mod.rs b/src/libsyntax/parse/mod.rs index ba7a9624c1895..10d3de4acee81 100644 --- a/src/libsyntax/parse/mod.rs +++ b/src/libsyntax/parse/mod.rs @@ -94,6 +94,19 @@ pub fn parse_crate_from_file( // why is there no p.abort_if_errors here? } +pub fn parse_crate_from_file_using_tts( + input: &Path, + cfg: ast::crate_cfg, + sess: @mut ParseSess +) -> @ast::crate { + let p = new_parser_from_file(sess, /*bad*/ copy cfg, input); + let tts = p.parse_all_token_trees(); + new_parser_from_tts(sess,cfg,tts).parse_crate_mod(/*bad*/ copy cfg) + // why is there no p.abort_if_errors here? +} + + + pub fn parse_crate_from_source_str( name: ~str, source: @~str, @@ -317,6 +330,7 @@ mod test { use std; use core::io; use core::option::None; + use ast; #[test] fn to_json_str>(val: @E) -> ~str { do io::with_str_writer |writer| { @@ -324,10 +338,38 @@ mod test { } } + fn string_to_crate (source_str : @~str) -> @ast::crate { + parse_crate_from_source_str( + ~"bogofile", + source_str, + ~[], + new_parse_sess(None)) + } + + fn string_to_tt_to_crate (source_str : @~str) -> @ast::crate { + let tts = parse_tts_from_source_str( + ~"bogofile", + source_str, + ~[], + new_parse_sess(None)); + new_parser_from_tts(new_parse_sess(None),~[],tts) + .parse_crate_mod(~[]) + } + + // make sure that parsing from TTs produces the same result + // as parsing from strings + #[test] fn tts_produce_the_same_result () { + let source_str = @~"fn foo (x : int) { x; }"; + assert_eq!(string_to_tt_to_crate(source_str), + string_to_crate(source_str)); + } + + // check the contents of the tt manually: #[test] fn alltts () { + let source_str = @~"fn foo (x : int) { x; }"; let tts = parse_tts_from_source_str( ~"bogofile", - @~"fn foo (x : int) { x; }", + source_str, ~[], new_parse_sess(None)); assert_eq!(