Skip to content

Commit

Permalink
Auto merge of rust-lang#128725 - nnethercote:fix-assoc-expr-collect-p…
Browse files Browse the repository at this point in the history
…roblems, r=petrochenkov

Fix problems with assoc expr token collection

There are several cases involving assoc exprs and attributes where the current code does the wrong thing. This PR adds some tests that demonstrate the problems and then fixes them.

r? `@petrochenkov`
  • Loading branch information
bors committed Aug 16, 2024
2 parents 4b7d074 + 9d31f86 commit be0ea0c
Show file tree
Hide file tree
Showing 12 changed files with 483 additions and 298 deletions.
22 changes: 14 additions & 8 deletions compiler/rustc_parse/src/parser/attr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ use rustc_span::{sym, BytePos, Span};
use thin_vec::ThinVec;
use tracing::debug;

use super::{AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle};
use super::{
AttrWrapper, Capturing, FnParseMode, ForceCollect, Parser, ParserRange, PathStyle, Trailing,
UsePreAttrPos,
};
use crate::{errors, fluent_generated as fluent, maybe_whole};

// Public for rustfmt usage
Expand Down Expand Up @@ -257,7 +260,8 @@ impl<'a> Parser<'a> {
pub fn parse_attr_item(&mut self, force_collect: ForceCollect) -> PResult<'a, ast::AttrItem> {
maybe_whole!(self, NtMeta, |attr| attr.into_inner());

let do_parse = |this: &mut Self, _empty_attrs| {
// Attr items don't have attributes.
self.collect_tokens(None, AttrWrapper::empty(), force_collect, |this, _empty_attrs| {
let is_unsafe = this.eat_keyword(kw::Unsafe);
let unsafety = if is_unsafe {
let unsafe_span = this.prev_token.span;
Expand All @@ -273,10 +277,12 @@ impl<'a> Parser<'a> {
if is_unsafe {
this.expect(&token::CloseDelim(Delimiter::Parenthesis))?;
}
Ok((ast::AttrItem { unsafety, path, args, tokens: None }, false))
};
// Attr items don't have attributes.
self.collect_tokens_trailing_token(AttrWrapper::empty(), force_collect, do_parse)
Ok((
ast::AttrItem { unsafety, path, args, tokens: None },
Trailing::No,
UsePreAttrPos::No,
))
})
}

/// Parses attributes that appear after the opening of an item. These should
Expand Down Expand Up @@ -309,8 +315,8 @@ impl<'a> Parser<'a> {
};
if let Some(attr) = attr {
// If we are currently capturing tokens (i.e. we are within a call to
// `Parser::collect_tokens_trailing_tokens`) record the token positions of this
// inner attribute, for possible later processing in a `LazyAttrTokenStream`.
// `Parser::collect_tokens`) record the token positions of this inner attribute,
// for possible later processing in a `LazyAttrTokenStream`.
if let Capturing::Yes = self.capture_state.capturing {
let end_pos = self.num_bump_calls;
let parser_range = ParserRange(start_pos..end_pos);
Expand Down
112 changes: 75 additions & 37 deletions compiler/rustc_parse/src/parser/attr_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,56 @@ use rustc_span::{sym, Span, DUMMY_SP};

use super::{
Capturing, FlatToken, ForceCollect, NodeRange, NodeReplacement, Parser, ParserRange,
TokenCursor,
TokenCursor, Trailing,
};

// When collecting tokens, this fully captures the start point. Usually its
// just after outer attributes, but occasionally it's before.
#[derive(Clone, Debug)]
pub(super) struct CollectPos {
start_token: (Token, Spacing),
cursor_snapshot: TokenCursor,
start_pos: u32,
}

pub(super) enum UsePreAttrPos {
No,
Yes,
}

/// A wrapper type to ensure that the parser handles outer attributes correctly.
/// When we parse outer attributes, we need to ensure that we capture tokens
/// for the attribute target. This allows us to perform cfg-expansion on
/// a token stream before we invoke a derive proc-macro.
///
/// This wrapper prevents direct access to the underlying `ast::AttrVec`.
/// Parsing code can only get access to the underlying attributes
/// by passing an `AttrWrapper` to `collect_tokens_trailing_token`.
/// by passing an `AttrWrapper` to `collect_tokens`.
/// This makes it difficult to accidentally construct an AST node
/// (which stores an `ast::AttrVec`) without first collecting tokens.
///
/// This struct has its own module, to ensure that the parser code
/// cannot directly access the `attrs` field.
#[derive(Debug, Clone)]
pub struct AttrWrapper {
pub(super) struct AttrWrapper {
attrs: AttrVec,
// The start of the outer attributes in the parser's token stream.
// This lets us create a `NodeReplacement` for the entire attribute
// target, including outer attributes.
start_pos: u32,
// target, including outer attributes. `None` if there are no outer
// attributes.
start_pos: Option<u32>,
}

impl AttrWrapper {
pub(super) fn new(attrs: AttrVec, start_pos: u32) -> AttrWrapper {
AttrWrapper { attrs, start_pos }
AttrWrapper { attrs, start_pos: Some(start_pos) }
}
pub fn empty() -> AttrWrapper {
AttrWrapper { attrs: AttrVec::new(), start_pos: u32::MAX }

pub(super) fn empty() -> AttrWrapper {
AttrWrapper { attrs: AttrVec::new(), start_pos: None }
}

pub(crate) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
pub(super) fn take_for_recovery(self, psess: &ParseSess) -> AttrVec {
psess.dcx().span_delayed_bug(
self.attrs.get(0).map(|attr| attr.span).unwrap_or(DUMMY_SP),
"AttrVec is taken for recovery but no error is produced",
Expand All @@ -56,12 +72,12 @@ impl AttrWrapper {

/// Prepend `self.attrs` to `attrs`.
// FIXME: require passing an NT to prevent misuse of this method
pub(crate) fn prepend_to_nt_inner(mut self, attrs: &mut AttrVec) {
pub(super) fn prepend_to_nt_inner(mut self, attrs: &mut AttrVec) {
mem::swap(attrs, &mut self.attrs);
attrs.extend(self.attrs);
}

pub fn is_empty(&self) -> bool {
pub(super) fn is_empty(&self) -> bool {
self.attrs.is_empty()
}
}
Expand All @@ -77,7 +93,7 @@ fn has_cfg_or_cfg_attr(attrs: &[Attribute]) -> bool {
}

// From a value of this type we can reconstruct the `TokenStream` seen by the
// `f` callback passed to a call to `Parser::collect_tokens_trailing_token`, by
// `f` callback passed to a call to `Parser::collect_tokens`, by
// replaying the getting of the tokens. This saves us producing a `TokenStream`
// if it is never needed, e.g. a captured `macro_rules!` argument that is never
// passed to a proc macro. In practice, token stream creation happens rarely
Expand Down Expand Up @@ -166,16 +182,30 @@ impl ToAttrTokenStream for LazyAttrTokenStreamImpl {
}

impl<'a> Parser<'a> {
pub(super) fn collect_pos(&self) -> CollectPos {
CollectPos {
start_token: (self.token.clone(), self.token_spacing),
cursor_snapshot: self.token_cursor.clone(),
start_pos: self.num_bump_calls,
}
}

/// Parses code with `f`. If appropriate, it records the tokens (in
/// `LazyAttrTokenStream` form) that were parsed in the result, accessible
/// via the `HasTokens` trait. The second (bool) part of the callback's
/// via the `HasTokens` trait. The `Trailing` part of the callback's
/// result indicates if an extra token should be captured, e.g. a comma or
/// semicolon.
/// semicolon. The `UsePreAttrPos` part of the callback's result indicates
/// if we should use `pre_attr_pos` as the collection start position (only
/// required in a few cases).
///
/// The `attrs` passed in are in `AttrWrapper` form, which is opaque. The
/// `AttrVec` within is passed to `f`. See the comment on `AttrWrapper` for
/// details.
///
/// `pre_attr_pos` is the position before the outer attributes (or the node
/// itself, if no outer attributes are present). It is only needed if `f`
/// can return `UsePreAttrPos::Yes`.
///
/// Note: If your callback consumes an opening delimiter (including the
/// case where `self.token` is an opening delimiter on entry to this
/// function), you must also consume the corresponding closing delimiter.
Expand All @@ -197,11 +227,12 @@ impl<'a> Parser<'a> {
/// } // 32..33
/// } // 33..34
/// ```
pub fn collect_tokens_trailing_token<R: HasAttrs + HasTokens>(
pub(super) fn collect_tokens<R: HasAttrs + HasTokens>(
&mut self,
pre_attr_pos: Option<CollectPos>,
attrs: AttrWrapper,
force_collect: ForceCollect,
f: impl FnOnce(&mut Self, ast::AttrVec) -> PResult<'a, (R, bool)>,
f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
) -> PResult<'a, R> {
// We must collect if anything could observe the collected tokens, i.e.
// if any of the following conditions hold.
Expand All @@ -220,23 +251,20 @@ impl<'a> Parser<'a> {
return Ok(f(self, attrs.attrs)?.0);
}

let start_token = (self.token.clone(), self.token_spacing);
let cursor_snapshot = self.token_cursor.clone();
let start_pos = self.num_bump_calls;
let mut collect_pos = self.collect_pos();
let has_outer_attrs = !attrs.attrs.is_empty();
let parser_replacements_start = self.capture_state.parser_replacements.len();

// We set and restore `Capturing::Yes` on either side of the call to
// `f`, so we can distinguish the outermost call to
// `collect_tokens_trailing_token` (e.g. parsing `m` in the example
// above) from any inner (indirectly recursive) calls (e.g. parsing `g`
// in the example above). This distinction is used below and in
// `Parser::parse_inner_attributes`.
let (mut ret, capture_trailing) = {
// `f`, so we can distinguish the outermost call to `collect_tokens`
// (e.g. parsing `m` in the example above) from any inner (indirectly
// recursive) calls (e.g. parsing `g` in the example above). This
// distinction is used below and in `Parser::parse_inner_attributes`.
let (mut ret, capture_trailing, use_pre_attr_pos) = {
let prev_capturing = mem::replace(&mut self.capture_state.capturing, Capturing::Yes);
let ret_and_trailing = f(self, attrs.attrs);
let res = f(self, attrs.attrs);
self.capture_state.capturing = prev_capturing;
ret_and_trailing?
res?
};

// When we're not in `capture_cfg` mode, then skip collecting and
Expand Down Expand Up @@ -279,10 +307,18 @@ impl<'a> Parser<'a> {
return Ok(ret);
}

// Replace the post-attribute collection start position with the
// pre-attribute position supplied, if `f` indicated it is necessary.
// (The caller is responsible for providing a non-`None` `pre_attr_pos`
// if this is a possibility.)
if matches!(use_pre_attr_pos, UsePreAttrPos::Yes) {
collect_pos = pre_attr_pos.unwrap();
}

let parser_replacements_end = self.capture_state.parser_replacements.len();

assert!(
!(self.break_last_token && capture_trailing),
!(self.break_last_token && matches!(capture_trailing, Trailing::Yes)),
"Cannot set break_last_token and have trailing token"
);

Expand All @@ -294,7 +330,7 @@ impl<'a> Parser<'a> {
// `AttrTokenStream`, we will create the proper token.
+ self.break_last_token as u32;

let num_calls = end_pos - start_pos;
let num_calls = end_pos - collect_pos.start_pos;

// Take the captured `ParserRange`s for any inner attributes that we parsed in
// `Parser::parse_inner_attributes`, and pair them in a `ParserReplacement` with `None`,
Expand Down Expand Up @@ -328,7 +364,9 @@ impl<'a> Parser<'a> {
.iter()
.cloned()
.chain(inner_attr_parser_replacements.iter().cloned())
.map(|(parser_range, data)| (NodeRange::new(parser_range, start_pos), data))
.map(|(parser_range, data)| {
(NodeRange::new(parser_range, collect_pos.start_pos), data)
})
.collect()
};

Expand All @@ -355,9 +393,9 @@ impl<'a> Parser<'a> {
// - `tokens`: lazy tokens for `g` (with its inner attr deleted).

let tokens = LazyAttrTokenStream::new(LazyAttrTokenStreamImpl {
start_token,
start_token: collect_pos.start_token,
cursor_snapshot: collect_pos.cursor_snapshot,
num_calls,
cursor_snapshot,
break_last_token: self.break_last_token,
node_replacements,
});
Expand All @@ -368,9 +406,9 @@ impl<'a> Parser<'a> {
}

// If `capture_cfg` is set and we're inside a recursive call to
// `collect_tokens_trailing_token`, then we need to register a replace range
// if we have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager cfg-expansion
// on the captured token stream.
// `collect_tokens`, then we need to register a replace range if we
// have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
// cfg-expansion on the captured token stream.
if self.capture_cfg
&& matches!(self.capture_state.capturing, Capturing::Yes)
&& has_cfg_or_cfg_attr(ret.attrs())
Expand All @@ -389,7 +427,8 @@ impl<'a> Parser<'a> {
// Set things up so that the entire AST node that we just parsed, including attributes,
// will be replaced with `target` in the lazy token stream. This will allow us to
// cfg-expand this AST node.
let start_pos = if has_outer_attrs { attrs.start_pos } else { start_pos };
let start_pos =
if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
self.capture_state
.parser_replacements
Expand Down Expand Up @@ -490,7 +529,6 @@ mod size_asserts {

use super::*;
// tidy-alphabetical-start
static_assert_size!(AttrWrapper, 16);
static_assert_size!(LazyAttrTokenStreamImpl, 96);
// tidy-alphabetical-end
}
19 changes: 10 additions & 9 deletions compiler/rustc_parse/src/parser/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2487,13 +2487,14 @@ impl<'a> Parser<'a> {
pub(super) fn handle_unambiguous_unbraced_const_arg(&mut self) -> PResult<'a, P<Expr>> {
let start = self.token.span;
let attrs = self.parse_outer_attributes()?;
let expr = self.parse_expr_res(Restrictions::CONST_EXPR, attrs).map_err(|mut err| {
err.span_label(
start.shrink_to_lo(),
"while parsing a const generic argument starting here",
);
err
})?;
let (expr, _) =
self.parse_expr_res(Restrictions::CONST_EXPR, attrs).map_err(|mut err| {
err.span_label(
start.shrink_to_lo(),
"while parsing a const generic argument starting here",
);
err
})?;
if !self.expr_is_valid_const_arg(&expr) {
self.dcx().emit_err(ConstGenericWithoutBraces {
span: expr.span,
Expand Down Expand Up @@ -2613,7 +2614,7 @@ impl<'a> Parser<'a> {
let attrs = self.parse_outer_attributes()?;
self.parse_expr_res(Restrictions::CONST_EXPR, attrs)
})() {
Ok(expr) => {
Ok((expr, _)) => {
// Find a mistake like `MyTrait<Assoc == S::Assoc>`.
if snapshot.token == token::EqEq {
err.span_suggestion(
Expand Down Expand Up @@ -2671,7 +2672,7 @@ impl<'a> Parser<'a> {
})() {
// Since we don't know the exact reason why we failed to parse the type or the
// expression, employ a simple heuristic to weed out some pathological cases.
Ok(expr) if let token::Comma | token::Gt = snapshot.token.kind => {
Ok((expr, _)) if let token::Comma | token::Gt = snapshot.token.kind => {
self.restore_snapshot(snapshot);
Some(expr)
}
Expand Down
Loading

0 comments on commit be0ea0c

Please sign in to comment.