From 593fdd3d45d7565e34dc429788fa81ca2e25a2d4 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Sat, 26 Sep 2020 21:56:29 -0400 Subject: [PATCH] Rewrite `collect_tokens` implementations to use a flattened buffer Instead of trying to collect tokens at each depth, we 'flatten' the stream as we go allong, pushing open/close delimiters to our buffer just like regular tokens. One capturing is complete, we reconstruct a nested `TokenTree::Delimited` structure, producing a normal `TokenStream`. The reconstructed `TokenStream` is not created immediately - instead, it is produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This closure stores a clone of the original `TokenCursor`, plus a record of the number of calls to `next()/next_desugared()`. This is sufficient to reconstruct the tokenstream seen by the callback without storing any additional state. If the tokenstream is never used (e.g. when a captured `macro_rules!` argument is never passed to a proc macro), we never actually create a `TokenStream`. This implementation has a number of advantages over the previous one: * It is significantly simpler, with no edge cases around capturing the start/end of a delimited group. * It can be easily extended to allow replacing tokens an an arbitrary 'depth' by just using `Vec::splice` at the proper position. This is important for PR #76130, which requires us to track information about attributes along with tokens. * The lazy approach to `TokenStream` construction allows us to easily parse an AST struct, and then decide after the fact whether we need a `TokenStream`. This will be useful when we start collecting tokens for `Attribute` - we can discard the `LazyTokenStream` if the parsed attribute doesn't need tokens (e.g. is a builtin attribute). The performance impact seems to be neglibile (see https://github.com/rust-lang/rust/pull/77250#issuecomment-703960604). There is a small slowdown on a few benchmarks, but it only rises above 1% for incremental builds, where it represents a larger fraction of the much smaller instruction count. There a ~1% speedup on a few other incremental benchmarks - my guess is that the speedups and slowdowns will usually cancel out in practice. --- compiler/rustc_ast/src/ast.rs | 20 +- compiler/rustc_ast/src/tokenstream.rs | 74 ++++++- compiler/rustc_parse/src/lib.rs | 23 +- compiler/rustc_parse/src/parser/attr.rs | 15 +- compiler/rustc_parse/src/parser/expr.rs | 19 +- compiler/rustc_parse/src/parser/item.rs | 5 +- compiler/rustc_parse/src/parser/mod.rs | 265 ++++++++++++------------ 7 files changed, 254 insertions(+), 167 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index ea84fc0095f76..9eb934c0c9e74 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -24,7 +24,7 @@ pub use UnsafeSource::*; use crate::ptr::P; use crate::token::{self, CommentKind, DelimToken}; -use crate::tokenstream::{DelimSpan, TokenStream, TokenTree}; +use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::stack::ensure_sufficient_stack; @@ -97,7 +97,7 @@ pub struct Path { /// The segments in the path: the things separated by `::`. /// Global paths begin with `kw::PathRoot`. pub segments: Vec, - pub tokens: Option, + pub tokens: Option, } impl PartialEq for Path { @@ -535,7 +535,7 @@ pub struct Block { /// Distinguishes between `unsafe { ... }` and `{ ... }`. pub rules: BlockCheckMode, pub span: Span, - pub tokens: Option, + pub tokens: Option, } /// A match pattern. @@ -546,7 +546,7 @@ pub struct Pat { pub id: NodeId, pub kind: PatKind, pub span: Span, - pub tokens: Option, + pub tokens: Option, } impl Pat { @@ -892,7 +892,7 @@ pub struct Stmt { pub id: NodeId, pub kind: StmtKind, pub span: Span, - pub tokens: Option, + pub tokens: Option, } impl Stmt { @@ -1040,7 +1040,7 @@ pub struct Expr { pub kind: ExprKind, pub span: Span, pub attrs: AttrVec, - pub tokens: Option, + pub tokens: Option, } // `Expr` is used a lot. Make sure it doesn't unintentionally get bigger. @@ -1835,7 +1835,7 @@ pub struct Ty { pub id: NodeId, pub kind: TyKind, pub span: Span, - pub tokens: Option, + pub tokens: Option, } impl Clone for Ty { @@ -2408,7 +2408,7 @@ impl rustc_serialize::Decodable for AttrId { pub struct AttrItem { pub path: Path, pub args: MacArgs, - pub tokens: Option, + pub tokens: Option, } /// A list of attributes. @@ -2482,7 +2482,7 @@ pub enum CrateSugar { pub struct Visibility { pub kind: VisibilityKind, pub span: Span, - pub tokens: Option, + pub tokens: Option, } #[derive(Clone, Encodable, Decodable, Debug)] @@ -2569,7 +2569,7 @@ pub struct Item { /// /// Note that the tokens here do not include the outer attributes, but will /// include inner attributes. - pub tokens: Option, + pub tokens: Option, } impl Item { diff --git a/compiler/rustc_ast/src/tokenstream.rs b/compiler/rustc_ast/src/tokenstream.rs index 8acb6b2f37589..8a7277fa7d956 100644 --- a/compiler/rustc_ast/src/tokenstream.rs +++ b/compiler/rustc_ast/src/tokenstream.rs @@ -16,8 +16,9 @@ use crate::token::{self, DelimToken, Token, TokenKind}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; -use rustc_data_structures::sync::Lrc; +use rustc_data_structures::sync::{self, Lrc}; use rustc_macros::HashStable_Generic; +use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use rustc_span::{Span, DUMMY_SP}; use smallvec::{smallvec, SmallVec}; @@ -119,6 +120,77 @@ where } } +// A cloneable callback which produces a `TokenStream`. Each clone +// of this should produce the same `TokenStream` +pub trait CreateTokenStream: sync::Send + sync::Sync + FnOnce() -> TokenStream { + // Workaround for the fact that `Clone` is not object-safe + fn clone_it(&self) -> Box; +} + +impl TokenStream> CreateTokenStream + for F +{ + fn clone_it(&self) -> Box { + Box::new(self.clone()) + } +} + +impl Clone for Box { + fn clone(&self) -> Self { + let val: &(dyn CreateTokenStream) = &**self; + val.clone_it() + } +} + +/// A lazy version of `TokenStream`, which may defer creation +/// of an actual `TokenStream` until it is needed. +pub type LazyTokenStream = Lrc; + +#[derive(Clone)] +pub enum LazyTokenStreamInner { + Lazy(Box), + Ready(TokenStream), +} + +impl std::fmt::Debug for LazyTokenStreamInner { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + LazyTokenStreamInner::Lazy(..) => f.debug_struct("LazyTokenStream::Lazy").finish(), + LazyTokenStreamInner::Ready(..) => f.debug_struct("LazyTokenStream::Ready").finish(), + } + } +} + +impl LazyTokenStreamInner { + pub fn into_token_stream(&self) -> TokenStream { + match self { + // Note that we do not cache this. If this ever becomes a performance + // problem, we should investigate wrapping `LazyTokenStreamInner` + // in a lock + LazyTokenStreamInner::Lazy(cb) => (cb.clone())(), + LazyTokenStreamInner::Ready(stream) => stream.clone(), + } + } +} + +impl Encodable for LazyTokenStreamInner { + fn encode(&self, _s: &mut S) -> Result<(), S::Error> { + panic!("Attempted to encode LazyTokenStream"); + } +} + +impl Decodable for LazyTokenStreamInner { + fn decode(_d: &mut D) -> Result { + panic!("Attempted to decode LazyTokenStream"); + } +} + +impl HashStable for LazyTokenStreamInner { + fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) { + panic!("Attempted to compute stable hash for LazyTokenStream"); + } +} + /// A `TokenStream` is an abstract sequence of tokens, organized into `TokenTree`s. /// /// The goal is for procedural macros to work with `TokenStream`s and `TokenTree`s diff --git a/compiler/rustc_parse/src/lib.rs b/compiler/rustc_parse/src/lib.rs index 9a187c6285e4b..e073f57108838 100644 --- a/compiler/rustc_parse/src/lib.rs +++ b/compiler/rustc_parse/src/lib.rs @@ -8,7 +8,7 @@ use rustc_ast as ast; use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind}; -use rustc_ast::tokenstream::{self, TokenStream, TokenTree}; +use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree}; use rustc_ast_pretty::pprust; use rustc_data_structures::sync::Lrc; use rustc_errors::{Diagnostic, FatalError, Level, PResult}; @@ -248,29 +248,32 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke // As a result, some AST nodes are annotated with the token stream they // came from. Here we attempt to extract these lossless token streams // before we fall back to the stringification. + + let convert_tokens = |tokens: Option| tokens.map(|t| t.into_token_stream()); + let tokens = match *nt { Nonterminal::NtItem(ref item) => { prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span) } - Nonterminal::NtBlock(ref block) => block.tokens.clone(), + Nonterminal::NtBlock(ref block) => convert_tokens(block.tokens.clone()), Nonterminal::NtStmt(ref stmt) => { // FIXME: We currently only collect tokens for `:stmt` // matchers in `macro_rules!` macros. When we start collecting // tokens for attributes on statements, we will need to prepend // attributes here - stmt.tokens.clone() + convert_tokens(stmt.tokens.clone()) } - Nonterminal::NtPat(ref pat) => pat.tokens.clone(), - Nonterminal::NtTy(ref ty) => ty.tokens.clone(), + Nonterminal::NtPat(ref pat) => convert_tokens(pat.tokens.clone()), + Nonterminal::NtTy(ref ty) => convert_tokens(ty.tokens.clone()), Nonterminal::NtIdent(ident, is_raw) => { Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into()) } Nonterminal::NtLifetime(ident) => { Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into()) } - Nonterminal::NtMeta(ref attr) => attr.tokens.clone(), - Nonterminal::NtPath(ref path) => path.tokens.clone(), - Nonterminal::NtVis(ref vis) => vis.tokens.clone(), + Nonterminal::NtMeta(ref attr) => convert_tokens(attr.tokens.clone()), + Nonterminal::NtPath(ref path) => convert_tokens(path.tokens.clone()), + Nonterminal::NtVis(ref vis) => convert_tokens(vis.tokens.clone()), Nonterminal::NtTT(ref tt) => Some(tt.clone().into()), Nonterminal::NtExpr(ref expr) | Nonterminal::NtLiteral(ref expr) => { if expr.tokens.is_none() { @@ -602,10 +605,10 @@ fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool { fn prepend_attrs( sess: &ParseSess, attrs: &[ast::Attribute], - tokens: Option<&tokenstream::TokenStream>, + tokens: Option<&tokenstream::LazyTokenStream>, span: rustc_span::Span, ) -> Option { - let tokens = tokens?; + let tokens = tokens?.clone().into_token_stream(); if attrs.is_empty() { return Some(tokens.clone()); } diff --git a/compiler/rustc_parse/src/parser/attr.rs b/compiler/rustc_parse/src/parser/attr.rs index 98f94098bfc15..73439643d69b9 100644 --- a/compiler/rustc_parse/src/parser/attr.rs +++ b/compiler/rustc_parse/src/parser/attr.rs @@ -4,7 +4,7 @@ use rustc_ast::attr; use rustc_ast::token::{self, Nonterminal}; use rustc_ast_pretty::pprust; use rustc_errors::{error_code, PResult}; -use rustc_span::Span; +use rustc_span::{sym, Span}; use tracing::debug; @@ -302,3 +302,16 @@ impl<'a> Parser<'a> { Err(self.struct_span_err(self.token.span, &msg)) } } + +pub fn maybe_needs_tokens(attrs: &[ast::Attribute]) -> bool { + attrs.iter().any(|attr| { + if let Some(ident) = attr.ident() { + ident.name == sym::derive + // This might apply a custom attribute/derive + || ident.name == sym::cfg_attr + || !rustc_feature::is_builtin_attr_name(ident.name) + } else { + true + } + }) +} diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index fb05f8791a505..698a7e7d9cde8 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -6,6 +6,7 @@ use crate::maybe_recover_from_interpolated_ty_qpath; use rustc_ast::ptr::P; use rustc_ast::token::{self, Token, TokenKind}; +use rustc_ast::tokenstream::Spacing; use rustc_ast::util::classify; use rustc_ast::util::literal::LitError; use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity}; @@ -18,7 +19,6 @@ use rustc_span::source_map::{self, Span, Spanned}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; use rustc_span::{BytePos, Pos}; use std::mem; -use tracing::debug; /// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression /// dropped into the token stream, which happens while parsing the result of @@ -459,7 +459,7 @@ impl<'a> Parser<'a> { /// Parses a prefix-unary-operator expr. fn parse_prefix_expr(&mut self, attrs: Option) -> PResult<'a, P> { let attrs = self.parse_or_use_outer_attributes(attrs)?; - self.maybe_collect_tokens(!attrs.is_empty(), |this| { + self.maybe_collect_tokens(super::attr::maybe_needs_tokens(&attrs), |this| { let lo = this.token.span; // Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr() let (hi, ex) = match this.token.uninterpolate().kind { @@ -884,7 +884,7 @@ impl<'a> Parser<'a> { assert!(suffix.is_none()); let symbol = Symbol::intern(&i); self.token = Token::new(token::Ident(symbol, false), ident_span); - let next_token = Token::new(token::Dot, dot_span); + let next_token = (Token::new(token::Dot, dot_span), self.token_spacing); self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token)) } // 1.2 | 1.2e3 @@ -902,12 +902,14 @@ impl<'a> Parser<'a> { }; let symbol1 = Symbol::intern(&i1); self.token = Token::new(token::Ident(symbol1, false), ident1_span); - let next_token1 = Token::new(token::Dot, dot_span); + // This needs to be `Spacing::Alone` to prevent regressions. + // See issue #76399 and PR #76285 for more details + let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone); let base1 = self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1)); let symbol2 = Symbol::intern(&i2); let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span); - self.bump_with(next_token2); // `.` + self.bump_with((next_token2, self.token_spacing)); // `.` self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None) } // 1e+ | 1e- (recovered) @@ -930,7 +932,7 @@ impl<'a> Parser<'a> { base: P, field: Symbol, suffix: Option, - next_token: Option, + next_token: Option<(Token, Spacing)>, ) -> P { match next_token { Some(next_token) => self.bump_with(next_token), @@ -1109,12 +1111,11 @@ impl<'a> Parser<'a> { fn maybe_collect_tokens( &mut self, - has_outer_attrs: bool, + needs_tokens: bool, f: impl FnOnce(&mut Self) -> PResult<'a, P>, ) -> PResult<'a, P> { - if has_outer_attrs { + if needs_tokens { let (mut expr, tokens) = self.collect_tokens(f)?; - debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens); expr.tokens = Some(tokens); Ok(expr) } else { diff --git a/compiler/rustc_parse/src/parser/item.rs b/compiler/rustc_parse/src/parser/item.rs index 48341f71d33ea..4ad259715bd98 100644 --- a/compiler/rustc_parse/src/parser/item.rs +++ b/compiler/rustc_parse/src/parser/item.rs @@ -116,15 +116,16 @@ impl<'a> Parser<'a> { Some(item.into_inner()) }); + let needs_tokens = super::attr::maybe_needs_tokens(&attrs); + let mut unclosed_delims = vec![]; - let has_attrs = !attrs.is_empty(); let parse_item = |this: &mut Self| { let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name); unclosed_delims.append(&mut this.unclosed_delims); item }; - let (mut item, tokens) = if has_attrs { + let (mut item, tokens) = if needs_tokens { let (item, tokens) = self.collect_tokens(parse_item)?; (item, Some(tokens)) } else { diff --git a/compiler/rustc_parse/src/parser/mod.rs b/compiler/rustc_parse/src/parser/mod.rs index 7970ad36456d1..f726abf9df39b 100644 --- a/compiler/rustc_parse/src/parser/mod.rs +++ b/compiler/rustc_parse/src/parser/mod.rs @@ -16,13 +16,15 @@ pub use path::PathStyle; use rustc_ast::ptr::P; use rustc_ast::token::{self, DelimToken, Token, TokenKind}; -use rustc_ast::tokenstream::{self, DelimSpan, TokenStream, TokenTree, TreeAndSpacing}; +use rustc_ast::tokenstream::{self, DelimSpan, LazyTokenStream, LazyTokenStreamInner, Spacing}; +use rustc_ast::tokenstream::{TokenStream, TokenTree}; use rustc_ast::DUMMY_NODE_ID; use rustc_ast::{self as ast, AnonConst, AttrStyle, AttrVec, Const, CrateSugar, Extern, Unsafe}; use rustc_ast::{Async, Expr, ExprKind, MacArgs, MacDelimiter, Mutability, StrLit}; use rustc_ast::{Visibility, VisibilityKind}; use rustc_ast_pretty::pprust; -use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError, PResult}; +use rustc_errors::PResult; +use rustc_errors::{struct_span_err, Applicability, DiagnosticBuilder, FatalError}; use rustc_session::parse::ParseSess; use rustc_span::source_map::{Span, DUMMY_SP}; use rustc_span::symbol::{kw, sym, Ident, Symbol}; @@ -85,10 +87,14 @@ pub struct Parser<'a> { pub sess: &'a ParseSess, /// The current token. pub token: Token, + /// The spacing for the current token + pub token_spacing: Spacing, /// The previous token. pub prev_token: Token, restrictions: Restrictions, expected_tokens: Vec, + // Important: This must only be advanced from `next_tok` + // to ensure that `token_cursor.num_next_calls` is updated properly token_cursor: TokenCursor, desugar_doc_comments: bool, /// This field is used to keep track of how many left angle brackets we have seen. This is @@ -120,8 +126,10 @@ impl<'a> Drop for Parser<'a> { struct TokenCursor { frame: TokenCursorFrame, stack: Vec, - cur_token: Option, - collecting: Option, + desugar_doc_comments: bool, + // Counts the number of calls to `next` or `next_desugared`, + // depending on whether `desugar_doc_comments` is set. + num_next_calls: usize, } #[derive(Clone)] @@ -133,40 +141,22 @@ struct TokenCursorFrame { close_delim: bool, } -/// Used to track additional state needed by `collect_tokens` -#[derive(Clone, Debug)] -struct Collecting { - /// Holds the current tokens captured during the most - /// recent call to `collect_tokens` - buf: Vec, - /// The depth of the `TokenCursor` stack at the time - /// collection was started. When we encounter a `TokenTree::Delimited`, - /// we want to record the `TokenTree::Delimited` itself, - /// but *not* any of the inner tokens while we are inside - /// the new frame (this would cause us to record duplicate tokens). - /// - /// This `depth` fields tracks stack depth we are recording tokens. - /// Only tokens encountered at this depth will be recorded. See - /// `TokenCursor::next` for more details. - depth: usize, -} - impl TokenCursorFrame { - fn new(span: DelimSpan, delim: DelimToken, tts: &TokenStream) -> Self { + fn new(span: DelimSpan, delim: DelimToken, tts: TokenStream) -> Self { TokenCursorFrame { delim, span, open_delim: delim == token::NoDelim, - tree_cursor: tts.clone().into_trees(), + tree_cursor: tts.into_trees(), close_delim: delim == token::NoDelim, } } } impl TokenCursor { - fn next(&mut self) -> Token { + fn next(&mut self) -> (Token, Spacing) { loop { - let tree = if !self.frame.open_delim { + let (tree, spacing) = if !self.frame.open_delim { self.frame.open_delim = true; TokenTree::open_tt(self.frame.span, self.frame.delim).into() } else if let Some(tree) = self.frame.tree_cursor.next_with_spacing() { @@ -178,40 +168,24 @@ impl TokenCursor { self.frame = frame; continue; } else { - return Token::new(token::Eof, DUMMY_SP); + (TokenTree::Token(Token::new(token::Eof, DUMMY_SP)), Spacing::Alone) }; - // Don't set an open delimiter as our current token - we want - // to leave it as the full `TokenTree::Delimited` from the previous - // iteration of this loop - if !matches!(tree.0, TokenTree::Token(Token { kind: TokenKind::OpenDelim(_), .. })) { - self.cur_token = Some(tree.clone()); - } - - if let Some(collecting) = &mut self.collecting { - if collecting.depth == self.stack.len() { - debug!( - "TokenCursor::next(): collected {:?} at depth {:?}", - tree, - self.stack.len() - ); - collecting.buf.push(tree.clone()) + match tree { + TokenTree::Token(token) => { + return (token, spacing); } - } - - match tree.0 { - TokenTree::Token(token) => return token, TokenTree::Delimited(sp, delim, tts) => { - let frame = TokenCursorFrame::new(sp, delim, &tts); + let frame = TokenCursorFrame::new(sp, delim, tts); self.stack.push(mem::replace(&mut self.frame, frame)); } } } } - fn next_desugared(&mut self) -> Token { + fn next_desugared(&mut self) -> (Token, Spacing) { let (data, attr_style, sp) = match self.next() { - Token { kind: token::DocComment(_, attr_style, data), span } => { + (Token { kind: token::DocComment(_, attr_style, data), span }, _) => { (data, attr_style, span) } tok => return tok, @@ -249,7 +223,7 @@ impl TokenCursor { TokenCursorFrame::new( delim_span, token::NoDelim, - &if attr_style == AttrStyle::Inner { + if attr_style == AttrStyle::Inner { [TokenTree::token(token::Pound, sp), TokenTree::token(token::Not, sp), body] .iter() .cloned() @@ -351,14 +325,15 @@ impl<'a> Parser<'a> { let mut parser = Parser { sess, token: Token::dummy(), + token_spacing: Spacing::Alone, prev_token: Token::dummy(), restrictions: Restrictions::empty(), expected_tokens: Vec::new(), token_cursor: TokenCursor { - frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, &tokens), + frame: TokenCursorFrame::new(DelimSpan::dummy(), token::NoDelim, tokens), stack: Vec::new(), - cur_token: None, - collecting: None, + num_next_calls: 0, + desugar_doc_comments, }, desugar_doc_comments, unmatched_angle_bracket_count: 0, @@ -375,17 +350,18 @@ impl<'a> Parser<'a> { parser } - fn next_tok(&mut self, fallback_span: Span) -> Token { - let mut next = if self.desugar_doc_comments { + fn next_tok(&mut self, fallback_span: Span) -> (Token, Spacing) { + let (mut next, spacing) = if self.desugar_doc_comments { self.token_cursor.next_desugared() } else { self.token_cursor.next() }; + self.token_cursor.num_next_calls += 1; if next.span.is_dummy() { // Tweak the location for better diagnostics, but keep syntactic context intact. next.span = fallback_span.with_ctxt(next.span.ctxt()); } - next + (next, spacing) } pub fn unexpected(&mut self) -> PResult<'a, T> { @@ -573,7 +549,9 @@ impl<'a> Parser<'a> { let first_span = self.sess.source_map().start_point(self.token.span); let second_span = self.token.span.with_lo(first_span.hi()); self.token = Token::new(first, first_span); - self.bump_with(Token::new(second, second_span)); + // Use the spacing of the glued token as the spacing + // of the unglued second token. + self.bump_with((Token::new(second, second_span), self.token_spacing)); true } _ => { @@ -805,7 +783,7 @@ impl<'a> Parser<'a> { } /// Advance the parser by one token using provided token as the next one. - fn bump_with(&mut self, next_token: Token) { + fn bump_with(&mut self, (next_token, next_spacing): (Token, Spacing)) { // Bumping after EOF is a bad sign, usually an infinite loop. if self.prev_token.kind == TokenKind::Eof { let msg = "attempted to bump the parser past EOF (may be stuck in a loop)"; @@ -814,6 +792,7 @@ impl<'a> Parser<'a> { // Update the current and previous tokens. self.prev_token = mem::replace(&mut self.token, next_token); + self.token_spacing = next_spacing; // Diagnostics. self.expected_tokens.clear(); @@ -984,13 +963,27 @@ impl<'a> Parser<'a> { pub(crate) fn parse_token_tree(&mut self) -> TokenTree { match self.token.kind { token::OpenDelim(..) => { - let frame = mem::replace( - &mut self.token_cursor.frame, - self.token_cursor.stack.pop().unwrap(), - ); - self.token = Token::new(TokenKind::CloseDelim(frame.delim), frame.span.close); + let depth = self.token_cursor.stack.len(); + + // We keep advancing the token cursor until we hit + // the matching `CloseDelim` token. + while !(depth == self.token_cursor.stack.len() + && matches!(self.token.kind, token::CloseDelim(_))) + { + // Advance one token at a time, so `TokenCursor::next()` + // can capture these tokens if necessary. + self.bump(); + } + // We are still inside the frame corresponding + // to the delimited stream we captured, so grab + // the tokens from this frame. + let frame = &self.token_cursor.frame; + let stream = frame.tree_cursor.stream.clone(); + let span = frame.span; + let delim = frame.delim; + // Consume close delimiter self.bump(); - TokenTree::Delimited(frame.span, frame.delim, frame.tree_cursor.stream) + TokenTree::Delimited(span, delim, stream) } token::CloseDelim(_) | token::Eof => unreachable!(), _ => { @@ -1198,79 +1191,45 @@ impl<'a> Parser<'a> { pub fn collect_tokens( &mut self, f: impl FnOnce(&mut Self) -> PResult<'a, R>, - ) -> PResult<'a, (R, TokenStream)> { - // Record all tokens we parse when parsing this item. - let tokens: Vec = self.token_cursor.cur_token.clone().into_iter().collect(); - debug!("collect_tokens: starting with {:?}", tokens); - - // We need special handling for the case where `collect_tokens` is called - // on an opening delimeter (e.g. '('). At this point, we have already pushed - // a new frame - however, we want to record the original `TokenTree::Delimited`, - // for consistency with the case where we start recording one token earlier. - // See `TokenCursor::next` to see how `cur_token` is set up. - let prev_depth = - if matches!(self.token_cursor.cur_token, Some((TokenTree::Delimited(..), _))) { - if self.token_cursor.stack.is_empty() { - // There is nothing below us in the stack that - // the function could consume, so the only thing it can legally - // capture is the entire contents of the current frame. - return Ok((f(self)?, TokenStream::new(tokens))); - } - // We have already recorded the full `TokenTree::Delimited` when we created - // our `tokens` vector at the start of this function. We are now inside - // a new frame corresponding to the `TokenTree::Delimited` we already recoreded. - // We don't want to record any of the tokens inside this frame, since they - // will be duplicates of the tokens nested inside the `TokenTree::Delimited`. - // Therefore, we set our recording depth to the *previous* frame. This allows - // us to record a sequence like: `(foo).bar()`: the `(foo)` will be recored - // as our initial `cur_token`, while the `.bar()` will be recored after we - // pop the `(foo)` frame. - self.token_cursor.stack.len() - 1 - } else { - self.token_cursor.stack.len() - }; - let prev_collecting = - self.token_cursor.collecting.replace(Collecting { buf: tokens, depth: prev_depth }); - - let ret = f(self); + ) -> PResult<'a, (R, LazyTokenStream)> { + let start_token = (self.token.clone(), self.token_spacing); + let mut cursor_snapshot = self.token_cursor.clone(); + + let ret = f(self)?; + + let new_calls = self.token_cursor.num_next_calls; + let num_calls = new_calls - cursor_snapshot.num_next_calls; + let desugar_doc_comments = self.desugar_doc_comments; + + // Produces a `TokenStream` on-demand. Using `cursor_snapshot` + // and `num_calls`, we can reconstruct the `TokenStream` seen + // by the callback. This allows us to avoid producing a `TokenStream` + // if it is never needed - for example, a captured `macro_rules!` + // argument that is never passed to a proc macro. + // + // This also makes `Parser` very cheap to clone, since + // there is no intermediate collection buffer to clone. + let lazy_cb = move || { + // The token produced by the final call to `next` or `next_desugared` + // was not actually consumed by the callback. The combination + // of chaining the initial token and using `take` produces the desired + // result - we produce an empty `TokenStream` if no calls were made, + // and omit the final token otherwise. + let tokens = std::iter::once(start_token) + .chain((0..num_calls).map(|_| { + if desugar_doc_comments { + cursor_snapshot.next_desugared() + } else { + cursor_snapshot.next() + } + })) + .take(num_calls); - let mut collected_tokens = if let Some(collecting) = self.token_cursor.collecting.take() { - collecting.buf - } else { - let msg = "our vector went away?"; - debug!("collect_tokens: {}", msg); - self.sess.span_diagnostic.delay_span_bug(self.token.span, &msg); - // This can happen due to a bad interaction of two unrelated recovery mechanisms - // with mismatched delimiters *and* recovery lookahead on the likely typo - // `pub ident(` (#62895, different but similar to the case above). - return Ok((ret?, TokenStream::default())); + make_token_stream(tokens) }; + let stream = LazyTokenStream::new(LazyTokenStreamInner::Lazy(Box::new(lazy_cb))); - debug!("collect_tokens: got raw tokens {:?}", collected_tokens); - - // If we're not at EOF our current token wasn't actually consumed by - // `f`, but it'll still be in our list that we pulled out. In that case - // put it back. - let extra_token = if self.token != token::Eof { collected_tokens.pop() } else { None }; - - if let Some(mut collecting) = prev_collecting { - // If we were previously collecting at the same depth, - // then the previous call to `collect_tokens` needs to see - // the tokens we just recorded. - // - // If we were previously recording at an lower `depth`, - // then the previous `collect_tokens` call already recorded - // this entire frame in the form of a `TokenTree::Delimited`, - // so there is nothing else for us to do. - if collecting.depth == prev_depth { - collecting.buf.extend(collected_tokens.iter().cloned()); - collecting.buf.extend(extra_token); - debug!("collect_tokens: updating previous buf to {:?}", collecting); - } - self.token_cursor.collecting = Some(collecting) - } - - Ok((ret?, TokenStream::new(collected_tokens))) + Ok((ret, stream)) } /// `::{` or `::*` @@ -1319,3 +1278,41 @@ pub fn emit_unclosed_delims(unclosed_delims: &mut Vec, sess: &Pa } } } + +/// Converts a flattened iterator of tokens (including open and close delimiter tokens) +/// into a `TokenStream`, creating a `TokenTree::Delimited` for each matching pair +/// of open and close delims. +fn make_token_stream(tokens: impl Iterator) -> TokenStream { + #[derive(Debug)] + struct FrameData { + open: Span, + inner: Vec<(TokenTree, Spacing)>, + } + let mut stack = vec![FrameData { open: DUMMY_SP, inner: vec![] }]; + for (token, spacing) in tokens { + match token { + Token { kind: TokenKind::OpenDelim(_), span } => { + stack.push(FrameData { open: span, inner: vec![] }); + } + Token { kind: TokenKind::CloseDelim(delim), span } => { + let frame_data = stack.pop().expect("Token stack was empty!"); + let dspan = DelimSpan::from_pair(frame_data.open, span); + let stream = TokenStream::new(frame_data.inner); + let delimited = TokenTree::Delimited(dspan, delim, stream); + stack + .last_mut() + .unwrap_or_else(|| panic!("Bottom token frame is missing for tokens!")) + .inner + .push((delimited, Spacing::Alone)); + } + token => stack + .last_mut() + .expect("Bottom token frame is missing!") + .inner + .push((TokenTree::Token(token), spacing)), + } + } + let final_buf = stack.pop().expect("Missing final buf!"); + assert!(stack.is_empty(), "Stack should be empty: final_buf={:?} stack={:?}", final_buf, stack); + TokenStream::new(final_buf.inner) +}