Skip to content

Commit

Permalink
Rewrite collect_tokens implementations to use a flattened buffer
Browse files Browse the repository at this point in the history
Instead of trying to collect tokens at each depth, we 'flatten' the
stream as we go allong, pushing open/close delimiters to our buffer
just like regular tokens. One capturing is complete, we reconstruct a
nested `TokenTree::Delimited` structure, producing a normal
`TokenStream`.

The reconstructed `TokenStream` is not created immediately - instead, it is
produced on-demand by a closure (wrapped in a new `LazyTokenStream` type). This
closure stores a clone of the original `TokenCursor`, plus a record of the
number of calls to `next()/next_desugared()`. This is sufficient to reconstruct
the tokenstream seen by the callback without storing any additional state. If
the tokenstream is never used (e.g. when a captured `macro_rules!` argument is
never passed to a proc macro), we never actually create a `TokenStream`.

This implementation has a number of advantages over the previous one:

* It is significantly simpler, with no edge cases around capturing the
  start/end of a delimited group.

* It can be easily extended to allow replacing tokens an an arbitrary
  'depth' by just using `Vec::splice` at the proper position. This is
  important for PR rust-lang#76130, which requires us to track information about
  attributes along with tokens.

* The lazy approach to `TokenStream` construction allows us to easily
  parse an AST struct, and then decide after the fact whether we need a
  `TokenStream`. This will be useful when we start collecting tokens for
  `Attribute` - we can discard the `LazyTokenStream` if the parsed
  attribute doesn't need tokens (e.g. is a builtin attribute).

The performance impact seems to be neglibile (see
rust-lang#77250 (comment)). There is a
small slowdown on a few benchmarks, but it only rises above 1% for incremental
builds, where it represents a larger fraction of the much smaller instruction
count. There a ~1% speedup on a few other incremental benchmarks - my guess is
that the speedups and slowdowns will usually cancel out in practice.
  • Loading branch information
Aaron1011 committed Oct 19, 2020
1 parent cb2462c commit 593fdd3
Show file tree
Hide file tree
Showing 7 changed files with 254 additions and 167 deletions.
20 changes: 10 additions & 10 deletions compiler/rustc_ast/src/ast.rs
Expand Up @@ -24,7 +24,7 @@ pub use UnsafeSource::*;

use crate::ptr::P;
use crate::token::{self, CommentKind, DelimToken};
use crate::tokenstream::{DelimSpan, TokenStream, TokenTree};
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree};

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stack::ensure_sufficient_stack;
Expand Down Expand Up @@ -97,7 +97,7 @@ pub struct Path {
/// The segments in the path: the things separated by `::`.
/// Global paths begin with `kw::PathRoot`.
pub segments: Vec<PathSegment>,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

impl PartialEq<Symbol> for Path {
Expand Down Expand Up @@ -535,7 +535,7 @@ pub struct Block {
/// Distinguishes between `unsafe { ... }` and `{ ... }`.
pub rules: BlockCheckMode,
pub span: Span,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

/// A match pattern.
Expand All @@ -546,7 +546,7 @@ pub struct Pat {
pub id: NodeId,
pub kind: PatKind,
pub span: Span,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

impl Pat {
Expand Down Expand Up @@ -892,7 +892,7 @@ pub struct Stmt {
pub id: NodeId,
pub kind: StmtKind,
pub span: Span,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

impl Stmt {
Expand Down Expand Up @@ -1040,7 +1040,7 @@ pub struct Expr {
pub kind: ExprKind,
pub span: Span,
pub attrs: AttrVec,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

// `Expr` is used a lot. Make sure it doesn't unintentionally get bigger.
Expand Down Expand Up @@ -1835,7 +1835,7 @@ pub struct Ty {
pub id: NodeId,
pub kind: TyKind,
pub span: Span,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

impl Clone for Ty {
Expand Down Expand Up @@ -2408,7 +2408,7 @@ impl<D: Decoder> rustc_serialize::Decodable<D> for AttrId {
pub struct AttrItem {
pub path: Path,
pub args: MacArgs,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

/// A list of attributes.
Expand Down Expand Up @@ -2482,7 +2482,7 @@ pub enum CrateSugar {
pub struct Visibility {
pub kind: VisibilityKind,
pub span: Span,
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

#[derive(Clone, Encodable, Decodable, Debug)]
Expand Down Expand Up @@ -2569,7 +2569,7 @@ pub struct Item<K = ItemKind> {
///
/// Note that the tokens here do not include the outer attributes, but will
/// include inner attributes.
pub tokens: Option<TokenStream>,
pub tokens: Option<LazyTokenStream>,
}

impl Item {
Expand Down
74 changes: 73 additions & 1 deletion compiler/rustc_ast/src/tokenstream.rs
Expand Up @@ -16,8 +16,9 @@
use crate::token::{self, DelimToken, Token, TokenKind};

use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::Lrc;
use rustc_data_structures::sync::{self, Lrc};
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use rustc_span::{Span, DUMMY_SP};
use smallvec::{smallvec, SmallVec};

Expand Down Expand Up @@ -119,6 +120,77 @@ where
}
}

// A cloneable callback which produces a `TokenStream`. Each clone
// of this should produce the same `TokenStream`
pub trait CreateTokenStream: sync::Send + sync::Sync + FnOnce() -> TokenStream {
// Workaround for the fact that `Clone` is not object-safe
fn clone_it(&self) -> Box<dyn CreateTokenStream>;
}

impl<F: 'static + Clone + sync::Send + sync::Sync + FnOnce() -> TokenStream> CreateTokenStream
for F
{
fn clone_it(&self) -> Box<dyn CreateTokenStream> {
Box::new(self.clone())
}
}

impl Clone for Box<dyn CreateTokenStream> {
fn clone(&self) -> Self {
let val: &(dyn CreateTokenStream) = &**self;
val.clone_it()
}
}

/// A lazy version of `TokenStream`, which may defer creation
/// of an actual `TokenStream` until it is needed.
pub type LazyTokenStream = Lrc<LazyTokenStreamInner>;

#[derive(Clone)]
pub enum LazyTokenStreamInner {
Lazy(Box<dyn CreateTokenStream>),
Ready(TokenStream),
}

impl std::fmt::Debug for LazyTokenStreamInner {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LazyTokenStreamInner::Lazy(..) => f.debug_struct("LazyTokenStream::Lazy").finish(),
LazyTokenStreamInner::Ready(..) => f.debug_struct("LazyTokenStream::Ready").finish(),
}
}
}

impl LazyTokenStreamInner {
pub fn into_token_stream(&self) -> TokenStream {
match self {
// Note that we do not cache this. If this ever becomes a performance
// problem, we should investigate wrapping `LazyTokenStreamInner`
// in a lock
LazyTokenStreamInner::Lazy(cb) => (cb.clone())(),
LazyTokenStreamInner::Ready(stream) => stream.clone(),
}
}
}

impl<S: Encoder> Encodable<S> for LazyTokenStreamInner {
fn encode(&self, _s: &mut S) -> Result<(), S::Error> {
panic!("Attempted to encode LazyTokenStream");
}
}

impl<D: Decoder> Decodable<D> for LazyTokenStreamInner {
fn decode(_d: &mut D) -> Result<Self, D::Error> {
panic!("Attempted to decode LazyTokenStream");
}
}

impl<CTX> HashStable<CTX> for LazyTokenStreamInner {
fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
panic!("Attempted to compute stable hash for LazyTokenStream");
}
}

/// A `TokenStream` is an abstract sequence of tokens, organized into `TokenTree`s.
///
/// The goal is for procedural macros to work with `TokenStream`s and `TokenTree`s
Expand Down
23 changes: 13 additions & 10 deletions compiler/rustc_parse/src/lib.rs
Expand Up @@ -8,7 +8,7 @@

use rustc_ast as ast;
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
use rustc_ast::tokenstream::{self, LazyTokenStream, TokenStream, TokenTree};
use rustc_ast_pretty::pprust;
use rustc_data_structures::sync::Lrc;
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
Expand Down Expand Up @@ -248,29 +248,32 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
// As a result, some AST nodes are annotated with the token stream they
// came from. Here we attempt to extract these lossless token streams
// before we fall back to the stringification.

let convert_tokens = |tokens: Option<LazyTokenStream>| tokens.map(|t| t.into_token_stream());

let tokens = match *nt {
Nonterminal::NtItem(ref item) => {
prepend_attrs(sess, &item.attrs, item.tokens.as_ref(), span)
}
Nonterminal::NtBlock(ref block) => block.tokens.clone(),
Nonterminal::NtBlock(ref block) => convert_tokens(block.tokens.clone()),
Nonterminal::NtStmt(ref stmt) => {
// FIXME: We currently only collect tokens for `:stmt`
// matchers in `macro_rules!` macros. When we start collecting
// tokens for attributes on statements, we will need to prepend
// attributes here
stmt.tokens.clone()
convert_tokens(stmt.tokens.clone())
}
Nonterminal::NtPat(ref pat) => pat.tokens.clone(),
Nonterminal::NtTy(ref ty) => ty.tokens.clone(),
Nonterminal::NtPat(ref pat) => convert_tokens(pat.tokens.clone()),
Nonterminal::NtTy(ref ty) => convert_tokens(ty.tokens.clone()),
Nonterminal::NtIdent(ident, is_raw) => {
Some(tokenstream::TokenTree::token(token::Ident(ident.name, is_raw), ident.span).into())
}
Nonterminal::NtLifetime(ident) => {
Some(tokenstream::TokenTree::token(token::Lifetime(ident.name), ident.span).into())
}
Nonterminal::NtMeta(ref attr) => attr.tokens.clone(),
Nonterminal::NtPath(ref path) => path.tokens.clone(),
Nonterminal::NtVis(ref vis) => vis.tokens.clone(),
Nonterminal::NtMeta(ref attr) => convert_tokens(attr.tokens.clone()),
Nonterminal::NtPath(ref path) => convert_tokens(path.tokens.clone()),
Nonterminal::NtVis(ref vis) => convert_tokens(vis.tokens.clone()),
Nonterminal::NtTT(ref tt) => Some(tt.clone().into()),
Nonterminal::NtExpr(ref expr) | Nonterminal::NtLiteral(ref expr) => {
if expr.tokens.is_none() {
Expand Down Expand Up @@ -602,10 +605,10 @@ fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bool {
fn prepend_attrs(
sess: &ParseSess,
attrs: &[ast::Attribute],
tokens: Option<&tokenstream::TokenStream>,
tokens: Option<&tokenstream::LazyTokenStream>,
span: rustc_span::Span,
) -> Option<tokenstream::TokenStream> {
let tokens = tokens?;
let tokens = tokens?.clone().into_token_stream();
if attrs.is_empty() {
return Some(tokens.clone());
}
Expand Down
15 changes: 14 additions & 1 deletion compiler/rustc_parse/src/parser/attr.rs
Expand Up @@ -4,7 +4,7 @@ use rustc_ast::attr;
use rustc_ast::token::{self, Nonterminal};
use rustc_ast_pretty::pprust;
use rustc_errors::{error_code, PResult};
use rustc_span::Span;
use rustc_span::{sym, Span};

use tracing::debug;

Expand Down Expand Up @@ -302,3 +302,16 @@ impl<'a> Parser<'a> {
Err(self.struct_span_err(self.token.span, &msg))
}
}

pub fn maybe_needs_tokens(attrs: &[ast::Attribute]) -> bool {
attrs.iter().any(|attr| {
if let Some(ident) = attr.ident() {
ident.name == sym::derive
// This might apply a custom attribute/derive
|| ident.name == sym::cfg_attr
|| !rustc_feature::is_builtin_attr_name(ident.name)
} else {
true
}
})
}
19 changes: 10 additions & 9 deletions compiler/rustc_parse/src/parser/expr.rs
Expand Up @@ -6,6 +6,7 @@ use crate::maybe_recover_from_interpolated_ty_qpath;

use rustc_ast::ptr::P;
use rustc_ast::token::{self, Token, TokenKind};
use rustc_ast::tokenstream::Spacing;
use rustc_ast::util::classify;
use rustc_ast::util::literal::LitError;
use rustc_ast::util::parser::{prec_let_scrutinee_needs_par, AssocOp, Fixity};
Expand All @@ -18,7 +19,6 @@ use rustc_span::source_map::{self, Span, Spanned};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, Pos};
use std::mem;
use tracing::debug;

/// Possibly accepts an `token::Interpolated` expression (a pre-parsed expression
/// dropped into the token stream, which happens while parsing the result of
Expand Down Expand Up @@ -459,7 +459,7 @@ impl<'a> Parser<'a> {
/// Parses a prefix-unary-operator expr.
fn parse_prefix_expr(&mut self, attrs: Option<AttrVec>) -> PResult<'a, P<Expr>> {
let attrs = self.parse_or_use_outer_attributes(attrs)?;
self.maybe_collect_tokens(!attrs.is_empty(), |this| {
self.maybe_collect_tokens(super::attr::maybe_needs_tokens(&attrs), |this| {
let lo = this.token.span;
// Note: when adding new unary operators, don't forget to adjust TokenKind::can_begin_expr()
let (hi, ex) = match this.token.uninterpolate().kind {
Expand Down Expand Up @@ -884,7 +884,7 @@ impl<'a> Parser<'a> {
assert!(suffix.is_none());
let symbol = Symbol::intern(&i);
self.token = Token::new(token::Ident(symbol, false), ident_span);
let next_token = Token::new(token::Dot, dot_span);
let next_token = (Token::new(token::Dot, dot_span), self.token_spacing);
self.parse_tuple_field_access_expr(lo, base, symbol, None, Some(next_token))
}
// 1.2 | 1.2e3
Expand All @@ -902,12 +902,14 @@ impl<'a> Parser<'a> {
};
let symbol1 = Symbol::intern(&i1);
self.token = Token::new(token::Ident(symbol1, false), ident1_span);
let next_token1 = Token::new(token::Dot, dot_span);
// This needs to be `Spacing::Alone` to prevent regressions.
// See issue #76399 and PR #76285 for more details
let next_token1 = (Token::new(token::Dot, dot_span), Spacing::Alone);
let base1 =
self.parse_tuple_field_access_expr(lo, base, symbol1, None, Some(next_token1));
let symbol2 = Symbol::intern(&i2);
let next_token2 = Token::new(token::Ident(symbol2, false), ident2_span);
self.bump_with(next_token2); // `.`
self.bump_with((next_token2, self.token_spacing)); // `.`
self.parse_tuple_field_access_expr(lo, base1, symbol2, suffix, None)
}
// 1e+ | 1e- (recovered)
Expand All @@ -930,7 +932,7 @@ impl<'a> Parser<'a> {
base: P<Expr>,
field: Symbol,
suffix: Option<Symbol>,
next_token: Option<Token>,
next_token: Option<(Token, Spacing)>,
) -> P<Expr> {
match next_token {
Some(next_token) => self.bump_with(next_token),
Expand Down Expand Up @@ -1109,12 +1111,11 @@ impl<'a> Parser<'a> {

fn maybe_collect_tokens(
&mut self,
has_outer_attrs: bool,
needs_tokens: bool,
f: impl FnOnce(&mut Self) -> PResult<'a, P<Expr>>,
) -> PResult<'a, P<Expr>> {
if has_outer_attrs {
if needs_tokens {
let (mut expr, tokens) = self.collect_tokens(f)?;
debug!("maybe_collect_tokens: Collected tokens for {:?} (tokens {:?}", expr, tokens);
expr.tokens = Some(tokens);
Ok(expr)
} else {
Expand Down
5 changes: 3 additions & 2 deletions compiler/rustc_parse/src/parser/item.rs
Expand Up @@ -116,15 +116,16 @@ impl<'a> Parser<'a> {
Some(item.into_inner())
});

let needs_tokens = super::attr::maybe_needs_tokens(&attrs);

let mut unclosed_delims = vec![];
let has_attrs = !attrs.is_empty();
let parse_item = |this: &mut Self| {
let item = this.parse_item_common_(attrs, mac_allowed, attrs_allowed, req_name);
unclosed_delims.append(&mut this.unclosed_delims);
item
};

let (mut item, tokens) = if has_attrs {
let (mut item, tokens) = if needs_tokens {
let (item, tokens) = self.collect_tokens(parse_item)?;
(item, Some(tokens))
} else {
Expand Down

0 comments on commit 593fdd3

Please sign in to comment.