Skip to content

Commit

Permalink
perf: return a sub-state on tokenizer lookahead
Browse files Browse the repository at this point in the history
  • Loading branch information
JLHwung committed May 20, 2021
1 parent 062982c commit 6402c9a
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 13 deletions.
3 changes: 1 addition & 2 deletions packages/babel-parser/src/plugins/flow/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import type Parser from "../../parser";
import { types as tt, type TokenType } from "../../tokenizer/types";
import * as N from "../../types";
import type { Pos, Position } from "../../util/location";
import type State from "../../tokenizer/state";
import { types as tc } from "../../tokenizer/context";
import * as charCodes from "charcodes";
import { isIteratorStart, isKeyword } from "../../util/identifier";
Expand Down Expand Up @@ -154,7 +153,7 @@ function hasTypeImportKind(node: N.Node): boolean {
return node.importKind === "type" || node.importKind === "typeof";
}

function isMaybeDefaultImport(state: State): boolean {
function isMaybeDefaultImport(state: { type: TokenType, value: any }): boolean {
return (
(state.type === tt.name || !!state.type.keyword) && state.value !== "from"
);
Expand Down
12 changes: 12 additions & 0 deletions packages/babel-parser/src/plugins/jsx/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ import { isIdentifierChar, isIdentifierStart } from "../../util/identifier";
import type { Position } from "../../util/location";
import { isNewLine } from "../../util/whitespace";
import { Errors, makeErrorTemplates, ErrorCodes } from "../../parser/error";
import type { LookaheadState } from "../../tokenizer/state";
import State from "../../tokenizer/state";

type JSXLookaheadState = LookaheadState & { inPropertyName: boolean };

const HEX_NUMBER = /^[\da-fA-F]+$/;
const DECIMAL_NUMBER = /^\d+$/;
Expand Down Expand Up @@ -573,6 +577,14 @@ export default (superClass: Class<Parser>): Class<Parser> =>
}
}

createLookaheadState(state: State): JSXLookaheadState {
const lookaheadState = ((super.createLookaheadState(
state,
): any): JSXLookaheadState);
lookaheadState.inPropertyName = state.inPropertyName;
return lookaheadState;
}

getTokenFromCode(code: number): void {
if (this.state.inPropertyName) return super.getTokenFromCode(code);

Expand Down
60 changes: 49 additions & 11 deletions packages/babel-parser/src/tokenizer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
skipWhiteSpace,
} from "../util/whitespace";
import State from "./state";
import type { LookaheadState } from "./state";

const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u"]);

Expand Down Expand Up @@ -144,11 +145,9 @@ export default class Tokenizer extends ParserErrors {
// Move to the next token

next(): void {
if (!this.isLookahead) {
this.checkKeywordEscapes();
if (this.options.tokens) {
this.pushToken(new Token(this.state));
}
this.checkKeywordEscapes();
if (this.options.tokens) {
this.pushToken(new Token(this.state));
}

this.state.lastTokEnd = this.state.end;
Expand All @@ -175,14 +174,51 @@ export default class Tokenizer extends ParserErrors {
return this.state.type === type;
}

// TODO
/**
* Create a LookaheadState from current parser state
*
* @param {State} state
* @returns {LookaheadState}
* @memberof Tokenizer
*/
createLookaheadState(state: State): LookaheadState {
return {
pos: state.pos,
value: null,
type: state.type,
start: state.start,
end: state.end,
lastTokEnd: state.end,
context: [this.curContext()],
exprAllowed: state.exprAllowed,
inType: state.inType,
};
}

lookahead(): State {
/**
* lookahead peeks the next token, skipping changes to token context and
* comment statck. For performance it returns a limited LookaheadState
* instead of full parser state.
*
* The { column, line } Loc info is not included in lookahead since such usage
* is rare. Although it may return other location properties e.g. `curLine` and
* `lineStart`, these properties are not listed in the LookaheadState interface
* and thus the returned value is _NOT_ reliable.
*
* The tokenizer should make best efforts to avoid using on parser states
* other than those defined in LookaheadState
*
* @returns {LookaheadState}
* @memberof Tokenizer
*/
lookahead(): LookaheadState {
const old = this.state;
this.state = old.clone(true);
// For performance we use a simpified tokenizer state structure
// $FlowIgnore
this.state = this.createLookaheadState(old);

this.isLookahead = true;
this.next();
this.nextToken();
this.isLookahead = false;

const curr = this.state;
Expand Down Expand Up @@ -397,12 +433,14 @@ export default class Tokenizer extends ParserErrors {

finishToken(type: TokenType, val: any): void {
this.state.end = this.state.pos;
this.state.endLoc = this.state.curPosition();
const prevType = this.state.type;
this.state.type = type;
this.state.value = val;

if (!this.isLookahead) this.updateContext(prevType);
if (!this.isLookahead) {
this.state.endLoc = this.state.curPosition();
this.updateContext(prevType);
}
}

// ### Token reading
Expand Down
11 changes: 11 additions & 0 deletions packages/babel-parser/src/tokenizer/state.js
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,14 @@ export default class State {
return state;
}
}

export type LookaheadState = {
pos: number,
value: any,
type: TokenType,
start: number,
end: number,
/* Used only in readSlashToken */
exprAllowed: boolean,
inType: boolean,
};

0 comments on commit 6402c9a

Please sign in to comment.