Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster checkReservedWord #13386

Merged
merged 5 commits into from Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -0,0 +1,23 @@
import Benchmark from "benchmark";
import baseline from "../../lib/index-main.js";
import current from "../../lib/index.js";
import { report } from "../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "abcde12345z;".repeat(length);
}
current.parse("a");
function benchCases(name, implementation, options) {
for (const length of [64, 128, 256, 512]) {
const input = createInput(length);
suite.add(`${name} ${length} length-11 identifiers`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
23 changes: 23 additions & 0 deletions packages/babel-parser/benchmark/many-identifiers/await.mjs
@@ -0,0 +1,23 @@
import Benchmark from "benchmark";
import baseline from "../../lib/index-main.js";
import current from "../../lib/index.js";
import { report } from "../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "await;".repeat(length);
}
current.parse("a");
function benchCases(name, implementation, options) {
for (const length of [64, 128, 256, 512]) {
const input = createInput(length);
suite.add(`${name} ${length} await identifier`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
45 changes: 27 additions & 18 deletions packages/babel-parser/src/parser/expression.js
Expand Up @@ -28,6 +28,7 @@ import {
isStrictReservedWord,
isStrictBindReservedWord,
isIdentifierStart,
canBeReservedWord,
} from "../util/identifier";
import type { Pos } from "../util/location";
import { Position } from "../util/location";
Expand Down Expand Up @@ -2358,13 +2359,14 @@ export default class ExpressionParser extends LValParser {
// `class` and `function` keywords push function-type token context into this.context.
// But there is no chance to pop the context if the keyword is consumed
// as an identifier such as a property name.
const curContext = this.curContext();
if (
(type === tt._class || type === tt._function) &&
(curContext === ct.functionStatement ||
curContext === ct.functionExpression)
) {
this.state.context.pop();
if (type === tt._class || type === tt._function) {
const curContext = this.curContext();
if (
curContext === ct.functionStatement ||
curContext === ct.functionExpression
) {
this.state.context.pop();
}
}
} else {
throw this.unexpected();
Expand All @@ -2389,12 +2391,22 @@ export default class ExpressionParser extends LValParser {
checkKeywords: boolean,
isBinding: boolean,
): void {
if (this.prodParam.hasYield && word === "yield") {
this.raise(startLoc, Errors.YieldBindingIdentifier);
// All JavaScript reserved words are not longer than 10 characters.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The longest reserved word is instanceof and implements.

JLHwung marked this conversation as resolved.
Show resolved Hide resolved
if (word.length > 10) {
return;
}
// Most identifiers are not reservedWord-like, they don't need special
// treatments afterward, which very likely ends up throwing errors
if (!canBeReservedWord(word)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe we can move this check after the if yield/await/arguments checks, and just use isStrictBindReservedWord? Intuitively it shouldn't be slower, but I didn't check.

return;
}

if (word === "await") {
if (word === "yield") {
if (this.prodParam.hasYield) {
this.raise(startLoc, Errors.YieldBindingIdentifier);
return;
}
} else if (word === "await") {
if (this.prodParam.hasAwait) {
this.raise(startLoc, Errors.AwaitBindingIdentifier);
return;
Expand All @@ -2407,16 +2419,13 @@ export default class ExpressionParser extends LValParser {
Errors.AwaitBindingIdentifier,
);
}
} else if (word === "arguments") {
if (this.scope.inClassAndNotInNonArrowFunction) {
this.raise(startLoc, Errors.ArgumentsInClass);
return;
}
}

if (
this.scope.inClass &&
!this.scope.inNonArrowFunction &&
word === "arguments"
) {
this.raise(startLoc, Errors.ArgumentsInClass);
return;
}
if (checkKeywords && isKeyword(word)) {
this.raise(startLoc, Errors.UnexpectedKeyword, word);
return;
Expand Down
63 changes: 63 additions & 0 deletions packages/babel-parser/src/util/identifier.js
Expand Up @@ -21,3 +21,66 @@ export const keywordRelationalOperator = /^in(stanceof)?$/;
export function isIteratorStart(current: number, next: number): boolean {
return current === charCodes.atSign && next === charCodes.atSign;
}

// This is the comprehensive set of JavaScript reserved words
// If a word is in this set, it could be a reserved word,
// depending on sourceType/strictMode/binding info. In other words
// if a word is not in this set, it is not a reserved word under
// any circumstance.
const reservedWordLikeSet = new Set([
"break",
"case",
"catch",
"continue",
"debugger",
"default",
"do",
"else",
"finally",
"for",
"function",
"if",
"return",
"switch",
"throw",
"try",
"var",
"const",
"while",
"with",
"new",
"this",
"super",
"class",
"extends",
"export",
"import",
"null",
"true",
"false",
"in",
"instanceof",
"typeof",
"void",
"delete",
// strict
"implements",
"interface",
"let",
"package",
"private",
"protected",
"public",
"static",
"yield",
// strictBind
"eval",
"arguments",
// reservedWorkLike
"enum",
"await",
]);

export function canBeReservedWord(word: string): boolean {
return reservedWordLikeSet.has(word);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid maintaining another reserved words list, could this be word === "await" || word === "enum" || isStrictBindReservedWord(word)?

Copy link

@KFlash KFlash May 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of that long list, why not do something simple as x & ReservedWord ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Set#has hashes the input string and checks if the hash value is in the table (source), since the the input string is scanned only during hash key is computed and the collision is rare by definition of hash, checking presence takes 1 scan of input and O(1) of the set size.

The StringEquals first checks the length, then the memory address and then fallbacks to byte-by-byte comparison. (source) If we check word === "await" and then isStrictBindReservedWord(word), the word is read for 1.1 times (average on the universe distribution of string length), 1 for the hash computation and 1/10 for comparison with "await". word === "await" also introduces extra branch conditions.

To avoid maintaining another reserved words list

I agree it is not ideal to duplicate keywords, WDYT if I add canBeReservedWord to helper-validator-identifier in Babel 7.15? Since that will be a feature, I can draft a subseqent PR that moves canBeReservedWord to the validator package based on this one so we don't have to wait until 7.15.

why not do something simple as x & ReservedWord ?

@KFlash Can you elaborate? How can & achieves searching here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 for deferring it to 7.15

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JLHwung It's possible if you make things complex and add some bitwise masks behind each reserved word token.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you verify whether using a prototye-less object is faster? My intuition is that Sets are algorithmically fast, but the actual implementation in V8 is slow.

Copy link

@KFlash KFlash May 29, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jridgewell It's faster because I "pack" all reserved words into a set of bitwise masks. It also consumes less memory. You are only dealing with series of number. see Kataw for an example. Same you can do with tokens, keywords, and everything you need. No need for a lookup.

Witch one is fastest? Set and lookup or this one
(token & ReserwedWord) === Reservedword // return true if reserved word

or a switch statement with 60 cases and 100lines of code to get all expression nodes or this one?

(token & IsExpressionNode) === IsExpressionNode // return true if expression node

It's very obvious what's fastest, but I think it's out of scope for Babel

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jridgewell I come up with a benchmark file comparing these three approaches: Set#has, Object brand checks and (switch length + string equals).

The benchmark result on Node.js 16.1.0:

object brand checks different string: 2059273 ops/sec ±0.42% (0ms)
object brand checks constant string: 31712533 ops/sec ±0.22% (0ms)
set#has different string: 17756450 ops/sec ±1.26% (0ms)
set#has constant string: 38050433 ops/sec ±1.54% (0ms)
string#equals different string: 13789367 ops/sec ±0.44% (0ms)
string#equals constant string: 709703804 ops/sec ±0.42% (0ms)

On different string cases, where each input is in different memory address, Set#has is 700% faster than prototype-less object brand check. On constant string cases, Set#has is 20% faster than prototype-less object brand check.

On different string cases, Set#has is 20% faster than (switch length + string equals). The latter significantly outperform Set#has when the input is a constant string, which is expected because V8 optimizes out the per-character equality branch of StringEquals. However in Babel parser, each identifier name have different address.

@KFlash Yes we can do a single bitwise check if we replace the data structure of token storage and create different token for different keywords. However Babel currently exposes its token storage via options.tokens so we have to maintain compatibility here.

}
35 changes: 18 additions & 17 deletions packages/babel-parser/src/util/scope.js
Expand Up @@ -49,22 +49,26 @@ export default class ScopeHandler<IScope: Scope = Scope> {
}

get inFunction() {
return (this.currentVarScope().flags & SCOPE_FUNCTION) > 0;
return (this.currentVarScopeFlags() & SCOPE_FUNCTION) > 0;
}
get allowSuper() {
return (this.currentThisScope().flags & SCOPE_SUPER) > 0;
return (this.currentThisScopeFlags() & SCOPE_SUPER) > 0;
}
get allowDirectSuper() {
return (this.currentThisScope().flags & SCOPE_DIRECT_SUPER) > 0;
return (this.currentThisScopeFlags() & SCOPE_DIRECT_SUPER) > 0;
}
get inClass() {
return (this.currentThisScope().flags & SCOPE_CLASS) > 0;
return (this.currentThisScopeFlags() & SCOPE_CLASS) > 0;
}
get inClassAndNotInNonArrowFunction() {
const flags = this.currentThisScopeFlags();
return (flags & SCOPE_CLASS) > 0 && (flags & SCOPE_FUNCTION) === 0;
}
get inStaticBlock() {
return (this.currentThisScope().flags & SCOPE_STATIC_BLOCK) > 0;
return (this.currentThisScopeFlags() & SCOPE_STATIC_BLOCK) > 0;
}
get inNonArrowFunction() {
return (this.currentThisScope().flags & SCOPE_FUNCTION) > 0;
return (this.currentThisScopeFlags() & SCOPE_FUNCTION) > 0;
}
get treatFunctionsAsVar() {
return this.treatFunctionsAsVarInScope(this.currentScope());
Expand Down Expand Up @@ -189,25 +193,22 @@ export default class ScopeHandler<IScope: Scope = Scope> {
}

// $FlowIgnore
currentVarScope(): IScope {
currentVarScopeFlags(): ScopeFlags {
for (let i = this.scopeStack.length - 1; ; i--) {
const scope = this.scopeStack[i];
if (scope.flags & SCOPE_VAR) {
return scope;
const { flags } = this.scopeStack[i];
if (flags & SCOPE_VAR) {
return flags;
}
}
}

// Could be useful for `arguments`, `this`, `new.target`, `super()`, `super.property`, and `super[property]`.
// $FlowIgnore
currentThisScope(): IScope {
currentThisScopeFlags(): ScopeFlags {
for (let i = this.scopeStack.length - 1; ; i--) {
const scope = this.scopeStack[i];
if (
(scope.flags & SCOPE_VAR || scope.flags & SCOPE_CLASS) &&
!(scope.flags & SCOPE_ARROW)
) {
return scope;
const { flags } = this.scopeStack[i];
if (flags & (SCOPE_VAR | SCOPE_CLASS) && !(flags & SCOPE_ARROW)) {
return flags;
}
}
}
Expand Down