From 8723af97006e8ea23bb0288468b890468f603826 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 10 Jun 2021 13:57:24 -0400 Subject: [PATCH 1/5] chore: bump babel parser baseline --- packages/babel-parser/package.json | 2 +- yarn.lock | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/babel-parser/package.json b/packages/babel-parser/package.json index 866d1cce98b6..54d4c3bf579e 100644 --- a/packages/babel-parser/package.json +++ b/packages/babel-parser/package.json @@ -33,7 +33,7 @@ "node": ">=6.0.0" }, "devDependencies": { - "@babel-baseline/parser": "npm:@babel/parser@^7.14.4", + "@babel-baseline/parser": "npm:@babel/parser@^7.14.5", "@babel/code-frame": "workspace:*", "@babel/helper-fixtures": "workspace:*", "@babel/helper-validator-identifier": "workspace:*", diff --git a/yarn.lock b/yarn.lock index b234e3ccf50d..2d12d0ef8971 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5,12 +5,12 @@ __metadata: version: 4 cacheKey: 7 -"@babel-baseline/parser@npm:@babel/parser@^7.14.4": - version: 7.14.4 - resolution: "@babel/parser@npm:7.14.4" +"@babel-baseline/parser@npm:@babel/parser@^7.14.5": + version: 7.14.5 + resolution: "@babel/parser@npm:7.14.5" bin: parser: ./bin/babel-parser.js - checksum: 3bc067c1ee0e0178d365e1b2988ea1a0d6d37af37870ea1a7e80729b3bdc40acda083cac44ce72f63a5b31a489e35120f617bd41f312dec4c86cf814cff8e64a + checksum: 55c14793888cb7d54275811e7f13136875df1ee4fc368f3f10cff46ebdf95b6a072e706a0486be0ac5686a597cbfb82f33b5f66aa6ba80ff50b73bca945035c6 languageName: node linkType: hard @@ -658,6 +658,7 @@ __metadata: resolution: "@babel/helper-module-transforms@condition:BABEL_8_BREAKING?:workspace:^7.14.5#2510a1" dependencies: "@babel/helper-module-transforms-BABEL_8_BREAKING-false": "npm:@babel/helper-module-transforms@workspace:^7.14.5" + checksum: eb4895913562bf398b8bf7e6c68a0380f153f52f2715b3685f9d07e376725227678c2f920dfe0772012dfed655e037534619de86bb9f3284b92555f8bf9d0f42 languageName: node linkType: hard @@ -972,7 +973,7 @@ __metadata: version: 0.0.0-use.local resolution: "@babel/parser@workspace:packages/babel-parser" dependencies: - "@babel-baseline/parser": "npm:@babel/parser@^7.14.4" + "@babel-baseline/parser": "npm:@babel/parser@^7.14.5" "@babel/code-frame": "workspace:*" "@babel/helper-fixtures": "workspace:*" "@babel/helper-validator-identifier": "workspace:*" From 9515e0be7e597eb71bcb068c356a8abed7b23211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 10 Jun 2021 13:57:47 -0400 Subject: [PATCH 2/5] chore: add benchmark --- .../benchmark/large-regexp/bench.mjs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 packages/babel-parser/benchmark/large-regexp/bench.mjs diff --git a/packages/babel-parser/benchmark/large-regexp/bench.mjs b/packages/babel-parser/benchmark/large-regexp/bench.mjs new file mode 100644 index 000000000000..67a6bdfb08e7 --- /dev/null +++ b/packages/babel-parser/benchmark/large-regexp/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "const a = /" + "[/\\\\]".repeat(length / 4) + "/igsudm"; +} +function benchCases(name, implementation, options) { + for (const length of [256, 512, 1024, 2048]) { + const input = createInput(length); + suite.add(`${name} ${length}-size RegExp literal `, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); From 636b0bf291e3a7cd027ebd6063c212b905b76f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 10 Jun 2021 14:00:35 -0400 Subject: [PATCH 3/5] perf: use charCodeAt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit baseline 256-size RegExp literal : 66124 ops/sec ±8.34% (0.015ms) baseline 512-size RegExp literal : 43351 ops/sec ±1.05% (0.023ms) baseline 1024-size RegExp literal : 24620 ops/sec ±0.34% (0.041ms) baseline 2048-size RegExp literal : 12957 ops/sec ±0.2% (0.077ms) current 256-size RegExp literal : 151662 ops/sec ±12.21% (0.007ms) current 512-size RegExp literal : 113828 ops/sec ±0.37% (0.009ms) current 1024-size RegExp literal : 67246 ops/sec ±1.95% (0.015ms) current 2048-size RegExp literal : 35645 ops/sec ±1.69% (0.028ms) --- packages/babel-parser/src/tokenizer/index.js | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index e7ee7f6d4beb..71b7196d5df1 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -12,7 +12,6 @@ import { type TokContext, types as ct } from "./context"; import ParserErrors, { Errors, type ErrorTemplate } from "../parser/error"; import { SourceLocation } from "../util/location"; import { - lineBreak, lineBreakG, isNewLine, isWhitespace, @@ -980,21 +979,21 @@ export default class Tokenizer extends ParserErrors { if (this.state.pos >= this.length) { throw this.raise(start, Errors.UnterminatedRegExp); } - const ch = this.input.charAt(this.state.pos); - if (lineBreak.test(ch)) { + const ch = this.input.charCodeAt(this.state.pos); + if (isNewLine(ch)) { throw this.raise(start, Errors.UnterminatedRegExp); } if (escaped) { escaped = false; } else { - if (ch === "[") { + if (ch === charCodes.leftSquareBracket) { inClass = true; - } else if (ch === "]" && inClass) { + } else if (ch === charCodes.rightSquareBracket && inClass) { inClass = false; - } else if (ch === "/" && !inClass) { + } else if (ch === charCodes.slash && !inClass) { break; } - escaped = ch === "\\"; + escaped = ch === charCodes.backslash; } ++this.state.pos; } From 0f2b924c6fd06a5344a7455c84638690908a34c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 10 Jun 2021 14:15:31 -0400 Subject: [PATCH 4/5] perf: update pos to state on finishing regex token MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit baseline 256-size RegExp literal : 142419 ops/sec ±14.37% (0.007ms) baseline 512-size RegExp literal : 110003 ops/sec ±0.95% (0.009ms) baseline 1024-size RegExp literal : 69509 ops/sec ±0.68% (0.014ms) baseline 2048-size RegExp literal : 36217 ops/sec ±4.63% (0.028ms) current 256-size RegExp literal : 166167 ops/sec ±1.04% (0.006ms) current 512-size RegExp literal : 120889 ops/sec ±1.25% (0.008ms) current 1024-size RegExp literal : 76181 ops/sec ±1.46% (0.013ms) current 2048-size RegExp literal : 44898 ops/sec ±1.25% (0.022ms) --- packages/babel-parser/src/tokenizer/index.js | 25 ++++++++++---------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index 71b7196d5df1..28f8ff8d1a45 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -975,11 +975,12 @@ export default class Tokenizer extends ParserErrors { readRegexp(): void { const start = this.state.start + 1; let escaped, inClass; - for (;;) { - if (this.state.pos >= this.length) { + let { pos } = this.state; + for (; ; ++pos) { + if (pos >= this.length) { throw this.raise(start, Errors.UnterminatedRegExp); } - const ch = this.input.charCodeAt(this.state.pos); + const ch = this.input.charCodeAt(pos); if (isNewLine(ch)) { throw this.raise(start, Errors.UnterminatedRegExp); } @@ -995,33 +996,33 @@ export default class Tokenizer extends ParserErrors { } escaped = ch === charCodes.backslash; } - ++this.state.pos; } - const content = this.input.slice(start, this.state.pos); - ++this.state.pos; + const content = this.input.slice(start, pos); + ++pos; let mods = ""; - while (this.state.pos < this.length) { - const char = this.input[this.state.pos]; - const charCode = this.codePointAtPos(this.state.pos); + while (pos < this.length) { + const char = this.input[pos]; + const charCode = this.codePointAtPos(pos); if (VALID_REGEX_FLAGS.has(char)) { if (mods.indexOf(char) > -1) { - this.raise(this.state.pos + 1, Errors.DuplicateRegExpFlags); + this.raise(pos + 1, Errors.DuplicateRegExpFlags); } } else if ( isIdentifierChar(charCode) || charCode === charCodes.backslash ) { - this.raise(this.state.pos + 1, Errors.MalformedRegExpFlags); + this.raise(pos + 1, Errors.MalformedRegExpFlags); } else { break; } - ++this.state.pos; + ++pos; mods += char; } + this.state.pos = pos; this.finishToken(tt.regexp, { pattern: content, From f377065cd285246e1d8a1d6815437bf5e28be60d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= Date: Thu, 10 Jun 2021 15:32:49 -0400 Subject: [PATCH 5/5] perf: compare charcode on valid_regex_flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit baseline 256 small regexp literal with all flags: 4342 ops/sec ±1.57% (0.23ms) baseline 512 small regexp literal with all flags: 2213 ops/sec ±1.51% (0.452ms) baseline 1024 small regexp literal with all flags: 1144 ops/sec ±0.22% (0.874ms) baseline 2048 small regexp literal with all flags: 541 ops/sec ±1.12% (1.849ms) current 256 small regexp literal with all flags: 4643 ops/sec ±1.3% (0.215ms) current 512 small regexp literal with all flags: 2355 ops/sec ±1.14% (0.425ms) current 1024 small regexp literal with all flags: 1176 ops/sec ±0.87% (0.85ms) current 2048 small regexp literal with all flags: 553 ops/sec ±1.31% (1.807ms) --- .../many-small-all-flags-regexp/bench.mjs | 22 +++++++++++++++++ packages/babel-parser/src/tokenizer/index.js | 24 ++++++++++++------- 2 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs diff --git a/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs new file mode 100644 index 000000000000..8575ace39c0e --- /dev/null +++ b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "../../lib/index-v2.js"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "/x/dgimsuy;".repeat(length); +} +function benchCases(name, implementation, options) { + for (const length of [256, 512, 1024, 2048]) { + const input = createInput(length); + suite.add(`${name} ${length} small regexp literal with all flags`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index 28f8ff8d1a45..c1c9848b424d 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -20,7 +20,15 @@ import { import State from "./state"; import type { LookaheadState } from "./state"; -const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]); +const VALID_REGEX_FLAGS = new Set([ + charCodes.lowercaseG, + charCodes.lowercaseM, + charCodes.lowercaseS, + charCodes.lowercaseI, + charCodes.lowercaseY, + charCodes.lowercaseU, + charCodes.lowercaseD, +]); // The following character codes are forbidden from being // an immediate sibling of NumericLiteralSeparator _ @@ -1003,17 +1011,15 @@ export default class Tokenizer extends ParserErrors { let mods = ""; while (pos < this.length) { - const char = this.input[pos]; - const charCode = this.codePointAtPos(pos); + const cp = this.codePointAtPos(pos); + // It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp + const char = String.fromCharCode(cp); - if (VALID_REGEX_FLAGS.has(char)) { - if (mods.indexOf(char) > -1) { + if (VALID_REGEX_FLAGS.has(cp)) { + if (mods.includes(char)) { this.raise(pos + 1, Errors.DuplicateRegExpFlags); } - } else if ( - isIdentifierChar(charCode) || - charCode === charCodes.backslash - ) { + } else if (isIdentifierChar(cp) || cp === charCodes.backslash) { this.raise(pos + 1, Errors.MalformedRegExpFlags); } else { break;