diff --git a/packages/babel-parser/benchmark/large-regexp/bench.mjs b/packages/babel-parser/benchmark/large-regexp/bench.mjs new file mode 100644 index 000000000000..67a6bdfb08e7 --- /dev/null +++ b/packages/babel-parser/benchmark/large-regexp/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "@babel-baseline/parser"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "const a = /" + "[/\\\\]".repeat(length / 4) + "/igsudm"; +} +function benchCases(name, implementation, options) { + for (const length of [256, 512, 1024, 2048]) { + const input = createInput(length); + suite.add(`${name} ${length}-size RegExp literal `, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs new file mode 100644 index 000000000000..8575ace39c0e --- /dev/null +++ b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs @@ -0,0 +1,22 @@ +import Benchmark from "benchmark"; +import baseline from "../../lib/index-v2.js"; +import current from "../../lib/index.js"; +import { report } from "../util.mjs"; + +const suite = new Benchmark.Suite(); +function createInput(length) { + return "/x/dgimsuy;".repeat(length); +} +function benchCases(name, implementation, options) { + for (const length of [256, 512, 1024, 2048]) { + const input = createInput(length); + suite.add(`${name} ${length} small regexp literal with all flags`, () => { + implementation.parse(input, options); + }); + } +} + +benchCases("baseline", baseline); +benchCases("current", current); + +suite.on("cycle", report).run(); diff --git a/packages/babel-parser/package.json b/packages/babel-parser/package.json index 866d1cce98b6..54d4c3bf579e 100644 --- a/packages/babel-parser/package.json +++ b/packages/babel-parser/package.json @@ -33,7 +33,7 @@ "node": ">=6.0.0" }, "devDependencies": { - "@babel-baseline/parser": "npm:@babel/parser@^7.14.4", + "@babel-baseline/parser": "npm:@babel/parser@^7.14.5", "@babel/code-frame": "workspace:*", "@babel/helper-fixtures": "workspace:*", "@babel/helper-validator-identifier": "workspace:*", diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js index e7ee7f6d4beb..c1c9848b424d 100644 --- a/packages/babel-parser/src/tokenizer/index.js +++ b/packages/babel-parser/src/tokenizer/index.js @@ -12,7 +12,6 @@ import { type TokContext, types as ct } from "./context"; import ParserErrors, { Errors, type ErrorTemplate } from "../parser/error"; import { SourceLocation } from "../util/location"; import { - lineBreak, lineBreakG, isNewLine, isWhitespace, @@ -21,7 +20,15 @@ import { import State from "./state"; import type { LookaheadState } from "./state"; -const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]); +const VALID_REGEX_FLAGS = new Set([ + charCodes.lowercaseG, + charCodes.lowercaseM, + charCodes.lowercaseS, + charCodes.lowercaseI, + charCodes.lowercaseY, + charCodes.lowercaseU, + charCodes.lowercaseD, +]); // The following character codes are forbidden from being // an immediate sibling of NumericLiteralSeparator _ @@ -976,53 +983,52 @@ export default class Tokenizer extends ParserErrors { readRegexp(): void { const start = this.state.start + 1; let escaped, inClass; - for (;;) { - if (this.state.pos >= this.length) { + let { pos } = this.state; + for (; ; ++pos) { + if (pos >= this.length) { throw this.raise(start, Errors.UnterminatedRegExp); } - const ch = this.input.charAt(this.state.pos); - if (lineBreak.test(ch)) { + const ch = this.input.charCodeAt(pos); + if (isNewLine(ch)) { throw this.raise(start, Errors.UnterminatedRegExp); } if (escaped) { escaped = false; } else { - if (ch === "[") { + if (ch === charCodes.leftSquareBracket) { inClass = true; - } else if (ch === "]" && inClass) { + } else if (ch === charCodes.rightSquareBracket && inClass) { inClass = false; - } else if (ch === "/" && !inClass) { + } else if (ch === charCodes.slash && !inClass) { break; } - escaped = ch === "\\"; + escaped = ch === charCodes.backslash; } - ++this.state.pos; } - const content = this.input.slice(start, this.state.pos); - ++this.state.pos; + const content = this.input.slice(start, pos); + ++pos; let mods = ""; - while (this.state.pos < this.length) { - const char = this.input[this.state.pos]; - const charCode = this.codePointAtPos(this.state.pos); + while (pos < this.length) { + const cp = this.codePointAtPos(pos); + // It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp + const char = String.fromCharCode(cp); - if (VALID_REGEX_FLAGS.has(char)) { - if (mods.indexOf(char) > -1) { - this.raise(this.state.pos + 1, Errors.DuplicateRegExpFlags); + if (VALID_REGEX_FLAGS.has(cp)) { + if (mods.includes(char)) { + this.raise(pos + 1, Errors.DuplicateRegExpFlags); } - } else if ( - isIdentifierChar(charCode) || - charCode === charCodes.backslash - ) { - this.raise(this.state.pos + 1, Errors.MalformedRegExpFlags); + } else if (isIdentifierChar(cp) || cp === charCodes.backslash) { + this.raise(pos + 1, Errors.MalformedRegExpFlags); } else { break; } - ++this.state.pos; + ++pos; mods += char; } + this.state.pos = pos; this.finishToken(tt.regexp, { pattern: content, diff --git a/yarn.lock b/yarn.lock index b234e3ccf50d..2d12d0ef8971 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5,12 +5,12 @@ __metadata: version: 4 cacheKey: 7 -"@babel-baseline/parser@npm:@babel/parser@^7.14.4": - version: 7.14.4 - resolution: "@babel/parser@npm:7.14.4" +"@babel-baseline/parser@npm:@babel/parser@^7.14.5": + version: 7.14.5 + resolution: "@babel/parser@npm:7.14.5" bin: parser: ./bin/babel-parser.js - checksum: 3bc067c1ee0e0178d365e1b2988ea1a0d6d37af37870ea1a7e80729b3bdc40acda083cac44ce72f63a5b31a489e35120f617bd41f312dec4c86cf814cff8e64a + checksum: 55c14793888cb7d54275811e7f13136875df1ee4fc368f3f10cff46ebdf95b6a072e706a0486be0ac5686a597cbfb82f33b5f66aa6ba80ff50b73bca945035c6 languageName: node linkType: hard @@ -658,6 +658,7 @@ __metadata: resolution: "@babel/helper-module-transforms@condition:BABEL_8_BREAKING?:workspace:^7.14.5#2510a1" dependencies: "@babel/helper-module-transforms-BABEL_8_BREAKING-false": "npm:@babel/helper-module-transforms@workspace:^7.14.5" + checksum: eb4895913562bf398b8bf7e6c68a0380f153f52f2715b3685f9d07e376725227678c2f920dfe0772012dfed655e037534619de86bb9f3284b92555f8bf9d0f42 languageName: node linkType: hard @@ -972,7 +973,7 @@ __metadata: version: 0.0.0-use.local resolution: "@babel/parser@workspace:packages/babel-parser" dependencies: - "@babel-baseline/parser": "npm:@babel/parser@^7.14.4" + "@babel-baseline/parser": "npm:@babel/parser@^7.14.5" "@babel/code-frame": "workspace:*" "@babel/helper-fixtures": "workspace:*" "@babel/helper-validator-identifier": "workspace:*"