Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster readRegexp #13453

Merged
merged 5 commits into from Jun 10, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
22 changes: 22 additions & 0 deletions packages/babel-parser/benchmark/large-regexp/bench.mjs
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "@babel-baseline/parser";
import current from "../../lib/index.js";
import { report } from "../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "const a = /" + "[/\\\\]".repeat(length / 4) + "/igsudm";
}
function benchCases(name, implementation, options) {
for (const length of [256, 512, 1024, 2048]) {
const input = createInput(length);
suite.add(`${name} ${length}-size RegExp literal `, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
@@ -0,0 +1,22 @@
import Benchmark from "benchmark";
import baseline from "../../lib/index-v2.js";
import current from "../../lib/index.js";
import { report } from "../util.mjs";

const suite = new Benchmark.Suite();
function createInput(length) {
return "/x/dgimsuy;".repeat(length);
}
function benchCases(name, implementation, options) {
for (const length of [256, 512, 1024, 2048]) {
const input = createInput(length);
suite.add(`${name} ${length} small regexp literal with all flags`, () => {
implementation.parse(input, options);
});
}
}

benchCases("baseline", baseline);
benchCases("current", current);

suite.on("cycle", report).run();
2 changes: 1 addition & 1 deletion packages/babel-parser/package.json
Expand Up @@ -33,7 +33,7 @@
"node": ">=6.0.0"
},
"devDependencies": {
"@babel-baseline/parser": "npm:@babel/parser@^7.14.4",
"@babel-baseline/parser": "npm:@babel/parser@^7.14.5",
"@babel/code-frame": "workspace:*",
"@babel/helper-fixtures": "workspace:*",
"@babel/helper-validator-identifier": "workspace:*",
Expand Down
56 changes: 31 additions & 25 deletions packages/babel-parser/src/tokenizer/index.js
Expand Up @@ -12,7 +12,6 @@ import { type TokContext, types as ct } from "./context";
import ParserErrors, { Errors, type ErrorTemplate } from "../parser/error";
import { SourceLocation } from "../util/location";
import {
lineBreak,
lineBreakG,
isNewLine,
isWhitespace,
Expand All @@ -21,7 +20,15 @@ import {
import State from "./state";
import type { LookaheadState } from "./state";

const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]);
const VALID_REGEX_FLAGS = new Set([
charCodes.lowercaseG,
charCodes.lowercaseM,
charCodes.lowercaseS,
charCodes.lowercaseI,
charCodes.lowercaseY,
charCodes.lowercaseU,
charCodes.lowercaseD,
]);

// The following character codes are forbidden from being
// an immediate sibling of NumericLiteralSeparator _
Expand Down Expand Up @@ -976,53 +983,52 @@ export default class Tokenizer extends ParserErrors {
readRegexp(): void {
const start = this.state.start + 1;
let escaped, inClass;
for (;;) {
if (this.state.pos >= this.length) {
let { pos } = this.state;
for (; ; ++pos) {
if (pos >= this.length) {
throw this.raise(start, Errors.UnterminatedRegExp);
}
const ch = this.input.charAt(this.state.pos);
if (lineBreak.test(ch)) {
const ch = this.input.charCodeAt(pos);
if (isNewLine(ch)) {
throw this.raise(start, Errors.UnterminatedRegExp);
}
if (escaped) {
escaped = false;
} else {
if (ch === "[") {
if (ch === charCodes.leftSquareBracket) {
inClass = true;
} else if (ch === "]" && inClass) {
} else if (ch === charCodes.rightSquareBracket && inClass) {
inClass = false;
} else if (ch === "/" && !inClass) {
} else if (ch === charCodes.slash && !inClass) {
break;
}
escaped = ch === "\\";
escaped = ch === charCodes.backslash;
}
++this.state.pos;
}
const content = this.input.slice(start, this.state.pos);
++this.state.pos;
const content = this.input.slice(start, pos);
++pos;

let mods = "";

while (this.state.pos < this.length) {
const char = this.input[this.state.pos];
const charCode = this.codePointAtPos(this.state.pos);
while (pos < this.length) {
const cp = this.codePointAtPos(pos);
// It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
const char = String.fromCharCode(cp);

if (VALID_REGEX_FLAGS.has(char)) {
if (mods.indexOf(char) > -1) {
this.raise(this.state.pos + 1, Errors.DuplicateRegExpFlags);
if (VALID_REGEX_FLAGS.has(cp)) {
if (mods.includes(char)) {
this.raise(pos + 1, Errors.DuplicateRegExpFlags);
}
} else if (
isIdentifierChar(charCode) ||
charCode === charCodes.backslash
) {
this.raise(this.state.pos + 1, Errors.MalformedRegExpFlags);
} else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
this.raise(pos + 1, Errors.MalformedRegExpFlags);
} else {
break;
}

++this.state.pos;
++pos;
mods += char;
}
this.state.pos = pos;

this.finishToken(tt.regexp, {
pattern: content,
Expand Down
11 changes: 6 additions & 5 deletions yarn.lock
Expand Up @@ -5,12 +5,12 @@ __metadata:
version: 4
cacheKey: 7

"@babel-baseline/parser@npm:@babel/parser@^7.14.4":
version: 7.14.4
resolution: "@babel/parser@npm:7.14.4"
"@babel-baseline/parser@npm:@babel/parser@^7.14.5":
version: 7.14.5
resolution: "@babel/parser@npm:7.14.5"
bin:
parser: ./bin/babel-parser.js
checksum: 3bc067c1ee0e0178d365e1b2988ea1a0d6d37af37870ea1a7e80729b3bdc40acda083cac44ce72f63a5b31a489e35120f617bd41f312dec4c86cf814cff8e64a
checksum: 55c14793888cb7d54275811e7f13136875df1ee4fc368f3f10cff46ebdf95b6a072e706a0486be0ac5686a597cbfb82f33b5f66aa6ba80ff50b73bca945035c6
languageName: node
linkType: hard

Expand Down Expand Up @@ -658,6 +658,7 @@ __metadata:
resolution: "@babel/helper-module-transforms@condition:BABEL_8_BREAKING?:workspace:^7.14.5#2510a1"
dependencies:
"@babel/helper-module-transforms-BABEL_8_BREAKING-false": "npm:@babel/helper-module-transforms@workspace:^7.14.5"
checksum: eb4895913562bf398b8bf7e6c68a0380f153f52f2715b3685f9d07e376725227678c2f920dfe0772012dfed655e037534619de86bb9f3284b92555f8bf9d0f42
languageName: node
linkType: hard

Expand Down Expand Up @@ -972,7 +973,7 @@ __metadata:
version: 0.0.0-use.local
resolution: "@babel/parser@workspace:packages/babel-parser"
dependencies:
"@babel-baseline/parser": "npm:@babel/parser@^7.14.4"
"@babel-baseline/parser": "npm:@babel/parser@^7.14.5"
"@babel/code-frame": "workspace:*"
"@babel/helper-fixtures": "workspace:*"
"@babel/helper-validator-identifier": "workspace:*"
Expand Down