From 8723af97006e8ea23bb0288468b890468f603826 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= <jlhwung@gmail.com>
Date: Thu, 10 Jun 2021 13:57:24 -0400
Subject: [PATCH 1/5] chore: bump babel parser baseline

---
 packages/babel-parser/package.json |  2 +-
 yarn.lock                          | 11 ++++++-----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/packages/babel-parser/package.json b/packages/babel-parser/package.json
index 866d1cce98b6..54d4c3bf579e 100644
--- a/packages/babel-parser/package.json
+++ b/packages/babel-parser/package.json
@@ -33,7 +33,7 @@
     "node": ">=6.0.0"
   },
   "devDependencies": {
-    "@babel-baseline/parser": "npm:@babel/parser@^7.14.4",
+    "@babel-baseline/parser": "npm:@babel/parser@^7.14.5",
     "@babel/code-frame": "workspace:*",
     "@babel/helper-fixtures": "workspace:*",
     "@babel/helper-validator-identifier": "workspace:*",
diff --git a/yarn.lock b/yarn.lock
index b234e3ccf50d..2d12d0ef8971 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -5,12 +5,12 @@ __metadata:
   version: 4
   cacheKey: 7
 
-"@babel-baseline/parser@npm:@babel/parser@^7.14.4":
-  version: 7.14.4
-  resolution: "@babel/parser@npm:7.14.4"
+"@babel-baseline/parser@npm:@babel/parser@^7.14.5":
+  version: 7.14.5
+  resolution: "@babel/parser@npm:7.14.5"
   bin:
     parser: ./bin/babel-parser.js
-  checksum: 3bc067c1ee0e0178d365e1b2988ea1a0d6d37af37870ea1a7e80729b3bdc40acda083cac44ce72f63a5b31a489e35120f617bd41f312dec4c86cf814cff8e64a
+  checksum: 55c14793888cb7d54275811e7f13136875df1ee4fc368f3f10cff46ebdf95b6a072e706a0486be0ac5686a597cbfb82f33b5f66aa6ba80ff50b73bca945035c6
   languageName: node
   linkType: hard
 
@@ -658,6 +658,7 @@ __metadata:
   resolution: "@babel/helper-module-transforms@condition:BABEL_8_BREAKING?:workspace:^7.14.5#2510a1"
   dependencies:
     "@babel/helper-module-transforms-BABEL_8_BREAKING-false": "npm:@babel/helper-module-transforms@workspace:^7.14.5"
+  checksum: eb4895913562bf398b8bf7e6c68a0380f153f52f2715b3685f9d07e376725227678c2f920dfe0772012dfed655e037534619de86bb9f3284b92555f8bf9d0f42
   languageName: node
   linkType: hard
 
@@ -972,7 +973,7 @@ __metadata:
   version: 0.0.0-use.local
   resolution: "@babel/parser@workspace:packages/babel-parser"
   dependencies:
-    "@babel-baseline/parser": "npm:@babel/parser@^7.14.4"
+    "@babel-baseline/parser": "npm:@babel/parser@^7.14.5"
     "@babel/code-frame": "workspace:*"
     "@babel/helper-fixtures": "workspace:*"
     "@babel/helper-validator-identifier": "workspace:*"

From 9515e0be7e597eb71bcb068c356a8abed7b23211 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= <jlhwung@gmail.com>
Date: Thu, 10 Jun 2021 13:57:47 -0400
Subject: [PATCH 2/5] chore: add benchmark

---
 .../benchmark/large-regexp/bench.mjs          | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)
 create mode 100644 packages/babel-parser/benchmark/large-regexp/bench.mjs

diff --git a/packages/babel-parser/benchmark/large-regexp/bench.mjs b/packages/babel-parser/benchmark/large-regexp/bench.mjs
new file mode 100644
index 000000000000..67a6bdfb08e7
--- /dev/null
+++ b/packages/babel-parser/benchmark/large-regexp/bench.mjs
@@ -0,0 +1,22 @@
+import Benchmark from "benchmark";
+import baseline from "@babel-baseline/parser";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "const a = /" + "[/\\\\]".repeat(length / 4) + "/igsudm";
+}
+function benchCases(name, implementation, options) {
+  for (const length of [256, 512, 1024, 2048]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length}-size RegExp literal `, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();

From 636b0bf291e3a7cd027ebd6063c212b905b76f5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= <jlhwung@gmail.com>
Date: Thu, 10 Jun 2021 14:00:35 -0400
Subject: [PATCH 3/5] perf: use charCodeAt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

baseline 256-size RegExp literal : 66124 ops/sec ±8.34% (0.015ms)
baseline 512-size RegExp literal : 43351 ops/sec ±1.05% (0.023ms)
baseline 1024-size RegExp literal : 24620 ops/sec ±0.34% (0.041ms)
baseline 2048-size RegExp literal : 12957 ops/sec ±0.2% (0.077ms)
current 256-size RegExp literal : 151662 ops/sec ±12.21% (0.007ms)
current 512-size RegExp literal : 113828 ops/sec ±0.37% (0.009ms)
current 1024-size RegExp literal : 67246 ops/sec ±1.95% (0.015ms)
current 2048-size RegExp literal : 35645 ops/sec ±1.69% (0.028ms)
---
 packages/babel-parser/src/tokenizer/index.js | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js
index e7ee7f6d4beb..71b7196d5df1 100644
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@@ -12,7 +12,6 @@ import { type TokContext, types as ct } from "./context";
 import ParserErrors, { Errors, type ErrorTemplate } from "../parser/error";
 import { SourceLocation } from "../util/location";
 import {
-  lineBreak,
   lineBreakG,
   isNewLine,
   isWhitespace,
@@ -980,21 +979,21 @@ export default class Tokenizer extends ParserErrors {
       if (this.state.pos >= this.length) {
         throw this.raise(start, Errors.UnterminatedRegExp);
       }
-      const ch = this.input.charAt(this.state.pos);
-      if (lineBreak.test(ch)) {
+      const ch = this.input.charCodeAt(this.state.pos);
+      if (isNewLine(ch)) {
         throw this.raise(start, Errors.UnterminatedRegExp);
       }
       if (escaped) {
         escaped = false;
       } else {
-        if (ch === "[") {
+        if (ch === charCodes.leftSquareBracket) {
           inClass = true;
-        } else if (ch === "]" && inClass) {
+        } else if (ch === charCodes.rightSquareBracket && inClass) {
           inClass = false;
-        } else if (ch === "/" && !inClass) {
+        } else if (ch === charCodes.slash && !inClass) {
           break;
         }
-        escaped = ch === "\\";
+        escaped = ch === charCodes.backslash;
       }
       ++this.state.pos;
     }

From 0f2b924c6fd06a5344a7455c84638690908a34c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= <jlhwung@gmail.com>
Date: Thu, 10 Jun 2021 14:15:31 -0400
Subject: [PATCH 4/5] perf: update pos to state on finishing regex token
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

baseline 256-size RegExp literal : 142419 ops/sec ±14.37% (0.007ms)
baseline 512-size RegExp literal : 110003 ops/sec ±0.95% (0.009ms)
baseline 1024-size RegExp literal : 69509 ops/sec ±0.68% (0.014ms)
baseline 2048-size RegExp literal : 36217 ops/sec ±4.63% (0.028ms)
current 256-size RegExp literal : 166167 ops/sec ±1.04% (0.006ms)
current 512-size RegExp literal : 120889 ops/sec ±1.25% (0.008ms)
current 1024-size RegExp literal : 76181 ops/sec ±1.46% (0.013ms)
current 2048-size RegExp literal : 44898 ops/sec ±1.25% (0.022ms)
---
 packages/babel-parser/src/tokenizer/index.js | 25 ++++++++++----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js
index 71b7196d5df1..28f8ff8d1a45 100644
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@@ -975,11 +975,12 @@ export default class Tokenizer extends ParserErrors {
   readRegexp(): void {
     const start = this.state.start + 1;
     let escaped, inClass;
-    for (;;) {
-      if (this.state.pos >= this.length) {
+    let { pos } = this.state;
+    for (; ; ++pos) {
+      if (pos >= this.length) {
         throw this.raise(start, Errors.UnterminatedRegExp);
       }
-      const ch = this.input.charCodeAt(this.state.pos);
+      const ch = this.input.charCodeAt(pos);
       if (isNewLine(ch)) {
         throw this.raise(start, Errors.UnterminatedRegExp);
       }
@@ -995,33 +996,33 @@ export default class Tokenizer extends ParserErrors {
         }
         escaped = ch === charCodes.backslash;
       }
-      ++this.state.pos;
     }
-    const content = this.input.slice(start, this.state.pos);
-    ++this.state.pos;
+    const content = this.input.slice(start, pos);
+    ++pos;
 
     let mods = "";
 
-    while (this.state.pos < this.length) {
-      const char = this.input[this.state.pos];
-      const charCode = this.codePointAtPos(this.state.pos);
+    while (pos < this.length) {
+      const char = this.input[pos];
+      const charCode = this.codePointAtPos(pos);
 
       if (VALID_REGEX_FLAGS.has(char)) {
         if (mods.indexOf(char) > -1) {
-          this.raise(this.state.pos + 1, Errors.DuplicateRegExpFlags);
+          this.raise(pos + 1, Errors.DuplicateRegExpFlags);
         }
       } else if (
         isIdentifierChar(charCode) ||
         charCode === charCodes.backslash
       ) {
-        this.raise(this.state.pos + 1, Errors.MalformedRegExpFlags);
+        this.raise(pos + 1, Errors.MalformedRegExpFlags);
       } else {
         break;
       }
 
-      ++this.state.pos;
+      ++pos;
       mods += char;
     }
+    this.state.pos = pos;
 
     this.finishToken(tt.regexp, {
       pattern: content,

From f377065cd285246e1d8a1d6815437bf5e28be60d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Hu=C3=A1ng=20J=C3=B9nli=C3=A0ng?= <jlhwung@gmail.com>
Date: Thu, 10 Jun 2021 15:32:49 -0400
Subject: [PATCH 5/5] perf: compare charcode on valid_regex_flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

baseline 256 small regexp literal with all flags: 4342 ops/sec ±1.57% (0.23ms)
baseline 512 small regexp literal with all flags: 2213 ops/sec ±1.51% (0.452ms)
baseline 1024 small regexp literal with all flags: 1144 ops/sec ±0.22% (0.874ms)
baseline 2048 small regexp literal with all flags: 541 ops/sec ±1.12% (1.849ms)
current 256 small regexp literal with all flags: 4643 ops/sec ±1.3% (0.215ms)
current 512 small regexp literal with all flags: 2355 ops/sec ±1.14% (0.425ms)
current 1024 small regexp literal with all flags: 1176 ops/sec ±0.87% (0.85ms)
current 2048 small regexp literal with all flags: 553 ops/sec ±1.31% (1.807ms)
---
 .../many-small-all-flags-regexp/bench.mjs     | 22 +++++++++++++++++
 packages/babel-parser/src/tokenizer/index.js  | 24 ++++++++++++-------
 2 files changed, 37 insertions(+), 9 deletions(-)
 create mode 100644 packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs

diff --git a/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs
new file mode 100644
index 000000000000..8575ace39c0e
--- /dev/null
+++ b/packages/babel-parser/benchmark/many-small-all-flags-regexp/bench.mjs
@@ -0,0 +1,22 @@
+import Benchmark from "benchmark";
+import baseline from "../../lib/index-v2.js";
+import current from "../../lib/index.js";
+import { report } from "../util.mjs";
+
+const suite = new Benchmark.Suite();
+function createInput(length) {
+  return "/x/dgimsuy;".repeat(length);
+}
+function benchCases(name, implementation, options) {
+  for (const length of [256, 512, 1024, 2048]) {
+    const input = createInput(length);
+    suite.add(`${name} ${length} small regexp literal with all flags`, () => {
+      implementation.parse(input, options);
+    });
+  }
+}
+
+benchCases("baseline", baseline);
+benchCases("current", current);
+
+suite.on("cycle", report).run();
diff --git a/packages/babel-parser/src/tokenizer/index.js b/packages/babel-parser/src/tokenizer/index.js
index 28f8ff8d1a45..c1c9848b424d 100644
--- a/packages/babel-parser/src/tokenizer/index.js
+++ b/packages/babel-parser/src/tokenizer/index.js
@@ -20,7 +20,15 @@ import {
 import State from "./state";
 import type { LookaheadState } from "./state";
 
-const VALID_REGEX_FLAGS = new Set(["g", "m", "s", "i", "y", "u", "d"]);
+const VALID_REGEX_FLAGS = new Set([
+  charCodes.lowercaseG,
+  charCodes.lowercaseM,
+  charCodes.lowercaseS,
+  charCodes.lowercaseI,
+  charCodes.lowercaseY,
+  charCodes.lowercaseU,
+  charCodes.lowercaseD,
+]);
 
 // The following character codes are forbidden from being
 // an immediate sibling of NumericLiteralSeparator _
@@ -1003,17 +1011,15 @@ export default class Tokenizer extends ParserErrors {
     let mods = "";
 
     while (pos < this.length) {
-      const char = this.input[pos];
-      const charCode = this.codePointAtPos(pos);
+      const cp = this.codePointAtPos(pos);
+      // It doesn't matter if cp > 0xffff, the loop will either throw or break because we check on cp
+      const char = String.fromCharCode(cp);
 
-      if (VALID_REGEX_FLAGS.has(char)) {
-        if (mods.indexOf(char) > -1) {
+      if (VALID_REGEX_FLAGS.has(cp)) {
+        if (mods.includes(char)) {
           this.raise(pos + 1, Errors.DuplicateRegExpFlags);
         }
-      } else if (
-        isIdentifierChar(charCode) ||
-        charCode === charCodes.backslash
-      ) {
+      } else if (isIdentifierChar(cp) || cp === charCodes.backslash) {
         this.raise(pos + 1, Errors.MalformedRegExpFlags);
       } else {
         break;