Skip to content

Commit

Permalink
Allow duplicate regexp capture group names in different branches
Browse files Browse the repository at this point in the history
FEATURE: Support ES2025 duplicate capture group names in regular
expressions.

Issue #1290
Issue #1291
  • Loading branch information
marijnh committed Apr 16, 2024
1 parent ed4a7a1 commit 7b7c1cd
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 11 deletions.
2 changes: 2 additions & 0 deletions .eslintrc.js
Expand Up @@ -21,6 +21,8 @@ module.exports = {
],
plugins: ["eslint-plugin-import"],
rules: {
"no-unreachable-loop": "off",
"no-empty": "off",
curly: "off",
eqeqeq: ["error", "always", {null: "ignore"}],
indent: [
Expand Down
68 changes: 57 additions & 11 deletions acorn/src/regexp.js
Expand Up @@ -5,6 +5,32 @@ import {hasOwn, codePointToString} from "./util.js"

const pp = Parser.prototype

// Track disjunction structure to determine whether a duplicate
// capture group name is allowed because it is in a separate branch.
class BranchID {
constructor(parent, base) {
// Parent disjunction branch
this.parent = parent
// Identifies this set of sibling branches
this.base = base || this
}

separatedFrom(alt) {
// A branch is separate from another branch if they or any of
// their parents are siblings in a given disjunction
for (let self = this; self; self = self.parent) {
for (let other = alt; other; other = other.parent) {
if (self.base === other.base && self !== other) return true
}
}
return false
}

sibling() {
return new BranchID(this.parent, this.base)
}
}

export class RegExpValidationState {
constructor(parser) {
this.parser = parser
Expand All @@ -22,8 +48,9 @@ export class RegExpValidationState {
this.lastAssertionIsQuantifiable = false
this.numCapturingParens = 0
this.maxBackReference = 0
this.groupNames = []
this.groupNames = Object.create(null)
this.backReferenceNames = []
this.alternative = null
}

reset(start, pattern, flags) {
Expand Down Expand Up @@ -140,6 +167,11 @@ pp.validateRegExpFlags = function(state) {
}
}

function hasProp(obj) {
for (let _ in obj) return true
return false
}

/**
* Validate the pattern part of a given RegExpLiteral.
*
Expand All @@ -154,7 +186,7 @@ pp.validateRegExpPattern = function(state) {
// |Pattern[~U, +N]| and use this result instead. Throw a *SyntaxError*
// exception if _P_ did not conform to the grammar, if any elements of _P_
// were not matched by the parse, or if any Early Error conditions exist.
if (!state.switchN && this.options.ecmaVersion >= 9 && state.groupNames.length > 0) {
if (!state.switchN && this.options.ecmaVersion >= 9 && hasProp(state.groupNames)) {
state.switchN = true
this.regexp_pattern(state)
}
Expand All @@ -168,8 +200,9 @@ pp.regexp_pattern = function(state) {
state.lastAssertionIsQuantifiable = false
state.numCapturingParens = 0
state.maxBackReference = 0
state.groupNames.length = 0
state.groupNames = Object.create(null)
state.backReferenceNames.length = 0
state.branchID = null

this.regexp_disjunction(state)

Expand All @@ -186,18 +219,22 @@ pp.regexp_pattern = function(state) {
state.raise("Invalid escape")
}
for (const name of state.backReferenceNames) {
if (state.groupNames.indexOf(name) === -1) {
if (!state.groupNames[name]) {
state.raise("Invalid named capture referenced")
}
}
}

// https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction
pp.regexp_disjunction = function(state) {
let trackDisjunction = this.options.ecmaVersion >= 16
if (trackDisjunction) state.branchID = new BranchID(state.branchID, null, 0)
this.regexp_alternative(state)
while (state.eat(0x7C /* | */)) {
if (trackDisjunction) state.branchID = state.branchID.sibling()
this.regexp_alternative(state)
}
if (trackDisjunction) state.branchID = state.branchID.parent

// Make the same message as V8.
if (this.regexp_eatQuantifier(state, true)) {
Expand All @@ -210,8 +247,7 @@ pp.regexp_disjunction = function(state) {

// https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative
pp.regexp_alternative = function(state) {
while (state.pos < state.source.length && this.regexp_eatTerm(state))
;
while (state.pos < state.source.length && this.regexp_eatTerm(state)) {}
}

// https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term
Expand Down Expand Up @@ -447,14 +483,24 @@ pp.regexp_eatExtendedPatternCharacter = function(state) {
// `?` GroupName
pp.regexp_groupSpecifier = function(state) {
if (state.eat(0x3F /* ? */)) {
if (this.regexp_eatGroupName(state)) {
if (state.groupNames.indexOf(state.lastStringValue) !== -1) {
if (!this.regexp_eatGroupName(state)) state.raise("Invalid group")
let trackDisjunction = this.options.ecmaVersion >= 16
let known = state.groupNames[state.lastStringValue]
if (known) {
if (trackDisjunction) {
for (let altID of known) {
if (!altID.separatedFrom(state.branchID))
state.raise("Duplicate capture group name")
}
} else {
state.raise("Duplicate capture group name")
}
state.groupNames.push(state.lastStringValue)
return
}
state.raise("Invalid group")
if (trackDisjunction) {
(known || (state.groupNames[state.lastStringValue] = [])).push(state.branchID)
} else {
state.groupNames[state.lastStringValue] = true
}
}
}

Expand Down
1 change: 1 addition & 0 deletions test/run.js
Expand Up @@ -15,6 +15,7 @@
require("./tests-regexp-2020.js");
require("./tests-regexp-2022.js");
require("./tests-regexp-2024.js");
require("./tests-regexp-2025.js");
require("./tests-json-superset.js");
require("./tests-optional-catch-binding.js");
require("./tests-bigint.js");
Expand Down
18 changes: 18 additions & 0 deletions test/tests-regexp-2025.js
@@ -0,0 +1,18 @@
if (typeof exports !== "undefined") {
var test = require("./driver.js").test
var testFail = require("./driver.js").testFail
}

test("/(?<x>a)|(?<x>b)/", {}, {ecmaVersion: 2025})
testFail("/(?<x>a)|(?<x>b)/", "Invalid regular expression: /(?<x>a)|(?<x>b)/: Duplicate capture group name (1:1)", {ecmaVersion: 2024 })
testFail("/(?<x>a)(?<x>b)/", "Invalid regular expression: /(?<x>a)(?<x>b)/: Duplicate capture group name (1:1)", {ecmaVersion: 2025})
test("/(?:(?<x>a)|(?<x>b))\\k<x>/", {}, {ecmaVersion: 2025})
testFail("/(?:(?<x>a)|(?<x>b))\\k<x>/", "Invalid regular expression: /(?:(?<x>a)|(?<x>b))\\k<x>/: Duplicate capture group name (1:1)", {ecmaVersion: 2024 })
testFail("/(?:(?<x>a)(?<x>b))\\k<x>/", "Invalid regular expression: /(?:(?<x>a)(?<x>b))\\k<x>/: Duplicate capture group name (1:1)", {ecmaVersion: 2025})
test("/(?<y>a)(?<x>a)|(?<x>b)(?<y>b)/", {}, {ecmaVersion: 2025})
test("/(?<x>a)|(?<x>b)|(?<x>c)/", {}, {ecmaVersion: 2025})
test("/(?<x>a)|\\k<x>/", {}, {ecmaVersion: 2025})
testFail("/(?<x>a)|(?<x>b)(?<x>c)/", "Invalid regular expression: /(?<x>a)|(?<x>b)(?<x>c)/: Duplicate capture group name (1:1)", {ecmaVersion: 2025})
testFail("/(?:(?<x>a)|(?<x>b))(?<x>c)/", "Invalid regular expression: /(?:(?<x>a)|(?<x>b))(?<x>c)/: Duplicate capture group name (1:1)", {ecmaVersion: 2025})
testFail("/(?<x>a)(?:(?<x>b)|(?<x>c))/", "Invalid regular expression: /(?<x>a)(?:(?<x>b)|(?<x>c))/: Duplicate capture group name (1:1)", {ecmaVersion: 2025})
testFail("/(?:(?:(?<x>a)|(?<x>b))|(?:))(?<x>c)/", "Invalid regular expression: /(?:(?:(?<x>a)|(?<x>b))|(?:))(?<x>c)/: Duplicate capture group name (1:1)", {ecmaVersion: 2025})

0 comments on commit 7b7c1cd

Please sign in to comment.