diff --git a/package-lock.json b/package-lock.json index d22a9454a9..b7667e1657 100644 --- a/package-lock.json +++ b/package-lock.json @@ -699,16 +699,6 @@ "integrity": "sha512-xVNN69YGDghOqCCtA6FI7avYrr02mTJjOgB0/f1VPD3pJC8QEvjTKWc4epDx8AqxxA75NI0QpVM2gPJXUbE4Tg==", "dev": true }, - "bindings": { - "version": "1.5.0", - "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", - "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", - "dev": true, - "optional": true, - "requires": { - "file-uri-to-path": "1.0.0" - } - }, "bluebird": { "version": "3.7.2", "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz", @@ -1894,13 +1884,6 @@ "integrity": "sha1-PYpcZog6FqMMqGQ+hR8Zuqd5eRc=", "dev": true }, - "file-uri-to-path": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", - "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", - "dev": true, - "optional": true - }, "fill-range": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-4.0.0.tgz", @@ -2056,562 +2039,11 @@ "dev": true }, "fsevents": { - "version": "1.2.11", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.11.tgz", - "integrity": "sha512-+ux3lx6peh0BpvY0JebGyZoiR4D+oYzdPZMKJwkZ+sFkNJzpL7tXc/wehS49gUAxg3tmMHPHZkA8JU2rhhgDHw==", + "version": "1.2.13", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-1.2.13.tgz", + "integrity": "sha512-oWb1Z6mkHIskLzEJ/XWX0srkpkTQ7vaopMQkyaEIoq0fmtFVxOthb8cCxeT+p3ynTdkk/RZwbgG4brR5BeWECw==", "dev": true, - "optional": true, - "requires": { - "bindings": "^1.5.0", - "nan": "^2.12.1", - "node-pre-gyp": "*" - }, - "dependencies": { - "abbrev": { - "version": "1.1.1", - "bundled": true, - "dev": true, - "optional": true - }, - "ansi-regex": { - "version": "2.1.1", - "bundled": true, - "dev": true, - "optional": true - }, - "aproba": { - "version": "1.2.0", - "bundled": true, - "dev": true, - "optional": true - }, - "are-we-there-yet": { - "version": "1.1.5", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "delegates": "^1.0.0", - "readable-stream": "^2.0.6" - } - }, - "balanced-match": { - "version": "1.0.0", - "bundled": true, - "dev": true, - "optional": true - }, - "brace-expansion": { - "version": "1.1.11", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "balanced-match": "^1.0.0", - "concat-map": "0.0.1" - } - }, - "chownr": { - "version": "1.1.3", - "bundled": true, - "dev": true, - "optional": true - }, - "code-point-at": { - "version": "1.1.0", - "bundled": true, - "dev": true, - "optional": true - }, - "concat-map": { - "version": "0.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "console-control-strings": { - "version": "1.1.0", - "bundled": true, - "dev": true, - "optional": true - }, - "core-util-is": { - "version": "1.0.2", - "bundled": true, - "dev": true, - "optional": true - }, - "debug": { - "version": "3.2.6", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "ms": "^2.1.1" - } - }, - "deep-extend": { - "version": "0.6.0", - "bundled": true, - "dev": true, - "optional": true - }, - "delegates": { - "version": "1.0.0", - "bundled": true, - "dev": true, - "optional": true - }, - "detect-libc": { - "version": "1.0.3", - "bundled": true, - "dev": true, - "optional": true - }, - "fs-minipass": { - "version": "1.2.7", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "minipass": "^2.6.0" - } - }, - "fs.realpath": { - "version": "1.0.0", - "bundled": true, - "dev": true, - "optional": true - }, - "gauge": { - "version": "2.7.4", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "aproba": "^1.0.3", - "console-control-strings": "^1.0.0", - "has-unicode": "^2.0.0", - "object-assign": "^4.1.0", - "signal-exit": "^3.0.0", - "string-width": "^1.0.1", - "strip-ansi": "^3.0.1", - "wide-align": "^1.1.0" - } - }, - "glob": { - "version": "7.1.6", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "fs.realpath": "^1.0.0", - "inflight": "^1.0.4", - "inherits": "2", - "minimatch": "^3.0.4", - "once": "^1.3.0", - "path-is-absolute": "^1.0.0" - } - }, - "has-unicode": { - "version": "2.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "iconv-lite": { - "version": "0.4.24", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "safer-buffer": ">= 2.1.2 < 3" - } - }, - "ignore-walk": { - "version": "3.0.3", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "minimatch": "^3.0.4" - } - }, - "inflight": { - "version": "1.0.6", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "once": "^1.3.0", - "wrappy": "1" - } - }, - "inherits": { - "version": "2.0.4", - "bundled": true, - "dev": true, - "optional": true - }, - "ini": { - "version": "1.3.5", - "bundled": true, - "dev": true, - "optional": true - }, - "is-fullwidth-code-point": { - "version": "1.0.0", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "number-is-nan": "^1.0.0" - } - }, - "isarray": { - "version": "1.0.0", - "bundled": true, - "dev": true, - "optional": true - }, - "minimatch": { - "version": "3.0.4", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "brace-expansion": "^1.1.7" - } - }, - "minimist": { - "version": "0.0.8", - "bundled": true, - "dev": true, - "optional": true - }, - "minipass": { - "version": "2.9.0", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "safe-buffer": "^5.1.2", - "yallist": "^3.0.0" - } - }, - "minizlib": { - "version": "1.3.3", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "minipass": "^2.9.0" - } - }, - "mkdirp": { - "version": "0.5.1", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "minimist": "0.0.8" - } - }, - "ms": { - "version": "2.1.2", - "bundled": true, - "dev": true, - "optional": true - }, - "needle": { - "version": "2.4.0", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "debug": "^3.2.6", - "iconv-lite": "^0.4.4", - "sax": "^1.2.4" - } - }, - "node-pre-gyp": { - "version": "0.14.0", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "detect-libc": "^1.0.2", - "mkdirp": "^0.5.1", - "needle": "^2.2.1", - "nopt": "^4.0.1", - "npm-packlist": "^1.1.6", - "npmlog": "^4.0.2", - "rc": "^1.2.7", - "rimraf": "^2.6.1", - "semver": "^5.3.0", - "tar": "^4.4.2" - } - }, - "nopt": { - "version": "4.0.1", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "abbrev": "1", - "osenv": "^0.1.4" - } - }, - "npm-bundled": { - "version": "1.1.1", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "npm-normalize-package-bin": "^1.0.1" - } - }, - "npm-normalize-package-bin": { - "version": "1.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "npm-packlist": { - "version": "1.4.7", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "ignore-walk": "^3.0.1", - "npm-bundled": "^1.0.1" - } - }, - "npmlog": { - "version": "4.1.2", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "are-we-there-yet": "~1.1.2", - "console-control-strings": "~1.1.0", - "gauge": "~2.7.3", - "set-blocking": "~2.0.0" - } - }, - "number-is-nan": { - "version": "1.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "object-assign": { - "version": "4.1.1", - "bundled": true, - "dev": true, - "optional": true - }, - "once": { - "version": "1.4.0", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "wrappy": "1" - } - }, - "os-homedir": { - "version": "1.0.2", - "bundled": true, - "dev": true, - "optional": true - }, - "os-tmpdir": { - "version": "1.0.2", - "bundled": true, - "dev": true, - "optional": true - }, - "osenv": { - "version": "0.1.5", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "os-homedir": "^1.0.0", - "os-tmpdir": "^1.0.0" - } - }, - "path-is-absolute": { - "version": "1.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "process-nextick-args": { - "version": "2.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "rc": { - "version": "1.2.8", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "deep-extend": "^0.6.0", - "ini": "~1.3.0", - "minimist": "^1.2.0", - "strip-json-comments": "~2.0.1" - }, - "dependencies": { - "minimist": { - "version": "1.2.0", - "bundled": true, - "dev": true, - "optional": true - } - } - }, - "readable-stream": { - "version": "2.3.6", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "core-util-is": "~1.0.0", - "inherits": "~2.0.3", - "isarray": "~1.0.0", - "process-nextick-args": "~2.0.0", - "safe-buffer": "~5.1.1", - "string_decoder": "~1.1.1", - "util-deprecate": "~1.0.1" - } - }, - "rimraf": { - "version": "2.7.1", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "glob": "^7.1.3" - } - }, - "safe-buffer": { - "version": "5.1.2", - "bundled": true, - "dev": true, - "optional": true - }, - "safer-buffer": { - "version": "2.1.2", - "bundled": true, - "dev": true, - "optional": true - }, - "sax": { - "version": "1.2.4", - "bundled": true, - "dev": true, - "optional": true - }, - "semver": { - "version": "5.7.1", - "bundled": true, - "dev": true, - "optional": true - }, - "set-blocking": { - "version": "2.0.0", - "bundled": true, - "dev": true, - "optional": true - }, - "signal-exit": { - "version": "3.0.2", - "bundled": true, - "dev": true, - "optional": true - }, - "string-width": { - "version": "1.0.2", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "code-point-at": "^1.0.0", - "is-fullwidth-code-point": "^1.0.0", - "strip-ansi": "^3.0.0" - } - }, - "string_decoder": { - "version": "1.1.1", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "safe-buffer": "~5.1.0" - } - }, - "strip-ansi": { - "version": "3.0.1", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "ansi-regex": "^2.0.0" - } - }, - "strip-json-comments": { - "version": "2.0.1", - "bundled": true, - "dev": true, - "optional": true - }, - "tar": { - "version": "4.4.13", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "chownr": "^1.1.1", - "fs-minipass": "^1.2.5", - "minipass": "^2.8.6", - "minizlib": "^1.2.1", - "mkdirp": "^0.5.0", - "safe-buffer": "^5.1.2", - "yallist": "^3.0.3" - } - }, - "util-deprecate": { - "version": "1.0.2", - "bundled": true, - "dev": true, - "optional": true - }, - "wide-align": { - "version": "1.1.3", - "bundled": true, - "dev": true, - "optional": true, - "requires": { - "string-width": "^1.0.2 || 2" - } - }, - "wrappy": { - "version": "1.0.2", - "bundled": true, - "dev": true, - "optional": true - }, - "yallist": { - "version": "3.1.1", - "bundled": true, - "dev": true, - "optional": true - } - } + "optional": true }, "function-bind": { "version": "1.1.1", @@ -4399,13 +3831,6 @@ "integrity": "sha512-kDcwXR4PS7caBpuRYYBUz9iVixUk3anO3f5OYFiIPwK/20vCzKCHyKoulbiDY1S53zD2bxUpxN/IJ+TnXjfvxg==", "dev": true }, - "nan": { - "version": "2.14.0", - "resolved": "https://registry.npmjs.org/nan/-/nan-2.14.0.tgz", - "integrity": "sha512-INOFj37C7k3AfaNTtX8RhsTw7qRy7eLET14cROi9+5HAVbbHuIWUHEauBv5qT4Av2tWasiTY1Jw6puUNqRJXQg==", - "dev": true, - "optional": true - }, "nanomatch": { "version": "1.2.13", "resolved": "https://registry.npmjs.org/nanomatch/-/nanomatch-1.2.13.tgz", @@ -5138,6 +4563,23 @@ "resolve": "^1.1.6" } }, + "refa": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/refa/-/refa-0.5.0.tgz", + "integrity": "sha512-080Hrd8Z8v9Audgt1WpopjMM8ai1umraSzgMvLxgCEt+B9HhguhKBmQkjnE5+1p5WLtWQdr2rGE2c80gYvi9uQ==", + "dev": true, + "requires": { + "regexpp": "^3.1.0" + }, + "dependencies": { + "regexpp": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/regexpp/-/regexpp-3.1.0.tgz", + "integrity": "sha512-ZOIzd8yVsQQA7j8GCSlPGXwg5PfmA1mrq0JP4nGhh54LaKN3xdai/vHUDu74pKwV8OxseMS65u2NImosQcSD0Q==", + "dev": true + } + } + }, "regenerator-runtime": { "version": "0.13.7", "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.7.tgz", diff --git a/package.json b/package.json index c48a5bd3b1..337817672a 100755 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "jsdom": "^13.0.0", "mocha": "^6.2.0", "pump": "^3.0.0", + "refa": "0.5.0", "regexpp": "^2.0.1", "simple-git": "^1.107.0", "yargs": "^13.2.2" diff --git a/tests/pattern-tests.js b/tests/pattern-tests.js index 15272c9002..0f6da8e6ce 100644 --- a/tests/pattern-tests.js +++ b/tests/pattern-tests.js @@ -5,7 +5,14 @@ const PrismLoader = require('./helper/prism-loader'); const { BFS, parseRegex } = require('./helper/util'); const { languages } = require('../components.json'); const { visitRegExpAST } = require('regexpp'); +const { JS, Words, NFA } = require('refa'); +/** + * A set of all safe (non-exponentially-backtracking) RegExp literals (string). + * + * @type {Set} + */ +const safeRegexes = new Set(); for (const lang in languages) { if (lang === 'meta') { @@ -52,7 +59,10 @@ for (const lang in languages) { * @param {any} Prism * * @typedef {import("./helper/util").LiteralAST} LiteralAST + * @typedef {import("regexpp/ast").CapturingGroup} CapturingGroup * @typedef {import("regexpp/ast").Element} Element + * @typedef {import("regexpp/ast").Group} Group + * @typedef {import("regexpp/ast").LookaroundAssertion} LookaroundAssertion * @typedef {import("regexpp/ast").Pattern} Pattern */ function testPatterns(Prism) { @@ -132,7 +142,7 @@ function testPatterns(Prism) { * @param {(values: ForEachCapturingGroupCallbackValue) => void} callback * * @typedef ForEachCapturingGroupCallbackValue - * @property {import("regexpp/ast").CapturingGroup} group + * @property {CapturingGroup} group * @property {number} number Note: Starts at 1. */ function forEachCapturingGroup(pattern, callback) { @@ -205,6 +215,32 @@ function testPatterns(Prism) { } } + /** + * Returns whether the given node either is or is a child of what is effectively a Kleene star. + * + * @param {import("regexpp/ast").Node} node + * @returns {boolean} + */ + function underAStar(node) { + if (node.type === "Quantifier" && node.max > 10) { + return true; + } else if (node.parent) { + return underAStar(node.parent); + } else { + return false; + } + } + + /** + * @param {Iterable} iter + * @returns {T | undefined} + * @template T + */ + function firstOf(iter) { + const [first] = iter; + return first; + } + it('- should not match the empty string', function () { forEachPattern(({ pattern, tokenPath }) => { @@ -338,4 +374,134 @@ function testPatterns(Prism) { }); }); + it('- should not cause exponential backtracking', function () { + forEachPattern(({ pattern, ast, tokenPath }) => { + const patternStr = String(pattern); + if (safeRegexes.has(patternStr)) { + // we know that the pattern won't cause exp backtracking because we checked before + return; + } + + const parser = JS.Parser.fromAst(ast); + /** + * Parses the given element and creates its NFA. + * + * @param {import("refa").JS.ParsableElement} element + * @returns {NFA} + */ + function toNFA(element) { + const { expression, maxCharacter } = parser.parseElement(element, { + backreferences: "resolve", + lookarounds: "disable", + }); + return NFA.fromRegex(expression, { maxCharacter }); + } + + /** + * Checks whether the alternatives of the given node are disjoint. If the alternatives are not disjoint + * and the give node is a descendant of an effective Kleene star, then an error will be thrown. + * + * @param {CapturingGroup | Group | LookaroundAssertion} node + * @returns {void} + */ + function checkDisjointAlternatives(node) { + if (!underAStar(node) || node.alternatives.length < 2) { + return; + } + + const alternatives = node.alternatives; + + const total = toNFA(alternatives[0]); + total.withoutEmptyWord(); + for (let i = 1, l = alternatives.length; i < l; i++) { + const a = alternatives[i]; + const current = toNFA(a); + current.withoutEmptyWord(); + + if (!total.isDisjointWith(current)) { + assert.fail(`${tokenPath}: The alternative \`${a.raw}\` is not disjoint with at least one previous alternative.` + + ` This will cause exponential backtracking.` + + `\n\nTo fix this issue, you have to rewrite the ${node.type} \`${node.raw}\`.` + + ` The goal is that all of its alternatives are disjoint.` + + ` This means that if a (sub-)string is matched by the ${node.type}, then only one of its alternatives can match the (sub-)string.` + + `\n\nExample: \`(?:[ab]|\\w|::)+\`` + + `\nThe alternatives of the group are not disjoint because the string "a" can be matched by both \`[ab]\` and \`\\w\`.` + + ` In this example, the pattern can easily be fixed because the \`[ab]\` is a subset of the \`\\w\`, so its enough to remove the \`[ab]\` alternative to get \`(?:\\w|::)+\` as the fixed pattern.` + + `\nIn the real world, patterns can be a lot harder to fix.` + + ` If you are trying to make the tests pass for a pull request but can\'t fix the issue yourself, then make the pull request (or commit) anyway.` + + ` A maintainer will help you.` + + `\n\nFull pattern:\n${pattern}`); + } else if (i !== l - 1) { + total.union(current); + } + } + } + + visitRegExpAST(ast.pattern, { + onCapturingGroupLeave: checkDisjointAlternatives, + onGroupLeave: checkDisjointAlternatives, + onAssertionLeave(node) { + if (node.kind === "lookahead" || node.kind === "lookbehind") { + checkDisjointAlternatives(node); + } + }, + + onQuantifierLeave(node) { + if (node.max < 10) { + return; // not a star + } + if (node.element.type !== "CapturingGroup" && node.element.type !== "Group") { + return; // not a group + } + + // The idea here is the following: + // + // We have found a part `A*` of the regex (`A` is assumed to not accept the empty word). Let `I` be + // the intersection of `A` and `A{2,}`. If `I` is not empty, then there exists a non-empty word `w` + // that is accepted by both `A` and `A{2,}`. That means that there exists some `m>1` for which `w` + // is accepted by `A{m}`. + // This means that there are at least two ways `A*` can accept `w`. It can be accepted as `A` or as + // `A{m}`. Hence there are at least 2^n ways for `A*` to accept the word `w{n}`. This is the main + // requirement for exponential backtracking. + // + // This is actually only a crude approximation for the real analysis that would have to be done. We + // would actually have to check the intersection `A{p}` and `A{p+1,}` for all p>0. However, in most + // cases, the approximation is good enough. + + const nfa = toNFA(node.element); + nfa.withoutEmptyWord(); + const twoStar = nfa.copy(); + twoStar.quantify(2, Infinity); + + if (!nfa.isDisjointWith(twoStar)) { + const word = Words.pickMostReadableWord(firstOf(nfa.intersectionWordSets(twoStar))); + const example = Words.fromUnicodeToString(word); + assert.fail(`${tokenPath}: The quantifier \`${node.raw}\` ambiguous for all words ${JSON.stringify(example)}.repeat(n) for any n>1.` + + ` This will cause exponential backtracking.` + + `\n\nTo fix this issue, you have to rewrite the element (let's call it E) of the quantifier.` + + ` The goal is modify E such that it is disjoint with repetitions of itself.` + + ` This means that if a (sub-)string is matched by E, then it must not be possible for E{2}, E{3}, E{4}, etc. to match that (sub-)string.` + + `\n\nExample 1: \`(?:\\w+|::)+\`` + + `\nThe problem lies in \`\\w+\` because \`\\w+\` and \`(?:\\w+){2}\` are not disjoint as the string "aa" is fully matched by both.` + + ` In this example, the pattern can easily be fixed by changing \`\\w+\` to \`\\w\`.` + + `\nExample 2: \`(?:\\w|Foo)+\`` + + `\nThe problem lies in \`\\w\` and \`Foo\` because the string "Foo" can be matched as either repeating \`\\w\` 3 times or by using the \`Foo\` alternative once.` + + ` In this example, the pattern can easily be fixed because the \`Foo\` alternative is redundant can can be removed.` + + `\nExample 3: \`(?:\\.\\w+(?:<.*?>)?)+\`` + + `\nThe problem lies in \`<.*?>\`. The string ".a<>.a<>" can be matched as either \`\\. \\w < . . . . >\` or \`\\. \\w < > \\. \\w < >\`.` + + ` When it comes to exponential backtracking, it doesn't matter whether a quantifier is greedy or lazy.` + + ` This means that the lazy \`.*?\` can jump over \`>\`.` + + ` In this example, the pattern can easily be fixed because we just have to prevent \`.*?\` jumping over \`>\`.` + + ` This can done by replacing \`<.*?>\` with \`<[^\\r\\n>]*>\`.` + + `\n\nIn the real world, patterns can be a lot harder to fix.` + + ` If you are trying to make this test pass for a pull request but can\'t fix the issue yourself, then make the pull request (or commit) anyway, a maintainer will help you.` + + `\n\nFull pattern:\n${pattern}`); + } + }, + }); + + safeRegexes.add(patternStr); + }); + }); + }