From 923f8c16bda5b1401622bb5718fb0b8ac6ed4705 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Fri, 8 Nov 2019 18:20:45 -0500 Subject: [PATCH 1/7] enh(cpp): Improve highlighting of unterminated raw strings PR #1897 switched C++ raw strings to use backreferences, however this breaks souce files where raw strings are truncated. Like comments, it would be preferable to highlight them. - Add `on:begin` and `on:end` to allow more granular matching when then end match is dynamic and based on a part of the begin match - This deprecates the `endSameAsBegin` attribute. That attribute was a very specific way to solve this problem, but now we have a much more general solution in these added callbacks. Also related: #2259. Co-authored-by: Josh Goebel --- docs/reference.rst | 1 + src/highlight.js | 65 +++++++++++++------ src/languages/c-like.js | 7 +- src/lib/mode_compiler.js | 11 ++-- src/lib/response.js | 11 ++++ .../cpp/truncated-block-comment.expect.txt | 3 + test/markup/cpp/truncated-block-comment.txt | 2 + .../cpp/truncated-raw-string.expect.txt | 5 ++ test/markup/cpp/truncated-raw-string.txt | 4 ++ 9 files changed, 82 insertions(+), 27 deletions(-) create mode 100644 src/lib/response.js create mode 100644 test/markup/cpp/truncated-block-comment.expect.txt create mode 100644 test/markup/cpp/truncated-block-comment.txt create mode 100644 test/markup/cpp/truncated-raw-string.expect.txt create mode 100644 test/markup/cpp/truncated-raw-string.txt diff --git a/docs/reference.rst b/docs/reference.rst index d2e4cb1c88..c823b48fca 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -208,6 +208,7 @@ In this case you can't simply specify the same regexp for ``begin`` and ``end`` (say, ``"\\$[a-z]\\$"``), but you can use ``begin: "\\$[a-z]\\$"`` and ``endSameAsBegin: true``. + .. _lexemes: lexemes diff --git a/src/highlight.js b/src/highlight.js index f5a93a88de..ca3a3d8b6a 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -4,6 +4,7 @@ https://highlightjs.org/ */ import deepFreeze from './vendor/deep_freeze'; +import Response from './lib/response'; import TokenTreeEmitter from './lib/token_tree'; import * as regex from './lib/regex'; import * as utils from './lib/utils'; @@ -118,18 +119,6 @@ const HLJS = function(hljs) { function _highlight(languageName, code, ignoreIllegals, continuation) { var codeToHighlight = code; - function endOfMode(mode, lexeme) { - if (regex.startsWith(mode.endRe, lexeme)) { - while (mode.endsParent && mode.parent) { - mode = mode.parent; - } - return mode; - } - if (mode.endsWithParent) { - return endOfMode(mode.parent, lexeme); - } - } - function keywordData(mode, match) { var matchText = language.case_insensitive ? match[0].toLowerCase() : match[0]; return Object.prototype.hasOwnProperty.call(mode.keywords, matchText) && mode.keywords[matchText]; @@ -206,7 +195,33 @@ const HLJS = function(hljs) { if (mode.className) { emitter.openNode(mode.className); } - top = Object.create(mode, { parent: { value: top } }); + top = Object.create(mode, {parent: {value: top}}); + return top; + } + + function endOfMode(mode, match, matchPlusRemainder) { + let matched = regex.startsWith(mode.endRe, matchPlusRemainder); + + if (matched) { + if (mode["before:end"]) { + let resp = new Response(mode); + mode["before:end"](match, resp); + if (resp.ignore) + matched = false; + } + + if (matched) { + while (mode.endsParent && mode.parent) { + mode = mode.parent; + } + return mode; + } + } + // even if before:end fires an `ignore` it's still possible + // that we might trigger the end node because of a parent mode + if (mode.endsWithParent) { + return endOfMode(mode.parent, match, matchPlusRemainder); + } } function doIgnore(lexeme) { @@ -226,12 +241,15 @@ const HLJS = function(hljs) { function doBeginMatch(match) { var lexeme = match[0]; var new_mode = match.rule; - - if (new_mode.__onBegin) { - const res = new_mode.__onBegin(match) || {}; - if (res.ignoreMatch) { - return doIgnore(lexeme); - } + var mode; + + let resp = new Response(new_mode); + // first internal before callbacks, then the public ones + let beforeCallbacks = [new_mode.__beforeBegin, new_mode["before:begin"]]; + for (let cb of beforeCallbacks) { + if (!cb) continue; + cb(match, resp); + if (resp.ignore) return doIgnore(lexeme); } if (new_mode && new_mode.endSameAsBegin) { @@ -249,14 +267,19 @@ const HLJS = function(hljs) { mode_buffer = lexeme; } } - startNewMode(new_mode); + mode = startNewMode(new_mode); + if (mode["after:begin"]) { + let resp = new Response(mode); + mode["after:begin"](match, resp); + } return new_mode.returnBegin ? 0 : lexeme.length; } function doEndMatch(match) { var lexeme = match[0]; var matchPlusRemainder = codeToHighlight.substr(match.index); - var end_mode = endOfMode(top, matchPlusRemainder); + + var end_mode = endOfMode(top, match, matchPlusRemainder); if (!end_mode) { return NO_MATCH; } var origin = top; diff --git a/src/languages/c-like.js b/src/languages/c-like.js index 90e23073ee..ca75094589 100644 --- a/src/languages/c-like.js +++ b/src/languages/c-like.js @@ -44,7 +44,12 @@ export default function(hljs) { begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)", end: '\'', illegal: '.' }, - { begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\((?:.|\n)*?\)\1"/ } + { + begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/, + end: /\)([^()\\ ]{0,16})"/, + 'after:begin': (m, resp) => { resp.data.heredoc = m[1]; }, + 'before:end': function(m, resp) { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); } + } ] }; diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index 9faf220258..16dc0adbaf 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -61,6 +61,7 @@ export function compileLanguage(language) { // eslint-disable-next-line no-undefined const i = match.findIndex((el, i) => i > 0 && el !== undefined); const matchData = this.matchIndexes[i]; + match.splice(0, i); // // trim off the extra matches return Object.assign(match, matchData); } @@ -158,11 +159,11 @@ export function compileLanguage(language) { } // TODO: We need negative look-behind support to do this properly - function skipIfhasPrecedingOrTrailingDot(match) { + function skipIfhasPrecedingOrTrailingDot(match, resp) { const before = match.input[match.index - 1]; const after = match.input[match.index + match[0].length]; if (before === "." || after === ".") { - return { ignoreMatch: true }; + resp.ignoreMatch(); } } @@ -200,8 +201,8 @@ export function compileLanguage(language) { if (mode.compiled) return; mode.compiled = true; - // __onBegin is considered private API, internal use only - mode.__onBegin = null; + // __beforeBegin is considered private API, internal use only + mode.__beforeBegin = null; mode.keywords = mode.keywords || mode.beginKeywords; if (mode.keywords) { @@ -218,7 +219,7 @@ export function compileLanguage(language) { // doesn't allow spaces in keywords anyways and we still check for the boundary // first mode.begin = '\\b(' + mode.beginKeywords.split(' ').join('|') + ')(?=\\b|\\s)'; - mode.__onBegin = skipIfhasPrecedingOrTrailingDot; + mode.__beforeBegin = skipIfhasPrecedingOrTrailingDot; } if (!mode.begin) mode.begin = /\B|\b/; diff --git a/src/lib/response.js b/src/lib/response.js new file mode 100644 index 0000000000..9c5bcfa95c --- /dev/null +++ b/src/lib/response.js @@ -0,0 +1,11 @@ +export default class Response { + constructor(mode) { + if (mode.data === undefined) + mode.data = {}; + this.data = mode.data; + } + + ignoreMatch() { + this.ignore = true; + } +} diff --git a/test/markup/cpp/truncated-block-comment.expect.txt b/test/markup/cpp/truncated-block-comment.expect.txt new file mode 100644 index 0000000000..a2f5ce048a --- /dev/null +++ b/test/markup/cpp/truncated-block-comment.expect.txt @@ -0,0 +1,3 @@ +/* +Truncated block comment + diff --git a/test/markup/cpp/truncated-block-comment.txt b/test/markup/cpp/truncated-block-comment.txt new file mode 100644 index 0000000000..b266bf0806 --- /dev/null +++ b/test/markup/cpp/truncated-block-comment.txt @@ -0,0 +1,2 @@ +/* +Truncated block comment diff --git a/test/markup/cpp/truncated-raw-string.expect.txt b/test/markup/cpp/truncated-raw-string.expect.txt new file mode 100644 index 0000000000..8d133e8bae --- /dev/null +++ b/test/markup/cpp/truncated-raw-string.expect.txt @@ -0,0 +1,5 @@ +R"foo( +Truncated raw string +)nope" +Still not completed. + diff --git a/test/markup/cpp/truncated-raw-string.txt b/test/markup/cpp/truncated-raw-string.txt new file mode 100644 index 0000000000..b012c82bfe --- /dev/null +++ b/test/markup/cpp/truncated-raw-string.txt @@ -0,0 +1,4 @@ +R"foo( +Truncated raw string +)nope" +Still not completed. From a6a132ad0f058b4cc9b4198d30c6eadd7c3b1b27 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 2 Mar 2020 19:23:45 -0500 Subject: [PATCH 2/7] (chore) C-like uses the new END_SAME_AS_BEGIN mode --- src/languages/c-like.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/languages/c-like.js b/src/languages/c-like.js index ca75094589..90062b9404 100644 --- a/src/languages/c-like.js +++ b/src/languages/c-like.js @@ -32,6 +32,10 @@ export default function(hljs) { // https://en.cppreference.com/w/cpp/language/escape // \\ \x \xFF \u2837 \u00323747 \374 var CHARACTER_ESCAPES = '\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4,8}|[0-7]{3}|\\S)' + var END_GROUP_SAME_AS_BEGIN = { + 'after:begin': (m, resp) => { resp.data.heredoc = m[1]; }, + 'before:end': (m, resp) => { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); } + }; var STRINGS = { className: 'string', variants: [ @@ -44,12 +48,10 @@ export default function(hljs) { begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)", end: '\'', illegal: '.' }, - { + Object.assign({ begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/, end: /\)([^()\\ ]{0,16})"/, - 'after:begin': (m, resp) => { resp.data.heredoc = m[1]; }, - 'before:end': function(m, resp) { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); } - } + }, END_GROUP_SAME_AS_BEGIN) ] }; From 7d5be2c0a1b3d7af265c2dc5a27795793f7fc9a5 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 2 Mar 2020 19:45:01 -0500 Subject: [PATCH 3/7] (chore) Ruby uses END_SAME_AS_BEGIN mode/rule --- src/languages/c-like.js | 6 +----- src/languages/ruby.js | 6 +++--- src/lib/modes.js | 5 +++++ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/languages/c-like.js b/src/languages/c-like.js index 90062b9404..b77a349842 100644 --- a/src/languages/c-like.js +++ b/src/languages/c-like.js @@ -32,10 +32,6 @@ export default function(hljs) { // https://en.cppreference.com/w/cpp/language/escape // \\ \x \xFF \u2837 \u00323747 \374 var CHARACTER_ESCAPES = '\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4,8}|[0-7]{3}|\\S)' - var END_GROUP_SAME_AS_BEGIN = { - 'after:begin': (m, resp) => { resp.data.heredoc = m[1]; }, - 'before:end': (m, resp) => { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); } - }; var STRINGS = { className: 'string', variants: [ @@ -51,7 +47,7 @@ export default function(hljs) { Object.assign({ begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/, end: /\)([^()\\ ]{0,16})"/, - }, END_GROUP_SAME_AS_BEGIN) + }, hljs.END_FIRST_MATCH_SAME_AS_BEGIN) ] }; diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 59f2b048f8..d702f617cd 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -72,10 +72,10 @@ export default function(hljs) { returnBegin: true, contains: [ { begin: /<<[-~]?'?/ }, - { begin: /\w+/, - endSameAsBegin: true, + Object.assign({ + begin: /(\w+)/, end: /(\w+)/, contains: [hljs.BACKSLASH_ESCAPE, SUBST], - } + }, hljs.END_FIRST_MATCH_SAME_AS_BEGIN) ] } ] diff --git a/src/lib/modes.js b/src/lib/modes.js index f91811ee17..7be59611aa 100644 --- a/src/lib/modes.js +++ b/src/lib/modes.js @@ -117,3 +117,8 @@ export const METHOD_GUARD = { begin: '\\.\\s*' + UNDERSCORE_IDENT_RE, relevance: 0 }; + +export const END_FIRST_MATCH_SAME_AS_BEGIN = { + 'after:begin': (m, resp) => { resp.data.heredoc = m[1]; }, + 'before:end': (m, resp) => { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); } +}; From 61070a44cab016fbb0a918067c3f7d3e252ecd1e Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 12 Mar 2020 03:02:27 -0400 Subject: [PATCH 4/7] (parser) make END_SAME_AS_BEGIN a function helper Adds a mode helper to replace the deprecated `endSameAsBegin` attribute. The first match group from the begin regex will be compared to the first match group from the end regex and the end regex will only match if both strings are identical. Note this is more advanced functionality than before since now you can match a larger selection of text yet only use a small portion of it for the actual "end must match begin" portion. --- src/languages/c-like.js | 4 ++-- src/languages/ruby.js | 4 ++-- src/lib/modes.js | 9 ++++++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/languages/c-like.js b/src/languages/c-like.js index b77a349842..fb3a70c2af 100644 --- a/src/languages/c-like.js +++ b/src/languages/c-like.js @@ -44,10 +44,10 @@ export default function(hljs) { begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)", end: '\'', illegal: '.' }, - Object.assign({ + hljs.END_SAME_AS_BEGIN({ begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/, end: /\)([^()\\ ]{0,16})"/, - }, hljs.END_FIRST_MATCH_SAME_AS_BEGIN) + }) ] }; diff --git a/src/languages/ruby.js b/src/languages/ruby.js index d702f617cd..bce2aaf841 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -72,10 +72,10 @@ export default function(hljs) { returnBegin: true, contains: [ { begin: /<<[-~]?'?/ }, - Object.assign({ + hljs.END_SAME_AS_BEGIN({ begin: /(\w+)/, end: /(\w+)/, contains: [hljs.BACKSLASH_ESCAPE, SUBST], - }, hljs.END_FIRST_MATCH_SAME_AS_BEGIN) + }) ] } ] diff --git a/src/lib/modes.js b/src/lib/modes.js index 7be59611aa..55eabb8e67 100644 --- a/src/lib/modes.js +++ b/src/lib/modes.js @@ -118,7 +118,10 @@ export const METHOD_GUARD = { relevance: 0 }; -export const END_FIRST_MATCH_SAME_AS_BEGIN = { - 'after:begin': (m, resp) => { resp.data.heredoc = m[1]; }, - 'before:end': (m, resp) => { if (resp.data.heredoc !== m[1]) resp.ignoreMatch(); } +export const END_SAME_AS_BEGIN = function(mode) { + return Object.assign(mode, + { + 'after:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, + 'before:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch() } + }); }; From 85e60f7a2eb09cdd57a26b58f8ea03c59af0a58b Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 12 Mar 2020 13:18:09 -0400 Subject: [PATCH 5/7] (pgsql) add test for $$ quoting existing behavior - even if that existing behavior is questionable - the ending span should really close before the $$, not after Fixing this would involve delving into the sublanguage behavior and I'm not sure we have time to tackle that right this moment. --- test/markup/pgsql/dollar_strings.expect.txt | 9 +++++++++ test/markup/pgsql/dollar_strings.txt | 9 +++++++++ 2 files changed, 18 insertions(+) create mode 100644 test/markup/pgsql/dollar_strings.expect.txt create mode 100644 test/markup/pgsql/dollar_strings.txt diff --git a/test/markup/pgsql/dollar_strings.expect.txt b/test/markup/pgsql/dollar_strings.expect.txt new file mode 100644 index 0000000000..841d2cff6f --- /dev/null +++ b/test/markup/pgsql/dollar_strings.expect.txt @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION hello_world(param_your_name text) +RETURNS text AS +$$ +SELECT 'Hello world. My name is ' || param_your_name || '.'; +$$ +language sql STRICT; + +SELECT sql_expression($sql$SELECT hello_world($phrase$Regina's elephant's dog$phrase$) + || $phrase$ I made a cat's meow today.$phrase$ $sql$); diff --git a/test/markup/pgsql/dollar_strings.txt b/test/markup/pgsql/dollar_strings.txt new file mode 100644 index 0000000000..cfc03f4732 --- /dev/null +++ b/test/markup/pgsql/dollar_strings.txt @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION hello_world(param_your_name text) +RETURNS text AS +$$ +SELECT 'Hello world. My name is ' || param_your_name || '.'; +$$ +language sql STRICT; + +SELECT sql_expression($sql$SELECT hello_world($phrase$Regina's elephant's dog$phrase$) + || $phrase$ I made a cat's meow today.$phrase$ $sql$); From b2fc85b582fc71b272d930462f19f002e11278f5 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 12 Mar 2020 13:29:38 -0400 Subject: [PATCH 6/7] (chore) pgsql uses END_SAME_AS_BEGIN mode/rule now also --- src/languages/pgsql.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/languages/pgsql.js b/src/languages/pgsql.js index 55d855a521..589a8ba145 100644 --- a/src/languages/pgsql.js +++ b/src/languages/pgsql.js @@ -463,9 +463,9 @@ export default function(hljs) { contains: [{begin: '\\\\.'}], relevance: 10 }, - { + hljs.END_SAME_AS_BEGIN({ begin: DOLLAR_STRING, - endSameAsBegin: true, + end: DOLLAR_STRING, contains: [ { // actually we want them all except SQL; listed are those with known implementations @@ -474,7 +474,7 @@ export default function(hljs) { endsWithParent: true } ] - }, + }), // identifiers in quotes { begin: '"', end: '"', From 9ada394e0e3bfa8d625dd5af91d83e57639fbc65 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 12 Mar 2020 13:58:01 -0400 Subject: [PATCH 7/7] (docs) rename to `mode_reference`; docs for callbacks - I can never find this file because it's name didn't fully match. - rename callbacks to `on:begin` and `on:end` --- CHANGES.md | 13 +++++++++ docs/index.rst | 2 +- docs/language-guide.rst | 2 +- docs/{reference.rst => mode-reference.rst} | 34 ++++++++++++++++++++-- src/highlight.js | 16 +++++----- src/lib/mode_compiler.js | 4 ++- src/lib/modes.js | 4 +-- 7 files changed, 60 insertions(+), 15 deletions(-) rename docs/{reference.rst => mode-reference.rst} (86%) diff --git a/CHANGES.md b/CHANGES.md index 9c7eda553b..b5f7f6b497 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,13 +1,26 @@ ## Version 10.1.0 (in progress) +Parser Engine: + +- (enh) Added `on:begin` callback for modes (#2261) [Josh Goebel][] +- (enh) Added `on:end` callback for modes (#2261) [Josh Goebel][] +- (enh) Added ability to programatically ignore begin and end matches (#2261) [Josh Goebel][] +- (enh) Added `END_SAME_AS_BEGIN` mode to replace `endSameAsBegin` parser attribute (#2261) [Josh Goebel][] + +Deprecations: + +- (deprecation) `endSameAsBegin` is now deprecated. (#2261) [Josh Goebel][] + Language Improvements: +- fix(cpp) Fix highlighting of unterminated raw strings (#2261) [David Benjamin][] - fix(javascript) `=>` function with nested `()` in params now works (#2502) [Josh Goebel][] - fix(typescript) `=>` function with nested `()` in params now works (#2502) [Josh Goebel][] - fix(yaml) Fix tags to include non-word characters (#2486) [Peter Plantinga][] [Josh Goebel]: https://github.com/yyyc514 [Peter Plantinga]: https://github.com/pplantinga +[David Benjamin]: https://github.com/davidben ## Version 10.0.1 diff --git a/docs/index.rst b/docs/index.rst index 3792e16245..7ae5b1953f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,7 +13,7 @@ Contents: api language-guide - reference + mode-reference css-classes-reference style-guide plugin-api diff --git a/docs/language-guide.rst b/docs/language-guide.rst index 0971524e52..4ed26d2d96 100644 --- a/docs/language-guide.rst +++ b/docs/language-guide.rst @@ -186,7 +186,7 @@ For such modes ``className`` attribute should be omitted so they won't generate Mode attributes --------------- -Other useful attributes are defined in the :doc:`mode reference `. +Other useful attributes are defined in the :doc:`mode reference `. .. _relevance: diff --git a/docs/reference.rst b/docs/mode-reference.rst similarity index 86% rename from docs/reference.rst rename to docs/mode-reference.rst index c823b48fca..a60400d536 100644 --- a/docs/reference.rst +++ b/docs/mode-reference.rst @@ -62,6 +62,19 @@ Regular expression starting a mode. For example a single quote for strings or tw If absent, ``begin`` defaults to a regexp that matches anything, so the mode starts immediately. +on:begin +^^^^^^^^^^^ + +**type**: callback (matchData, response) + +This callback is triggered the moment a begin match is detected. ``matchData`` includes the typical regex match data; the full match, match groups, etc. The ``response`` object is used to tell the parser how it should handle the match. It can be also used to temporarily store data. + +- ``response.data`` - a simple object data store. Can be used for building more complex rules where the end rule is dependent on the content of begin, etc. +- ``response.ignoreMatch()`` - pretend as if this match never happened. The mode is not entered. Continues trying subsequent modes in the current mode's ``contains`` list + +For an example of usage see ``END_SAME_AS_BEGIN`` in ``modes.js``. + + end ^^^ @@ -79,6 +92,19 @@ Sometimes a mode can end not by itself but implicitly with its containing (paren This is achieved with :ref:`endsWithParent ` attribute. +on:end +^^^^^^^^^^^ + +**type**: callback (matchData, response) + +This callback is triggered the moment an end match is detected. ``matchData`` includes the typical regex match data; the full match, match groups, etc. The ``response`` object is used to tell the parser how it should handle the match. It can also be used to retrieve data stored from a `begin` callback. + +- ``response.data`` - a simple object data store. Can be used for building more complex rules where the end rule is dependent on the content of begin, etc. +- ``response.ignoreMatch()`` - pretend as if this match never happened. The mode is not entered. Continues trying subsequent modes in the current mode's ``contains`` list + +For an example of usage see ``END_SAME_AS_BEGIN`` in ``modes.js``. + + beginKeywords ^^^^^^^^^^^^^^^^ @@ -182,8 +208,12 @@ tell it to end the function definition after itself: .. _endSameAsBegin: -endSameAsBegin -^^^^^^^^^^^^^^ +endSameAsBegin (deprecated as of 10.1) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Deprecated:** *This attribute has been deprecated.* You should instead use the +``END_SAME_AS_BEGIN`` mode or use the ``on:begin`` and ``on:end`` attributes to +build more complex paired matchers. **type**: boolean diff --git a/src/highlight.js b/src/highlight.js index ca3a3d8b6a..d60330fba5 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -203,9 +203,9 @@ const HLJS = function(hljs) { let matched = regex.startsWith(mode.endRe, matchPlusRemainder); if (matched) { - if (mode["before:end"]) { + if (mode["on:end"]) { let resp = new Response(mode); - mode["before:end"](match, resp); + mode["on:end"](match, resp); if (resp.ignore) matched = false; } @@ -217,7 +217,7 @@ const HLJS = function(hljs) { return mode; } } - // even if before:end fires an `ignore` it's still possible + // even if on:end fires an `ignore` it's still possible // that we might trigger the end node because of a parent mode if (mode.endsWithParent) { return endOfMode(mode.parent, match, matchPlusRemainder); @@ -245,7 +245,7 @@ const HLJS = function(hljs) { let resp = new Response(new_mode); // first internal before callbacks, then the public ones - let beforeCallbacks = [new_mode.__beforeBegin, new_mode["before:begin"]]; + let beforeCallbacks = [new_mode.__beforeBegin, new_mode["on:begin"]]; for (let cb of beforeCallbacks) { if (!cb) continue; cb(match, resp); @@ -268,10 +268,10 @@ const HLJS = function(hljs) { } } mode = startNewMode(new_mode); - if (mode["after:begin"]) { - let resp = new Response(mode); - mode["after:begin"](match, resp); - } + // if (mode["after:begin"]) { + // let resp = new Response(mode); + // mode["after:begin"](match, resp); + // } return new_mode.returnBegin ? 0 : lexeme.length; } diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index 16dc0adbaf..e853cf2c3b 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -61,7 +61,9 @@ export function compileLanguage(language) { // eslint-disable-next-line no-undefined const i = match.findIndex((el, i) => i > 0 && el !== undefined); const matchData = this.matchIndexes[i]; - match.splice(0, i); // // trim off the extra matches + // trim off any earlier non-relevant match groups (ie, the other regex + // match groups that make up the multi-matcher) + match.splice(0, i); return Object.assign(match, matchData); } diff --git a/src/lib/modes.js b/src/lib/modes.js index 55eabb8e67..8acb8f0e51 100644 --- a/src/lib/modes.js +++ b/src/lib/modes.js @@ -121,7 +121,7 @@ export const METHOD_GUARD = { export const END_SAME_AS_BEGIN = function(mode) { return Object.assign(mode, { - 'after:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, - 'before:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch() } + 'on:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, + 'on:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch() } }); };