diff --git a/CHANGES.md b/CHANGES.md index 9c7eda553b..b5f7f6b497 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,13 +1,26 @@ ## Version 10.1.0 (in progress) +Parser Engine: + +- (enh) Added `on:begin` callback for modes (#2261) [Josh Goebel][] +- (enh) Added `on:end` callback for modes (#2261) [Josh Goebel][] +- (enh) Added ability to programatically ignore begin and end matches (#2261) [Josh Goebel][] +- (enh) Added `END_SAME_AS_BEGIN` mode to replace `endSameAsBegin` parser attribute (#2261) [Josh Goebel][] + +Deprecations: + +- (deprecation) `endSameAsBegin` is now deprecated. (#2261) [Josh Goebel][] + Language Improvements: +- fix(cpp) Fix highlighting of unterminated raw strings (#2261) [David Benjamin][] - fix(javascript) `=>` function with nested `()` in params now works (#2502) [Josh Goebel][] - fix(typescript) `=>` function with nested `()` in params now works (#2502) [Josh Goebel][] - fix(yaml) Fix tags to include non-word characters (#2486) [Peter Plantinga][] [Josh Goebel]: https://github.com/yyyc514 [Peter Plantinga]: https://github.com/pplantinga +[David Benjamin]: https://github.com/davidben ## Version 10.0.1 diff --git a/docs/index.rst b/docs/index.rst index 3792e16245..7ae5b1953f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,7 +13,7 @@ Contents: api language-guide - reference + mode-reference css-classes-reference style-guide plugin-api diff --git a/docs/language-guide.rst b/docs/language-guide.rst index 0971524e52..4ed26d2d96 100644 --- a/docs/language-guide.rst +++ b/docs/language-guide.rst @@ -186,7 +186,7 @@ For such modes ``className`` attribute should be omitted so they won't generate Mode attributes --------------- -Other useful attributes are defined in the :doc:`mode reference `. +Other useful attributes are defined in the :doc:`mode reference `. .. _relevance: diff --git a/docs/reference.rst b/docs/mode-reference.rst similarity index 86% rename from docs/reference.rst rename to docs/mode-reference.rst index d2e4cb1c88..a60400d536 100644 --- a/docs/reference.rst +++ b/docs/mode-reference.rst @@ -62,6 +62,19 @@ Regular expression starting a mode. For example a single quote for strings or tw If absent, ``begin`` defaults to a regexp that matches anything, so the mode starts immediately. +on:begin +^^^^^^^^^^^ + +**type**: callback (matchData, response) + +This callback is triggered the moment a begin match is detected. ``matchData`` includes the typical regex match data; the full match, match groups, etc. The ``response`` object is used to tell the parser how it should handle the match. It can be also used to temporarily store data. + +- ``response.data`` - a simple object data store. Can be used for building more complex rules where the end rule is dependent on the content of begin, etc. +- ``response.ignoreMatch()`` - pretend as if this match never happened. The mode is not entered. Continues trying subsequent modes in the current mode's ``contains`` list + +For an example of usage see ``END_SAME_AS_BEGIN`` in ``modes.js``. + + end ^^^ @@ -79,6 +92,19 @@ Sometimes a mode can end not by itself but implicitly with its containing (paren This is achieved with :ref:`endsWithParent ` attribute. +on:end +^^^^^^^^^^^ + +**type**: callback (matchData, response) + +This callback is triggered the moment an end match is detected. ``matchData`` includes the typical regex match data; the full match, match groups, etc. The ``response`` object is used to tell the parser how it should handle the match. It can also be used to retrieve data stored from a `begin` callback. + +- ``response.data`` - a simple object data store. Can be used for building more complex rules where the end rule is dependent on the content of begin, etc. +- ``response.ignoreMatch()`` - pretend as if this match never happened. The mode is not entered. Continues trying subsequent modes in the current mode's ``contains`` list + +For an example of usage see ``END_SAME_AS_BEGIN`` in ``modes.js``. + + beginKeywords ^^^^^^^^^^^^^^^^ @@ -182,8 +208,12 @@ tell it to end the function definition after itself: .. _endSameAsBegin: -endSameAsBegin -^^^^^^^^^^^^^^ +endSameAsBegin (deprecated as of 10.1) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Deprecated:** *This attribute has been deprecated.* You should instead use the +``END_SAME_AS_BEGIN`` mode or use the ``on:begin`` and ``on:end`` attributes to +build more complex paired matchers. **type**: boolean @@ -208,6 +238,7 @@ In this case you can't simply specify the same regexp for ``begin`` and ``end`` (say, ``"\\$[a-z]\\$"``), but you can use ``begin: "\\$[a-z]\\$"`` and ``endSameAsBegin: true``. + .. _lexemes: lexemes diff --git a/src/highlight.js b/src/highlight.js index f5a93a88de..d60330fba5 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -4,6 +4,7 @@ https://highlightjs.org/ */ import deepFreeze from './vendor/deep_freeze'; +import Response from './lib/response'; import TokenTreeEmitter from './lib/token_tree'; import * as regex from './lib/regex'; import * as utils from './lib/utils'; @@ -118,18 +119,6 @@ const HLJS = function(hljs) { function _highlight(languageName, code, ignoreIllegals, continuation) { var codeToHighlight = code; - function endOfMode(mode, lexeme) { - if (regex.startsWith(mode.endRe, lexeme)) { - while (mode.endsParent && mode.parent) { - mode = mode.parent; - } - return mode; - } - if (mode.endsWithParent) { - return endOfMode(mode.parent, lexeme); - } - } - function keywordData(mode, match) { var matchText = language.case_insensitive ? match[0].toLowerCase() : match[0]; return Object.prototype.hasOwnProperty.call(mode.keywords, matchText) && mode.keywords[matchText]; @@ -206,7 +195,33 @@ const HLJS = function(hljs) { if (mode.className) { emitter.openNode(mode.className); } - top = Object.create(mode, { parent: { value: top } }); + top = Object.create(mode, {parent: {value: top}}); + return top; + } + + function endOfMode(mode, match, matchPlusRemainder) { + let matched = regex.startsWith(mode.endRe, matchPlusRemainder); + + if (matched) { + if (mode["on:end"]) { + let resp = new Response(mode); + mode["on:end"](match, resp); + if (resp.ignore) + matched = false; + } + + if (matched) { + while (mode.endsParent && mode.parent) { + mode = mode.parent; + } + return mode; + } + } + // even if on:end fires an `ignore` it's still possible + // that we might trigger the end node because of a parent mode + if (mode.endsWithParent) { + return endOfMode(mode.parent, match, matchPlusRemainder); + } } function doIgnore(lexeme) { @@ -226,12 +241,15 @@ const HLJS = function(hljs) { function doBeginMatch(match) { var lexeme = match[0]; var new_mode = match.rule; - - if (new_mode.__onBegin) { - const res = new_mode.__onBegin(match) || {}; - if (res.ignoreMatch) { - return doIgnore(lexeme); - } + var mode; + + let resp = new Response(new_mode); + // first internal before callbacks, then the public ones + let beforeCallbacks = [new_mode.__beforeBegin, new_mode["on:begin"]]; + for (let cb of beforeCallbacks) { + if (!cb) continue; + cb(match, resp); + if (resp.ignore) return doIgnore(lexeme); } if (new_mode && new_mode.endSameAsBegin) { @@ -249,14 +267,19 @@ const HLJS = function(hljs) { mode_buffer = lexeme; } } - startNewMode(new_mode); + mode = startNewMode(new_mode); + // if (mode["after:begin"]) { + // let resp = new Response(mode); + // mode["after:begin"](match, resp); + // } return new_mode.returnBegin ? 0 : lexeme.length; } function doEndMatch(match) { var lexeme = match[0]; var matchPlusRemainder = codeToHighlight.substr(match.index); - var end_mode = endOfMode(top, matchPlusRemainder); + + var end_mode = endOfMode(top, match, matchPlusRemainder); if (!end_mode) { return NO_MATCH; } var origin = top; diff --git a/src/languages/c-like.js b/src/languages/c-like.js index 90e23073ee..fb3a70c2af 100644 --- a/src/languages/c-like.js +++ b/src/languages/c-like.js @@ -44,7 +44,10 @@ export default function(hljs) { begin: '(u8?|U|L)?\'(' + CHARACTER_ESCAPES + "|.)", end: '\'', illegal: '.' }, - { begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\((?:.|\n)*?\)\1"/ } + hljs.END_SAME_AS_BEGIN({ + begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/, + end: /\)([^()\\ ]{0,16})"/, + }) ] }; diff --git a/src/languages/pgsql.js b/src/languages/pgsql.js index 55d855a521..589a8ba145 100644 --- a/src/languages/pgsql.js +++ b/src/languages/pgsql.js @@ -463,9 +463,9 @@ export default function(hljs) { contains: [{begin: '\\\\.'}], relevance: 10 }, - { + hljs.END_SAME_AS_BEGIN({ begin: DOLLAR_STRING, - endSameAsBegin: true, + end: DOLLAR_STRING, contains: [ { // actually we want them all except SQL; listed are those with known implementations @@ -474,7 +474,7 @@ export default function(hljs) { endsWithParent: true } ] - }, + }), // identifiers in quotes { begin: '"', end: '"', diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 59f2b048f8..bce2aaf841 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -72,10 +72,10 @@ export default function(hljs) { returnBegin: true, contains: [ { begin: /<<[-~]?'?/ }, - { begin: /\w+/, - endSameAsBegin: true, + hljs.END_SAME_AS_BEGIN({ + begin: /(\w+)/, end: /(\w+)/, contains: [hljs.BACKSLASH_ESCAPE, SUBST], - } + }) ] } ] diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index 9faf220258..e853cf2c3b 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -61,6 +61,9 @@ export function compileLanguage(language) { // eslint-disable-next-line no-undefined const i = match.findIndex((el, i) => i > 0 && el !== undefined); const matchData = this.matchIndexes[i]; + // trim off any earlier non-relevant match groups (ie, the other regex + // match groups that make up the multi-matcher) + match.splice(0, i); return Object.assign(match, matchData); } @@ -158,11 +161,11 @@ export function compileLanguage(language) { } // TODO: We need negative look-behind support to do this properly - function skipIfhasPrecedingOrTrailingDot(match) { + function skipIfhasPrecedingOrTrailingDot(match, resp) { const before = match.input[match.index - 1]; const after = match.input[match.index + match[0].length]; if (before === "." || after === ".") { - return { ignoreMatch: true }; + resp.ignoreMatch(); } } @@ -200,8 +203,8 @@ export function compileLanguage(language) { if (mode.compiled) return; mode.compiled = true; - // __onBegin is considered private API, internal use only - mode.__onBegin = null; + // __beforeBegin is considered private API, internal use only + mode.__beforeBegin = null; mode.keywords = mode.keywords || mode.beginKeywords; if (mode.keywords) { @@ -218,7 +221,7 @@ export function compileLanguage(language) { // doesn't allow spaces in keywords anyways and we still check for the boundary // first mode.begin = '\\b(' + mode.beginKeywords.split(' ').join('|') + ')(?=\\b|\\s)'; - mode.__onBegin = skipIfhasPrecedingOrTrailingDot; + mode.__beforeBegin = skipIfhasPrecedingOrTrailingDot; } if (!mode.begin) mode.begin = /\B|\b/; diff --git a/src/lib/modes.js b/src/lib/modes.js index f91811ee17..8acb8f0e51 100644 --- a/src/lib/modes.js +++ b/src/lib/modes.js @@ -117,3 +117,11 @@ export const METHOD_GUARD = { begin: '\\.\\s*' + UNDERSCORE_IDENT_RE, relevance: 0 }; + +export const END_SAME_AS_BEGIN = function(mode) { + return Object.assign(mode, + { + 'on:begin': (m, resp) => { resp.data._beginMatch = m[1]; }, + 'on:end': (m, resp) => { if (resp.data._beginMatch !== m[1]) resp.ignoreMatch() } + }); +}; diff --git a/src/lib/response.js b/src/lib/response.js new file mode 100644 index 0000000000..9c5bcfa95c --- /dev/null +++ b/src/lib/response.js @@ -0,0 +1,11 @@ +export default class Response { + constructor(mode) { + if (mode.data === undefined) + mode.data = {}; + this.data = mode.data; + } + + ignoreMatch() { + this.ignore = true; + } +} diff --git a/test/markup/cpp/truncated-block-comment.expect.txt b/test/markup/cpp/truncated-block-comment.expect.txt new file mode 100644 index 0000000000..a2f5ce048a --- /dev/null +++ b/test/markup/cpp/truncated-block-comment.expect.txt @@ -0,0 +1,3 @@ +/* +Truncated block comment + diff --git a/test/markup/cpp/truncated-block-comment.txt b/test/markup/cpp/truncated-block-comment.txt new file mode 100644 index 0000000000..b266bf0806 --- /dev/null +++ b/test/markup/cpp/truncated-block-comment.txt @@ -0,0 +1,2 @@ +/* +Truncated block comment diff --git a/test/markup/cpp/truncated-raw-string.expect.txt b/test/markup/cpp/truncated-raw-string.expect.txt new file mode 100644 index 0000000000..8d133e8bae --- /dev/null +++ b/test/markup/cpp/truncated-raw-string.expect.txt @@ -0,0 +1,5 @@ +R"foo( +Truncated raw string +)nope" +Still not completed. + diff --git a/test/markup/cpp/truncated-raw-string.txt b/test/markup/cpp/truncated-raw-string.txt new file mode 100644 index 0000000000..b012c82bfe --- /dev/null +++ b/test/markup/cpp/truncated-raw-string.txt @@ -0,0 +1,4 @@ +R"foo( +Truncated raw string +)nope" +Still not completed. diff --git a/test/markup/pgsql/dollar_strings.expect.txt b/test/markup/pgsql/dollar_strings.expect.txt new file mode 100644 index 0000000000..841d2cff6f --- /dev/null +++ b/test/markup/pgsql/dollar_strings.expect.txt @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION hello_world(param_your_name text) +RETURNS text AS +$$ +SELECT 'Hello world. My name is ' || param_your_name || '.'; +$$ +language sql STRICT; + +SELECT sql_expression($sql$SELECT hello_world($phrase$Regina's elephant's dog$phrase$) + || $phrase$ I made a cat's meow today.$phrase$ $sql$); diff --git a/test/markup/pgsql/dollar_strings.txt b/test/markup/pgsql/dollar_strings.txt new file mode 100644 index 0000000000..cfc03f4732 --- /dev/null +++ b/test/markup/pgsql/dollar_strings.txt @@ -0,0 +1,9 @@ +CREATE OR REPLACE FUNCTION hello_world(param_your_name text) +RETURNS text AS +$$ +SELECT 'Hello world. My name is ' || param_your_name || '.'; +$$ +language sql STRICT; + +SELECT sql_expression($sql$SELECT hello_world($phrase$Regina's elephant's dog$phrase$) + || $phrase$ I made a cat's meow today.$phrase$ $sql$);