From 7be9dc8bc3c4ff92aa38f748062d4270ec0099f5 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 6 Feb 2020 16:20:40 -0500 Subject: [PATCH] enh(fortran) support intrinsic data types & better 0 width match error detection (#2379) * enh(fortran) support intrinsic data types Closes #1723. * (parser) throw "0 width match" error for bad regex Closes #2140. - In safe mode 0 width matches will be safely and quietly ignored and advance the cursor 1 step, as before. - In debug mode a "0 width match" error will be thrown. This should help prevent such misbehaved rules from sneaking back into the library in the future. --- CHANGES.md | 1 + src/highlight.js | 7 +++++++ src/languages/fortran.js | 3 ++- src/languages/irpf90.js | 3 ++- test/detect/index.js | 1 + test/index.js | 3 +++ test/markup/fortran/numbers.expect.txt | 6 +++--- test/parser/should-not-destroyData.js | 7 +++++++ 8 files changed, 26 insertions(+), 5 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 775680a938..44e7160830 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -14,6 +14,7 @@ Core Changes: Language Improvements: +- (fortran) enh(fortran) support intrinsic data types (#2379) [Josh Goebel][] - enh(java) annotations can include numbers (#2377) [Josh Goebel][] - enh(java) annotations can take params (#2377) [Josh Goebel][] - enh(java) allow annotations inside function call params (#2377) [Josh Goebel][] diff --git a/src/highlight.js b/src/highlight.js index ab8203654a..7afe48ff19 100644 --- a/src/highlight.js +++ b/src/highlight.js @@ -694,6 +694,12 @@ https://highlightjs.org/ if (lastMatch.type=="begin" && match.type=="end" && lastMatch.index == match.index && lexeme === "") { // spit the "skipped" character that our regex choked on back into the output sequence mode_buffer += codeToHighlight.slice(match.index, match.index + 1); + if (!SAFE_MODE) { + var err = new Error('0 width match regex'); + err.languageName = languageName; + err.badRule = lastMatch.rule; + throw(err); + } return 1; } lastMatch = match; @@ -1018,6 +1024,7 @@ https://highlightjs.org/ hljs.inherit = inherit; hljs.addPlugin = addPlugin; hljs.debugMode = function() { SAFE_MODE = false; } + hljs.safeMode = function() { SAFE_MODE = true; } // Common regexps hljs.IDENT_RE = '[a-zA-Z]\\w*'; diff --git a/src/languages/fortran.js b/src/languages/fortran.js index 2c2f792e28..9e40ed52ca 100644 --- a/src/languages/fortran.js +++ b/src/languages/fortran.js @@ -69,7 +69,8 @@ function(hljs) { hljs.COMMENT('!', '$', {relevance: 0}), { className: 'number', - begin: '(?=\\b|\\+|\\-|\\.)(?=\\.\\d|\\d)(?:\\d+)?(?:\\.?\\d*)(?:[de][+-]?\\d+)?\\b\\.?', + // regex in both fortran and irpf90 should match + begin: '(?=\\b|\\+|\\-|\\.)(?:\\.|\\d+\\.?)\\d*([de][+-]?\\d+)?(_[a-z_\\d]+)?', relevance: 0 } ] diff --git a/src/languages/irpf90.js b/src/languages/irpf90.js index 2b89cace10..3ec9acbad6 100644 --- a/src/languages/irpf90.js +++ b/src/languages/irpf90.js @@ -75,7 +75,8 @@ function(hljs) { hljs.COMMENT('begin_doc', 'end_doc', {relevance: 10}), { className: 'number', - begin: '(?=\\b|\\+|\\-|\\.)(?=\\.\\d|\\d)(?:\\d+)?(?:\\.?\\d*)(?:[de][+-]?\\d+)?\\b\\.?', + // regex in both fortran and irpf90 should match + begin: '(?=\\b|\\+|\\-|\\.)(?:\\.|\\d+\\.?)\\d*([de][+-]?\\d+)?(_[a-z_\\d]+)?', relevance: 0 } ] diff --git a/test/detect/index.js b/test/detect/index.js index 17c071e7f0..f22b7ea31f 100644 --- a/test/detect/index.js +++ b/test/detect/index.js @@ -5,6 +5,7 @@ delete require.cache[require.resolve('../../build/lib/highlight')] const fs = require('fs').promises; const hljs = require('../../build'); +hljs.debugMode(); // tests run in debug mode so errors are raised const path = require('path'); const utility = require('../utility'); diff --git a/test/index.js b/test/index.js index b224f325e3..fdb0cc6bad 100644 --- a/test/index.js +++ b/test/index.js @@ -1,5 +1,8 @@ 'use strict'; +const hljs = require('../build'); +hljs.debugMode(); // tests run in debug mode so errors are raised + // Tests specific to the API exposed inside the hljs object. // Right now, that only includes tests for several common regular expressions. require('./api'); diff --git a/test/markup/fortran/numbers.expect.txt b/test/markup/fortran/numbers.expect.txt index f675c6acbf..056b625f6f 100644 --- a/test/markup/fortran/numbers.expect.txt +++ b/test/markup/fortran/numbers.expect.txt @@ -14,6 +14,6 @@ var1 va1r mo_tot_8 = 1./(0.4*log(float(elec_num_tot_8+0.4))) -6_ikind -1_c_short -6.666666666666666_DBL +6_ikind +1_c_short +6.666666666666666_DBL diff --git a/test/parser/should-not-destroyData.js b/test/parser/should-not-destroyData.js index bce13a1a31..630e6af3b2 100644 --- a/test/parser/should-not-destroyData.js +++ b/test/parser/should-not-destroyData.js @@ -5,6 +5,7 @@ describe("bugs", function () { // CONTEXT: https://github.com/highlightjs/highlight.js/pull/2219 describe("a grammar with a mode that makes a 0 width match", () => { it("should instead count it as a 1 character match", () => { + hljs.safeMode(); hljs.registerLanguage('test-language', (hljs) => { // broken regex from old Fortran ruleset @@ -28,6 +29,12 @@ describe("bugs", function () { // Incorrect prior output: // 'The number is 23_longint yes.' ) + hljs.debugMode(); + should(() => { + hljs.highlight('test-language', 'The number is 123_longint yes.').value + }).throw(Error, { + message: "0 width match regex", + languageName: "test-language"}) }) }) })