Merge pull request markedjs#1636 from Scrum/master

fix: Intra-word emphasis can match the wrong asterisks
zhenalexfan · Apr 14, 2020 · ab54687 · ab54687
2 parents 780c5cf + 0d34997
commit ab54687
Show file tree

Hide file tree

Showing 10 changed files with 50 additions and 45 deletions.
diff --git a/docs/demo/worker.js b/docs/demo/worker.js
@@ -49,7 +49,7 @@ function parse(e) {
     case 'parse':
       var startTime = new Date();
       var lexed = marked.lexer(e.data.markdown, e.data.options);
-      var lexedList = getLexedList(lexed);
+      var lexedList = jsonString(lexed);
       var parsed = marked.parser(lexed, e.data.options);
       var endTime = new Date();
       postMessage({
@@ -62,23 +62,6 @@ function parse(e) {
   }
 }
 
-function getLexedList(lexed, level) {
-  level = level || 0;
-  var lexedList = [];
-  for (var i = 0; i < lexed.length; i++) {
-    var lexedLine = [];
-    for (var j in lexed[i]) {
-      if (j === 'tokens' || j === 'items') {
-        lexedLine.push(j + ': [\n' + getLexedList(lexed[i][j], level + 1) + '\n]');
-      } else {
-        lexedLine.push(j + ':' + jsonString(lexed[i][j]));
-      }
-    }
-    lexedList.push(stringRepeat(' ', 2 * level) + '{' + lexedLine.join(', ') + '}');
-  }
-  return lexedList.join('\n');
-}
-
 function stringRepeat(char, times) {
   var s = '';
   for (var i = 0; i < times; i++) {
@@ -87,15 +70,33 @@ function stringRepeat(char, times) {
   return s;
 }
 
-function jsonString(input) {
-  var output = (input + '')
-    .replace(/\n/g, '\\n')
-    .replace(/\r/g, '\\r')
-    .replace(/\t/g, '\\t')
-    .replace(/\f/g, '\\f')
-    .replace(/[\\"']/g, '\\$&')
-    .replace(/\u0000/g, '\\0');
-  return '"' + output + '"';
+function jsonString(input, level) {
+  level = level || 0;
+  if (Array.isArray(input)) {
+    if (input.length === 0) {
+      return '[]';
+    }
+    var items = [],
+        i;
+    if (!Array.isArray(input[0]) && typeof input[0] === 'object' && input[0] !== null) {
+      for (i = 0; i < input.length; i++) {
+        items.push(stringRepeat(' ', 2 * level) + jsonString(input[i], level + 1));
+      }
+      return '[\n' + items.join('\n') + '\n]';
+    }
+    for (i = 0; i < input.length; i++) {
+      items.push(jsonString(input[i], level));
+    }
+    return '[' + items.join(', ') + ']';
+  } else if (typeof input === 'object' && input !== null) {
+    var props = [];
+    for (var prop in input) {
+      props.push(prop + ':' + jsonString(input[prop], level));
+    }
+    return '{' + props.join(', ') + '}';
+  } else {
+    return JSON.stringify(input);
+  }
 }
 
 function loadVersion(ver) {

diff --git a/lib/marked.esm.js b/lib/marked.esm.js
@@ -465,7 +465,7 @@ const inline = {
   reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
   nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
   strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
-  em: /^_([^\s_])_(?!_)|^\*([^\s*<\[])\*(?!\*)|^_([^\s<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s<"][\s\S]*?[^\s\*])\*(?!\*|[^\spunctuation])|^\*([^\s*"<\[][\s\S]*?[^\s])\*(?!\*)/,
+  em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
   code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
   br: /^( {2,}|\\)\n(?!\s*$)/,
   del: noopTest$1,
@@ -474,7 +474,7 @@ const inline = {
 
 // list of punctuation marks from common mark spec
 // without ` and ] to workaround Rule 17 (inline code blocks/links)
-inline._punctuation = '!"#$%&\'()*+,\\-./:;<=>?@\\[^_{|}~';
+inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
 inline.em = edit$1(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
 
 inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
@@ -2007,7 +2007,7 @@ var Parser_1 = class Parser {
       token = tokens[i];
       switch (token.type) {
         case 'escape': {
-          out += token.text;
+          out += renderer.text(token.text);
           break;
         }
         case 'html': {

diff --git a/lib/marked.js b/lib/marked.js
@@ -448,15 +448,15 @@
     reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
     nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
     strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
-    em: /^_([^\s_])_(?!_)|^\*([^\s*<\[])\*(?!\*)|^_([^\s<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s<"][\s\S]*?[^\s\*])\*(?!\*|[^\spunctuation])|^\*([^\s*"<\[][\s\S]*?[^\s])\*(?!\*)/,
+    em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
     code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
     br: /^( {2,}|\\)\n(?!\s*$)/,
     del: noopTest$1,
     text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/
   }; // list of punctuation marks from common mark spec
   // without ` and ] to workaround Rule 17 (inline code blocks/links)
 
-  inline._punctuation = '!"#$%&\'()*+,\\-./:;<=>?@\\[^_{|}~';
+  inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
   inline.em = edit$1(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
   inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
   inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/;
@@ -2036,7 +2036,7 @@
         switch (token.type) {
           case 'escape':
             {
-              out += token.text;
+              out += renderer.text(token.text);
               break;
             }
 

diff --git a/marked.min.js b/marked.min.js
diff --git a/src/Parser.js b/src/Parser.js
@@ -200,7 +200,7 @@ module.exports = class Parser {
       token = tokens[i];
       switch (token.type) {
         case 'escape': {
-          out += token.text;
+          out += renderer.text(token.text);
           break;
         }
         case 'html': {

diff --git a/src/rules.js b/src/rules.js
@@ -169,7 +169,7 @@ const inline = {
   reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
   nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
   strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
-  em: /^_([^\s_])_(?!_)|^\*([^\s*<\[])\*(?!\*)|^_([^\s<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s<"][\s\S]*?[^\s\*])\*(?!\*|[^\spunctuation])|^\*([^\s*"<\[][\s\S]*?[^\s])\*(?!\*)/,
+  em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\spunctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\spunctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
   code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
   br: /^( {2,}|\\)\n(?!\s*$)/,
   del: noopTest,
@@ -178,7 +178,7 @@ const inline = {
 
 // list of punctuation marks from common mark spec
 // without ` and ] to workaround Rule 17 (inline code blocks/links)
-inline._punctuation = '!"#$%&\'()*+,\\-./:;<=>?@\\[^_{|}~';
+inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
 inline.em = edit(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
 
 inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;

diff --git a/test/specs/commonmark/commonmark.0.29.json b/test/specs/commonmark/commonmark.0.29.json
@@ -3359,17 +3359,15 @@
     "example": 413,
     "start_line": 6895,
     "end_line": 6899,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "*foo**bar***\n",
     "html": "<p><em>foo<strong>bar</strong></em></p>\n",
     "example": 414,
     "start_line": 6902,
     "end_line": 6906,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "foo***bar***baz\n",

diff --git a/test/specs/gfm/commonmark.0.29.json b/test/specs/gfm/commonmark.0.29.json
@@ -3359,17 +3359,15 @@
     "example": 413,
     "start_line": 6895,
     "end_line": 6899,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "*foo**bar***\n",
     "html": "<p><em>foo<strong>bar</strong></em></p>\n",
     "example": 414,
     "start_line": 6902,
     "end_line": 6906,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "foo***bar***baz\n",

diff --git a/test/specs/new/em_2char.html b/test/specs/new/em_2char.html
@@ -23,3 +23,7 @@
 <p><em>1_</em></p>
 
 <p><em>1*</em></p>
+
+<p>It’s levi<em>OH</em>sa, not levio<em>SAH.</em></p>
+
+<p>__ test <a href="https://test.com/_">test</a></p>
diff --git a/test/specs/new/em_2char.md b/test/specs/new/em_2char.md
@@ -23,3 +23,7 @@ _ 123_
 _1__
 
 *1**
+
+It’s levi*OH*sa, not levio*SAH.*
+
+__ test [test](https://test.com/_)