markedjs · UziTech · Jul 13, 2020 · May 21, 2020 · May 29, 2020 · May 29, 2020
diff --git a/src/Lexer.js b/src/Lexer.js
@@ -1,6 +1,7 @@
 const Tokenizer = require('./Tokenizer.js');
 const { defaults } = require('./defaults.js');
 const { block, inline } = require('./rules.js');
+const { edit } = require('./helpers.js');
 
 /**
  * smartypants text replacement
@@ -102,6 +103,12 @@ module.exports = class Lexer {
 
     this.blockTokens(src, this.tokens, true);
 
+    // Insert known reflinks into em rules to properly skip over them
+    const rep = Object.keys(this.tokens.links).join('|').replace(/\*/g, '\\*');
+    this.tokenizer.rules.inline.em = edit(inline.em)
+      .replace(/reflink/g, rep)
+      .getRegex();
+
     this.inline(this.tokens);
 
     return this.tokens;
@@ -267,7 +274,7 @@ module.exports = class Lexer {
         case 'text':
         case 'heading': {
           token.tokens = [];
-          this.inlineTokens(token.text, token.tokens);
+          this.inlineTokens(token.text, token.tokens, undefined, undefined);
           break;
         }
         case 'table': {
@@ -319,7 +326,7 @@ module.exports = class Lexer {
   /**
    * Lexing/Compiling
    */
-  inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) {
+  inlineTokens(src, tokens = [], inLink = false, inRawBlock = false, prevChar = '') {
     let token;
 
     while (src) {
@@ -360,15 +367,15 @@ module.exports = class Lexer {
       }
 
       // strong
-      if (token = this.tokenizer.strong(src)) {
+      if (token = this.tokenizer.strong(src, prevChar)) {
         src = src.substring(token.raw.length);
         token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
         tokens.push(token);
         continue;
       }
 
       // em
-      if (token = this.tokenizer.em(src)) {
+      if (token = this.tokenizer.em(src, prevChar, this.tokens.links)) {
         src = src.substring(token.raw.length);
         token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock);
         tokens.push(token);
@@ -414,6 +421,7 @@ module.exports = class Lexer {
       // text
       if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) {
         src = src.substring(token.raw.length);
+        prevChar = token.raw.slice(-1);
         tokens.push(token);
         continue;
       }

diff --git a/src/Tokenizer.js b/src/Tokenizer.js
@@ -489,7 +489,7 @@ module.exports = class Tokenizer {
     }
   }
 
-  strong(src) {
+  strong(src, prevChar = '') {
     const cap = this.rules.inline.strong.exec(src);
     if (cap) {
       return {
@@ -500,14 +500,16 @@ module.exports = class Tokenizer {
     }
   }
 
-  em(src) {
+  em(src, prevChar = '') {
     const cap = this.rules.inline.em.exec(src);
     if (cap) {
-      return {
-        type: 'em',
-        raw: cap[0],
-        text: cap[6] || cap[5] || cap[4] || cap[3] || cap[2] || cap[1]
-      };
+      if (!cap[1] || (cap[1] && (prevChar === '' || this.rules.inline.punctuation.exec(prevChar)))) {
+        return {
+          type: 'em',
+          raw: cap[0],
+          text: cap[3] || cap[2]
+        };
+      }
     }
   }
 

diff --git a/src/rules.js b/src/rules.js
@@ -169,18 +169,27 @@ const inline = {
   reflink: /^!?\[(label)\]\[(?!\s*\])((?:\\[\[\]]?|[^\[\]\\])+)\]/,
   nolink: /^!?\[(?!\s*\])((?:\[[^\[\]]*\]|\\[\[\]]|[^\[\]])*)\](?:\[\])?/,
   strong: /^__([^\s_])__(?!_)|^\*\*([^\s*])\*\*(?!\*)|^__([^\s][\s\S]*?[^\s])__(?!_)|^\*\*([^\s][\s\S]*?[^\s])\*\*(?!\*)/,
-  em: /^_([^\s_])_(?!_)|^_([^\s_<][\s\S]*?[^\s_])_(?!_|[^\s,punctuation])|^_([^\s_<][\s\S]*?[^\s])_(?!_|[^\s,punctuation])|^\*([^\s*<\[])\*(?!\*)|^\*([^\s<"][\s\S]*?[^\s\[\*])\*(?![\]`punctuation])|^\*([^\s*"<\[][\s\S]*[^\s])\*(?!\*)/,
+  //       (1) returns if starts w/ punctuation  | (2)  ⬐Check groups to skip over ⬐ skip if needed  ⬐repeat logic for inner *'s (must be in pairs)⬎           ⬐last char can't be punct OR  ⬐final * must also be followed by punct (or endline)  | (3) Underscores   ⬐Check groups to skip over ⬐ skip if needed  ⬐repeat logic for inner _'s (must be in pairs)⬎     ⬐last char can't be a space, and final _ must be followed by punct (or endline)
+  em: /^(?:(\*(?=[`\]punctuation]))|\*)(?![\*\s])((?:(?:(?!emSkip)(?:[^\*]|[\\\s]\*)|emSkip)|(?:(?:(?!emSkip)(?:[^\*]|[\\\s]\*)|emSkip)*?(?<!\\)\*){2})*?)(?:(?<![`\s\]punctuation])\*(?!\*)|(?<=[`\]punctuation])\*(?!\*)(?:(?=[`\s\]punctuation]|$)))|^_(?![_\s])((?:(?:(?!emSkip)(?:[^_]|[\\\s]_)|emSkip)|(?:(?:(?!emSkip)(?:[^_]|[\\\s]_)|emSkip)*?(?<!\\)_){2})*?)(?:(?<![\s])_(?!_)(?:(?=[`\s\]punctuation])|$))/,
   code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/,
   br: /^( {2,}|\\)\n(?!\s*$)/,
   del: noopTest,
-  text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/
+  text: /^(`+|[^`])(?:[\s\S]*?(?:(?=[\\<!\[`*]|\b_|$)|[^ ](?= {2,}\n))|(?= {2,}\n))/,
+  punctuation: /^(['\s\]punctuation])/
 };
 
 // list of punctuation marks from common mark spec
-// without ` and ] to workaround Rule 17 (inline code blocks/links)
-// without , to work around example 393
-inline._punctuation = '!"#$%&\'()*+\\-./:;<=>?@\\[^_{|}~';
-inline.em = edit(inline.em).replace(/punctuation/g, inline._punctuation).getRegex();
+// without * and _ to workaround cases with double emphasis
+inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~';
+inline.punctuation = edit(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex();
+
+// sequences em should skip over [reflink], [title][reflink], [title](link), `code`, <html>
+inline._emSkip = '\\[reflink\\]|\\[.*?\\]\\[reflink\\]|\\[.*?\\]\\(.*?\\)|`.*?`|<.*?>';
+
+inline.em = edit(inline.em)
+  .replace(/punctuation/g, inline._punctuation)
+  .replace(/emSkip/g, inline._emSkip)
+  .getRegex();
 
 inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g;
 
@@ -224,7 +233,7 @@ inline.normal = merge({}, inline);
 
 inline.pedantic = merge({}, inline.normal, {
   strong: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/,
-  em: /^_(?=\S)([\s\S]*?\S)_(?!_)|^\*(?=\S)([\s\S]*?\S)\*(?!\*)/,
+  em: /^()\*(?=\S)([\s\S]*?\S)\*(?!\*)|^_(?=\S)([\s\S]*?\S)_(?!_)/,
   link: edit(/^!?\[(label)\]\((.*?)\)/)
     .replace('label', inline._label)
     .getRegex(),

diff --git a/test/specs/commonmark/commonmark.0.29.json b/test/specs/commonmark/commonmark.0.29.json
@@ -2766,7 +2766,7 @@
     "start_line": 6003,
     "end_line": 6007,
     "section": "Code spans",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "[not a `link](/foo`)\n",
@@ -2976,7 +2976,7 @@
     "start_line": 6455,
     "end_line": 6459,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "*(*foo*)*\n",
@@ -2985,7 +2985,7 @@
     "start_line": 6465,
     "end_line": 6469,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "*foo*bar\n",
@@ -3010,7 +3010,7 @@
     "start_line": 6497,
     "end_line": 6501,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "_(_foo_)_\n",
@@ -3019,7 +3019,7 @@
     "start_line": 6506,
     "end_line": 6510,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "_foo_bar\n",
@@ -3301,7 +3301,7 @@
     "start_line": 6824,
     "end_line": 6828,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "__foo_ bar_\n",
@@ -3394,7 +3394,7 @@
     "start_line": 6928,
     "end_line": 6932,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "*foo [*bar*](/url)*\n",
@@ -3589,8 +3589,7 @@
     "example": 441,
     "start_line": 7122,
     "end_line": 7126,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "*foo**\n",
@@ -3616,8 +3615,7 @@
     "example": 444,
     "start_line": 7143,
     "end_line": 7147,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "**foo***\n",

diff --git a/test/specs/gfm/commonmark.0.29.json b/test/specs/gfm/commonmark.0.29.json
@@ -2766,7 +2766,7 @@
     "start_line": 6003,
     "end_line": 6007,
     "section": "Code spans",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "[not a `link](/foo`)\n",
@@ -2976,7 +2976,7 @@
     "start_line": 6455,
     "end_line": 6459,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail":false
   },
   {
     "markdown": "*(*foo*)*\n",
@@ -2985,7 +2985,7 @@
     "start_line": 6465,
     "end_line": 6469,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "*foo*bar\n",
@@ -3010,7 +3010,7 @@
     "start_line": 6497,
     "end_line": 6501,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "_(_foo_)_\n",
@@ -3019,7 +3019,7 @@
     "start_line": 6506,
     "end_line": 6510,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "_foo_bar\n",
@@ -3301,7 +3301,7 @@
     "start_line": 6824,
     "end_line": 6828,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "__foo_ bar_\n",
@@ -3394,7 +3394,7 @@
     "start_line": 6928,
     "end_line": 6932,
     "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "shouldFail": false
   },
   {
     "markdown": "*foo [*bar*](/url)*\n",
@@ -3589,8 +3589,7 @@
     "example": 441,
     "start_line": 7122,
     "end_line": 7126,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "*foo**\n",
@@ -3616,8 +3615,7 @@
     "example": 444,
     "start_line": 7143,
     "end_line": 7147,
-    "section": "Emphasis and strong emphasis",
-    "shouldFail": true
+    "section": "Emphasis and strong emphasis"
   },
   {
     "markdown": "**foo***\n",

diff --git a/test/specs/new/em_2char.html b/test/specs/new/em_2char.html
@@ -20,10 +20,6 @@
 
 <p>_ 123_</p>
 
-<p><em>1_</em></p>
-
-<p><em>1*</em></p>
-
 <p>It’s levi<em>OH</em>sa, not levio<em>SAH.</em></p>
 
-<p>__ test <a href="https://test.com/_">test</a></p>
+<p>__ test <a href="https://test.com/_">test</a></p>
diff --git a/test/specs/new/em_2char.md b/test/specs/new/em_2char.md
@@ -20,10 +20,6 @@ _123 _
 
 _ 123_
 
-_1__
-
-*1**
-
 It’s levi*OH*sa, not levio*SAH.*
 
 __ test [test](https://test.com/_)
diff --git a/test/specs/what/em_left_square_bracket.html b/test/specs/what/em_left_square_bracket.html
@@ -0,0 +1,4 @@
+<p>[<em>[punctuation, asterisk, punctuation should work</em></p>
+<p><em>[space, asterisk, punctuation should work</em></p>
+<p>p<em>non-punctuation, asterisk, non-punctuation should work</em></p>
+<p>p*[non-punctuation, asterisk, punctuation should NOT work*</p>
diff --git a/test/specs/what/em_left_square_bracket.md b/test/specs/what/em_left_square_bracket.md
@@ -0,0 +1,10 @@
+[*[punctuation, asterisk, punctuation should work*
+
+
+ *[space, asterisk, punctuation should work*
+
+
+p*non-punctuation, asterisk, non-punctuation should work*
+
+
+p*[non-punctuation, asterisk, punctuation should NOT work*
diff --git a/test/specs/what/em_left_square_bracket0.html b/test/specs/what/em_left_square_bracket0.html
@@ -0,0 +1 @@
+<p><em>foo <em>bar</em></em></p>
diff --git a/test/specs/what/em_left_square_bracket0.md b/test/specs/what/em_left_square_bracket0.md
@@ -0,0 +1 @@
+*foo *bar** 
diff --git a/test/specs/what/em_left_square_bracket00.html b/test/specs/what/em_left_square_bracket00.html
@@ -0,0 +1 @@
+<p><em>foo <strong>bar</strong> baz</em></p>
diff --git a/test/specs/what/em_left_square_bracket00.md b/test/specs/what/em_left_square_bracket00.md
@@ -0,0 +1 @@
+*foo **bar** baz*
diff --git a/test/specs/what/em_left_square_bracket000.html b/test/specs/what/em_left_square_bracket000.html
@@ -0,0 +1 @@
+<p>foo <em>_</em></p>
diff --git a/test/specs/what/em_left_square_bracket000.md b/test/specs/what/em_left_square_bracket000.md
@@ -0,0 +1 @@
+foo *_*
diff --git a/test/specs/what/em_left_square_bracket0000.html b/test/specs/what/em_left_square_bracket0000.html
@@ -0,0 +1 @@
+<p><em>(<strong>foo</strong>)</em></p>
diff --git a/test/specs/what/em_left_square_bracket0000.md b/test/specs/what/em_left_square_bracket0000.md
@@ -0,0 +1 @@
+*(**foo**)*
diff --git a/test/specs/whats/strong_and_em_together.html b/test/specs/whats/strong_and_em_together.html
@@ -0,0 +1,7 @@
+<p><strong><em>This is strong and em.</em></strong></p>
+
+<p>So is <strong><em>this</em></strong> word.</p>
+
+<p><strong><em>This is strong and em.</em></strong></p>
+
+<p>So is <strong><em>this</em></strong> word.</p>
diff --git a/test/specs/whats/strong_and_em_together.md b/test/specs/whats/strong_and_em_together.md
@@ -0,0 +1,7 @@
+***This is strong and em.***
+
+So is ***this*** word.
+
+___This is strong and em.___
+
+So is ___this___ word.