From 32c600e60132b14a1d5ed0a09ea4dfd3093aa950 Mon Sep 17 00:00:00 2001
From: Tony Brix <tony@brix.ninja>
Date: Fri, 27 Nov 2020 09:41:27 -0600
Subject: [PATCH] fix: fix atx heading and make regex safe

---
 lib/marked.esm.js                          | 19 ++++++++++++++++---
 lib/marked.js                              | 19 ++++++++++++++++---
 src/Tokenizer.js                           | 15 ++++++++++++++-
 src/rules.js                               |  4 ++--
 test/specs/commonmark/commonmark.0.29.json |  3 +--
 test/specs/gfm/commonmark.0.29.json        |  3 +--
 test/specs/new/pedantic_heading.html       |  9 +++++++++
 test/specs/new/pedantic_heading.md         | 12 ++++++++++++
 test/specs/redos/quadratic_heading.js      |  7 +++++++
 9 files changed, 78 insertions(+), 13 deletions(-)
 create mode 100644 test/specs/new/pedantic_heading.html
 create mode 100644 test/specs/new/pedantic_heading.md
 create mode 100644 test/specs/redos/quadratic_heading.js

diff --git a/lib/marked.esm.js b/lib/marked.esm.js
index 1beab3500e..afac44dc3d 100644
--- a/lib/marked.esm.js
+++ b/lib/marked.esm.js
@@ -435,11 +435,24 @@ var Tokenizer_1 = class Tokenizer {
   heading(src) {
     const cap = this.rules.block.heading.exec(src);
     if (cap) {
+      let text = cap[2].trim();
+
+      // remove trailing #s
+      if (text.endsWith('#')) {
+        const trimmed = rtrim$1(text, '#');
+        if (this.options.pedantic) {
+          text = trimmed.trim();
+        } else if (!trimmed || trimmed.endsWith(' ')) {
+          // CommonMark requires space before trailing #s
+          text = trimmed.trim();
+        }
+      }
+
       return {
         type: 'heading',
         raw: cap[0],
         depth: cap[1].length,
-        text: cap[2]
+        text: text
       };
     }
   }
@@ -1003,7 +1016,7 @@ const block = {
   code: /^( {4}[^\n]+\n*)+/,
   fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
   hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
-  heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/,
+  heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
   list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
   html: '^ {0,3}(?:' // optional indentation
@@ -1134,7 +1147,7 @@ block.pedantic = merge$1({}, block.normal, {
       + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
     .getRegex(),
   def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
-  heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,
+  heading: /^(#{1,6})(.*)(?:\n+|$)/,
   fences: noopTest$1, // fences not supported
   paragraph: edit$1(block.normal._paragraph)
     .replace('hr', block.hr)
diff --git a/lib/marked.js b/lib/marked.js
index 4ff1fa6f4d..3e623b8944 100644
--- a/lib/marked.js
+++ b/lib/marked.js
@@ -532,11 +532,24 @@
       var cap = this.rules.block.heading.exec(src);
 
       if (cap) {
+        var text = cap[2].trim(); // remove trailing #s
+
+        if (text.endsWith('#')) {
+          var trimmed = rtrim$1(text, '#');
+
+          if (this.options.pedantic) {
+            text = trimmed.trim();
+          } else if (!trimmed || trimmed.endsWith(' ')) {
+            // CommonMark requires space before trailing #s
+            text = trimmed.trim();
+          }
+        }
+
         return {
           type: 'heading',
           raw: cap[0],
           depth: cap[1].length,
-          text: cap[2]
+          text: text
         };
       }
     };
@@ -1122,7 +1135,7 @@
     code: /^( {4}[^\n]+\n*)+/,
     fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
     hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
-    heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/,
+    heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
     blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
     list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
     html: '^ {0,3}(?:' // optional indentation
@@ -1193,7 +1206,7 @@
     html: edit$1('^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+?</\\1> *(?:\\n{2,}|\\s*$)' // closed tag
     + '|<tag(?:"[^"]*"|\'[^\']*\'|\\s[^\'"/>\\s]*)*?/?> *(?:\\n{2,}|\\s*$))').replace('comment', block._comment).replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b').getRegex(),
     def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
-    heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,
+    heading: /^(#{1,6})(.*)(?:\n+|$)/,
     fences: noopTest$1,
     // fences not supported
     paragraph: edit$1(block.normal._paragraph).replace('hr', block.hr).replace('heading', ' *#{1,6} *[^\n]').replace('lheading', block.lheading).replace('blockquote', ' {0,3}>').replace('|fences', '').replace('|list', '').replace('|html', '').getRegex()
diff --git a/src/Tokenizer.js b/src/Tokenizer.js
index 20f122332f..0ddcb3c1a3 100644
--- a/src/Tokenizer.js
+++ b/src/Tokenizer.js
@@ -121,11 +121,24 @@ module.exports = class Tokenizer {
   heading(src) {
     const cap = this.rules.block.heading.exec(src);
     if (cap) {
+      let text = cap[2].trim();
+
+      // remove trailing #s
+      if (text.endsWith('#')) {
+        const trimmed = rtrim(text, '#');
+        if (this.options.pedantic) {
+          text = trimmed.trim();
+        } else if (!trimmed || trimmed.endsWith(' ')) {
+          // CommonMark requires space before trailing #s
+          text = trimmed.trim();
+        }
+      }
+
       return {
         type: 'heading',
         raw: cap[0],
         depth: cap[1].length,
-        text: cap[2]
+        text: text
       };
     }
   }
diff --git a/src/rules.js b/src/rules.js
index 13356df367..95897119bb 100644
--- a/src/rules.js
+++ b/src/rules.js
@@ -12,7 +12,7 @@ const block = {
   code: /^( {4}[^\n]+\n*)+/,
   fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
   hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
-  heading: /^ {0,3}(#{1,6}) +([^\n]*?)(?: +#+)? *(?:\n+|$)/,
+  heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
   list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
   html: '^ {0,3}(?:' // optional indentation
@@ -143,7 +143,7 @@ block.pedantic = merge({}, block.normal, {
       + '\\b)\\w+(?!:|[^\\w\\s@]*@)\\b')
     .getRegex(),
   def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/,
-  heading: /^ *(#{1,6}) *([^\n]+?) *(?:#+ *)?(?:\n+|$)/,
+  heading: /^(#{1,6})(.*)(?:\n+|$)/,
   fences: noopTest, // fences not supported
   paragraph: edit(block.normal._paragraph)
     .replace('hr', block.hr)
diff --git a/test/specs/commonmark/commonmark.0.29.json b/test/specs/commonmark/commonmark.0.29.json
index 1440e2f2c1..56ac943252 100644
--- a/test/specs/commonmark/commonmark.0.29.json
+++ b/test/specs/commonmark/commonmark.0.29.json
@@ -389,8 +389,7 @@
     "example": 49,
     "start_line": 963,
     "end_line": 971,
-    "section": "ATX headings",
-    "shouldFail": true
+    "section": "ATX headings"
   },
   {
     "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n",
diff --git a/test/specs/gfm/commonmark.0.29.json b/test/specs/gfm/commonmark.0.29.json
index 7732f6a9da..ea3586e4bc 100644
--- a/test/specs/gfm/commonmark.0.29.json
+++ b/test/specs/gfm/commonmark.0.29.json
@@ -389,8 +389,7 @@
     "example": 49,
     "start_line": 963,
     "end_line": 971,
-    "section": "ATX headings",
-    "shouldFail": true
+    "section": "ATX headings"
   },
   {
     "markdown": "Foo *bar*\n=========\n\nFoo *bar*\n---------\n",
diff --git a/test/specs/new/pedantic_heading.html b/test/specs/new/pedantic_heading.html
new file mode 100644
index 0000000000..7795a3bccc
--- /dev/null
+++ b/test/specs/new/pedantic_heading.html
@@ -0,0 +1,9 @@
+<h1 id="h1">h1</h1>
+
+<h1 id="h1-1">h1</h1>
+
+<h1 id="h1-">h1 #</h1>
+
+<h1 id="h1-2">h1</h1>
+
+<p># h1</p>
diff --git a/test/specs/new/pedantic_heading.md b/test/specs/new/pedantic_heading.md
new file mode 100644
index 0000000000..8f7a1e8eb2
--- /dev/null
+++ b/test/specs/new/pedantic_heading.md
@@ -0,0 +1,12 @@
+---
+pedantic: true
+---
+#h1
+
+#h1#
+
+#h1 # #
+
+#h1####
+
+ # h1
diff --git a/test/specs/redos/quadratic_heading.js b/test/specs/redos/quadratic_heading.js
new file mode 100644
index 0000000000..ae8a20880a
--- /dev/null
+++ b/test/specs/redos/quadratic_heading.js
@@ -0,0 +1,7 @@
+module.exports = {
+  markdown: `# #${' '.repeat(50000)}a`,
+  html: '<h1># a</h1>',
+  options: {
+    headerIds: false
+  }
+};