From fb3cebd1f212c17ad626c87cc102cd8b422472e7 Mon Sep 17 00:00:00 2001
From: Tiago de Paula <tiagodepalves@gmail.com>
Date: Fri, 22 Apr 2022 12:59:47 -0300
Subject: [PATCH 1/6] Update markdown to html converter to mistune 2.0.2

---
 nbconvert/filters/markdown_mistune.py | 128 ++++++++++++--------------
 1 file changed, 57 insertions(+), 71 deletions(-)

diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index 382a53882..19bf4b255 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -21,7 +21,7 @@
     from cgi import escape as html_escape
 
 import bs4
-import mistune
+from mistune import BlockParser, HTMLRenderer, InlineParser, Markdown
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexers import get_lexer_by_name
@@ -34,99 +34,83 @@ class InvalidNotebook(Exception):
     pass
 
 
-class MathBlockGrammar(mistune.BlockGrammar):
-    """This defines a single regex comprised of the different patterns that
-    identify math content spanning multiple lines. These are used by the
-    MathBlockLexer.
+class MathBlockParser(BlockParser):
+    """This acts as a pass-through to the MathInlineParser. It is needed in
+    order to avoid other block level rules splitting math sections apart.
     """
 
-    multi_math_str = "|".join(
-        [r"^\$\$.*?\$\$", r"^\\\\\[.*?\\\\\]", r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}"]
+    MULTILINE_MATH = re.compile(
+        r"(?<!\\)[$]{2}.*?(?<!\\)[$]{2}|"
+        r"\\\\\[.*?\\\\\]|"
+        r"\\begin\{([a-z]*\*?)\}.*?\\end\{\1\}",
+        re.DOTALL,
     )
-    multiline_math = re.compile(multi_math_str, re.DOTALL)
-
-
-class MathBlockLexer(mistune.BlockLexer):
-    """This acts as a pass-through to the MathInlineLexer. It is needed in
-    order to avoid other block level rules splitting math sections apart.
-    """
 
-    default_rules = ["multiline_math"] + mistune.BlockLexer.default_rules
+    RULE_NAMES = ("multiline_math",) + BlockParser.RULE_NAMES
 
-    def __init__(self, rules=None, **kwargs):
-        if rules is None:
-            rules = MathBlockGrammar()
-        super().__init__(rules, **kwargs)
+    def parse_multiline_math(self, m, state):
+        """Pass token through mutiline math."""
+        return {"type": "multiline_math", "text": m.group(0)}
 
-    def parse_multiline_math(self, m):
-        """Add token to pass through mutiline math."""
-        self.tokens.append({"type": "multiline_math", "text": m.group(0)})
 
+def _dotall(pattern):
+    """Make the '.' special character match any character inside the pattern, including a newline.
 
-class MathInlineGrammar(mistune.InlineGrammar):
-    """This defines different ways of declaring math objects that should be
-    passed through to mathjax unaffected. These are used by the MathInlineLexer.
+    This is implemented with the inline flag `(?s:...)` and is equivalent to using `re.DOTALL` when
+    it is the only pattern used. It is necessary since `mistune>=2.0.0`, where the pattern is passed
+    to the undocumented `re.Scanner`.
     """
+    return f"(?s:{pattern})"
 
-    inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)
-    block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
-    latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL)
-    text = re.compile(r"^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)")
 
-
-class MathInlineLexer(mistune.InlineLexer):
-    r"""This interprets the content of LaTeX style math objects using the rules
-    defined by the MathInlineGrammar.
+class MathInlineParser(InlineParser):
+    r"""This interprets the content of LaTeX style math objects.
 
     In particular this grabs ``$$...$$``, ``\\[...\\]``, ``\\(...\\)``, ``$...$``,
     and ``\begin{foo}...\end{foo}`` styles for declaring mathematics. It strips
     delimiters from all these varieties, and extracts the type of environment
     in the last case (``foo`` in this example).
     """
-    default_rules = [
-        "block_math",
-        "inline_math",
-        "latex_environment",
-    ] + mistune.InlineLexer.default_rules
-
-    def __init__(self, renderer, rules=None, **kwargs):
-        if rules is None:
-            rules = MathInlineGrammar()
-        super().__init__(renderer, rules, **kwargs)
+    INLINE_MATH = _dotall(r"(?<![\\$])[$](.+?)(?<!\\)[$]|[\\]{2}[(](.+?)[\\]{2}[)]")
+    BLOCK_MATH = _dotall(r"(?<!\\)[$]{2}(.*?)(?<!\\)[$]{2}|\\\\\[(.*?)\\\\\]")
+    LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")
 
-    def output_inline_math(self, m):
-        return self.renderer.inline_math(m.group(1) or m.group(2))
+    RULE_NAMES = ("block_math", "inline_math", "latex_environment") + InlineParser.RULE_NAMES
 
-    def output_block_math(self, m):
-        return self.renderer.block_math(m.group(1) or m.group(2) or "")
+    def parse_inline_math(self, m, state):
+        text = m.group(1) or m.group(2)
+        return "inline_math", text
 
-    def output_latex_environment(self, m):
-        return self.renderer.latex_environment(m.group(1), m.group(2))
+    def parse_block_math(self, m, state):
+        text = m.group(1) or m.group(2)
+        return "block_math", text
 
+    def parse_latex_environment(self, m, state):
+        name, text = m.group(1), m.group(2)
+        return "latex_environment", name, text
 
-class MarkdownWithMath(mistune.Markdown):
-    def __init__(self, renderer, **kwargs):
-        if "inline" not in kwargs:
-            kwargs["inline"] = MathInlineLexer
-        if "block" not in kwargs:
-            kwargs["block"] = MathBlockLexer
-        super().__init__(renderer, **kwargs)
 
-    def output_multiline_math(self):
-        return self.inline(self.token["text"])
+class MarkdownWithMath(Markdown):
+    def __init__(self, renderer, block=None, inline=None, plugins=None):
+        if block is None:
+            block = MathBlockParser()
+        if inline is None:
+            inline = MathInlineParser(renderer, hard_wrap=False)
+        super().__init__(renderer, block, inline, plugins)
 
 
-class IPythonRenderer(mistune.Renderer):
-    def block_code(self, code, lang):
-        if lang:
+class IPythonRenderer(HTMLRenderer):
+    def block_code(self, code, info=None):
+        if info:
             try:
+                lang = info.strip().split(None, 1)[0]
                 lexer = get_lexer_by_name(lang, stripall=True)
             except ClassNotFound:
                 code = lang + "\n" + code
                 lang = None
 
         if not lang:
-            return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code)
+            return super().block_code(code)
 
         formatter = HtmlFormatter()
         return highlight(code, lexer, formatter)
@@ -147,8 +131,8 @@ def inline_html(self, html):
 
         return super().inline_html(html)
 
-    def header(self, text, level, raw=None):
-        html = super().header(text, level, raw=raw)
+    def heading(self, text, level):
+        html = super().heading(text, level)
         if self.options.get("exclude_anchor_links"):
             return html
         anchor_link_text = self.options.get("anchor_link_text", "¶")
@@ -157,23 +141,25 @@ def header(self, text, level, raw=None):
     def escape_html(self, text):
         return html_escape(text)
 
+    def multiline_math(self, text):
+        return text
+
     def block_math(self, text):
-        return "$$%s$$" % self.escape_html(text)
+        return f"$${self.escape_html(text)}$$"
 
     def latex_environment(self, name, text):
-        name = self.escape_html(name)
-        text = self.escape_html(text)
-        return rf"\begin{{{name}}}{text}\end{{{name}}}"
+        name, text = self.escape_html(name), self.escape_html(text)
+        return f"\\begin{{{name}}}{text}\\end{{{name}}}"
 
     def inline_math(self, text):
-        return "$%s$" % self.escape_html(text)
+        return f"${self.escape_html(text)}$"
 
-    def image(self, src, title, text):
+    def image(self, src, text, title):
         """Rendering a image with title and text.
 
         :param src: source link of the image.
-        :param title: title text of the image.
         :param text: alt text of the image.
+        :param title: title text of the image.
         """
         attachments = self.options.get("attachments", {})
         attachment_prefix = "attachment:"

From c6f55c37fdde421e81b1f7962f9fde6cd0800155 Mon Sep 17 00:00:00 2001
From: Tiago de Paula <tiagodepalves@gmail.com>
Date: Fri, 22 Apr 2022 13:22:59 -0300
Subject: [PATCH 2/6] Compatibility options for IPythonRenderer on mistune ver2

---
 nbconvert/filters/markdown_mistune.py | 48 ++++++++++++++++++---------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index 19bf4b255..bba701760 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -98,8 +98,32 @@ def __init__(self, renderer, block=None, inline=None, plugins=None):
             inline = MathInlineParser(renderer, hard_wrap=False)
         super().__init__(renderer, block, inline, plugins)
 
+    def render(self, s):
+        """Compatibility method with `mistune==0.8.4`."""
+        return self.parse(s)
+
 
 class IPythonRenderer(HTMLRenderer):
+    def __init__(
+        self,
+        escape=True,
+        allow_harmful_protocols=None,
+        embed_images=False,
+        exclude_anchor_links=False,
+        anchor_link_text="¶",
+        path="",
+        attachments=None,
+    ):
+        super().__init__(escape, allow_harmful_protocols)
+        self.embed_images = embed_images
+        self.exclude_anchor_links = exclude_anchor_links
+        self.anchor_link_text = anchor_link_text
+        self.path = path
+        if attachments is not None:
+            self.attachments = attachments
+        else:
+            self.attachments = {}
+
     def block_code(self, code, info=None):
         if info:
             try:
@@ -116,27 +140,22 @@ def block_code(self, code, info=None):
         return highlight(code, lexer, formatter)
 
     def block_html(self, html):
-        embed_images = self.options.get("embed_images", False)
-
-        if embed_images:
+        if self.embed_images:
             html = self._html_embed_images(html)
 
         return super().block_html(html)
 
     def inline_html(self, html):
-        embed_images = self.options.get("embed_images", False)
-
-        if embed_images:
+        if self.embed_images:
             html = self._html_embed_images(html)
 
         return super().inline_html(html)
 
     def heading(self, text, level):
         html = super().heading(text, level)
-        if self.options.get("exclude_anchor_links"):
+        if self.exclude_anchor_links:
             return html
-        anchor_link_text = self.options.get("anchor_link_text", "¶")
-        return add_anchor(html, anchor_link_text=anchor_link_text)
+        return add_anchor(html, anchor_link_text=self.anchor_link_text)
 
     def escape_html(self, text):
         return html_escape(text)
@@ -161,17 +180,15 @@ def image(self, src, text, title):
         :param text: alt text of the image.
         :param title: title text of the image.
         """
-        attachments = self.options.get("attachments", {})
         attachment_prefix = "attachment:"
-        embed_images = self.options.get("embed_images", False)
 
         if src.startswith(attachment_prefix):
             name = src[len(attachment_prefix) :]
 
-            if name not in attachments:
+            if name not in self.attachments:
                 raise InvalidNotebook(f"missing attachment: {name}")
 
-            attachment = attachments[name]
+            attachment = self.attachments[name]
             # we choose vector over raster, and lossless over lossy
             preferred_mime_types = ["image/svg+xml", "image/png", "image/jpeg"]
             for preferred_mime_type in preferred_mime_types:
@@ -183,7 +200,7 @@ def image(self, src, text, title):
             data = attachment[mime_type]
             src = "data:" + mime_type + ";base64," + data
 
-        elif embed_images:
+        elif self.embed_images:
             base64_url = self._src_to_base64(src)
 
             if base64_url is not None:
@@ -197,8 +214,7 @@ def _src_to_base64(self, src):
         :param src: source link of the file.
         :return: the base64 url or None if the file was not found.
         """
-        path = self.options.get("path", "")
-        src_path = os.path.join(path, src)
+        src_path = os.path.join(self.path, src)
 
         if not os.path.exists(src_path):
             return None

From 16189b9782f2f0af4c32b4c9e6dda68149a0d0a9 Mon Sep 17 00:00:00 2001
From: Tiago de Paula <tiagodepalves@gmail.com>
Date: Sat, 23 Apr 2022 12:14:45 -0300
Subject: [PATCH 3/6] Fix some discrepancies with mistune version 2

- 'AXT_HEADING' is now requiring whitespace after the '#', fixed with a new regex
- 're.Scanner' is not able to extract the correct group, the text needs trimming before use
- 'javascript:...' links considered harmful by 'HTMLRenderer', should we disable it too?
---
 nbconvert/filters/markdown_mistune.py | 32 +++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index bba701760..0f3cd60ef 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -48,6 +48,9 @@ class MathBlockParser(BlockParser):
 
     RULE_NAMES = ("multiline_math",) + BlockParser.RULE_NAMES
 
+    # Regex for header that doesn't require space after '#'
+    AXT_HEADING = re.compile(r" {0,3}(#{1,6})(?!#+)\s*([^\n]*?)$")
+
     def parse_multiline_math(self, m, state):
         """Pass token through mutiline math."""
         return {"type": "multiline_math", "text": m.group(0)}
@@ -63,6 +66,19 @@ def _dotall(pattern):
     return f"(?s:{pattern})"
 
 
+def _strip(text, *, prefix, suffix):
+    """Remove prefix and suffix from text, if present.
+
+    `InlineParser` sometimes return these affixes, even though it shouldn't.
+    """
+    np, ns = len(prefix), len(suffix)
+    if text[:np] == prefix:
+        text = text[np:]
+    if text[-ns:] == suffix:
+        text = text[:-ns]
+    return text
+
+
 class MathInlineParser(InlineParser):
     r"""This interprets the content of LaTeX style math objects.
 
@@ -78,11 +94,19 @@ class MathInlineParser(InlineParser):
     RULE_NAMES = ("block_math", "inline_math", "latex_environment") + InlineParser.RULE_NAMES
 
     def parse_inline_math(self, m, state):
-        text = m.group(1) or m.group(2)
+        text = m.group(1)
+        if text:
+            text = _strip(text, prefix="$", suffix="$")
+        else:
+            text = _strip(m.group(2), prefix="\\\\(", suffix="\\\\)")
         return "inline_math", text
 
     def parse_block_math(self, m, state):
-        text = m.group(1) or m.group(2)
+        text = m.group(1)
+        if text:
+            text = _strip(text, prefix="$$", suffix="$$")
+        else:
+            text = _strip(m.group(2), prefix="\\\\[", suffix="\\\\]")
         return "block_math", text
 
     def parse_latex_environment(self, m, state):
@@ -107,7 +131,7 @@ class IPythonRenderer(HTMLRenderer):
     def __init__(
         self,
         escape=True,
-        allow_harmful_protocols=None,
+        allow_harmful_protocols=True,
         embed_images=False,
         exclude_anchor_links=False,
         anchor_link_text="¶",
@@ -206,7 +230,7 @@ def image(self, src, text, title):
             if base64_url is not None:
                 src = base64_url
 
-        return super().image(src, title, text)
+        return super().image(src, text, title)
 
     def _src_to_base64(self, src):
         """Turn the source file into a base64 url.

From 7f0959c8f2445ce5aabf8b05c1e62fb8fb231436 Mon Sep 17 00:00:00 2001
From: Tiago de Paula <tiagodepalves@gmail.com>
Date: Wed, 27 Apr 2022 02:09:42 -0300
Subject: [PATCH 4/6] Split BLOCK_MATH and INLINE_MATH into four regexes

Important to avoid problems with the TeX-style block math where the trailing '$$' is not removed.
---
 nbconvert/filters/markdown_mistune.py | 52 +++++++++++++--------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index 0f3cd60ef..636e1e8cc 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -66,19 +66,6 @@ def _dotall(pattern):
     return f"(?s:{pattern})"
 
 
-def _strip(text, *, prefix, suffix):
-    """Remove prefix and suffix from text, if present.
-
-    `InlineParser` sometimes return these affixes, even though it shouldn't.
-    """
-    np, ns = len(prefix), len(suffix)
-    if text[:np] == prefix:
-        text = text[np:]
-    if text[-ns:] == suffix:
-        text = text[:-ns]
-    return text
-
-
 class MathInlineParser(InlineParser):
     r"""This interprets the content of LaTeX style math objects.
 
@@ -87,27 +74,38 @@ class MathInlineParser(InlineParser):
     delimiters from all these varieties, and extracts the type of environment
     in the last case (``foo`` in this example).
     """
-    INLINE_MATH = _dotall(r"(?<![\\$])[$](.+?)(?<!\\)[$]|[\\]{2}[(](.+?)[\\]{2}[)]")
-    BLOCK_MATH = _dotall(r"(?<!\\)[$]{2}(.*?)(?<!\\)[$]{2}|\\\\\[(.*?)\\\\\]")
+    BLOCK_MATH_TEX = _dotall(r"(?<!\\)\$\$(.*?)(?<!\\)\$\$")
+    BLOCK_MATH_LATEX = _dotall(r"(?<!\\)\\\\\[(.*?)(?<!\\)\\\\\]")
+    INLINE_MATH_TEX = _dotall(r"(?<![$\\])\$(.+?)(?<![$\\])\$")
+    INLINE_MATH_LATEX = _dotall(r"(?<!\\)\\\\\((.*?)(?<!\\)\\\\\)")
     LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")
 
-    RULE_NAMES = ("block_math", "inline_math", "latex_environment") + InlineParser.RULE_NAMES
+    # The order is important here
+    RULE_NAMES = (
+        "block_math_tex",
+        "block_math_latex",
+        "inline_math_tex",
+        "inline_math_latex",
+        "latex_environment",
+    ) + InlineParser.RULE_NAMES
+
+    def parse_block_math_tex(self, m, state):
+        # sometimes the Scanner keeps the final '$$', so we use the
+        # full matched string and remove the math markers
+        text = m.group(0)[2:-2]
+        return "block_math", text
 
-    def parse_inline_math(self, m, state):
+    def parse_block_math_latex(self, m, state):
+        text = m.group(1)
+        return "block_math", text
+
+    def parse_inline_math_tex(self, m, state):
         text = m.group(1)
-        if text:
-            text = _strip(text, prefix="$", suffix="$")
-        else:
-            text = _strip(m.group(2), prefix="\\\\(", suffix="\\\\)")
         return "inline_math", text
 
-    def parse_block_math(self, m, state):
+    def parse_inline_math_latex(self, m, state):
         text = m.group(1)
-        if text:
-            text = _strip(text, prefix="$$", suffix="$$")
-        else:
-            text = _strip(m.group(2), prefix="\\\\[", suffix="\\\\]")
-        return "block_math", text
+        return "inline_math", text
 
     def parse_latex_environment(self, m, state):
         name, text = m.group(1), m.group(2)

From 672c612843d9931cfbefc9691ea76fc05d6060b8 Mon Sep 17 00:00:00 2001
From: Steven Silvester <steven.silvester@ieee.org>
Date: Mon, 9 May 2022 06:12:48 -0500
Subject: [PATCH 5/6] update mistune dep

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 060bda2c7..25f12e7c5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
     "jupyter_core>=4.7",
     "jupyterlab_pygments",
     "MarkupSafe>=2.0",
-    "mistune>=0.8.1,<2",
+    "mistune>=2.02",
     "nbclient>=0.5.0",
     "nbformat>=5.1",
     "packaging",

From 4f65730003f599ae90b5cb54b177fa5711b44db9 Mon Sep 17 00:00:00 2001
From: Steven Silvester <steven.silvester@ieee.org>
Date: Mon, 9 May 2022 06:14:57 -0500
Subject: [PATCH 6/6] fix mistune version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 25f12e7c5..ae51a7970 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ dependencies = [
     "jupyter_core>=4.7",
     "jupyterlab_pygments",
     "MarkupSafe>=2.0",
-    "mistune>=2.02",
+    "mistune>=2.0.2",
     "nbclient>=0.5.0",
     "nbformat>=5.1",
     "packaging",