Skip to content

Commit

Permalink
✨ NEW: Add myst_gfm_only configuration (#501)
Browse files Browse the repository at this point in the history
For parsing of only [GitHub-flavored Markdown](https://github.github.com/gfm/)
  • Loading branch information
chrisjsewell committed Jan 9, 2022
1 parent 05c0905 commit c3945bb
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 24 deletions.
5 changes: 4 additions & 1 deletion docs/sphinx/reference.md
Expand Up @@ -12,7 +12,10 @@ To do so, use the keywords beginning `myst_`.
- Description
* - `myst_commonmark_only`
- `False`
- If `True` convert text as strict CommonMark (all options below are then ignored). Note that strict CommonMark is unable to parse any directives, including the `toctree` directive, thus limiting MyST parser to single-page documentations. Use in conjunction with [sphinx-external-toc](https://github.com/executablebooks/sphinx-external-toc) Sphinx extension to counter this limitation.
- If `True` convert text as strict [CommonMark](https://spec.commonmark.org/) (all options below are then ignored). Note that strict CommonMark is unable to parse any directives, including the `toctree` directive, thus limiting MyST parser to single-page documentations. Use in conjunction with [sphinx-external-toc](https://github.com/executablebooks/sphinx-external-toc) Sphinx extension to counter this limitation.
* - `myst_gfm_only`
- `False`
- If `True` convert text as strict [GitHub-flavored Markdown](https://github.github.com/gfm/) (all options below are then ignored).
* - `myst_disable_syntax`
- ()
- List of markdown syntax elements to disable, see the [markdown-it parser guide](markdown_it:using).
Expand Down
48 changes: 26 additions & 22 deletions myst_parser/docutils_renderer.py
Expand Up @@ -571,27 +571,11 @@ def render_fence(self, token: SyntaxTreeNode) -> None:
info = token.info.strip() if token.info else token.info
language = info.split()[0] if info else ""

if not self.md_config.commonmark_only and language == "{eval-rst}":
# copy necessary elements (source, line no, env, reporter)
newdoc = make_document()
newdoc["source"] = self.document["source"]
newdoc.settings = self.document.settings
newdoc.reporter = self.reporter
# pad the line numbers artificially so they offset with the fence block
pseudosource = ("\n" * token_line(token)) + token.content
# actually parse the rst into our document
MockRSTParser().parse(pseudosource, newdoc)
for node in newdoc:
if node["names"]:
self.document.note_explicit_target(node, node)
self.current_node.extend(newdoc[:])
return
elif (
not self.md_config.commonmark_only
and language.startswith("{")
and language.endswith("}")
):
return self.render_directive(token)
if (not self.md_config.commonmark_only) and (not self.md_config.gfm_only):
if language == "{eval-rst}":
return self.render_restructuredtext(token)
if language.startswith("{") and language.endswith("}"):
return self.render_directive(token)

if not language and self.sphinx_env is not None:
# use the current highlight setting, via the ``highlight`` directive,
Expand Down Expand Up @@ -680,7 +664,11 @@ def render_link(self, token: SyntaxTreeNode) -> None:
if token.info == "auto": # handles both autolink and linkify
return self.render_autolink(token)

if self.md_config.all_links_external:
if (
self.md_config.commonmark_only
or self.md_config.gfm_only
or self.md_config.all_links_external
):
return self.render_external_url(token)

# Check for external URL
Expand Down Expand Up @@ -1150,6 +1138,22 @@ def render_field_list(self, token: SyntaxTreeNode) -> None:
with self.current_node_context(field_body):
self.render_children(child)

def render_restructuredtext(self, token: SyntaxTreeNode) -> None:
"""Render the content of the token as restructuredtext."""
# copy necessary elements (source, line no, env, reporter)
newdoc = make_document()
newdoc["source"] = self.document["source"]
newdoc.settings = self.document.settings
newdoc.reporter = self.reporter
# pad the line numbers artificially so they offset with the fence block
pseudosource = ("\n" * token_line(token)) + token.content
# actually parse the rst into our document
MockRSTParser().parse(pseudosource, newdoc)
for node in newdoc:
if node["names"]:
self.document.note_explicit_target(node, node)
self.current_node.extend(newdoc.children)

def render_directive(self, token: SyntaxTreeNode) -> None:
"""Render special fenced code blocks as directives."""
first_line = token.info.split(maxsplit=1)
Expand Down
12 changes: 11 additions & 1 deletion myst_parser/html_to_nodes.py
@@ -1,3 +1,5 @@
"""Convert HTML to docutils nodes."""
import re
from typing import TYPE_CHECKING, List

from docutils import nodes
Expand All @@ -23,6 +25,12 @@ def make_error(

OPTION_KEYS_ADMONITION = {"class", "name"}

# See https://github.com/micromark/micromark-extension-gfm-tagfilter
RE_FLOW = re.compile(
r"<(\/?)(iframe|noembed|noframes|plaintext|script|style|title|textarea|xmp)(?=[\t\n\f\r />])",
re.IGNORECASE,
)


def default_html(text: str, source: str, line_number: int) -> List[nodes.Element]:
raw_html = nodes.raw("", text, format="html")
Expand All @@ -35,9 +43,11 @@ def html_to_nodes(
text: str, line_number: int, renderer: "DocutilsRenderer"
) -> List[nodes.Element]:
"""Convert HTML to docutils nodes."""
if renderer.md_config.gfm_only:
text, _ = RE_FLOW.subn(lambda s: s.group(0).replace("<", "&lt;"), text)

enable_html_img = "html_image" in renderer.md_config.enable_extensions
enable_html_admonition = "html_admonition" in renderer.md_config.enable_extensions

if not (enable_html_img or enable_html_admonition):
return default_html(text, renderer.document["source"], line_number)

Expand Down
19 changes: 19 additions & 0 deletions myst_parser/main.py
Expand Up @@ -43,6 +43,11 @@ class MdParserConfig:
validator=instance_of(bool),
metadata={"help": "Use strict CommonMark parser"},
)
gfm_only: bool = attr.ib(
default=False,
validator=instance_of(bool),
metadata={"help": "Use strict Github Flavoured Markdown parser"},
)
enable_extensions: Sequence[str] = attr.ib(
factory=lambda: ["dollarmath"], metadata={"help": "Enable extensions"}
)
Expand Down Expand Up @@ -238,12 +243,26 @@ def create_md_parser(
"""Return a Markdown parser with the required MyST configuration."""

if config.commonmark_only:
# see https://spec.commonmark.org/
md = MarkdownIt("commonmark", renderer_cls=renderer).use(
wordcount_plugin, per_minute=config.words_per_minute
)
md.options.update({"myst_config": config})
return md

if config.gfm_only:
# see https://github.github.com/gfm/
# TODO strikethrough not currently supported in docutils
md = (
MarkdownIt("commonmark", renderer_cls=renderer)
.enable("table")
.use(tasklists_plugin)
.enable("linkify")
.use(wordcount_plugin, per_minute=config.words_per_minute)
)
md.options.update({"linkify": True, "myst_config": config})
return md

md = (
MarkdownIt("commonmark", renderer_cls=renderer)
.enable("table")
Expand Down
91 changes: 91 additions & 0 deletions tests/test_renderers/fixtures/myst-config.txt
Expand Up @@ -34,3 +34,94 @@ www.example.com
<reference refuri="http://www.example.com">
www.example.com
.

[gfm-disallowed-html] --myst-gfm-only="yes"
.
<strong> <title> <style> <em>

<blockquote>
<xmp> is disallowed. <XMP> is also disallowed.
</blockquote>
.
<document source="<string>">
<paragraph>
<raw format="html" xml:space="preserve">
<strong>

<raw format="html" xml:space="preserve">
&lt;title>

<raw format="html" xml:space="preserve">
&lt;style>

<raw format="html" xml:space="preserve">
<em>
<raw format="html" xml:space="preserve">
<blockquote>
&lt;xmp> is disallowed. &lt;XMP> is also disallowed.
</blockquote>
.

[gfm-autolink] --myst-gfm-only="yes"
.
www.commonmark.org

Visit www.commonmark.org/help for more information.

www.google.com/search?q=Markup+(business)

www.google.com/search?q=Markup+(business)))

(www.google.com/search?q=Markup+(business))

(www.google.com/search?q=Markup+(business)

www.google.com/search?q=(business))+ok

www.google.com/search?q=commonmark&hl=en

www.google.com/search?q=commonmark&hl;

www.commonmark.org/he<lp
.
<document source="<string>">
<paragraph>
<reference refuri="http://www.commonmark.org">
www.commonmark.org
<paragraph>
Visit
<reference refuri="http://www.commonmark.org/help">
www.commonmark.org/help
for more information.
<paragraph>
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
<paragraph>
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
))
<paragraph>
(
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
)
<paragraph>
(
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
<paragraph>
<reference refuri="http://www.google.com/search?q=(business)">
www.google.com/search?q=(business)
)+ok
<paragraph>
<reference refuri="http://www.google.com/search?q=commonmark&amp;hl=en">
www.google.com/search?q=commonmark&hl=en
<paragraph>
<reference refuri="http://www.google.com/search?q=commonmark&amp;hl">
www.google.com/search?q=commonmark&hl
;
<paragraph>
<reference refuri="http://www.commonmark.org/he">
www.commonmark.org/he
<lp
.

0 comments on commit c3945bb

Please sign in to comment.