Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ NEW: Add myst_gfm_only configuration #501

Merged
merged 6 commits into from Jan 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 4 additions & 1 deletion docs/sphinx/reference.md
Expand Up @@ -12,7 +12,10 @@ To do so, use the keywords beginning `myst_`.
- Description
* - `myst_commonmark_only`
- `False`
- If `True` convert text as strict CommonMark (all options below are then ignored). Note that strict CommonMark is unable to parse any directives, including the `toctree` directive, thus limiting MyST parser to single-page documentations. Use in conjunction with [sphinx-external-toc](https://github.com/executablebooks/sphinx-external-toc) Sphinx extension to counter this limitation.
- If `True` convert text as strict [CommonMark](https://spec.commonmark.org/) (all options below are then ignored). Note that strict CommonMark is unable to parse any directives, including the `toctree` directive, thus limiting MyST parser to single-page documentations. Use in conjunction with [sphinx-external-toc](https://github.com/executablebooks/sphinx-external-toc) Sphinx extension to counter this limitation.
* - `myst_gfm_only`
- `False`
- If `True` convert text as strict [GitHub-flavored Markdown](https://github.github.com/gfm/) (all options below are then ignored).
* - `myst_disable_syntax`
- ()
- List of markdown syntax elements to disable, see the [markdown-it parser guide](markdown_it:using).
Expand Down
48 changes: 26 additions & 22 deletions myst_parser/docutils_renderer.py
Expand Up @@ -571,27 +571,11 @@ def render_fence(self, token: SyntaxTreeNode) -> None:
info = token.info.strip() if token.info else token.info
language = info.split()[0] if info else ""

if not self.md_config.commonmark_only and language == "{eval-rst}":
# copy necessary elements (source, line no, env, reporter)
newdoc = make_document()
newdoc["source"] = self.document["source"]
newdoc.settings = self.document.settings
newdoc.reporter = self.reporter
# pad the line numbers artificially so they offset with the fence block
pseudosource = ("\n" * token_line(token)) + token.content
# actually parse the rst into our document
MockRSTParser().parse(pseudosource, newdoc)
for node in newdoc:
if node["names"]:
self.document.note_explicit_target(node, node)
self.current_node.extend(newdoc[:])
return
elif (
not self.md_config.commonmark_only
and language.startswith("{")
and language.endswith("}")
):
return self.render_directive(token)
if (not self.md_config.commonmark_only) and (not self.md_config.gfm_only):
if language == "{eval-rst}":
return self.render_restructuredtext(token)
if language.startswith("{") and language.endswith("}"):
return self.render_directive(token)

if not language and self.sphinx_env is not None:
# use the current highlight setting, via the ``highlight`` directive,
Expand Down Expand Up @@ -680,7 +664,11 @@ def render_link(self, token: SyntaxTreeNode) -> None:
if token.info == "auto": # handles both autolink and linkify
return self.render_autolink(token)

if self.md_config.all_links_external:
if (
self.md_config.commonmark_only
or self.md_config.gfm_only
or self.md_config.all_links_external
):
return self.render_external_url(token)

# Check for external URL
Expand Down Expand Up @@ -1150,6 +1138,22 @@ def render_field_list(self, token: SyntaxTreeNode) -> None:
with self.current_node_context(field_body):
self.render_children(child)

def render_restructuredtext(self, token: SyntaxTreeNode) -> None:
"""Render the content of the token as restructuredtext."""
# copy necessary elements (source, line no, env, reporter)
newdoc = make_document()
newdoc["source"] = self.document["source"]
newdoc.settings = self.document.settings
newdoc.reporter = self.reporter
# pad the line numbers artificially so they offset with the fence block
pseudosource = ("\n" * token_line(token)) + token.content
# actually parse the rst into our document
MockRSTParser().parse(pseudosource, newdoc)
for node in newdoc:
if node["names"]:
self.document.note_explicit_target(node, node)
self.current_node.extend(newdoc.children)

def render_directive(self, token: SyntaxTreeNode) -> None:
"""Render special fenced code blocks as directives."""
first_line = token.info.split(maxsplit=1)
Expand Down
12 changes: 11 additions & 1 deletion myst_parser/html_to_nodes.py
@@ -1,3 +1,5 @@
"""Convert HTML to docutils nodes."""
import re
from typing import TYPE_CHECKING, List

from docutils import nodes
Expand All @@ -23,6 +25,12 @@ def make_error(

OPTION_KEYS_ADMONITION = {"class", "name"}

# See https://github.com/micromark/micromark-extension-gfm-tagfilter
RE_FLOW = re.compile(
r"<(\/?)(iframe|noembed|noframes|plaintext|script|style|title|textarea|xmp)(?=[\t\n\f\r />])",
re.IGNORECASE,
)


def default_html(text: str, source: str, line_number: int) -> List[nodes.Element]:
raw_html = nodes.raw("", text, format="html")
Expand All @@ -35,9 +43,11 @@ def html_to_nodes(
text: str, line_number: int, renderer: "DocutilsRenderer"
) -> List[nodes.Element]:
"""Convert HTML to docutils nodes."""
if renderer.md_config.gfm_only:
text, _ = RE_FLOW.subn(lambda s: s.group(0).replace("<", "&lt;"), text)

enable_html_img = "html_image" in renderer.md_config.enable_extensions
enable_html_admonition = "html_admonition" in renderer.md_config.enable_extensions

if not (enable_html_img or enable_html_admonition):
return default_html(text, renderer.document["source"], line_number)

Expand Down
19 changes: 19 additions & 0 deletions myst_parser/main.py
Expand Up @@ -43,6 +43,11 @@ class MdParserConfig:
validator=instance_of(bool),
metadata={"help": "Use strict CommonMark parser"},
)
gfm_only: bool = attr.ib(
default=False,
validator=instance_of(bool),
metadata={"help": "Use strict Github Flavoured Markdown parser"},
)
enable_extensions: Sequence[str] = attr.ib(
factory=lambda: ["dollarmath"], metadata={"help": "Enable extensions"}
)
Expand Down Expand Up @@ -238,12 +243,26 @@ def create_md_parser(
"""Return a Markdown parser with the required MyST configuration."""

if config.commonmark_only:
# see https://spec.commonmark.org/
md = MarkdownIt("commonmark", renderer_cls=renderer).use(
wordcount_plugin, per_minute=config.words_per_minute
)
md.options.update({"myst_config": config})
return md

if config.gfm_only:
# see https://github.github.com/gfm/
# TODO strikethrough not currently supported in docutils
md = (
MarkdownIt("commonmark", renderer_cls=renderer)
.enable("table")
.use(tasklists_plugin)
.enable("linkify")
.use(wordcount_plugin, per_minute=config.words_per_minute)
)
md.options.update({"linkify": True, "myst_config": config})
return md

md = (
MarkdownIt("commonmark", renderer_cls=renderer)
.enable("table")
Expand Down
91 changes: 91 additions & 0 deletions tests/test_renderers/fixtures/myst-config.txt
Expand Up @@ -34,3 +34,94 @@ www.example.com
<reference refuri="http://www.example.com">
www.example.com
.

[gfm-disallowed-html] --myst-gfm-only="yes"
.
<strong> <title> <style> <em>

<blockquote>
<xmp> is disallowed. <XMP> is also disallowed.
</blockquote>
.
<document source="<string>">
<paragraph>
<raw format="html" xml:space="preserve">
<strong>

<raw format="html" xml:space="preserve">
&lt;title>

<raw format="html" xml:space="preserve">
&lt;style>

<raw format="html" xml:space="preserve">
<em>
<raw format="html" xml:space="preserve">
<blockquote>
&lt;xmp> is disallowed. &lt;XMP> is also disallowed.
</blockquote>
.

[gfm-autolink] --myst-gfm-only="yes"
.
www.commonmark.org

Visit www.commonmark.org/help for more information.

www.google.com/search?q=Markup+(business)

www.google.com/search?q=Markup+(business)))

(www.google.com/search?q=Markup+(business))

(www.google.com/search?q=Markup+(business)

www.google.com/search?q=(business))+ok

www.google.com/search?q=commonmark&hl=en

www.google.com/search?q=commonmark&hl;

www.commonmark.org/he<lp
.
<document source="<string>">
<paragraph>
<reference refuri="http://www.commonmark.org">
www.commonmark.org
<paragraph>
Visit
<reference refuri="http://www.commonmark.org/help">
www.commonmark.org/help
for more information.
<paragraph>
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
<paragraph>
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
))
<paragraph>
(
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
)
<paragraph>
(
<reference refuri="http://www.google.com/search?q=Markup+(business)">
www.google.com/search?q=Markup+(business)
<paragraph>
<reference refuri="http://www.google.com/search?q=(business)">
www.google.com/search?q=(business)
)+ok
<paragraph>
<reference refuri="http://www.google.com/search?q=commonmark&amp;hl=en">
www.google.com/search?q=commonmark&hl=en
<paragraph>
<reference refuri="http://www.google.com/search?q=commonmark&amp;hl">
www.google.com/search?q=commonmark&hl
;
<paragraph>
<reference refuri="http://www.commonmark.org/he">
www.commonmark.org/he
<lp
.