From 59fe82a6e8e1386a77bf0ba03eff94518d8cb109 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Fri, 17 Dec 2021 16:20:35 +0100 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=91=8C=20IMPROVE:=20Parsing=20logic?= =?UTF-8?q?=20of=20links?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/api/reference.rst | 2 +- myst_parser/docutils_renderer.py | 87 +++++++++++++++++++------------- myst_parser/main.py | 7 +++ myst_parser/myst_refs.py | 2 + myst_parser/sphinx_renderer.py | 65 ++++++++++++++++++++---- myst_parser/utils.py | 4 +- 6 files changed, 120 insertions(+), 47 deletions(-) diff --git a/docs/api/reference.rst b/docs/api/reference.rst index ad395cd6..ffd17dd6 100644 --- a/docs/api/reference.rst +++ b/docs/api/reference.rst @@ -36,7 +36,7 @@ Sphinx .. autoclass:: myst_parser.sphinx_renderer.SphinxRenderer :special-members: __output__ - :members: handle_cross_reference, render_math_block_label + :members: render_internal_link, render_math_block_label :undoc-members: :member-order: alphabetical :show-inheritance: diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py index ec94e4ac..5b89e2f1 100644 --- a/myst_parser/docutils_renderer.py +++ b/myst_parser/docutils_renderer.py @@ -526,51 +526,69 @@ def render_heading(self, token: SyntaxTreeNode) -> None: self.current_node = section def render_link(self, token: SyntaxTreeNode) -> None: + """Parse `` or `[text](link "title")` syntax to docutils AST: + + - If `<>` autolink, forward to `render_autolink` + - If `myst_all_links_external` is True, forward to `render_external_url` + - If link is an external URL, forward to `render_external_url` + - External URLs start with a scheme (e.g. `http:`) in `myst_url_schemes`, + or any scheme if `myst_url_schemes` is None. + - Otherwise, forward to `render_internal_link` + """ if token.markup == "autolink": return self.render_autolink(token) - ref_node = nodes.reference() - self.add_line_and_source_path(ref_node, token) + if self.config.get("myst_all_links_external", False): + return self.render_external_url(token) + destination = cast(str, token.attrGet("href") or "") + if is_external_url(destination, self.config.get("myst_url_schemes", None)): + return self.render_external_url(token) - if self.config.get( - "relative-docs", None - ) is not None and destination.startswith(self.config["relative-docs"][0]): - # make the path relative to an "including" document - source_dir, include_dir = self.config["relative-docs"][1:] - destination = os.path.relpath( - os.path.join(include_dir, os.path.normpath(destination)), source_dir - ) + # TODO previously, we also interpreted any link containing a `#` as external, + # e.g. `[alt](#fragment)` (only if `heading_anchors`` extension was not active) + # should we still do that? + + return self.render_internal_link(token) - ref_node["refuri"] = destination + def render_external_url(self, token: SyntaxTreeNode) -> None: + """Render link token `[text](link "title")`, + where the link has been identified as an external URL:: + + text + + `text` can contain nested syntax, e.g. `[**bold**](url "title")`. + """ + ref_node = nodes.reference() + self.add_line_and_source_path(ref_node, token) + ref_node["refuri"] = cast(str, token.attrGet("href") or "") title = token.attrGet("title") if title: ref_node["title"] = title - next_node = ref_node - - # TODO currently any reference with a fragment # is deemed external - # (if anchors are not enabled) - # This comes from recommonmark, but I am not sure of the rationale for it - if is_external_url( - destination, - self.config.get("myst_url_schemes", None), - "heading_anchors" not in self.config.get("myst_extensions", []), - ): - self.current_node.append(next_node) - with self.current_node_context(ref_node): - self.render_children(token) - else: - self.handle_cross_reference(token, destination) + with self.current_node_context(ref_node, append=True): + self.render_children(token) - def handle_cross_reference(self, token: SyntaxTreeNode, destination: str) -> None: - if not self.config.get("ignore_missing_refs", False): - self.create_warning( - f"Reference not found: {destination}", - line=token_line(token), - subtype="ref", - append_to=self.current_node, - ) + def render_internal_link(self, token: SyntaxTreeNode) -> None: + """Render link token `[text](link "title")`, + where the link has not been identified as an external URL:: + + + text + + `text` can contain nested syntax, e.g. `[**bold**](link "title")`. + + Note, this is overridden by `SphinxRenderer`, to use `pending_xref` nodes. + """ + # TODO is this too strict for docutils? + ref_node = nodes.reference() + self.add_line_and_source_path(ref_node, token) + ref_node["refname"] = cast(str, token.attrGet("href") or "") + title = token.attrGet("title") + if title: + ref_node["title"] = title + with self.current_node_context(ref_node, append=True): + self.render_children(token) def render_autolink(self, token: SyntaxTreeNode) -> None: refuri = target = escapeHtml(token.attrGet("href") or "") # type: ignore[arg-type] @@ -594,6 +612,7 @@ def render_image(self, token: SyntaxTreeNode) -> None: destination, None, True ): # make the path relative to an "including" document + # this is set when using the `relative-images` option of the MyST `include` directive destination = os.path.normpath( os.path.join( self.config.get("relative-images", ""), diff --git a/myst_parser/main.py b/myst_parser/main.py index 4b3c0f9c..d088b205 100644 --- a/myst_parser/main.py +++ b/myst_parser/main.py @@ -115,6 +115,12 @@ def check_extensions(self, attribute, value): metadata={"help": "Disable syntax elements"}, ) + all_links_external: bool = attr.ib( + default=False, + validator=instance_of(bool), + metadata={"help": "Parse all links as simple hyperlinks"}, + ) + # see https://en.wikipedia.org/wiki/List_of_URI_schemes url_schemes: Optional[Iterable[str]] = attr.ib( default=cast(Optional[Iterable[str]], ("http", "https", "mailto", "ftp")), @@ -273,6 +279,7 @@ def default_parser(config: MdParserConfig) -> MarkdownIt: list(config.enable_extensions) + (["heading_anchors"] if config.heading_anchors is not None else []) ), + "myst_all_links_external": config.all_links_external, "myst_url_schemes": config.url_schemes, "myst_substitutions": config.substitutions, "myst_html_meta": config.html_meta, diff --git a/myst_parser/myst_refs.py b/myst_parser/myst_refs.py index 96a41c49..11503931 100644 --- a/myst_parser/myst_refs.py +++ b/myst_parser/myst_refs.py @@ -70,6 +70,8 @@ def run(self, **kwargs: Any) -> None: # we are in nit-picky mode if newnode is None: node["refdomain"] = "" + # TODO ideally we would override the warning message here, + # to show the [ref.] for supressing warning self.warn_missing_reference(refdoc, typ, target, node, domain) except NoUri: newnode = contnode diff --git a/myst_parser/sphinx_renderer.py b/myst_parser/sphinx_renderer.py index 5bb385ba..ae4c6f87 100644 --- a/myst_parser/sphinx_renderer.py +++ b/myst_parser/sphinx_renderer.py @@ -1,7 +1,9 @@ import copy +import os import tempfile from contextlib import contextmanager from io import StringIO +from pathlib import Path from typing import Optional, cast from urllib.parse import unquote from uuid import uuid4 @@ -64,23 +66,64 @@ def create_warning( append_to.append(msg_node) return None - def handle_cross_reference(self, token: SyntaxTreeNode, destination: str) -> None: - """Create nodes for references that are not immediately resolvable.""" - wrap_node = addnodes.pending_xref( - refdoc=self.doc_env.docname, - reftarget=unquote(destination), - reftype="myst", - refdomain=None, # Added to enable cross-linking - refexplicit=len(token.children or []) > 0, - refwarn=True, + def render_internal_link(self, token: SyntaxTreeNode) -> None: + """Render link token `[text](link "title")`, + where the link has not been identified as an external URL. + """ + destination = unquote(cast(str, token.attrGet("href") or "")) + + # make the path relative to an "including" document + # this is set when using the `relative-docs` option of the MyST `include` directive + relative_include = self.config.get("relative-docs", None) + if relative_include is not None and destination.startswith(relative_include[0]): + source_dir, include_dir = relative_include[1:] + destination = os.path.relpath( + os.path.join(include_dir, os.path.normpath(destination)), source_dir + ) + + potential_path = ( + Path(self.doc_env.doc2path(self.doc_env.docname)).parent / destination + if self.doc_env.srcdir # not set in some test situations + else None ) + if ( + potential_path + and potential_path.is_file() + and not any( + destination.endswith(suffix) + for suffix in self.doc_env.config.source_suffix + ) + ): + wrap_node = addnodes.download_reference( + refdoc=self.doc_env.docname, + reftarget=destination, + reftype="myst", + refdomain=None, # Added to enable cross-linking + refexplicit=len(token.children or []) > 0, + refwarn=False, + ) + classes = ["xref", "download", "myst"] + text = destination if not token.children else "" + # TODO warn if there are other matching references + else: + wrap_node = addnodes.pending_xref( + refdoc=self.doc_env.docname, + reftarget=destination, + reftype="myst", + refdomain=None, # Added to enable cross-linking + refexplicit=len(token.children or []) > 0, + refwarn=True, + ) + classes = ["xref", "myst"] + text = "" + self.add_line_and_source_path(wrap_node, token) title = token.attrGet("title") if title: wrap_node["title"] = title self.current_node.append(wrap_node) - inner_node = nodes.inline("", "", classes=["xref", "myst"]) + inner_node = nodes.inline("", text, classes=classes) wrap_node.append(inner_node) with self.current_node_context(inner_node): self.render_children(token) @@ -88,7 +131,7 @@ def handle_cross_reference(self, token: SyntaxTreeNode, destination: str) -> Non def render_heading(self, token: SyntaxTreeNode) -> None: """This extends the docutils method, to allow for the addition of heading ids. These ids are computed by the ``markdown-it-py`` ``anchors_plugin`` - as "slugs" which are unique document. + as "slugs" which are unique to a document. The approach is similar to ``sphinx.ext.autosectionlabel`` """ diff --git a/myst_parser/utils.py b/myst_parser/utils.py index 7e9c8938..b31d8c75 100644 --- a/myst_parser/utils.py +++ b/myst_parser/utils.py @@ -11,7 +11,9 @@ def escape_url(raw: str) -> str: def is_external_url( - reference: str, known_url_schemes: Optional[Iterable[str]], match_fragment: bool + reference: str, + known_url_schemes: Optional[Iterable[str]], + match_fragment: bool = False, ) -> bool: """Return if a reference should be recognised as an external URL. From 86111091cc956225063b5dde573a3d0eff365ca6 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 28 Dec 2021 00:25:15 +0100 Subject: [PATCH 2/4] Fix tests --- myst_parser/docutils_renderer.py | 14 +++++++------- tests/test_renderers/fixtures/reporter_warnings.md | 2 +- tests/test_renderers/fixtures/syntax_elements.md | 5 +++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/myst_parser/docutils_renderer.py b/myst_parser/docutils_renderer.py index 5b89e2f1..ae77d4d9 100644 --- a/myst_parser/docutils_renderer.py +++ b/myst_parser/docutils_renderer.py @@ -19,6 +19,7 @@ Union, cast, ) +from urllib.parse import urlparse import jinja2 import yaml @@ -541,14 +542,14 @@ def render_link(self, token: SyntaxTreeNode) -> None: if self.config.get("myst_all_links_external", False): return self.render_external_url(token) - destination = cast(str, token.attrGet("href") or "") - if is_external_url(destination, self.config.get("myst_url_schemes", None)): + # Check for external URL + url_scheme = urlparse(cast(str, token.attrGet("href") or "")).scheme + allowed_url_schemes = self.config.get("myst_url_schemes", None) + if (allowed_url_schemes is None and url_scheme) or ( + url_scheme in allowed_url_schemes + ): return self.render_external_url(token) - # TODO previously, we also interpreted any link containing a `#` as external, - # e.g. `[alt](#fragment)` (only if `heading_anchors`` extension was not active) - # should we still do that? - return self.render_internal_link(token) def render_external_url(self, token: SyntaxTreeNode) -> None: @@ -580,7 +581,6 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None: Note, this is overridden by `SphinxRenderer`, to use `pending_xref` nodes. """ - # TODO is this too strict for docutils? ref_node = nodes.reference() self.add_line_and_source_path(ref_node, token) ref_node["refname"] = cast(str, token.attrGet("href") or "") diff --git a/tests/test_renderers/fixtures/reporter_warnings.md b/tests/test_renderers/fixtures/reporter_warnings.md index 15e63630..bda330d7 100644 --- a/tests/test_renderers/fixtures/reporter_warnings.md +++ b/tests/test_renderers/fixtures/reporter_warnings.md @@ -10,7 +10,7 @@ Missing Reference: . [a](b) . -:1: (WARNING/2) Reference not found: b +:1: (ERROR/3) Unknown target name: "b". . Unknown role: diff --git a/tests/test_renderers/fixtures/syntax_elements.md b/tests/test_renderers/fixtures/syntax_elements.md index 0883c0e6..4e0ebf9d 100644 --- a/tests/test_renderers/fixtures/syntax_elements.md +++ b/tests/test_renderers/fixtures/syntax_elements.md @@ -430,8 +430,9 @@ Title alt2 - - alt3 + + + alt3 . -------------------------- From 82e3ef1b5a503de5c5f21b7c2e207b87bc9bc68f Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 28 Dec 2021 01:09:09 +0100 Subject: [PATCH 3/4] Add download file links --- myst_parser/__init__.py | 1 + myst_parser/main.py | 2 +- myst_parser/sphinx_renderer.py | 1 - tests/test_sphinx/conftest.py | 7 +--- .../sourcedirs/references/file_link.txt | 0 .../sourcedirs/references/index.md | 4 +++ .../references/subfolder/file_link2.txt | 0 tests/test_sphinx/test_sphinx_builds.py | 32 ++++++------------- .../test_references.resolved.xml | 10 ++++++ .../test_references.sphinx3.html | 17 ++++++++++ .../test_references.sphinx4.html | 17 ++++++++++ .../test_sphinx_builds/test_references.xml | 10 ++++++ 12 files changed, 70 insertions(+), 31 deletions(-) create mode 100644 tests/test_sphinx/sourcedirs/references/file_link.txt create mode 100644 tests/test_sphinx/sourcedirs/references/subfolder/file_link2.txt diff --git a/myst_parser/__init__.py b/myst_parser/__init__.py index ae43a54e..4dd24501 100644 --- a/myst_parser/__init__.py +++ b/myst_parser/__init__.py @@ -35,6 +35,7 @@ def setup_sphinx(app: "Sphinx"): for name, default in MdParserConfig().as_dict().items(): if not name == "renderer": + # TODO add types? app.add_config_value(f"myst_{name}", default, "env") app.connect("builder-inited", create_myst_config) diff --git a/myst_parser/main.py b/myst_parser/main.py index d088b205..f9221861 100644 --- a/myst_parser/main.py +++ b/myst_parser/main.py @@ -125,7 +125,7 @@ def check_extensions(self, attribute, value): url_schemes: Optional[Iterable[str]] = attr.ib( default=cast(Optional[Iterable[str]], ("http", "https", "mailto", "ftp")), validator=optional(deep_iterable(instance_of(str), instance_of((list, tuple)))), - metadata={"help": "URL schemes to allow in links"}, + metadata={"help": "URL scheme prefixes identified as external links"}, ) heading_anchors: Optional[int] = attr.ib( diff --git a/myst_parser/sphinx_renderer.py b/myst_parser/sphinx_renderer.py index ae4c6f87..52118921 100644 --- a/myst_parser/sphinx_renderer.py +++ b/myst_parser/sphinx_renderer.py @@ -104,7 +104,6 @@ def render_internal_link(self, token: SyntaxTreeNode) -> None: ) classes = ["xref", "download", "myst"] text = destination if not token.children else "" - # TODO warn if there are other matching references else: wrap_node = addnodes.pending_xref( refdoc=self.doc_env.docname, diff --git a/tests/test_sphinx/conftest.py b/tests/test_sphinx/conftest.py index 0f2b2877..fcb88558 100644 --- a/tests/test_sphinx/conftest.py +++ b/tests/test_sphinx/conftest.py @@ -42,10 +42,7 @@ def test_basic(app, status, warning, get_sphinx_app_output): SOURCE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "sourcedirs")) -# TODO autouse not working, may need to be in root conftest -# (ideally _build folder should be in tempdir) -# @pytest.fixture(scope="session", autouse=True) -@pytest.fixture() +@pytest.fixture(scope="session", autouse=True) def remove_sphinx_builds(): """remove all build directories from the test folder""" yield @@ -62,8 +59,6 @@ def read( buildername="html", filename="index.html", encoding="utf-8", - extract_body=False, - remove_scripts=False, regress_html=False, regress_ext=".html", replace=None, diff --git a/tests/test_sphinx/sourcedirs/references/file_link.txt b/tests/test_sphinx/sourcedirs/references/file_link.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_sphinx/sourcedirs/references/index.md b/tests/test_sphinx/sourcedirs/references/index.md index 29c611ef..c31bedbf 100644 --- a/tests/test_sphinx/sourcedirs/references/index.md +++ b/tests/test_sphinx/sourcedirs/references/index.md @@ -20,6 +20,10 @@ [nested *syntax*](index.md) +[download **link**](file_link.txt) + +[](subfolder/file_link2.txt) + ```{eval-rst} .. _insidecodeblock: diff --git a/tests/test_sphinx/sourcedirs/references/subfolder/file_link2.txt b/tests/test_sphinx/sourcedirs/references/subfolder/file_link2.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_sphinx/test_sphinx_builds.py b/tests/test_sphinx/test_sphinx_builds.py index 1c6aa287..f05ef992 100644 --- a/tests/test_sphinx/test_sphinx_builds.py +++ b/tests/test_sphinx/test_sphinx_builds.py @@ -26,7 +26,6 @@ def test_basic( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """basic test.""" app.build() @@ -82,7 +81,6 @@ def test_references( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """Test reference resolution.""" app.build() @@ -94,13 +92,15 @@ def test_references( try: get_sphinx_app_doctree(app, docname="index", regress=True) finally: - get_sphinx_app_doctree(app, docname="index", resolve=True, regress=True) - get_sphinx_app_output( - app, - filename="index.html", - regress_html=True, - regress_ext=f".sphinx{sphinx.version_info[0]}.html", - ) + try: + get_sphinx_app_doctree(app, docname="index", resolve=True, regress=True) + finally: + get_sphinx_app_output( + app, + filename="index.html", + regress_html=True, + regress_ext=f".sphinx{sphinx.version_info[0]}.html", + ) @pytest.mark.sphinx( @@ -115,7 +115,6 @@ def test_references_singlehtml( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """Test reference resolution for singlehtml builds.""" app.build() @@ -165,7 +164,6 @@ def test_heading_slug_func( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """Test heading_slug_func configuration.""" app.build() @@ -197,7 +195,6 @@ def test_extended_syntaxes( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, monkeypatch, ): """test setting addition configuration values.""" @@ -234,7 +231,6 @@ def test_includes( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """Test of include directive.""" app.build() @@ -284,8 +280,6 @@ def test_include_from_rst( status, warning, get_sphinx_app_doctree, - get_sphinx_app_output, - remove_sphinx_builds, ): """Test of include directive inside RST file.""" app.build() @@ -311,7 +305,6 @@ def test_footnotes( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """Test of include directive.""" app.build() @@ -342,7 +335,6 @@ def test_commonmark_only( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """test setting addition configuration values.""" app.build() @@ -372,7 +364,6 @@ def test_substitutions( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, file_regression, ): """test setting addition configuration values.""" @@ -399,7 +390,6 @@ def test_gettext( status, warning, get_sphinx_app_output, - remove_sphinx_builds, file_regression, ): """Test gettext message extraction.""" @@ -427,7 +417,6 @@ def test_gettext_html( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """Test gettext message extraction.""" app.build() @@ -477,7 +466,6 @@ def test_gettext_additional_targets( status, warning, get_sphinx_app_output, - remove_sphinx_builds, file_regression, ): """Test gettext message extraction.""" @@ -500,7 +488,6 @@ def test_mathjax_warning( app, status, warning, - remove_sphinx_builds, ): """Test mathjax config override warning.""" app.build() @@ -523,7 +510,6 @@ def test_fieldlist_extension( warning, get_sphinx_app_doctree, get_sphinx_app_output, - remove_sphinx_builds, ): """test setting addition configuration values.""" app.build() diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml b/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml index 7232b7a6..9c6a4cac 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml +++ b/tests/test_sphinx/test_sphinx_builds/test_references.resolved.xml @@ -46,6 +46,16 @@ nested syntax + + + + download + + link + + + + subfolder/file_link2.txt I am inside the eval-rst fence diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.sphinx3.html b/tests/test_sphinx/test_sphinx_builds/test_references.sphinx3.html index 543bb308..acd217fd 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.sphinx3.html +++ b/tests/test_sphinx/test_sphinx_builds/test_references.sphinx3.html @@ -81,6 +81,23 @@

+

+ + + download + + link + + + +

+

+ + + subfolder/file_link2.txt + + +

I am inside the eval-rst fence

diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.sphinx4.html b/tests/test_sphinx/test_sphinx_builds/test_references.sphinx4.html index d02ba1cf..677256e3 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.sphinx4.html +++ b/tests/test_sphinx/test_sphinx_builds/test_references.sphinx4.html @@ -81,6 +81,23 @@

+

+ + + download + + link + + + +

+

+ + + subfolder/file_link2.txt + + +

I am inside the eval-rst fence

diff --git a/tests/test_sphinx/test_sphinx_builds/test_references.xml b/tests/test_sphinx/test_sphinx_builds/test_references.xml index 253d363f..03bfb8d5 100644 --- a/tests/test_sphinx/test_sphinx_builds/test_references.xml +++ b/tests/test_sphinx/test_sphinx_builds/test_references.xml @@ -44,6 +44,16 @@ nested syntax + + + + download + + link + + + + subfolder/file_link2.txt I am inside the eval-rst fence From 35c3a064c77275f6c872684795ef7b50fb4e5f40 Mon Sep 17 00:00:00 2001 From: Chris Sewell Date: Tue, 28 Dec 2021 04:18:53 +0100 Subject: [PATCH 4/4] add documentation --- docs/sphinx/reference.md | 6 ++++ docs/syntax/example.txt | 1 + docs/syntax/reference.md | 2 +- docs/syntax/syntax.md | 33 ++++++++++++++++++++ myst_parser/docutils_.py | 4 +-- myst_parser/main.py | 6 ++++ myst_parser/myst_refs.py | 66 ++++++++++++++++++++++------------------ 7 files changed, 86 insertions(+), 32 deletions(-) create mode 100644 docs/syntax/example.txt diff --git a/docs/sphinx/reference.md b/docs/sphinx/reference.md index 79e98998..1ba6ee5a 100644 --- a/docs/sphinx/reference.md +++ b/docs/sphinx/reference.md @@ -19,10 +19,16 @@ To do so, use the keywords beginning `myst_`. * - `myst_enable_extensions` - `["dollarmath"]` - Enable Markdown extensions, [see here](../syntax/optional.md) for details. +* - `myst_all_links_external` + - `False` + - If `True`, all Markdown links `[text](link)` are treated as external. * - `myst_url_schemes` - `None` - [URI schemes](https://en.wikipedia.org/wiki/List_of_URI_schemes) that will be recognised as external URLs in `[](scheme:loc)` syntax, or set `None` to recognise all. Other links will be resolved as internal cross-references. +* - `myst_ref_domains` + - `None` + - If a list, then only these [sphinx domains](sphinx:domain) will be searched for when resolving Markdown links like `[text](reference)`. * - `myst_linkify_fuzzy_links` - `True` - If `False`, only links that contain a scheme (such as `http`) will be recognised as external links. diff --git a/docs/syntax/example.txt b/docs/syntax/example.txt new file mode 100644 index 00000000..e84df8e7 --- /dev/null +++ b/docs/syntax/example.txt @@ -0,0 +1 @@ +Hallo! diff --git a/docs/syntax/reference.md b/docs/syntax/reference.md index c3264a23..c03226ba 100644 --- a/docs/syntax/reference.md +++ b/docs/syntax/reference.md @@ -242,7 +242,7 @@ In addition to these summaries of inline syntax, see {ref}`extra-markdown-syntax ![alt](src "title") ``` * - Link - - Reference `LinkDefinitions` + - Reference `LinkDefinitions`. See {ref}`syntax/referencing` for more details. - ```md [text](target "title") or [text][key] ``` diff --git a/docs/syntax/syntax.md b/docs/syntax/syntax.md index 23e22d3d..e92b2ba5 100644 --- a/docs/syntax/syntax.md +++ b/docs/syntax/syntax.md @@ -518,6 +518,39 @@ Is below, but it won't be parsed into the document. +++ +(syntax/referencing)= + +## Markdown Links and Referencing + +Markdown links are of the form: `[text](link)`. + +If you set the configuration `myst_all_links_external = True` (`False` by default), +then all links will be treated simply as "external" links. +For example, in HTML outputs, `[text](link)` will be rendered as `text`. + +Otherwise, links will only be treated as "external" links if they are prefixed with a scheme, +configured with `myst_url_schemes` (by default, `http`, `https`, `ftp`, or `mailto`). +For example, `[example.com](https://example.com)` becomes [example.com](https://example.com). + +:::{note} +The `text` will be parsed as nested Markdown, for example `[here's some *emphasised text*](https://example.com)` will be parsed as [here's some *emphasised text*](https://example.com). +::: + +For "internal" links, myst-parser in Sphinx will attempt to resolve the reference to either a relative document path, or a cross-reference to a target (see [](syntax/targets)): + +- `[this doc](syntax.md)` will link to a rendered source document: [this doc](syntax.md) + - This is similar to `` {doc}`this doc ` ``; {doc}`this doc `, but allows for document extensions, and parses nested Markdown text. +- `[example text](example.txt)` will link to a non-source (downloadable) file: [example text](example.txt) + - The linked document itself will be copied to the build directory. + - This is similar to `` {download}`example text ` ``; {download}`example text `, but parses nested Markdown text. +- `[reference](syntax/referencing)` will link to an internal cross-reference: [reference](syntax/referencing) + - This is similar to `` {any}`reference ` ``; {any}`reference `, but parses nested Markdown text. + - You can limit the scope of the cross-reference to specific [sphinx domains](sphinx:domain), by using the `myst_ref_domains` configuration. + For example, `myst_ref_domains = ("std", "py")` will only allow cross-references to `std` and `py` domains. + +Additionally, only if [](syntax/header-anchors) are enabled, then internal links to document headers can be used. +For example `[a header](syntax.md#markdown-links-and-referencing)` will link to a header anchor: [a header](syntax.md#markdown-links-and-referencing). + (syntax/targets)= ## Targets and Cross-Referencing diff --git a/myst_parser/docutils_.py b/myst_parser/docutils_.py index 269a6ec2..73bc5a7c 100644 --- a/myst_parser/docutils_.py +++ b/myst_parser/docutils_.py @@ -58,9 +58,9 @@ def __repr__(self): "substitutions", # we can't add substitutions so not needed "sub_delimiters", - # heading anchors are currently sphinx only + # sphinx only options "heading_anchors", - # sphinx.ext.mathjax only options + "ref_domains", "update_mathjax", "mathjax_classes", # We don't want to set the renderer from docutils.conf diff --git a/myst_parser/main.py b/myst_parser/main.py index f9221861..7663250e 100644 --- a/myst_parser/main.py +++ b/myst_parser/main.py @@ -128,6 +128,12 @@ def check_extensions(self, attribute, value): metadata={"help": "URL scheme prefixes identified as external links"}, ) + ref_domains: Optional[Iterable[str]] = attr.ib( + default=None, + validator=optional(deep_iterable(instance_of(str), instance_of((list, tuple)))), + metadata={"help": "Sphinx domain names to search in for references"}, + ) + heading_anchors: Optional[int] = attr.ib( default=None, validator=optional(in_([1, 2, 3, 4, 5, 6, 7])), diff --git a/myst_parser/myst_refs.py b/myst_parser/myst_refs.py index 11503931..5ee2de04 100644 --- a/myst_parser/myst_refs.py +++ b/myst_parser/myst_refs.py @@ -42,7 +42,6 @@ def run(self, **kwargs: Any) -> None: contnode = cast(nodes.TextElement, node[0].deepcopy()) newnode = None - typ = node["reftype"] target = node["reftarget"] refdoc = node.get("refdoc", self.env.docname) domain = None @@ -54,25 +53,29 @@ def run(self, **kwargs: Any) -> None: # but first we change the the reftype to 'any' # this means it is picked up by extensions like intersphinx node["reftype"] = "any" - newnode = self.app.emit_firstresult( - "missing-reference", - self.env, - node, - contnode, - **( - {"allowed_exceptions": (NoUri,)} - if version_info[0] > 2 - else {} - ), - ) - node["reftype"] = "myst" + try: + newnode = self.app.emit_firstresult( + "missing-reference", + self.env, + node, + contnode, + **( + {"allowed_exceptions": (NoUri,)} + if version_info[0] > 2 + else {} + ), + ) + finally: + node["reftype"] = "myst" # still not found? warn if node wishes to be warned about or # we are in nit-picky mode if newnode is None: node["refdomain"] = "" # TODO ideally we would override the warning message here, - # to show the [ref.] for supressing warning - self.warn_missing_reference(refdoc, typ, target, node, domain) + # to show the [ref.myst] for supressing warning + self.warn_missing_reference( + refdoc, node["reftype"], target, node, domain + ) except NoUri: newnode = contnode @@ -111,25 +114,30 @@ def resolve_myst_ref( if res: results.append(("std:doc", res)) + # get allowed domains for referencing + ref_domains = self.env.config.myst_ref_domains + # next resolve for any other standard reference objects - stddomain = cast(StandardDomain, self.env.get_domain("std")) - for objtype in stddomain.object_types: - key = (objtype, target) - if objtype == "term": - key = (objtype, target.lower()) - if key in stddomain.objects: - docname, labelid = stddomain.objects[key] - domain_role = "std:" + stddomain.role_for_objtype(objtype) - ref_node = make_refnode( - self.app.builder, refdoc, docname, labelid, contnode - ) - results.append((domain_role, ref_node)) + if ref_domains is None or "std" in ref_domains: + stddomain = cast(StandardDomain, self.env.get_domain("std")) + for objtype in stddomain.object_types: + key = (objtype, target) + if objtype == "term": + key = (objtype, target.lower()) + if key in stddomain.objects: + docname, labelid = stddomain.objects[key] + domain_role = "std:" + stddomain.role_for_objtype(objtype) + ref_node = make_refnode( + self.app.builder, refdoc, docname, labelid, contnode + ) + results.append((domain_role, ref_node)) - # finally resolve for any other type of reference - # TODO do we want to restrict this at all? + # finally resolve for any other type of allowed reference domain for domain in self.env.domains.values(): if domain.name == "std": continue # we did this one already + if ref_domains is not None and domain.name not in ref_domains: + continue try: results.extend( domain.resolve_any_xref(