Skip to content

Commit

Permalink
Ensure linkcheck items are comparable
Browse files Browse the repository at this point in the history
Linkcheck organizes the URLs to checks in a PriorityQueue. The items are
tuples (priority, url, docname, lineno).

Tuples where the lineno is `None` are not comparable with tuples that
have an integer lineno, and PriorityQueue items must be comparable (see
https://bugs.python.org/issue31145).

Fixes an issue when a document contains two links to the same URL, one
with an int line number and the other without line number metadata (such
as an image :target: attribute).

Using 0 instead of None to represent no line number should not lead to
observable changes, the result logger only logs the line number when it
is truthy.

Close sphinx-doc#8565
  • Loading branch information
francoisfreitag committed Dec 22, 2020
1 parent a7e10c1 commit b469555
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 3 deletions.
14 changes: 11 additions & 3 deletions sphinx/builders/linkcheck.py
Expand Up @@ -22,7 +22,7 @@
from urllib.parse import unquote, urlparse

from docutils import nodes
from docutils.nodes import Node
from docutils.nodes import Element, Node
from requests import Response
from requests.exceptions import HTTPError, TooManyRedirects

Expand All @@ -47,6 +47,14 @@
DEFAULT_DELAY = 60.0


def node_line_or_0(node: Element) -> int:
"""
PriorityQueue items must be comparable. The line number is part of the
tuple used by the PriorityQueue, keep an homogeneous type for comparison.
"""
return get_node_line(node) or 0


class AnchorCheckParser(HTMLParser):
"""Specialized HTML parser that looks for a specific anchor."""

Expand Down Expand Up @@ -406,7 +414,7 @@ def write_doc(self, docname: str, doctree: Node) -> None:
if 'refuri' not in refnode:
continue
uri = refnode['refuri']
lineno = get_node_line(refnode)
lineno = node_line_or_0(refnode)
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
self.wqueue.put(uri_info, False)
n += 1
Expand All @@ -415,7 +423,7 @@ def write_doc(self, docname: str, doctree: Node) -> None:
for imgnode in doctree.traverse(nodes.image):
uri = imgnode['candidates'].get('?')
if uri and '://' in uri:
lineno = get_node_line(imgnode)
lineno = node_line_or_0(imgnode)
uri_info = (CHECK_IMMEDIATELY, uri, docname, lineno)
self.wqueue.put(uri_info, False)
n += 1
Expand Down
1 change: 1 addition & 0 deletions tests/roots/test-linkcheck-localserver-two-links/conf.py
@@ -0,0 +1 @@
exclude_patterns = ['_build']
6 changes: 6 additions & 0 deletions tests/roots/test-linkcheck-localserver-two-links/index.rst
@@ -0,0 +1,6 @@
.. image:: http://localhost:7777/
:target: http://localhost:7777/

`weblate.org`_

.. _weblate.org: http://localhost:7777/
37 changes: 37 additions & 0 deletions tests/test_build_linkcheck.py
Expand Up @@ -573,3 +573,40 @@ def test_limit_rate_bails_out_after_waiting_max_time(app):
checker.rate_limits = {"localhost": RateLimit(90.0, 0.0)}
next_check = checker.limit_rate(FakeResponse())
assert next_check is None


@pytest.mark.sphinx(
'linkcheck', testroot='linkcheck-localserver-two-links', freshenv=True,
)
def test_priorityqueue_items_are_comparable(app):
with http_server(OKHandler):
app.builder.build_all()
content = (app.outdir / 'output.json').read_text()
rows = [json.loads(x) for x in sorted(content.splitlines())]
assert rows == [
{
'filename': 'index.rst',
# Should not be None.
'lineno': 0,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/',
'info': '',
},
{
'filename': 'index.rst',
'lineno': 0,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/',
'info': '',
},
{
'filename': 'index.rst',
'lineno': 4,
'status': 'working',
'code': 0,
'uri': 'http://localhost:7777/',
'info': '',
}
]

0 comments on commit b469555

Please sign in to comment.