Skip to content

Commit

Permalink
Fix sphinx-doc#7301: Allow . and _ for node_id
Browse files Browse the repository at this point in the history
In development 3.0, Sphinx has obeyed to the rule of "Identifier
Normalization" of docutils.  This extends it to allow dots(".") and
underscores("_") for node identifier.

It allows Sphinx to generate node identifier from source string as
possible as it is (bacause dots and underscores are usually used in
many programming langauges).

This change will keep not to break hyperlinks as possible.
  • Loading branch information
tk0miya committed Mar 22, 2020
1 parent dd85cb6 commit 6f63660
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 6 deletions.
2 changes: 1 addition & 1 deletion sphinx/domains/python.py
Expand Up @@ -370,7 +370,7 @@ def add_target_and_index(self, name_cls: Tuple[str, str], sig: str,
signode: desc_signature) -> None:
modname = self.options.get('module', self.env.ref_context.get('py:module'))
fullname = (modname + '.' if modname else '') + name_cls[0]
node_id = make_id(self.env, self.state.document, modname or '', name_cls[0])
node_id = make_id(self.env, self.state.document, '', fullname)
signode['ids'].append(node_id)

# Assign old styled node_id(fullname) not to break old hyperlinks (if possible)
Expand Down
78 changes: 76 additions & 2 deletions sphinx/util/nodes.py
Expand Up @@ -9,6 +9,7 @@
"""

import re
import unicodedata
import warnings
from typing import Any, Callable, Iterable, List, Set, Tuple
from typing import cast
Expand Down Expand Up @@ -436,6 +437,79 @@ def inline_all_toctrees(builder: "Builder", docnameset: Set[str], docname: str,
return tree


def _make_id(string: str) -> str:
"""Convert `string` into an identifier and return it.
This function is a modified version of ``docutils.nodes.make_id()`` of
docutils-0.16.
Changes:
* Allow to use dots (".") and underscores ("_") for an identifier
without a leading character.
# Author: David Goodger <goodger@python.org>
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.
"""
id = string.lower()
id = id.translate(_non_id_translate_digraphs)
id = id.translate(_non_id_translate)
# get rid of non-ascii characters.
# 'ascii' lowercase to prevent problems with turkish locale.
id = unicodedata.normalize('NFKD', id).encode('ascii', 'ignore').decode('ascii')
# shrink runs of whitespace and replace by hyphen
id = _non_id_chars.sub('-', ' '.join(id.split()))
id = _non_id_at_ends.sub('', id)
return str(id)


_non_id_chars = re.compile('[^a-z0-9._]+')
_non_id_at_ends = re.compile('^[-0-9._]+|-+$')
_non_id_translate = {
0x00f8: u'o', # o with stroke
0x0111: u'd', # d with stroke
0x0127: u'h', # h with stroke
0x0131: u'i', # dotless i
0x0142: u'l', # l with stroke
0x0167: u't', # t with stroke
0x0180: u'b', # b with stroke
0x0183: u'b', # b with topbar
0x0188: u'c', # c with hook
0x018c: u'd', # d with topbar
0x0192: u'f', # f with hook
0x0199: u'k', # k with hook
0x019a: u'l', # l with bar
0x019e: u'n', # n with long right leg
0x01a5: u'p', # p with hook
0x01ab: u't', # t with palatal hook
0x01ad: u't', # t with hook
0x01b4: u'y', # y with hook
0x01b6: u'z', # z with stroke
0x01e5: u'g', # g with stroke
0x0225: u'z', # z with hook
0x0234: u'l', # l with curl
0x0235: u'n', # n with curl
0x0236: u't', # t with curl
0x0237: u'j', # dotless j
0x023c: u'c', # c with stroke
0x023f: u's', # s with swash tail
0x0240: u'z', # z with swash tail
0x0247: u'e', # e with stroke
0x0249: u'j', # j with stroke
0x024b: u'q', # q with hook tail
0x024d: u'r', # r with stroke
0x024f: u'y', # y with stroke
}
_non_id_translate_digraphs = {
0x00df: u'sz', # ligature sz
0x00e6: u'ae', # ae
0x0153: u'oe', # ligature oe
0x0238: u'db', # db digraph
0x0239: u'qp', # qp digraph
}


def make_id(env: "BuildEnvironment", document: nodes.document,
prefix: str = '', term: str = None) -> str:
"""Generate an appropriate node_id for given *prefix* and *term*."""
Expand All @@ -447,12 +521,12 @@ def make_id(env: "BuildEnvironment", document: nodes.document,

# try to generate node_id by *term*
if prefix and term:
node_id = nodes.make_id(idformat % term)
node_id = _make_id(idformat % term)
if node_id == prefix:
# *term* is not good to generate a node_id.
node_id = None
elif term:
node_id = nodes.make_id(term)
node_id = _make_id(term)
if node_id == '':
node_id = None # fallback to None

Expand Down
6 changes: 3 additions & 3 deletions tests/test_util_nodes.py
Expand Up @@ -189,9 +189,9 @@ def test_clean_astext():
('', '', 'id0'),
('term', '', 'term-0'),
('term', 'Sphinx', 'term-sphinx'),
('', 'io.StringIO', 'io-stringio'), # contains a dot
('', 'sphinx.setup_command', 'sphinx-setup-command'), # contains a dot
('', '_io.StringIO', 'io-stringio'), # starts with underscore
('', 'io.StringIO', 'io.stringio'), # contains a dot
('', 'sphinx.setup_command', 'sphinx.setup_command'), # contains a dot & underscore
('', '_io.StringIO', 'io.stringio'), # starts with underscore
('', 'sphinx', 'sphinx'), # alphabets in unicode fullwidth characters
('', '悠好', 'id0'), # multibytes text (in Chinese)
('', 'Hello=悠好=こんにちは', 'hello'), # alphabets and multibytes text
Expand Down

0 comments on commit 6f63660

Please sign in to comment.