diff --git a/doc/usage/extensions/napoleon.rst b/doc/usage/extensions/napoleon.rst index 76c423dc0e8..438c3395089 100644 --- a/doc/usage/extensions/napoleon.rst +++ b/doc/usage/extensions/napoleon.rst @@ -274,11 +274,12 @@ sure that "sphinx.ext.napoleon" is enabled in `conf.py`:: napoleon_use_ivar = False napoleon_use_param = True napoleon_use_rtype = True + napoleon_type_aliases = None .. _Google style: https://google.github.io/styleguide/pyguide.html .. _NumPy style: - https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt + https://numpydoc.readthedocs.io/en/latest/format.html#docstring-standard .. confval:: napoleon_google_docstring @@ -435,7 +436,7 @@ sure that "sphinx.ext.napoleon" is enabled in `conf.py`:: :param arg1: Description of `arg1` :type arg1: str :param arg2: Description of `arg2`, defaults to 0 - :type arg2: int, optional + :type arg2: :class:`int`, *optional* **If False**:: @@ -480,3 +481,31 @@ sure that "sphinx.ext.napoleon" is enabled in `conf.py`:: **If False**:: :returns: *bool* -- True if successful, False otherwise + +.. confval:: napoleon_type_aliases + + A mapping to translate type names to other names or references. Works + only when ``napoleon_use_param = True``. *Defaults to None.* + + With:: + + napoleon_type_aliases = { + "CustomType": "mypackage.CustomType", + "dict-like": ":term:`dict-like `", + } + + This `NumPy style`_ snippet:: + + Parameters + ---------- + arg1 : CustomType + Description of `arg1` + arg2 : dict-like + Description of `arg2` + + becomes:: + + :param arg1: Description of `arg1` + :type arg1: mypackage.CustomType + :param arg2: Description of `arg2` + :type arg2: :term:`dict-like ` diff --git a/sphinx/ext/napoleon/__init__.py b/sphinx/ext/napoleon/__init__.py index 2b1818425db..6d7406ead39 100644 --- a/sphinx/ext/napoleon/__init__.py +++ b/sphinx/ext/napoleon/__init__.py @@ -41,6 +41,7 @@ class Config: napoleon_use_param = True napoleon_use_rtype = True napoleon_use_keyword = True + napoleon_type_aliases = None napoleon_custom_sections = None .. _Google style: @@ -236,6 +237,10 @@ def __unicode__(self): :returns: *bool* -- True if successful, False otherwise + napoleon_type_aliases : :obj:`dict` (Defaults to None) + Add a mapping of strings to string, translating types in numpy + style docstrings. Only works when ``napoleon_use_param = True``. + napoleon_custom_sections : :obj:`list` (Defaults to None) Add a list of custom sections to include, expanding the list of parsed sections. @@ -263,6 +268,7 @@ def __unicode__(self): 'napoleon_use_param': (True, 'env'), 'napoleon_use_rtype': (True, 'env'), 'napoleon_use_keyword': (True, 'env'), + 'napoleon_type_aliases': (None, 'env'), 'napoleon_custom_sections': (None, 'env') } diff --git a/sphinx/ext/napoleon/docstring.py b/sphinx/ext/napoleon/docstring.py index 5857fcf92fb..95fb1e538a0 100644 --- a/sphinx/ext/napoleon/docstring.py +++ b/sphinx/ext/napoleon/docstring.py @@ -10,6 +10,7 @@ :license: BSD, see LICENSE for details. """ +import collections import inspect import re from functools import partial @@ -18,13 +19,15 @@ from sphinx.application import Sphinx from sphinx.config import Config as SphinxConfig from sphinx.ext.napoleon.iterators import modify_iter -from sphinx.locale import _ +from sphinx.locale import _, __ +from sphinx.util import logging + +logger = logging.getLogger(__name__) if False: # For type annotation from typing import Type # for python3.5.1 - _directive_regex = re.compile(r'\.\. \S+::') _google_section_regex = re.compile(r'^(\s|\w)+:\s*$') _google_typed_arg_regex = re.compile(r'\s*(.+?)\s*\(\s*(.*[^\s]+)\s*\)') @@ -33,11 +36,19 @@ _xref_or_code_regex = re.compile( r'((?::(?:[a-zA-Z0-9]+[\-_+:.])*[a-zA-Z0-9]+:`.+?`)|' r'(?:``.+``))') +_xref_regex = re.compile( + r'(?::(?:[a-zA-Z0-9]+[\-_+:.])*[a-zA-Z0-9]+:`.+?`)' +) _bullet_list_regex = re.compile(r'^(\*|\+|\-)(\s+\S|\s*$)') _enumerated_list_regex = re.compile( r'^(?P\()?' r'(\d+|#|[ivxlcdm]+|[IVXLCDM]+|[a-zA-Z])' r'(?(paren)\)|\.)(\s+\S|\s*$)') +_token_regex = re.compile( + r"(\sor\s|\sof\s|:\s|,\s|[{]|[}]" + r'|"(?:\\"|[^"])*"' + r"|'(?:\\'|[^'])*')" +) class GoogleDocstring: @@ -780,6 +791,165 @@ def _strip_empty(self, lines: List[str]) -> List[str]: return lines +def _recombine_set_tokens(tokens: List[str]) -> List[str]: + token_queue = collections.deque(tokens) + keywords = ("optional", "default") + + def takewhile_set(tokens): + open_braces = 0 + previous_token = None + while True: + try: + token = tokens.popleft() + except IndexError: + break + + if token == ", ": + previous_token = token + continue + + if token in keywords: + tokens.appendleft(token) + if previous_token is not None: + tokens.appendleft(previous_token) + break + + if previous_token is not None: + yield previous_token + previous_token = None + + if token == "{": + open_braces += 1 + elif token == "}": + open_braces -= 1 + + yield token + + if open_braces == 0: + break + + def combine_set(tokens): + while True: + try: + token = tokens.popleft() + except IndexError: + break + + if token == "{": + tokens.appendleft("{") + yield "".join(takewhile_set(tokens)) + else: + yield token + + return list(combine_set(token_queue)) + + +def _tokenize_type_spec(spec: str) -> List[str]: + def postprocess(item): + if item.startswith("default"): + return [item[:7], item[7:]] + else: + return [item] + + tokens = list( + item + for raw_token in _token_regex.split(spec) + for item in postprocess(raw_token) + if item + ) + return tokens + + +def _token_type(token: str, location: str = None) -> str: + if token.startswith(" ") or token.endswith(" "): + type_ = "delimiter" + elif ( + token.isnumeric() or + (token.startswith("{") and token.endswith("}")) or + (token.startswith('"') and token.endswith('"')) or + (token.startswith("'") and token.endswith("'")) + ): + type_ = "literal" + elif token.startswith("{"): + logger.warning( + __("invalid value set (missing closing brace): %s"), + token, + location=location, + ) + type_ = "literal" + elif token.endswith("}"): + logger.warning( + __("invalid value set (missing opening brace): %s"), + token, + location=location, + ) + type_ = "literal" + elif token.startswith("'") or token.startswith('"'): + logger.warning( + __("malformed string literal (missing closing quote): %s"), + token, + location=location, + ) + type_ = "literal" + elif token.endswith("'") or token.endswith('"'): + logger.warning( + __("malformed string literal (missing opening quote): %s"), + token, + location=location, + ) + type_ = "literal" + elif token in ("optional", "default"): + # default is not a official keyword (yet) but supported by the + # reference implementation (numpydoc) and widely used + type_ = "control" + elif _xref_regex.match(token): + type_ = "reference" + else: + type_ = "obj" + + return type_ + + +def _convert_numpy_type_spec(_type: str, location: str = None, translations: dict = {}) -> str: + def convert_obj(obj, translations, default_translation): + translation = translations.get(obj, obj) + + # use :class: (the default) only if obj is not a standard singleton (None, True, False) + if translation in ("None", "True", "False") and default_translation == ":class:`%s`": + default_translation = ":obj:`%s`" + + if _xref_regex.match(translation) is None: + translation = default_translation % translation + + return translation + + tokens = _tokenize_type_spec(_type) + combined_tokens = _recombine_set_tokens(tokens) + types = [ + (token, _token_type(token, location)) + for token in combined_tokens + ] + + # don't use the object role if it's not necessary + default_translation = ( + ":class:`%s`" + if not all(type_ == "obj" for _, type_ in types) + else "%s" + ) + + converters = { + "literal": lambda x: "``%s``" % x, + "obj": lambda x: convert_obj(x, translations, default_translation), + "control": lambda x: "*%s*" % x, + "delimiter": lambda x: x, + "reference": lambda x: x, + } + + converted = "".join(converters.get(type_)(token) for token, type_ in types) + + return converted + + class NumpyDocstring(GoogleDocstring): """Convert NumPy style docstrings to reStructuredText. @@ -879,6 +1049,15 @@ def __init__(self, docstring: Union[str, List[str]], config: SphinxConfig = None self._directive_sections = ['.. index::'] super().__init__(docstring, config, app, what, name, obj, options) + def _get_location(self) -> str: + filepath = inspect.getfile(self._obj) if self._obj is not None else "" + name = self._name + + if filepath is None and name is None: + return None + + return ":".join([filepath, "docstring of %s" % name]) + def _consume_field(self, parse_type: bool = True, prefer_type: bool = False ) -> Tuple[str, str, List[str]]: line = next(self._line_iter) @@ -888,6 +1067,12 @@ def _consume_field(self, parse_type: bool = True, prefer_type: bool = False _name, _type = line, '' _name, _type = _name.strip(), _type.strip() _name = self._escape_args_and_kwargs(_name) + if self._config.napoleon_use_param: + _type = _convert_numpy_type_spec( + _type, + location=self._get_location(), + translations=self._config.napoleon_type_aliases or {}, + ) if prefer_type and not _type: _type, _name = _name, _type diff --git a/tests/test_ext_napoleon_docstring.py b/tests/test_ext_napoleon_docstring.py index f9cd40104f2..56812d19390 100644 --- a/tests/test_ext_napoleon_docstring.py +++ b/tests/test_ext_napoleon_docstring.py @@ -9,13 +9,21 @@ :license: BSD, see LICENSE for details. """ +import re from collections import namedtuple +from contextlib import contextmanager from inspect import cleandoc from textwrap import dedent from unittest import TestCase, mock from sphinx.ext.napoleon import Config from sphinx.ext.napoleon.docstring import GoogleDocstring, NumpyDocstring +from sphinx.ext.napoleon.docstring import ( + _tokenize_type_spec, + _recombine_set_tokens, + _convert_numpy_type_spec, + _token_type +) class NamedtupleSubclass(namedtuple('NamedtupleSubclass', ('attr1', 'attr2'))): @@ -1976,6 +1984,154 @@ def test_list_in_parameter_description(self): actual = str(NumpyDocstring(docstring, config)) self.assertEqual(expected, actual) + def test_token_type(self): + tokens = ( + ("1", "literal"), + ("'string'", "literal"), + ('"another_string"', "literal"), + ("{1, 2}", "literal"), + ("{'va{ue', 'set'}", "literal"), + ("optional", "control"), + ("default", "control"), + (", ", "delimiter"), + (" of ", "delimiter"), + (" or ", "delimiter"), + (": ", "delimiter"), + ("True", "obj"), + ("None", "obj"), + ("name", "obj"), + (":py:class:`Enum`", "reference"), + ) + + for token, expected in tokens: + actual = _token_type(token) + self.assertEqual(expected, actual) + + def test_tokenize_type_spec(self): + specs = ( + "str", + "int or float or None, optional", + '{"F", "C", "N"}', + "{'F', 'C', 'N'}, default: 'F'", + "{'F', 'C', 'N or C'}, default 'F'", + '"ma{icious"', + r"'with \'quotes\''", + ) + + tokens = ( + ["str"], + ["int", " or ", "float", " or ", "None", ", ", "optional"], + ["{", '"F"', ", ", '"C"', ", ", '"N"', "}"], + ["{", "'F'", ", ", "'C'", ", ", "'N'", "}", ", ", "default", ": ", "'F'"], + ["{", "'F'", ", ", "'C'", ", ", "'N or C'", "}", ", ", "default", " ", "'F'"], + ['"ma{icious"'], + [r"'with \'quotes\''"], + ) + + for spec, expected in zip(specs, tokens): + actual = _tokenize_type_spec(spec) + self.assertEqual(expected, actual) + + def test_recombine_set_tokens(self): + tokens = ( + ["{", "1", ", ", "2", "}"], + ["{", '"F"', ", ", '"C"', ", ", '"N"', "}", ", ", "optional"], + ["{", "'F'", ", ", "'C'", ", ", "'N'", "}", ", ", "default", ": ", "None"], + ) + + combined_tokens = ( + ["{1, 2}"], + ['{"F", "C", "N"}', ", ", "optional"], + ["{'F', 'C', 'N'}", ", ", "default", ": ", "None"], + ) + + for tokens_, expected in zip(tokens, combined_tokens): + actual = _recombine_set_tokens(tokens_) + self.assertEqual(expected, actual) + + def test_recombine_set_tokens_invalid(self): + tokens = ( + ["{", "1", ", ", "2"], + ['"F"', ", ", '"C"', ", ", '"N"', "}", ", ", "optional"], + ["{", "1", ", ", "2", ", ", "default", ": ", "None"], + ) + combined_tokens = ( + ["{1, 2"], + ['"F"', ", ", '"C"', ", ", '"N"', "}", ", ", "optional"], + ["{1, 2", ", ", "default", ": ", "None"], + ) + + for tokens_, expected in zip(tokens, combined_tokens): + actual = _recombine_set_tokens(tokens_) + self.assertEqual(expected, actual) + + def test_convert_numpy_type_spec(self): + translations = { + "DataFrame": "pandas.DataFrame", + } + + specs = ( + "", + "optional", + "str, optional", + "int or float or None, default: None", + '{"F", "C", "N"}', + "{'F', 'C', 'N'}, default: 'N'", + "DataFrame, optional", + ) + + converted = ( + "", + "*optional*", + ":class:`str`, *optional*", + ":class:`int` or :class:`float` or :obj:`None`, *default*: :obj:`None`", + '``{"F", "C", "N"}``', + "``{'F', 'C', 'N'}``, *default*: ``'N'``", + ":class:`pandas.DataFrame`, *optional*", + ) + + for spec, expected in zip(specs, converted): + actual = _convert_numpy_type_spec(spec, translations=translations) + self.assertEqual(expected, actual) + + def test_parameter_types(self): + docstring = dedent("""\ + Parameters + ---------- + param1 : DataFrame + the data to work on + param2 : int or float or None + a parameter with different types + param3 : dict-like, optional + a optional mapping + param4 : int or float or None, optional + a optional parameter with different types + param5 : {"F", "C", "N"}, optional + a optional parameter with fixed values + """) + expected = dedent("""\ + :param param1: the data to work on + :type param1: DataFrame + :param param2: a parameter with different types + :type param2: :class:`int` or :class:`float` or :obj:`None` + :param param3: a optional mapping + :type param3: :term:`dict-like `, *optional* + :param param4: a optional parameter with different types + :type param4: :class:`int` or :class:`float` or :obj:`None`, *optional* + :param param5: a optional parameter with fixed values + :type param5: ``{"F", "C", "N"}``, *optional* + """) + translations = { + "dict-like": ":term:`dict-like `", + } + config = Config( + napoleon_use_param=True, + napoleon_use_rtype=True, + napoleon_type_aliases=translations, + ) + actual = str(NumpyDocstring(docstring, config)) + self.assertEqual(expected, actual) + def test_keywords_with_types(self): docstring = """\ Do as you please @@ -1991,3 +2147,38 @@ def test_keywords_with_types(self): :kwtype gotham_is_yours: None """ self.assertEqual(expected, actual) + +@contextmanager +def warns(warning, match): + match_re = re.compile(match) + try: + yield warning + finally: + raw_warnings = warning.getvalue() + warnings = [w for w in raw_warnings.split("\n") if w.strip()] + + assert len(warnings) == 1 and all(match_re.match(w) for w in warnings) + warning.truncate(0) + + +class TestNumpyDocstring: + def test_token_type_invalid(self, warning): + tokens = ( + "{1, 2", + "}", + "'abc", + "def'", + '"ghi', + 'jkl"', + ) + errors = ( + r".+: invalid value set \(missing closing brace\):", + r".+: invalid value set \(missing opening brace\):", + r".+: malformed string literal \(missing closing quote\):", + r".+: malformed string literal \(missing opening quote\):", + r".+: malformed string literal \(missing closing quote\):", + r".+: malformed string literal \(missing opening quote\):", + ) + for token, error in zip(tokens, errors): + with warns(warning, match=error): + _token_type(token)