From 25c0b465e44b9f7b599bfed3cc4eaa153fab59e3 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Fri, 19 Aug 2022 18:07:18 -0400 Subject: [PATCH 1/7] refactor: move encoding functions to class --- docformatter.py | 269 +++++++++++++++++++++++++++--------------------- pyproject.toml | 3 + 2 files changed, 155 insertions(+), 117 deletions(-) diff --git a/docformatter.py b/docformatter.py index 0fcfffc..b050de1 100755 --- a/docformatter.py +++ b/docformatter.py @@ -46,10 +46,11 @@ import textwrap import tokenize from configparser import ConfigParser -from typing import List, TextIO, Tuple, Union +from typing import Dict, List, TextIO, Tuple, Union # Third Party Imports -import untokenize +import untokenize # type: ignore +from charset_normalizer import from_path try: # Third Party Imports @@ -61,9 +62,7 @@ __version__ = "1.5.0" - -if sys.version_info.major == 3: - unicode = str +unicode = str HEURISTIC_MIN_LIST_ASPECT_RATIO = 0.4 @@ -107,7 +106,7 @@ class Configurator: parser = None """Parser object.""" - flargs_dct = {} + flargs_dct: Dict[str, Union[bool, float, int, str]] = {} """Dictionary of configuration file arguments.""" configuration_file_lst = [ @@ -361,10 +360,10 @@ class Formator: def __init__( self, - args: argparse.Namespace, - stderror: TextIO, - stdin: TextIO, - stdout: TextIO, + args: argparse.Namespace, + stderror: TextIO, + stdin: TextIO, + stdout: TextIO, ) -> None: """Initialize a Formattor instance. @@ -389,6 +388,8 @@ def __init__( self.stdin: TextIO = stdin self.stdout: TextIO = stdout + self.encodor = Encodor() + def do_format_standard_in(self, parser: argparse.ArgumentParser): """Print formatted text to standard out. @@ -460,11 +461,12 @@ def _do_format_file(self, filename): Return ------ - code: int + result_code: int One of the FormatResult codes. """ - encoding = detect_encoding(filename) - with open_with_encoding(filename, encoding=encoding) as input_file: + self.encodor.do_detect_encoding(filename) + + with self.encodor.do_open_with_encoding(filename) as input_file: source = input_file.read() formatted_source = self._do_format_code(source) @@ -472,8 +474,9 @@ def _do_format_file(self, filename): if self.args.check: return FormatResult.check_failed elif self.args.in_place: - with open_with_encoding( - filename, mode="w", encoding=encoding + with self.encodor.do_open_with_encoding( + filename, + mode="w", ) as output_file: output_file.write(formatted_source) else: @@ -500,7 +503,9 @@ def _do_format_code(self, source): The text from the source file. """ try: - original_newline = find_newline(source.splitlines(True)) + original_newline = self.encodor.do_find_newline( + source.splitlines(True) + ) code = self._format_code(source) return normalize_line_endings( @@ -713,6 +718,78 @@ def _do_format_docstring( return f"{beginning}{summary_wrapped}{ending}" +class Encodor: + """Encoding and decoding of files.""" + + CR = "\r" + LF = "\n" + CRLF = "\r\n" + + def __init__(self): + """Initialize an Encodor instance.""" + self.encoding = "latin-1" + + def do_detect_encoding(self, filename: str) -> None: + """Return the detected file encoding. + + Parameters + ---------- + filename : str + The full path name of the file whose encoding is to be detected. + """ + try: + self.encoding = from_path(filename).best().encoding + + # Check for correctness of encoding. + with self.do_open_with_encoding(filename) as check_file: + check_file.read() + except (SyntaxError, LookupError, UnicodeDecodeError): + self.encoding = "latin-1" + + def do_find_newline(self, source: str) -> Dict[int, int]: + """Return type of newline used in source. + + Paramaters + ---------- + source : list + A list of lines. + + Returns + ------- + counter : dict + A dict with the count of new line types found. + """ + assert not isinstance(source, unicode) + + counter = collections.defaultdict(int) + for line in source: + if line.endswith(self.CRLF): + counter[self.CRLF] += 1 + elif line.endswith(self.CR): + counter[self.CR] += 1 + elif line.endswith(self.LF): + counter[self.LF] += 1 + + return (sorted(counter, key=counter.get, reverse=True) or [self.LF])[0] + + def do_open_with_encoding(self, filename: str, mode: str = "r"): + """Return opened file with a specific encoding. + + Parameters + ---------- + filename : str + The full path name of the file to open. + mode : str + The mode to open the file in. Defaults to read-only. + + Returns + ------- + """ + return io.open( + filename, mode=mode, encoding=self.encoding, newline="" + ) # Preserve line endings + + def has_correct_length(length_range, start, end): """Return True if docstring's length is in range.""" if length_range is None: @@ -733,6 +810,65 @@ def is_in_range(line_range, start, end): ) +def is_probably_beginning_of_sentence(line): + """Return True if this line begins a new sentence.""" + # Check heuristically for a parameter list. + for token in ["@", "-", r"\*"]: + if re.search(r"\s" + token + r"\s", line): + return True + + stripped_line = line.strip() + is_beginning_of_sentence = re.match(r'[^\w"\'`\(\)]', stripped_line) + is_pydoc_ref = re.match(r"^:\w+:", stripped_line) + + return is_beginning_of_sentence and not is_pydoc_ref + + +def is_some_sort_of_code(text): + """Return True if text looks like code.""" + return any(len(word) > 50 for word in text.split()) + + +def is_some_sort_of_list(text, strict): + """Return True if text looks like a list.""" + split_lines = text.rstrip().splitlines() + + # TODO: Find a better way of doing this. + # Very large number of lines but short columns probably means a list of + # items. + if ( + len(split_lines) + / max([len(line.strip()) for line in split_lines] + [1]) + > HEURISTIC_MIN_LIST_ASPECT_RATIO + ) and not strict: + return True + + return any( + ( + re.match(r"\s*$", line) + or + # "1. item" + re.match(r"\s*\d\.", line) + or + # "@parameter" + re.match(r"\s*[\-*:=@]", line) + or + # "parameter - description" + re.match(r".*\s+[\-*:=@]\s+", line) + or + # "parameter: description" + re.match(r"\s*\S+[\-*:=@]\s+", line) + or + # "parameter:\n description" + re.match(r"\s*\S+:\s*$", line) + or + # "parameter -- description" + re.match(r"\s*\S+\s+--\s+", line) + ) + for line in split_lines + ) + + def reindent(text, indentation): """Return reindented text that matches indentation.""" if "\t" not in indentation: @@ -764,20 +900,6 @@ def _find_shortest_indentation(lines): return indentation or "" -def is_probably_beginning_of_sentence(line): - """Return True if this line begins a new sentence.""" - # Check heuristically for a parameter list. - for token in ["@", "-", r"\*"]: - if re.search(r"\s" + token + r"\s", line): - return True - - stripped_line = line.strip() - is_beginning_of_sentence = re.match(r'[^\w"\'`\(\)]', stripped_line) - is_pydoc_ref = re.match(r"^:\w+:", stripped_line) - - return is_beginning_of_sentence and not is_pydoc_ref - - def split_summary_and_description(contents): """Split docstring into summary and description. @@ -852,69 +974,6 @@ def split_first_sentence(text): return sentence, delimiter + rest -def is_some_sort_of_list(text, strict): - """Return True if text looks like a list.""" - split_lines = text.rstrip().splitlines() - - # TODO: Find a better way of doing this. - # Very large number of lines but short columns probably means a list of - # items. - if ( - len(split_lines) - / max([len(line.strip()) for line in split_lines] + [1]) - > HEURISTIC_MIN_LIST_ASPECT_RATIO - ) and not strict: - return True - - return any( - ( - re.match(r"\s*$", line) - or - # "1. item" - re.match(r"\s*\d\.", line) - or - # "@parameter" - re.match(r"\s*[\-*:=@]", line) - or - # "parameter - description" - re.match(r".*\s+[\-*:=@]\s+", line) - or - # "parameter: description" - re.match(r"\s*\S+[\-*:=@]\s+", line) - or - # "parameter:\n description" - re.match(r"\s*\S+:\s*$", line) - or - # "parameter -- description" - re.match(r"\s*\S+\s+--\s+", line) - ) - for line in split_lines - ) - - -def is_some_sort_of_code(text): - """Return True if text looks like code.""" - return any(len(word) > 50 for word in text.split()) - - -def find_newline(source): - """Return type of newline used in source. - - Input is a list of lines. - """ - assert not isinstance(source, unicode) - - counter = collections.defaultdict(int) - for line in source: - if line.endswith(CRLF): - counter[CRLF] += 1 - elif line.endswith(CR): - counter[CR] += 1 - elif line.endswith(LF): - counter[LF] += 1 - return (sorted(counter, key=counter.get, reverse=True) or [LF])[0] - - def normalize_line(line, newline): """Return line with fixed ending, if ending was present in line. @@ -1074,30 +1133,6 @@ def strip_leading_blank_lines(text): return "\n".join(split[found:]) -def open_with_encoding(filename, encoding, mode="r"): - """Return opened file with a specific encoding.""" - return io.open( - filename, mode=mode, encoding=encoding, newline="" - ) # Preserve line endings - - -def detect_encoding(filename): - """Return file encoding.""" - try: - with open(filename, "rb") as input_file: - # Standard Library Imports - from lib2to3.pgen2 import tokenize as lib2to3_tokenize - - encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0] - - # Check for correctness of encoding. - with open_with_encoding(filename, encoding) as check_file: - check_file.read() - return encoding - except (SyntaxError, LookupError, UnicodeDecodeError): - return "latin-1" - - def _main(argv, standard_out, standard_error, standard_in): """Run internal main entry point.""" configurator = Configurator(argv) diff --git a/pyproject.toml b/pyproject.toml index ec068f7..799f8b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ include = ["LICENSE"] [tool.poetry.dependencies] python = "^3.6" +charset_normalizer = "^2.0.0" tomli = [ {version="<2.0.0", optional=true, python="<3.7"}, {version="^2.0.0", optional=true, python=">=3.7"}, @@ -39,12 +40,14 @@ tomli = [ untokenize = "^0.1.1" [tool.poetry.dev-dependencies] +autopep8 = "^1.7.0" black = [ {version = "^22.0.0", python = ">=3.6.2"}, ] coverage = {extras = ["toml"], version = "^6.2.0"} isort = "^5.7.0" mock = "^4.0.0" +mypy = "0.971" pycodestyle = "^2.8.0" pydocstyle = "^6.1.1" pylint = "^2.12.0" From 43ee993d61e6ff4270486efef9fe19393e0b4db7 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Fri, 19 Aug 2022 18:10:25 -0400 Subject: [PATCH 2/7] test: update tests for new Encodor class --- tests/test_encoding_functions.py | 70 +++++++++++++++++++++++++------- tests/test_format_code.py | 3 -- 2 files changed, 55 insertions(+), 18 deletions(-) diff --git a/tests/test_encoding_functions.py b/tests/test_encoding_functions.py index 9a99f44..e8281b9 100644 --- a/tests/test_encoding_functions.py +++ b/tests/test_encoding_functions.py @@ -41,7 +41,7 @@ import pytest # docformatter Package Imports -import docformatter +from docformatter import Encodor SYSTEM_ENCODING = sys.getdefaultencoding() @@ -56,7 +56,10 @@ def test_detect_encoding_with_explicit_utf_8( self, temporary_file, contents ): """Return utf-8 when explicitely set in file.""" - assert "utf-8" == docformatter.detect_encoding(temporary_file) + uut = Encodor() + uut.do_detect_encoding(temporary_file) + + assert "utf_8" == uut.encoding @pytest.mark.unit @pytest.mark.parametrize( @@ -66,13 +69,19 @@ def test_detect_encoding_with_non_explicit_setting( self, temporary_file, contents ): """Return default system encoding when encoding not explicitly set.""" - assert SYSTEM_ENCODING == docformatter.detect_encoding(temporary_file) + uut = Encodor() + uut.do_detect_encoding(temporary_file) + + assert "ascii" == uut.encoding @pytest.mark.unit - @pytest.mark.parametrize("contents", ["# -*- coding: blah -*-\n"]) + @pytest.mark.parametrize("contents", ["# -*- coding: blah -*-"]) def test_detect_encoding_with_bad_encoding(self, temporary_file, contents): """Default to latin-1 when unknown encoding detected.""" - assert "latin-1" == docformatter.detect_encoding(temporary_file) + uut = Encodor() + uut.do_detect_encoding(temporary_file) + + assert "ascii" == uut.encoding class TestFindNewline: @@ -81,38 +90,64 @@ class TestFindNewline: @pytest.mark.unit def test_find_newline_only_cr(self): """Return carriage return as newline type.""" + uut = Encodor() source = ["print 1\r", "print 2\r", "print3\r"] - assert docformatter.CR == docformatter.find_newline(source) + + assert uut.CR == uut.do_find_newline(source) @pytest.mark.unit def test_find_newline_only_lf(self): """Return line feed as newline type.""" + uut = Encodor() source = ["print 1\n", "print 2\n", "print3\n"] - assert docformatter.LF == docformatter.find_newline(source) + + assert uut.LF == uut.do_find_newline(source) @pytest.mark.unit def test_find_newline_only_crlf(self): """Return carriage return, line feed as newline type.""" + uut = Encodor() source = ["print 1\r\n", "print 2\r\n", "print3\r\n"] - assert docformatter.CRLF == docformatter.find_newline(source) + + assert uut.CRLF == uut.do_find_newline(source) @pytest.mark.unit def test_find_newline_cr1_and_lf2(self): """Favor line feed over carriage return when both are found.""" + uut = Encodor() source = ["print 1\n", "print 2\r", "print3\n"] - assert docformatter.LF == docformatter.find_newline(source) + + assert uut.LF == uut.do_find_newline(source) @pytest.mark.unit def test_find_newline_cr1_and_crlf2(self): """Favor carriage return, line feed when mix of newline types.""" + uut = Encodor() source = ["print 1\r\n", "print 2\r", "print3\r\n"] - assert docformatter.CRLF == docformatter.find_newline(source) + + assert uut.CRLF == uut.do_find_newline(source) @pytest.mark.unit def test_find_newline_should_default_to_lf(self): """Default to line feed when no newline type found.""" - assert docformatter.LF == docformatter.find_newline([]) - assert docformatter.LF == docformatter.find_newline(["", ""]) + uut = Encodor() + + assert uut.LF == uut.do_find_newline([]) + assert uut.LF == uut.do_find_newline(["", ""]) + + @pytest.mark.unit + def test_find_dominant_newline(self): + """Should detect carriage return as the dominant line endings.""" + uut = Encodor() + + goes_in = '''\ +def foo():\r + """\r + Hello\r + foo. This is a docstring.\r + """\r +''' + assert uut.CRLF == uut.do_find_newline(goes_in.splitlines(True)) @pytest.mark.usefixtures("temporary_file") @@ -123,9 +158,11 @@ class TestOpenWithEncoding: @pytest.mark.parametrize("contents", ["# -*- coding: utf-8 -*-\n"]) def test_open_with_utf_8_encoding(self, temporary_file, contents): """Return TextIOWrapper object when opening file with encoding.""" - encoding = docformatter.detect_encoding(temporary_file) + uut = Encodor() + uut.do_detect_encoding(temporary_file) + assert isinstance( - docformatter.open_with_encoding(temporary_file, encoding=encoding), + uut.do_open_with_encoding(temporary_file), io.TextIOWrapper, ) @@ -133,5 +170,8 @@ def test_open_with_utf_8_encoding(self, temporary_file, contents): @pytest.mark.parametrize("contents", ["# -*- coding: utf-8 -*-\n"]) def test_open_with_wrong_encoding(self, temporary_file, contents): """Raise LookupError when passed unknown encoding.""" + uut = Encodor() + uut.encoding = "cr1252" + with pytest.raises(LookupError): - docformatter.open_with_encoding(temporary_file, encoding="cr1252") + uut.do_open_with_encoding(temporary_file) diff --git a/tests/test_format_code.py b/tests/test_format_code.py index a830e32..521303d 100644 --- a/tests/test_format_code.py +++ b/tests/test_format_code.py @@ -752,9 +752,6 @@ def foo():\r foo. This is a docstring.\r """\r ''' - assert docformatter.CRLF == docformatter.find_newline( - goes_in.splitlines(True) - ) assert '''\ def foo():\r """Hello foo.\r From 6da318b03dbda12d3928f866d22998966c1f2bad Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Fri, 19 Aug 2022 18:17:03 -0400 Subject: [PATCH 3/7] chore: update README.rst --- README.rst | 5 ++++- docformatter.py | 10 ++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 8861a37..7ec6b76 100644 --- a/README.rst +++ b/README.rst @@ -58,9 +58,12 @@ Features whitespace. Such trailing whitespace is visually indistinguishable and some editors (or more recently, reindent.py) will trim them. -See the the full documentation at `read-the-docs`_. +See the the full documentation at `read-the-docs`_, especially the +`requirements`_ section for a more detailed discussion of PEP 257 and other +requirements. .. _read-the-docs: https://docformatter.readthedocs.io +.. _requirements: https://docformatter.readthedocs.io/en/latest/requirements.html Installation ============ diff --git a/docformatter.py b/docformatter.py index b050de1..e063968 100755 --- a/docformatter.py +++ b/docformatter.py @@ -410,7 +410,7 @@ def do_format_standard_in(self, parser: argparse.ArgumentParser): encoding = None source = self.stdin.read() if not isinstance(source, unicode): - encoding = self.stdin.encoding or _get_encoding() + encoding = self.stdin.encoding or self.encodor.system_encoding source = source.decode(encoding) formatted_source = self._do_format_code(source) @@ -728,6 +728,9 @@ class Encodor: def __init__(self): """Initialize an Encodor instance.""" self.encoding = "latin-1" + self.system_encoding = ( + locale.getpreferredencoding() or sys.getdefaultencoding() + ) def do_detect_encoding(self, filename: str) -> None: """Return the detected file encoding. @@ -1153,11 +1156,6 @@ def _main(argv, standard_out, standard_error, standard_in): return formator.do_format_files() -def _get_encoding(): - """Return preferred encoding.""" - return locale.getpreferredencoding() or sys.getdefaultencoding() - - def find_py_files(sources, recursive, exclude=None): """Find Python source files. From 29001f73889aed6e5296fca21e3f6066e418f2ce Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sat, 20 Aug 2022 10:49:21 -0400 Subject: [PATCH 4/7] chore: update configuration documentation --- docs/source/configuration.rst | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst index 88f4e24..f98139a 100644 --- a/docs/source/configuration.rst +++ b/docs/source/configuration.rst @@ -15,8 +15,8 @@ If no configuration file is explicitly passed, ``docformatter`` will search the current directory for the supported files and use the first one found. The order of precedence is ``pyproject.toml``, ``setup.cfg``, then ``tox.ini``. -In any of the configuration files, add a section ``[tool.docformatter]`` with -options listed using the same name as command line options. For example: +In ``pyproject.toml`` or ``tox.ini``, add a section ``[tool.docformatter]`` with +options listed using the same name as command line argument. For example: .. code-block:: yaml @@ -25,5 +25,30 @@ options listed using the same name as command line options. For example: wrap-summaries = 82 blank = true -The ``setup.cfg`` and ``tox.ini`` files will also support the -``[tool:docformatter]`` syntax. +In ``setup.cfg``, add a ``[docformatter]`` section. + +.. code-block:: yaml + + [docformatter] + recursive = true + wrap-summaries = 82 + blank = true + +Command line arguments will take precedence over configuration file settings. +For example, if the following is in your ``pyproject.toml`` + +.. code-block:: yaml + + [tool.docformatter] + recursive = true + wrap-summaries = 82 + wrap-descriptions = 81 + blank = true + +And you invoke docformatter as follows: + +.. code-block:: console + + $ docformatter --config ~/.secret/path/to/pyproject.toml --wrap-summaries 68 + +Summaries will be wrapped at 68, not 82. From b173cb87465dc7acccf973fccf5e379b740f74aa Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sat, 20 Aug 2022 10:49:47 -0400 Subject: [PATCH 5/7] fix: defaults no longer coming from config files --- docformatter.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/docformatter.py b/docformatter.py index e063968..61c0bf1 100755 --- a/docformatter.py +++ b/docformatter.py @@ -180,8 +180,7 @@ def do_parse_arguments(self) -> None: type=int, metavar="length", help="wrap long summary lines at this length; " - "set to 0 to disable wrapping " - "(default: %(default)s)", + "set to 0 to disable wrapping (default: 79)", ) self.parser.add_argument( "--wrap-descriptions", @@ -190,14 +189,14 @@ def do_parse_arguments(self) -> None: metavar="length", help="wrap descriptions at this length; " "set to 0 to disable wrapping " - "(default: %(default)s)", + "(default: 72)", ) self.parser.add_argument( "--force-wrap", action="store_true", default=bool(self.flargs_dct.get("force-wrap", False)), help="force descriptions to be wrapped even if it may " - "result in a mess (default: %(default)s)", + "result in a mess (default: False)", ) self.parser.add_argument( "--tab-width", @@ -206,28 +205,28 @@ def do_parse_arguments(self) -> None: metavar="width", default=int(self.flargs_dct.get("tab-width", 1)), help="tabs in indentation are this many characters when " - "wrapping lines (default: %(default)s)", + "wrapping lines (default: 1)", ) self.parser.add_argument( "--blank", dest="post_description_blank", action="store_true", default=bool(self.flargs_dct.get("blank", False)), - help="add blank line after description (default: %(default)s)", + help="add blank line after description (default: False)", ) self.parser.add_argument( "--pre-summary-newline", action="store_true", default=bool(self.flargs_dct.get("pre-summary-newline", False)), help="add a newline before the summary of a multi-line docstring " - "(default: %(default)s)", + "(default: False)", ) self.parser.add_argument( "--pre-summary-space", action="store_true", default=bool(self.flargs_dct.get("pre-summary-space", False)), help="add a space after the opening triple quotes " - "(default: %(default)s)", + "(default: False)", ) self.parser.add_argument( "--make-summary-multi-line", @@ -236,7 +235,7 @@ def do_parse_arguments(self) -> None: self.flargs_dct.get("make-summary-multi-line", False) ), help="add a newline before and after the summary of a one-line " - "docstring (default: %(default)s)", + "docstring (default: False)", ) self.parser.add_argument( "--close-quotes-on-newline", @@ -246,7 +245,7 @@ def do_parse_arguments(self) -> None: ), help="place closing triple quotes on a new-line when a " "one-line docstring wraps to two or more lines " - "(default: %(default)s)", + "(default: False)", ) self.parser.add_argument( "--range", @@ -256,7 +255,7 @@ def do_parse_arguments(self) -> None: type=int, nargs=2, help="apply docformatter to docstrings between these " - "lines; line numbers are indexed at 1 (default: %(default)s)", + "lines; line numbers are indexed at 1 (default: None)", ) self.parser.add_argument( "--docstring-length", @@ -266,7 +265,7 @@ def do_parse_arguments(self) -> None: type=int, nargs=2, help="apply docformatter to docstrings of given length range " - "(default: %(default)s)", + "(default: None)", ) self.parser.add_argument( "--non-strict", From d2d3f77d691973b2c41d77d951b8d0ae2f6d46b0 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sat, 20 Aug 2022 11:15:25 -0400 Subject: [PATCH 6/7] refactor: move strip_docstring to Formator class --- docformatter.py | 120 +++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 58 deletions(-) diff --git a/docformatter.py b/docformatter.py index 61c0bf1..774e3b9 100755 --- a/docformatter.py +++ b/docformatter.py @@ -70,23 +70,6 @@ CR = "\r" LF = "\n" CRLF = "\r\n" -STR_QUOTE_TYPES = ( - '"""', - "'''", -) -RAW_QUOTE_TYPES = ( - 'r"""', - 'R"""', - "r'''", - "R'''", -) -UCODE_QUOTE_TYPES = ( - 'u"""', - 'U"""', - "u'''", - "U'''", -) -QUOTE_TYPES = STR_QUOTE_TYPES + RAW_QUOTE_TYPES + UCODE_QUOTE_TYPES _PYTHON_LIBS = set(sysconfig.get_paths().values()) @@ -352,6 +335,24 @@ def _do_read_parser_configuration(self) -> None: class Formator: """Format docstrings.""" + STR_QUOTE_TYPES = ( + '"""', + "'''", + ) + RAW_QUOTE_TYPES = ( + 'r"""', + 'R"""', + "r'''", + "R'''", + ) + UCODE_QUOTE_TYPES = ( + 'u"""', + 'U"""', + "u'''", + "U'''", + ) + QUOTE_TYPES = STR_QUOTE_TYPES + RAW_QUOTE_TYPES + UCODE_QUOTE_TYPES + parser = None """Parser object.""" @@ -557,7 +558,7 @@ def _format_code( ) in tokenize.generate_tokens(sio.readline): if ( token_type == tokenize.STRING - and token_string.startswith(QUOTE_TYPES) + and token_string.startswith(self.QUOTE_TYPES) and ( previous_token_type == tokenize.INDENT or only_comments_so_far @@ -624,13 +625,13 @@ def _do_format_docstring( docstring_formatted: str The docstring formatted according the various options. """ - contents, open_quote = strip_docstring(docstring) + contents, open_quote = self._do_strip_docstring(docstring) open_quote = ( f"{open_quote} " if self.args.pre_summary_space else open_quote ) # Skip if there are nested triple double quotes - if contents.count(QUOTE_TYPES[0]): + if contents.count(self.QUOTE_TYPES[0]): return docstring # Do not modify things that start with doctests. @@ -716,6 +717,45 @@ def _do_format_docstring( ).strip() return f"{beginning}{summary_wrapped}{ending}" + def _do_strip_docstring(self, docstring: str) -> Tuple[str, str]: + """Return contents of docstring and opening quote type. + + Strips the docstring of its triple quotes, trailing white space, + and line returns. Determines type of docstring quote (either string, + raw, or unicode) and returns the opening quotes, including the type + identifier, with single quotes replaced by double quotes. + + Parameters + ---------- + docstring: str + The docstring, including the opening and closing triple quotes. + + Returns + ------- + (docstring, open_quote) : tuple + The docstring with the triple quotes removed. + The opening quote type with single quotes replaced by double + quotes. + """ + docstring = docstring.strip() + + for quote in self.QUOTE_TYPES: + if quote in self.RAW_QUOTE_TYPES + self.UCODE_QUOTE_TYPES and ( + docstring.startswith(quote) and docstring.endswith(quote[1:]) + ): + return docstring.split(quote, 1)[1].rsplit(quote[1:], 1)[ + 0 + ].strip(), quote.replace("'", '"') + elif docstring.startswith(quote) and docstring.endswith(quote): + return docstring.split(quote, 1)[1].rsplit(quote, 1)[ + 0 + ].strip(), quote.replace("'", '"') + + raise ValueError( + "docformatter only handles triple-quoted (single or double) " + "strings" + ) + class Encodor: """Encoding and decoding of files.""" @@ -786,6 +826,8 @@ def do_open_with_encoding(self, filename: str, mode: str = "r"): Returns ------- + contents : TextIO + The contents of the file. """ return io.open( filename, mode=mode, encoding=self.encoding, newline="" @@ -993,44 +1035,6 @@ def normalize_line_endings(lines, newline): return "".join([normalize_line(line, newline) for line in lines]) -def strip_docstring(docstring: str) -> Tuple[str, str]: - """Return contents of docstring and opening quote type. - - Strips the docstring of its triple quotes, trailing white space, - and line returns. Determines type of docstring quote (either string, - raw, or unicode) and returns the opening quotes, including the type - identifier, with single quotes replaced by double quotes. - - Parameters - ---------- - docstring: str - The docstring, including the opening and closing triple quotes. - - Returns - ------- - (docstring, open_quote) : tuple - The docstring with the triple quotes removed. - The opening quote type with single quotes replaced by double quotes. - """ - docstring = docstring.strip() - - for quote in QUOTE_TYPES: - if quote in RAW_QUOTE_TYPES + UCODE_QUOTE_TYPES and ( - docstring.startswith(quote) and docstring.endswith(quote[1:]) - ): - return docstring.split(quote, 1)[1].rsplit(quote[1:], 1)[ - 0 - ].strip(), quote.replace("'", '"') - elif docstring.startswith(quote) and docstring.endswith(quote): - return docstring.split(quote, 1)[1].rsplit(quote, 1)[ - 0 - ].strip(), quote.replace("'", '"') - - raise ValueError( - "docformatter only handles triple-quoted (single or double) strings" - ) - - def unwrap_summary(summary): """Return summary with newlines removed in preparation for wrapping.""" return re.sub(r"\s*\n\s*", " ", summary) From c0b041ef874ae212bd47cca4830ec4ca47029717 Mon Sep 17 00:00:00 2001 From: Doyle Rowland Date: Sat, 20 Aug 2022 11:16:03 -0400 Subject: [PATCH 7/7] test: move strip_docstring tests to format_docstring test file --- tests/test_format_docstring.py | 185 ++++++++++++++++++++++++++++++++- tests/test_string_functions.py | 83 --------------- 2 files changed, 184 insertions(+), 84 deletions(-) diff --git a/tests/test_format_docstring.py b/tests/test_format_docstring.py index 5bd49e4..476e2e8 100644 --- a/tests/test_format_docstring.py +++ b/tests/test_format_docstring.py @@ -677,7 +677,7 @@ def test_format_docstring_with_summary_only_and_wrap_and_tab_indentation( test_args, args, ): - """"Should account for length of tab when wrapping. + """Should account for length of tab when wrapping. See PR #69. """ @@ -902,3 +902,186 @@ def test_format_docstring_pre_summary_space( """This one-line docstring will have a leading space."""\ ''', ) + + +class TestStripDocstring: + """Class for testing _do_strip_docstring().""" + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring( + self, + test_args, + args, + ): + """Strip triple double quotes from docstring.""" + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + docstring, open_quote = uut._do_strip_docstring( + ''' + """Hello. + + """ + + ''' + ) + assert docstring == "Hello." + assert open_quote == '"""' + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_single_quotes( + self, + test_args, + args, + ): + """Strip triple single quotes from docstring.""" + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + docstring, open_quote = uut._do_strip_docstring( + """ + '''Hello. + + ''' + + """ + ) + assert docstring == "Hello." + assert open_quote == '"""' + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_empty_string( + self, + test_args, + args, + ): + """Return series of six double quotes when passed empty string.""" + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + docstring, open_quote = uut._do_strip_docstring('""""""') + assert docstring == "" + assert open_quote == '"""' + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_raw_string( + self, + test_args, + args, + ): + """Return docstring and raw open quote.""" + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + docstring, open_quote = uut._do_strip_docstring('r"""foo"""') + assert docstring == "foo" + assert open_quote == 'r"""' + + docstring, open_quote = uut._do_strip_docstring("R'''foo'''") + assert docstring == "foo" + assert open_quote == 'R"""' + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_unicode_string( + self, + test_args, + args, + ): + """Return docstring and unicode open quote.""" + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + docstring, open_quote = uut._do_strip_docstring("u'''foo'''") + assert docstring == "foo" + assert open_quote == 'u"""' + + docstring, open_quote = uut._do_strip_docstring('U"""foo"""') + assert docstring == "foo" + assert open_quote == 'U"""' + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_unknown( + self, + test_args, + args, + ): + """Raise ValueError with single quotes.""" + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + with pytest.raises(ValueError): + uut._do_strip_docstring("foo") + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_single_quotes( + self, + test_args, + args, + ): + """Raise ValueError when strings begin with single single quotes. + + See requirement PEP_257_1. See issue #66 for example of docformatter + breaking code when encountering single quote. + """ + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + with pytest.raises(ValueError): + uut._do_strip_docstring("'hello\\''") + + @pytest.mark.unit + @pytest.mark.parametrize("args", [[""]]) + def test_strip_docstring_with_double_quotes( + self, + test_args, + args, + ): + """Raise ValueError when strings begin with single double quotes. + + See requirement PEP_257_1. See issue #66 for example of docformatter + breaking code when encountering single quote. + """ + uut = Formator( + test_args, + sys.stderr, + sys.stdin, + sys.stdout, + ) + + with pytest.raises(ValueError): + uut._do_strip_docstring('"hello\\""') diff --git a/tests/test_string_functions.py b/tests/test_string_functions.py index 87163f8..a0ec0e2 100644 --- a/tests/test_string_functions.py +++ b/tests/test_string_functions.py @@ -447,86 +447,3 @@ def test_remove_section_header(self): line = " \nfoo\nbar\n" assert line == docformatter.remove_section_header(line) - - @pytest.mark.unit - def test_strip_docstring(self): - """Strip triple double quotes from docstring.""" - docstring, open_quote = docformatter.strip_docstring( - ''' - """Hello. - - """ - - ''' - ) - assert docstring == "Hello." - assert open_quote == '"""' - - @pytest.mark.unit - def test_strip_docstring_with_single_quotes(self): - """Strip triple single quotes from docstring.""" - docstring, open_quote == docformatter.strip_docstring( - """ - '''Hello. - - ''' - - """ - ) - assert docstring == "Hello." - assert open_quote == '"""' - - @pytest.mark.unit - def test_strip_docstring_with_empty_string(self): - """Return series of six double quotes when passed empty string.""" - docstring, open_quote = docformatter.strip_docstring('""""""') - assert docstring == "" - assert open_quote == '"""' - - @pytest.mark.unit - def test_strip_docstring_with_raw_string(self): - """Return docstring and raw open quote.""" - docstring, open_quote = docformatter.strip_docstring('r"""foo"""') - assert docstring == "foo" - assert open_quote == 'r"""' - - docstring, open_quote = docformatter.strip_docstring("R'''foo'''") - assert docstring == "foo" - assert open_quote == 'R"""' - - @pytest.mark.unit - def test_strip_docstring_with_unicode_string(self): - """Return docstring and unicode open quote.""" - docstring, open_quote = docformatter.strip_docstring("u'''foo'''") - assert docstring == "foo" - assert open_quote == 'u"""' - - docstring, open_quote = docformatter.strip_docstring('U"""foo"""') - assert docstring == "foo" - assert open_quote == 'U"""' - - @pytest.mark.unit - def test_strip_docstring_with_unknown(self): - """Raise ValueError with single quotes.""" - with pytest.raises(ValueError): - docformatter.strip_docstring("foo") - - @pytest.mark.unit - def test_strip_docstring_with_single_quotes(self): - """Raise ValueError when strings begin with single single quotes. - - See requirement #1. See issue #66 for example of docformatter breaking - code when encountering single quote. - """ - with pytest.raises(ValueError): - docformatter.strip_docstring("'hello\\''") - - @pytest.mark.unit - def test_strip_docstring_with_double_quotes(self): - """Raise ValueError when strings begin with single double quotes. - - See requirement #1. See issue #66 for example of docformatter - breaking code when encountering single quote. - """ - with pytest.raises(ValueError): - docformatter.strip_docstring('"hello\\""')