From dbeaaa63611c19b942261ec0115c57290c8f8719 Mon Sep 17 00:00:00 2001 From: Timothy Crosley Date: Sat, 3 Oct 2020 01:01:49 -0700 Subject: [PATCH] Implemented #1487: Improved handling of encoding errors. --- CHANGELOG.md | 3 ++- isort/exceptions.py | 14 +++++++++++++- isort/io.py | 15 ++++++++++++--- isort/main.py | 29 +++++++++++++++++++++++++---- tests/unit/test_exceptions.py | 8 ++++++++ tests/unit/test_main.py | 31 +++++++++++++++++++++++++++++++ 6 files changed, 91 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 150256ea0..e7fb1aab1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ Find out more about isort's release policy [here](https://pycqa.github.io/isort/ - Implemented #1494: Default to sorting imports within `.pxd` files. - Implemented #1502: Improved float-to-top behavior when there is an existing import section present at top-of-file. - Implemented #1511: Support for easily seeing all files isort will be ran against using `isort . --show-files`. + - Implemented #1487: Improved handling of encoding errors. - Improved handling of unsupported configuration option errors (see #1475). - Fixed #1463: Better interactive documentation for future option. - Fixed #1461: Quiet config option not respected by file API in some circumstances. @@ -16,7 +17,7 @@ Find out more about isort's release policy [here](https://pycqa.github.io/isort/ - Fixed #1492: --check does not work with stdin source. - Fixed #1499: isort gets confused by single line, multi-line style comments when using float-to-top. Potentially breaking changes: - - Fixed #1486: "Google" profile is not quite Google style. + - Fixed #1486: "Google" profile is not quite Google style. Goal Zero: (Tickets related to aspirational goal of achieving 0 regressions for remaining 5.0.0 lifespan): - Implemented #1472: Full testing of stdin CLI Options diff --git a/isort/exceptions.py b/isort/exceptions.py index 265928e98..b98454a2c 100644 --- a/isort/exceptions.py +++ b/isort/exceptions.py @@ -1,5 +1,6 @@ """All isort specific exception classes should be defined here""" -from typing import Any, Dict +from pathlib import Path +from typing import Any, Dict, Union from .profiles import profiles @@ -157,3 +158,14 @@ def __init__(self, unsupported_settings: Dict[str, Dict[str, str]]): "https://pycqa.github.io/isort/docs/configuration/options/.\n" ) self.unsupported_settings = unsupported_settings + + +class UnsupportedEncoding(ISortError): + """Raised when isort encounters an encoding error while trying to read a file""" + + def __init__( + self, + filename: Union[str, Path], + ): + super().__init__(f"Unknown or unsupported encoding in {filename}") + self.filename = filename diff --git a/isort/io.py b/isort/io.py index a0357347b..7ff2807d2 100644 --- a/isort/io.py +++ b/isort/io.py @@ -4,7 +4,9 @@ from contextlib import contextmanager from io import BytesIO, StringIO, TextIOWrapper from pathlib import Path -from typing import Iterator, NamedTuple, TextIO, Union +from typing import Callable, Iterator, NamedTuple, TextIO, Union + +from isort.exceptions import UnsupportedEncoding _ENCODING_PATTERN = re.compile(br"^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)") @@ -14,9 +16,16 @@ class File(NamedTuple): path: Path encoding: str + @staticmethod + def detect_encoding(filename: str, readline: Callable[[], bytes]): + try: + return tokenize.detect_encoding(readline)[0] + except Exception: + raise UnsupportedEncoding(filename) + @staticmethod def from_contents(contents: str, filename: str) -> "File": - encoding, _ = tokenize.detect_encoding(BytesIO(contents.encode("utf-8")).readline) + encoding = File.detect_encoding(filename, BytesIO(contents.encode("utf-8")).readline) return File(StringIO(contents), path=Path(filename).resolve(), encoding=encoding) @property @@ -30,7 +39,7 @@ def _open(filename): """ buffer = open(filename, "rb") try: - encoding, _ = tokenize.detect_encoding(buffer.readline) + encoding = File.detect_encoding(filename, buffer.readline) buffer.seek(0) text = TextIOWrapper(buffer, encoding, line_buffering=True, newline="") text.mode = "r" # type: ignore diff --git a/isort/main.py b/isort/main.py index c518a92dc..841caca52 100644 --- a/isort/main.py +++ b/isort/main.py @@ -10,7 +10,7 @@ from warnings import warn from . import __version__, api, sections -from .exceptions import FileSkipped +from .exceptions import FileSkipped, UnsupportedEncoding from .format import create_terminal_printer from .logo import ASCII_ART from .profiles import profiles @@ -67,9 +67,10 @@ class SortAttempt: - def __init__(self, incorrectly_sorted: bool, skipped: bool) -> None: + def __init__(self, incorrectly_sorted: bool, skipped: bool, supported_encoding: bool) -> None: self.incorrectly_sorted = incorrectly_sorted self.skipped = skipped + self.supported_encoding = supported_encoding def sort_imports( @@ -88,7 +89,7 @@ def sort_imports( incorrectly_sorted = not api.check_file(file_name, config=config, **kwargs) except FileSkipped: skipped = True - return SortAttempt(incorrectly_sorted, skipped) + return SortAttempt(incorrectly_sorted, skipped, True) else: try: incorrectly_sorted = not api.sort_file( @@ -100,10 +101,14 @@ def sort_imports( ) except FileSkipped: skipped = True - return SortAttempt(incorrectly_sorted, skipped) + return SortAttempt(incorrectly_sorted, skipped, True) except (OSError, ValueError) as error: warn(f"Unable to parse file {file_name} due to {error}") return None + except UnsupportedEncoding: + if config.verbose: + warn(f"Encoding not supported for {file_name}") + return SortAttempt(incorrectly_sorted, skipped, False) except Exception: printer = create_terminal_printer(color=config.color_output) printer.error( @@ -852,6 +857,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] = remapped_deprecated_args = config_dict.pop("remapped_deprecated_args", False) wrong_sorted_files = False all_attempt_broken = False + no_valid_encodings = False if "src_paths" in config_dict: config_dict["src_paths"] = { @@ -901,6 +907,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] = return num_skipped = 0 num_broken = 0 + num_invalid_encoding = 0 if config.verbose: print(ASCII_ART) @@ -934,6 +941,7 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] = # If any files passed in are missing considered as error, should be removed is_no_attempt = True + any_encoding_valid = False for sort_attempt in attempt_iterator: if not sort_attempt: continue # pragma: no cover - shouldn't happen, satisfies type constraint @@ -944,6 +952,12 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] = num_skipped += ( 1 # pragma: no cover - shouldn't happen, due to skip in iter_source_code ) + + if not sort_attempt.supported_encoding: + num_invalid_encoding += 1 + else: + any_encoding_valid = True + is_no_attempt = False num_skipped += len(skipped) @@ -965,6 +979,8 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] = if num_broken > 0 and is_no_attempt: all_attempt_broken = True + if num_invalid_encoding > 0 and not any_encoding_valid: + no_valid_encodings = True if not config.quiet and (remapped_deprecated_args or deprecated_flags): if remapped_deprecated_args: @@ -988,6 +1004,11 @@ def main(argv: Optional[Sequence[str]] = None, stdin: Optional[TextIOWrapper] = if all_attempt_broken: sys.exit(1) + if no_valid_encodings: + printer = create_terminal_printer(color=config.color_output) + printer.error("No valid encodings.") + sys.exit(1) + if __name__ == "__main__": main() diff --git a/tests/unit/test_exceptions.py b/tests/unit/test_exceptions.py index bb6ee2a31..d9eae8bf8 100644 --- a/tests/unit/test_exceptions.py +++ b/tests/unit/test_exceptions.py @@ -90,3 +90,11 @@ def setup_class(self): def test_variables(self): assert self.instance.unsupported_settings == {"apply": {"value": "true", "source": "/"}} + + +class TestUnsupportedEncoding(TestISortError): + def setup_class(self): + self.instance = exceptions.UnsupportedEncoding("file.py") + + def test_variables(self): + assert self.instance.filename == "file.py" diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index ef837b614..28da37eb8 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -719,3 +719,34 @@ def test_isort_with_stdin(capsys): import sys """ ) + + +def test_unsupported_encodings(tmpdir, capsys): + tmp_file = tmpdir.join("file.py") + # fmt: off + tmp_file.write( + u''' +# [syntax-error]\ +# -*- coding: IBO-8859-1 -*- +""" check correct unknown encoding declaration +""" +__revision__ = 'יייי' +''' + ) + # fmt: on + + # should throw an error if only unsupported encoding provided + with pytest.raises(SystemExit): + main.main([str(tmp_file)]) + out, error = capsys.readouterr() + + assert "No valid encodings." in error + + # should not throw an error if at least one valid encoding found + normal_file = tmpdir.join("file1.py") + normal_file.write("import os\nimport sys") + + main.main([str(tmp_file), str(normal_file), "--verbose"]) + out, error = capsys.readouterr() + + assert not error