From 9531e1b7789e74a6c93d7c62a97e8082cf8ad010 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 5 Jun 2021 22:42:34 -0400 Subject: [PATCH 01/21] Initial mypyc support changes --- setup.py | 23 ++++++++++++- src/black/__init__.py | 2 +- src/black/brackets.py | 4 ++- src/black/linegen.py | 24 +++++++++----- src/black/mode.py | 3 +- src/black/nodes.py | 4 +-- src/black/parsing.py | 15 ++++----- src/black/trans.py | 59 +++++++++++++++++++++------------- src/blib2to3/pgen2/tokenize.py | 2 +- 9 files changed, 90 insertions(+), 46 deletions(-) diff --git a/setup.py b/setup.py index 5549ae35342..fb2b154a7a7 100644 --- a/setup.py +++ b/setup.py @@ -29,11 +29,32 @@ def get_long_description() -> str: if USE_MYPYC: mypyc_targets = [ "src/black/__init__.py", + "src/black/nodes.py", + "src/black/mode.py", + "src/black/files.py", + "src/black/concurrency.py", + "src/black/const.py", + "src/black/linegen.py", + "src/black/rusty.py", + "src/black/numerics.py", + "src/black/comments.py", + "src/black/report.py", + "src/black/cache.py", + "src/black/lines.py", + "src/black/debug.py", + "src/black/strings.py", + "src/black/parsing.py", + "src/black/output.py", + "src/black/brackets.py", + "src/black/trans.py", "src/blib2to3/pytree.py", "src/blib2to3/pygram.py", + "src/blib2to3/pgen2/conv.py", + "src/blib2to3/pgen2/literals.py", "src/blib2to3/pgen2/parse.py", "src/blib2to3/pgen2/grammar.py", "src/blib2to3/pgen2/token.py", + "src/blib2to3/pgen2/tokenize.py", "src/blib2to3/pgen2/driver.py", "src/blib2to3/pgen2/pgen.py", ] @@ -41,7 +62,7 @@ def get_long_description() -> str: from mypyc.build import mypycify opt_level = os.getenv("MYPYC_OPT_LEVEL", "3") - ext_modules = mypycify(mypyc_targets, opt_level=opt_level) + ext_modules = mypycify(mypyc_targets, opt_level=opt_level, verbose=True) else: ext_modules = [] diff --git a/src/black/__init__.py b/src/black/__init__.py index 8e2123d50cc..6998c1ea593 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -359,7 +359,7 @@ def main( experimental_string_processing: bool, quiet: bool, verbose: bool, - required_version: str, + required_version: Optional[str], include: Pattern, exclude: Optional[Pattern], extend_exclude: Optional[Pattern], diff --git a/src/black/brackets.py b/src/black/brackets.py index bb865a0d5b7..d92d7c11c81 100644 --- a/src/black/brackets.py +++ b/src/black/brackets.py @@ -49,7 +49,9 @@ DOT_PRIORITY: Final = 1 -class BracketMatchError(KeyError): +# Ideally this would be a subclass of KeyError, but mypyc doesn't like that. +# See also: https://mypyc.readthedocs.io/en/latest/native_classes.html#inheritance. +class BracketMatchError(Exception): """Raised when an opening bracket is unable to be matched to a closing bracket.""" diff --git a/src/black/linegen.py b/src/black/linegen.py index 76b553a959a..1691cc51ba8 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -5,8 +5,6 @@ import sys from typing import Collection, Iterator, List, Optional, Set, Union -from dataclasses import dataclass, field - from black.nodes import WHITESPACE, STATEMENT, STANDALONE_COMMENT from black.nodes import ASSIGNMENTS, OPENING_BRACKETS, CLOSING_BRACKETS from black.nodes import Visitor, syms, first_child_is_arith, ensure_visible @@ -40,7 +38,8 @@ class CannotSplit(CannotTransform): """A readable split that fits the allotted line length is impossible.""" -@dataclass +# This isn't a dataclass because @dataclass + Generic breaks mypyc. +# See also https://github.com/mypyc/mypyc/issues/827. class LineGenerator(Visitor[Line]): """Generates reformatted Line objects. Empty lines are not emitted. @@ -48,9 +47,11 @@ class LineGenerator(Visitor[Line]): in ways that will no longer stringify to valid Python code on the tree. """ - mode: Mode - remove_u_prefix: bool = False - current_line: Line = field(init=False) + def __init__(self, mode: Mode, remove_u_prefix: bool = False) -> None: + self.mode = mode + self.remove_u_prefix = remove_u_prefix + self.current_line: Line + self.__post_init__() def line(self, indent: int = 0) -> Iterator[Line]: """Generate a line. @@ -335,7 +336,9 @@ def transform_line( transformers = [left_hand_split] else: - def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]: + def _rhs( + self: object, line: Line, features: Collection[Feature] + ) -> Iterator[Line]: """Wraps calls to `right_hand_split`. The calls increasingly `omit` right-hand trailers (bracket pairs with @@ -362,6 +365,11 @@ def rhs(line: Line, features: Collection[Feature]) -> Iterator[Line]: line, line_length=mode.line_length, features=features ) + # HACK: functions (like rhs) compiled by mypyc don't retain their __name__ + # attribute which is needed in `run_transformer` further down. Unfortunately + # a nested class breaks mypyc too. So a class must be created via type ... + rhs = type("rhs", (), {"__call__": _rhs})() + if mode.experimental_string_processing: if line.inside_brackets: transformers = [ @@ -962,7 +970,7 @@ def run_transformer( result.extend(transform_line(transformed_line, mode=mode, features=features)) if ( - transform.__name__ != "rhs" + transform.__class__.__name__ != "rhs" or not line.bracket_tracker.invisible or any(bracket.value for bracket in line.bracket_tracker.invisible) or line.contains_multiline_strings() diff --git a/src/black/mode.py b/src/black/mode.py index e2ce322da5c..305ec0eb88a 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -6,6 +6,7 @@ from dataclasses import dataclass, field from enum import Enum +from operator import attrgetter from typing import Dict, Set from black.const import DEFAULT_LINE_LENGTH @@ -108,7 +109,7 @@ def get_cache_key(self) -> str: if self.target_versions: version_str = ",".join( str(version.value) - for version in sorted(self.target_versions, key=lambda v: v.value) + for version in sorted(self.target_versions, key=attrgetter("value")) ) else: version_str = "-" diff --git a/src/black/nodes.py b/src/black/nodes.py index e0db9a42426..41cd9c3cf8d 100644 --- a/src/black/nodes.py +++ b/src/black/nodes.py @@ -439,8 +439,8 @@ def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> b def last_two_except(leaves: List[Leaf], omit: Collection[LeafID]) -> Tuple[Leaf, Leaf]: """Return (penultimate, last) leaves skipping brackets in `omit` and contents.""" - stop_after = None - last = None + stop_after: Optional[Leaf] = None + last: Optional[Leaf] = None for leaf in reversed(leaves): if stop_after: if leaf is stop_after: diff --git a/src/black/parsing.py b/src/black/parsing.py index 8e9feea9120..d1499ecb6e8 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -3,7 +3,7 @@ """ import ast import sys -from typing import Iterable, Iterator, List, Set, Union +from typing import Iterable, Iterator, List, Set, Tuple, Type, Union # lib2to3 fork from blib2to3.pytree import Node, Leaf @@ -16,6 +16,8 @@ from black.nodes import syms try: + # TODO: currently the code assumes ast3 and ast27 will be availabe all the time, + # unfortunately this isn't the case these days from typed_ast import ast3, ast27 except ImportError: if sys.version_info < (3, 8): @@ -27,7 +29,7 @@ ) sys.exit(1) else: - ast3 = ast27 = ast + pass class InvalidInput(ValueError): @@ -121,12 +123,7 @@ def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]: return ast3.parse(src, filename, feature_version=feature_version) except SyntaxError: continue - if ast27.__name__ == "ast": - raise SyntaxError( - "The requested source code has invalid Python 3 syntax.\n" - "If you are trying to format Python 2 files please reinstall Black" - " with the 'python2' extra: `python3 -m pip install black[python2]`." - ) + return ast27.parse(src) @@ -141,7 +138,7 @@ def stringify_ast( for field in sorted(node._fields): # noqa: F402 # TypeIgnore has only one field 'lineno' which breaks this comparison - type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) + type_ignore_classes: Tuple[Type, ...] = (ast3.TypeIgnore, ast27.TypeIgnore) if sys.version_info >= (3, 8): type_ignore_classes += (ast.TypeIgnore,) if isinstance(node, type_ignore_classes): diff --git a/src/black/trans.py b/src/black/trans.py index 023dcd3618a..d918ef111a2 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -8,6 +8,7 @@ from typing import ( Any, Callable, + ClassVar, Collection, Dict, Iterable, @@ -20,6 +21,14 @@ TypeVar, Union, ) +import sys + +if sys.version_info < (3, 8): + from typing_extensions import Final +else: + from typing import Final + +from mypy_extensions import trait from black.rusty import Result, Ok, Err @@ -62,7 +71,6 @@ def TErr(err_msg: str) -> Err[CannotTransform]: return Err(cant_transform) -@dataclass # type: ignore class StringTransformer(ABC): """ An implementation of the Transformer protocol that relies on its @@ -90,9 +98,13 @@ class StringTransformer(ABC): as much as possible. """ - line_length: int - normalize_strings: bool - __name__ = "StringTransformer" + __name__: Final = "StringTransformer" + + # Ideally this would be a dataclass, but unfortunately mypyc breaks when used with + # `abc.ABC`. + def __init__(self, line_length: int, normalize_strings: bool) -> None: + self.line_length = line_length + self.normalize_strings = normalize_strings @abstractmethod def do_match(self, line: Line) -> TMatchResult: @@ -184,6 +196,7 @@ class CustomSplit: break_idx: int +@trait class CustomSplitMapMixin: """ This mixin class is used to map merged strings to a sequence of @@ -191,8 +204,10 @@ class CustomSplitMapMixin: the resultant substrings go over the configured max line length. """ - _Key = Tuple[StringID, str] - _CUSTOM_SPLIT_MAP: Dict[_Key, Tuple[CustomSplit, ...]] = defaultdict(tuple) + _Key: ClassVar = Tuple[StringID, str] + _CUSTOM_SPLIT_MAP: ClassVar[Dict[_Key, Tuple[CustomSplit, ...]]] = defaultdict( + tuple + ) @staticmethod def _get_key(string: str) -> "CustomSplitMapMixin._Key": @@ -243,7 +258,7 @@ def has_custom_splits(self, string: str) -> bool: return key in self._CUSTOM_SPLIT_MAP -class StringMerger(CustomSplitMapMixin, StringTransformer): +class StringMerger(StringTransformer, CustomSplitMapMixin): """StringTransformer that merges strings together. Requirements: @@ -739,7 +754,7 @@ class BaseStringSplitter(StringTransformer): * The target string is not a multiline (i.e. triple-quote) string. """ - STRING_OPERATORS = [ + STRING_OPERATORS: Final = [ token.EQEQUAL, token.GREATER, token.GREATEREQUAL, @@ -927,7 +942,7 @@ def _get_max_string_length(self, line: Line, string_idx: int) -> int: return max_string_length -class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): +class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits "atom" strings (i.e. strings which exist on lines by themselves). @@ -965,9 +980,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter): CustomSplit objects and add them to the custom split map. """ - MIN_SUBSTR_SIZE = 6 + MIN_SUBSTR_SIZE: Final = 6 # Matches an "f-expression" (e.g. {var}) that might be found in an f-string. - RE_FEXPR = r""" + RE_FEXPR: Final = r""" (? List[Leaf]: return string_op_leaves -class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter): +class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits non-"atom" strings (i.e. strings that do not exist on lines by themselves). @@ -1811,20 +1826,20 @@ class StringParser: ``` """ - DEFAULT_TOKEN = -1 + DEFAULT_TOKEN: Final = 20210605 # String Parser States - START = 1 - DOT = 2 - NAME = 3 - PERCENT = 4 - SINGLE_FMT_ARG = 5 - LPAR = 6 - RPAR = 7 - DONE = 8 + START: Final = 1 + DOT: Final = 2 + NAME: Final = 3 + PERCENT: Final = 4 + SINGLE_FMT_ARG: Final = 5 + LPAR: Final = 6 + RPAR: Final = 7 + DONE: Final = 8 # Lookup Table for Next State - _goto: Dict[Tuple[ParserState, NodeType], ParserState] = { + _goto: Final[Dict[Tuple[ParserState, NodeType], ParserState]] = { # A string trailer may start with '.' OR '%'. (START, token.DOT): DOT, (START, token.PERCENT): PERCENT, diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index bad79b2dc2c..dfd7d68ec2a 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -427,7 +427,7 @@ def generate_tokens( # `await` as keywords. async_keywords = False if grammar is None else grammar.async_keywords # 'stashed' and 'async_*' are used for async/await parsing - stashed = None + stashed: Optional[GoodTokenInfo] = None async_def = False async_def_indent = 0 async_def_nl = False From 6e9e0fb05cf84d7219d1202aaa65e2aef50b3616 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sun, 6 Jun 2021 19:29:02 -0400 Subject: [PATCH 02/21] Make the test suite usable for testing Pretty much marking a few tests as straight incompatible or marking certain functions with monkeypatching support. Also address the lack of 3.8 or higher support in src/black/parsing.py caused by my original changes. Finally tweak the compile configuration. --- setup.py | 6 ++++-- src/black/__init__.py | 5 ++++- src/black/files.py | 2 ++ src/black/output.py | 3 +++ src/black/parsing.py | 17 ++++++++++++----- tests/test_black.py | 15 +++++++++++++-- 6 files changed, 38 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index fb2b154a7a7..f0c934a606a 100644 --- a/setup.py +++ b/setup.py @@ -38,10 +38,12 @@ def get_long_description() -> str: "src/black/rusty.py", "src/black/numerics.py", "src/black/comments.py", - "src/black/report.py", + # Leave uncompiled to save bytes since this isn't performance sensitive at all. + # "src/black/report.py", "src/black/cache.py", "src/black/lines.py", - "src/black/debug.py", + # Kept uncompiled due being useless (and breaks tests) when compiled. + # "src/black/debug.py", "src/black/strings.py", "src/black/parsing.py", "src/black/output.py", diff --git a/src/black/__init__.py b/src/black/__init__.py index 6998c1ea593..b9ece8e19e1 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -167,7 +167,10 @@ def validate_regex( raise click.BadParameter("Not a valid regular expression") -@click.command(context_settings=dict(help_option_names=["-h", "--help"])) +@click.command( + context_settings=dict(help_option_names=["-h", "--help"]), + help="The uncompromising code formatter.", +) @click.option("-c", "--code", type=str, help="Format the code passed in as a string.") @click.option( "-l", diff --git a/src/black/files.py b/src/black/files.py index b9cefd317e0..ce6ae95e0a2 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -17,6 +17,7 @@ TYPE_CHECKING, ) +from mypy_extensions import mypyc_attr from pathspec import PathSpec import toml @@ -86,6 +87,7 @@ def find_pyproject_toml(path_search_start: Tuple[str, ...]) -> Optional[str]: return None +@mypyc_attr(patchable=True) def parse_pyproject_toml(path_config: str) -> Dict[str, Any]: """Parse a pyproject toml file, pulling out relevant parts for Black diff --git a/src/black/output.py b/src/black/output.py index c253c85e90e..48644e0bbf7 100644 --- a/src/black/output.py +++ b/src/black/output.py @@ -10,6 +10,7 @@ from click import echo, style +@mypyc_attr(patchable=True) def _out(message: Optional[str] = None, nl: bool = True, **styles: Any) -> None: if message is not None: if "bold" not in styles: @@ -18,6 +19,7 @@ def _out(message: Optional[str] = None, nl: bool = True, **styles: Any) -> None: echo(message, nl=nl, err=True) +@mypyc_attr(patchable=True) def _err(message: Optional[str] = None, nl: bool = True, **styles: Any) -> None: if message is not None: if "fg" not in styles: @@ -26,6 +28,7 @@ def _err(message: Optional[str] = None, nl: bool = True, **styles: Any) -> None: echo(message, nl=nl, err=True) +@mypyc_attr(patchable=True) def out(message: Optional[str] = None, nl: bool = True, **styles: Any) -> None: _out(message, nl=nl, **styles) diff --git a/src/black/parsing.py b/src/black/parsing.py index d1499ecb6e8..c91234891f2 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -3,7 +3,7 @@ """ import ast import sys -from typing import Iterable, Iterator, List, Set, Tuple, Type, Union +from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union # lib2to3 fork from blib2to3.pytree import Node, Leaf @@ -15,9 +15,9 @@ from black.mode import TargetVersion, Feature, supports_feature from black.nodes import syms +ast3: Any +ast27: Any try: - # TODO: currently the code assumes ast3 and ast27 will be availabe all the time, - # unfortunately this isn't the case these days from typed_ast import ast3, ast27 except ImportError: if sys.version_info < (3, 8): @@ -29,7 +29,7 @@ ) sys.exit(1) else: - pass + ast3 = ast27 = ast class InvalidInput(ValueError): @@ -124,6 +124,13 @@ def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]: except SyntaxError: continue + if ast27.__name__ == "ast": + raise SyntaxError( + "The requested source code has invalid Python 3 syntax.\n" + "If you are trying to format Python 2 files please reinstall Black" + " with the 'python2' extra: `python3 -m pip install black[python2]`." + ) + return ast27.parse(src) @@ -185,7 +192,7 @@ def stringify_ast( # To normalize, we strip any leading and trailing space from # each line... stripped = [line.strip() for line in value.splitlines()] - normalized = lineend.join(stripped) # type: ignore[attr-defined] + normalized = lineend.join(stripped) # ...and remove any blank lines at the beginning and end of # the whole string normalized = normalized.strip() diff --git a/tests/test_black.py b/tests/test_black.py index 42ac119324c..b56a3f02033 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1596,6 +1596,7 @@ def test_get_sources_with_stdin_filename_and_force_exclude(self) -> None: ) self.assertEqual([], sorted(sources)) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin(self) -> None: with patch( "black.format_stdin_to_stdout", @@ -1613,6 +1614,7 @@ def test_reformat_one_with_stdin(self) -> None: fsts.assert_called_once() report.done.assert_called_with(path, black.Changed.YES) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin_filename(self) -> None: with patch( "black.format_stdin_to_stdout", @@ -1635,6 +1637,7 @@ def test_reformat_one_with_stdin_filename(self) -> None: # __BLACK_STDIN_FILENAME__ should have been stripped report.done.assert_called_with(expected, black.Changed.YES) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin_filename_pyi(self) -> None: with patch( "black.format_stdin_to_stdout", @@ -1659,6 +1662,7 @@ def test_reformat_one_with_stdin_filename_pyi(self) -> None: # __BLACK_STDIN_FILENAME__ should have been stripped report.done.assert_called_with(expected, black.Changed.YES) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin_and_existing_path(self) -> None: with patch( "black.format_stdin_to_stdout", @@ -1853,6 +1857,7 @@ def test_assert_equivalent_different_asts(self) -> None: with self.assertRaises(AssertionError): black.assert_equivalent("{}", "None") + @pytest.mark.incompatible_with_mypyc def test_symlink_out_of_root_directory(self) -> None: path = MagicMock() root = THIS_DIR.resolve() @@ -1981,6 +1986,7 @@ def test_read_pyproject_toml(self) -> None: self.assertEqual(config["exclude"], r"\.pyi?$") self.assertEqual(config["include"], r"\.py?$") + @pytest.mark.incompatible_with_mypyc def test_find_project_root(self) -> None: with TemporaryDirectory() as workspace: root = Path(workspace) @@ -2173,6 +2179,7 @@ def test_code_option_color_diff(self) -> None: assert output == result_diff, "The output did not match the expected value." assert result.exit_code == 0, "The exit code is incorrect." + @pytest.mark.incompatible_with_mypyc def test_code_option_safe(self) -> None: """Test that the code option throws an error when the sanity checks fail.""" # Patch black.assert_equivalent to ensure the sanity checks fail @@ -2242,8 +2249,12 @@ def test_code_option_parent_config(self) -> None: ), "Incorrect config loaded." -with open(black.__file__, "r", encoding="utf-8") as _bf: - black_source_lines = _bf.readlines() +try: + with open(black.__file__, "r", encoding="utf-8") as _bf: + black_source_lines = _bf.readlines() +except UnicodeDecodeError: + # Probably due to Black being compiled; just ignore. + pass def tracefunc(frame: types.FrameType, event: str, arg: Any) -> Callable: From acb77f7951229b2670edc3fad366ec256fa0c86f Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 12 Jun 2021 14:13:26 -0400 Subject: [PATCH 03/21] Fix mypyc KeyError on src/black/parsing.py Typing the ast3 / ast27 variables as Any breaks mypyc. More details in the comment added in this commit. Many thanks goes to Jelle for helping out, with their insight I was able to find a workaround. --- pyproject.toml | 3 +++ src/black/parsing.py | 19 +++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 79060fc422d..4e862b0bc6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,3 +32,6 @@ optional-tests = [ "no_python2: run when `python2` extra NOT installed", "no_blackd: run when `d` extra NOT installed", ] +markers = [ + "incompatible_with_mypyc: run when testing mypyc compiled black" +] diff --git a/src/black/parsing.py b/src/black/parsing.py index c91234891f2..2457570b0b0 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -5,6 +5,11 @@ import sys from typing import Any, Iterable, Iterator, List, Set, Tuple, Type, Union +if sys.version_info < (3, 8): + from typing_extensions import Final +else: + from typing import Final + # lib2to3 fork from blib2to3.pytree import Node, Leaf from blib2to3 import pygram, pytree @@ -134,6 +139,10 @@ def parse_ast(src: str) -> Union[ast.AST, ast3.AST, ast27.AST]: return ast27.parse(src) +ast3_AST: Final[Type] = ast3.AST +ast27_AST: Final[Type] = ast27.AST + + def stringify_ast( node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0 ) -> Iterator[str]: @@ -173,7 +182,13 @@ def stringify_ast( elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): yield from stringify_ast(item, depth + 2) - elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)): + # Note that we are referencing the typed-ast ASTs via global variables and not + # direct module attribute accesses because that breaks mypyc. It's probably + # something to do with the ast3 / ast27 variables being marked as Any leading + # mypy to think this branch is always taken, leaving the rest of the code + # unanalyzed. Tighting up the types for the typed-ast AST types avoids the + # mypyc crash. + elif isinstance(value, (ast.AST, ast3_AST, ast27_AST)): yield from stringify_ast(value, depth + 2) else: @@ -192,7 +207,7 @@ def stringify_ast( # To normalize, we strip any leading and trailing space from # each line... stripped = [line.strip() for line in value.splitlines()] - normalized = lineend.join(stripped) + normalized = lineend.join(stripped) # type: ignore[attr-defined] # ...and remove any blank lines at the beginning and end of # the whole string normalized = normalized.strip() From a37fb7750d9065dfe7938390244bbb21de542f4f Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Wed, 30 Jun 2021 14:13:13 -0400 Subject: [PATCH 04/21] Saves ~100 kB on my Linux machine :) before .whl was 1.3M and now it is 1.2M --- setup.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index f0c934a606a..ea20f2ab988 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,9 @@ def get_long_description() -> str: "src/black/__init__.py", "src/black/nodes.py", "src/black/mode.py", - "src/black/files.py", - "src/black/concurrency.py", + # Ditto bytes savings. + # "src/black/files.py", + # "src/black/concurrency.py", "src/black/const.py", "src/black/linegen.py", "src/black/rusty.py", @@ -46,7 +47,8 @@ def get_long_description() -> str: # "src/black/debug.py", "src/black/strings.py", "src/black/parsing.py", - "src/black/output.py", + # Ditto again about saving btes. + # "src/black/output.py", "src/black/brackets.py", "src/black/trans.py", "src/blib2to3/pytree.py", From 2ccc774b32836ee2a22457dfff6184c8f7f3ede6 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Wed, 30 Jun 2021 17:26:06 -0400 Subject: [PATCH 05/21] Strings specific micro-optimization (1-10% perf boost) --- src/black/strings.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/black/strings.py b/src/black/strings.py index 80f588f5119..8294f1ffc6d 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -4,10 +4,19 @@ import regex as re import sys +from functools import lru_cache from typing import List, Pattern +if sys.version_info < (3, 8): + from typing_extensions import Final +else: + from typing import Final -STRING_PREFIX_CHARS = "furbFURB" # All possible string prefix characters. + +STRING_PREFIX_CHARS: Final = "furbFURB" # All possible string prefix characters. +STRING_PREFIX_RE: Final = re.compile( + r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL +) def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: @@ -133,7 +142,7 @@ def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str: If remove_u_prefix is given, also removes any u prefix from the string. """ - match = re.match(r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", s, re.DOTALL) + match = STRING_PREFIX_RE.match(s) assert match is not None, f"failed to match string {s!r}" orig_prefix = match.group(1) new_prefix = orig_prefix.replace("F", "f").replace("B", "b").replace("U", "u") @@ -142,6 +151,11 @@ def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str: return f"{new_prefix}{match.group(2)}" +@lru_cache(maxsize=None) +def _cached_compile(pattern: str) -> re.Pattern: + return re.compile(pattern) + + def normalize_string_quotes(s: str) -> str: """Prefer double quotes but only if it doesn't cause more escaping. @@ -166,9 +180,9 @@ def normalize_string_quotes(s: str) -> str: return s # There's an internal error prefix = s[:first_quote_pos] - unescaped_new_quote = re.compile(rf"(([^\\]|^)(\\\\)*){new_quote}") - escaped_new_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}") - escaped_orig_quote = re.compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}") + unescaped_new_quote = _cached_compile(rf"(([^\\]|^)(\\\\)*){new_quote}") + escaped_new_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){new_quote}") + escaped_orig_quote = _cached_compile(rf"([^\\]|^)\\((?:\\\\)*){orig_quote}") body = s[first_quote_pos + len(orig_quote) : -len(orig_quote)] if "r" in prefix.casefold(): if unescaped_new_quote.search(body): From 6f60e6ea45beaf217e7e209d902779814891d41b Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Wed, 30 Jun 2021 21:28:17 -0400 Subject: [PATCH 06/21] Looks like I'll be marking more and more tests --- tests/test_black.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_black.py b/tests/test_black.py index b56a3f02033..5459cf5f9a8 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -985,6 +985,7 @@ def test_get_future_imports(self) -> None: ) self.assertEqual({"unicode_literals", "print"}, black.get_future_imports(node)) + @pytest.mark.incompatible_with_mypyc def test_debug_visitor(self) -> None: source, _ = read_data("debug_visitor.py") expected, _ = read_data("debug_visitor.out") @@ -1035,6 +1036,7 @@ def test_endmarker(self) -> None: self.assertEqual(len(n.children), 1) self.assertEqual(n.children[0].type, black.token.ENDMARKER) + @pytest.mark.incompatible_with_mypyc @unittest.skipIf(os.environ.get("SKIP_AST_PRINT"), "user set SKIP_AST_PRINT") def test_assertFormatEqual(self) -> None: out_lines = [] @@ -2204,6 +2206,7 @@ def test_code_option_fast(self) -> None: self.compare_results(result, formatted, 0) + @pytest.mark.incompatible_with_mypyc def test_code_option_config(self) -> None: """ Test that the code option finds the pyproject.toml in the current directory. @@ -2226,6 +2229,7 @@ def test_code_option_config(self) -> None: call_args[0].lower() == str(pyproject_path).lower() ), "Incorrect config loaded." + @pytest.mark.incompatible_with_mypyc def test_code_option_parent_config(self) -> None: """ Test that the code option finds the pyproject.toml in the parent directory. From f508be4ff60842760f0742a10d768bac068b8eb1 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 10 Jul 2021 14:29:03 -0400 Subject: [PATCH 07/21] Fix mypyc + Black on Windows The `platform=linux` was causing mypy (and therefore mypyc) to believe any `if sys.platform == "win32":` branch would never be taken. That led to `Reached allegedly unreachable code!` crashes because of safety checks mypyc adds. There's not a strong reason for pinning the platform in `mypy.ini` so with the agreement of Jelle, it's gone now! I also disabled CliRunner's exception catching feature because while it does lead to nicer test failures when an exception goes unhandled, it also removes traceback information which makes debugging a pain. P.S. You can blame Windows Update for the slowness of this bugfix :p --- mypy.ini | 1 - src/black/__init__.py | 1 + tests/test_black.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mypy.ini b/mypy.ini index ae7be5e5106..5357fb872ab 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,7 +3,6 @@ # free to run mypy on Windows, Linux, or macOS and get consistent # results. python_version=3.6 -platform=linux show_column_numbers=True diff --git a/src/black/__init__.py b/src/black/__init__.py index b9ece8e19e1..bd10c8cc816 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -621,6 +621,7 @@ def reformat_many( worker_count = os.cpu_count() if sys.platform == "win32": # Work around https://bugs.python.org/issue26903 + assert worker_count is not None worker_count = min(worker_count, 60) try: executor = ProcessPoolExecutor(max_workers=worker_count) diff --git a/tests/test_black.py b/tests/test_black.py index 5459cf5f9a8..397450ffd7b 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -119,7 +119,7 @@ def invokeBlack( runner = BlackRunner() if ignore_config: args = ["--verbose", "--config", str(THIS_DIR / "empty.toml"), *args] - result = runner.invoke(black.main, args) + result = runner.invoke(black.main, args, catch_exceptions=False) assert result.stdout_bytes is not None assert result.stderr_bytes is not None self.assertEqual( From 94dcddb259b123cc34beda8230b4440b8cf365fb Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 10 Jul 2021 14:34:51 -0400 Subject: [PATCH 08/21] Ask mypy to warn on unreachable code Unreachable code has either led to mypyc being unable to analyze or compile Black, or has led to runtime failures (like the one the previous commit fixed - although mypy wouldn't be able to catch it). --- mypy.ini | 4 ++++ src/black_primer/cli.py | 3 +-- src/blib2to3/pytree.py | 7 +++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/mypy.ini b/mypy.ini index 5357fb872ab..2ed7d096cae 100644 --- a/mypy.ini +++ b/mypy.ini @@ -22,6 +22,10 @@ warn_unused_ignores=True # Until we're not supporting 3.6 primer needs this disallow_any_generics=False +# Unreachable blocks have been an issue when compiling mypyc, let's try to avoid 'em +# in the first place. +warn_unreachable=True + # The following are off by default. Flip them on if you feel # adventurous. disallow_untyped_defs=True diff --git a/src/black_primer/cli.py b/src/black_primer/cli.py index f0997049d21..e13b9f4b1f8 100644 --- a/src/black_primer/cli.py +++ b/src/black_primer/cli.py @@ -72,13 +72,12 @@ async def async_main( no_diff, ) return int(ret_val) + finally: if not keep and work_path.exists(): LOG.debug(f"Removing {work_path}") rmtree(work_path, onerror=lib.handle_PermissionError) - return -2 - @click.command(context_settings={"help_option_names": ["-h", "--help"]}) @click.option( diff --git a/src/blib2to3/pytree.py b/src/blib2to3/pytree.py index 7843467e012..010d6ea335c 100644 --- a/src/blib2to3/pytree.py +++ b/src/blib2to3/pytree.py @@ -669,8 +669,11 @@ def __init__( newcontent = list(content) for i, item in enumerate(newcontent): assert isinstance(item, BasePattern), (i, item) - if isinstance(item, WildcardPattern): - self.wildcards = True + # I don't even think this code is used anywhere, but it does cause + # unreachable errors from mypy. This function's signature does look + # odd though *shrug*. + if isinstance(item, WildcardPattern): # type: ignore[unreachable] + self.wildcards = True # type: ignore[unreachable] self.type = type self.content = newcontent self.name = name From 57a25eda356ca3983c54002885e511364155ed92 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 10 Jul 2021 15:04:32 -0400 Subject: [PATCH 09/21] Clean up mypyc setup in setup.py Hardcoding the targets isn't great since we will probably forget to add paths to the list as new files are created. Inspired from mypy's setup for mypyc. --- setup.py | 72 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/setup.py b/setup.py index ea20f2ab988..62a94c1378f 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ assert sys.version_info >= (3, 6, 2), "black requires Python 3.6.2+" from pathlib import Path # noqa E402 +from typing import List # noqa: E402 CURRENT_DIR = Path(__file__).parent sys.path.insert(0, str(CURRENT_DIR)) # for setuptools.build_meta @@ -18,6 +19,17 @@ def get_long_description() -> str: ) +def find_python_files(base: Path) -> List[Path]: + files = [] + for entry in base.iterdir(): + if entry.is_file() and entry.suffix == ".py": + files.append(entry) + elif entry.is_dir(): + files.extend(find_python_files(entry)) + + return files + + USE_MYPYC = False # To compile with mypyc, a mypyc checkout must be present on the PYTHONPATH if len(sys.argv) > 1 and sys.argv[1] == "--use-mypyc": @@ -27,44 +39,34 @@ def get_long_description() -> str: USE_MYPYC = True if USE_MYPYC: + from mypyc.build import mypycify + + source_base = CURRENT_DIR / "src" + # TIP: filepaths are normalized to use forward slashes and are relative to ./src/ + # before being checked against. + blocklist = [ + # Not performance sensitive, so save bytes + compilation time: + "blib2to3/__init__.py", + "blib2to3/pgen2/__init__.py", + "black/output.py", + "black/concurrency.py", + "black/files.py", + "black/report.py", + # Breaks the test suite when compiled (and is also useless): + "black/debug.py", + # Compiled modules can't be run directly and that's a problem here: + "black/__main__.py", + ] + discovered = [] + # black-primer and blackd have no good reason to be compiled. + discovered.extend(find_python_files(source_base / "black")) + discovered.extend(find_python_files(source_base / "blib2to3")) mypyc_targets = [ - "src/black/__init__.py", - "src/black/nodes.py", - "src/black/mode.py", - # Ditto bytes savings. - # "src/black/files.py", - # "src/black/concurrency.py", - "src/black/const.py", - "src/black/linegen.py", - "src/black/rusty.py", - "src/black/numerics.py", - "src/black/comments.py", - # Leave uncompiled to save bytes since this isn't performance sensitive at all. - # "src/black/report.py", - "src/black/cache.py", - "src/black/lines.py", - # Kept uncompiled due being useless (and breaks tests) when compiled. - # "src/black/debug.py", - "src/black/strings.py", - "src/black/parsing.py", - # Ditto again about saving btes. - # "src/black/output.py", - "src/black/brackets.py", - "src/black/trans.py", - "src/blib2to3/pytree.py", - "src/blib2to3/pygram.py", - "src/blib2to3/pgen2/conv.py", - "src/blib2to3/pgen2/literals.py", - "src/blib2to3/pgen2/parse.py", - "src/blib2to3/pgen2/grammar.py", - "src/blib2to3/pgen2/token.py", - "src/blib2to3/pgen2/tokenize.py", - "src/blib2to3/pgen2/driver.py", - "src/blib2to3/pgen2/pgen.py", + str(p) + for p in discovered + if p.relative_to(source_base).as_posix() not in blocklist ] - from mypyc.build import mypycify - opt_level = os.getenv("MYPYC_OPT_LEVEL", "3") ext_modules = mypycify(mypyc_targets, opt_level=opt_level, verbose=True) else: From f6a3e788bb8714e41fe0a4cc1ee2058b0a7cb3ac Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Tue, 27 Jul 2021 15:43:15 -0400 Subject: [PATCH 10/21] Initial mypyc optimizations - 5% faster parsing --- src/blib2to3/README | 7 ++++++- src/blib2to3/pgen2/driver.py | 9 ++++++--- src/blib2to3/pgen2/parse.py | 9 +++------ src/blib2to3/pgen2/tokenize.py | 29 ++++++++++++++++++----------- 4 files changed, 33 insertions(+), 21 deletions(-) diff --git a/src/blib2to3/README b/src/blib2to3/README index a43f15cb37d..86eeeacfbac 100644 --- a/src/blib2to3/README +++ b/src/blib2to3/README @@ -13,4 +13,9 @@ Reasons for forking: - ability to Cythonize Change Log: -- Changes default logger used by Driver \ No newline at end of file +- Changes default logger used by Driver +- Backported https://github.com/python/cpython/commit/8565f6b6db0fa9f65449b532a5056a98bad3dc37 + and https://github.com/python/cpython/commit/62c35a8a8ff5854ed470b1c16a7a14f3bb80368c + to support all RHS expressions in annotated assignments as like in regular assignments +- Tweaks to help mypyc compile faster code (including inlining type information, + "Final-ing", etc.) diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py index af1dc6b8aeb..615612c043f 100644 --- a/src/blib2to3/pgen2/driver.py +++ b/src/blib2to3/pgen2/driver.py @@ -23,6 +23,7 @@ import sys from typing import ( Any, + cast, IO, Iterable, List, @@ -37,6 +38,7 @@ from logging import Logger from blib2to3.pytree import _Convert, NL from blib2to3.pgen2.grammar import Grammar +from blib2to3.pgen2.tokenize import GoodTokenInfo Path = Union[str, "os.PathLike[str]"] @@ -54,14 +56,14 @@ def __init__( self.logger = logger self.convert = convert - def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL: + def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) -> NL: """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. p = parse.Parser(self.grammar, self.convert) p.setup() lineno = 1 column = 0 - indent_columns = [] + indent_columns: List[int] = [] type = value = start = end = line_text = None prefix = "" for quintuple in tokens: @@ -86,6 +88,7 @@ def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL: if type == token.OP: type = grammar.opmap[value] if debug: + assert type is not None self.logger.debug( "%s %r (prefix=%r)", token.tok_name[type], value, prefix ) @@ -97,7 +100,7 @@ def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL: elif type == token.DEDENT: _indent_col = indent_columns.pop() prefix, _prefix = self._partially_consume_prefix(prefix, _indent_col) - if p.addtoken(type, value, (prefix, start)): + if p.addtoken(cast(int, type), value, (prefix, start)): if debug: self.logger.debug("Stop.") break diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py index 47c8f02b4f5..6b031889f4f 100644 --- a/src/blib2to3/pgen2/parse.py +++ b/src/blib2to3/pgen2/parse.py @@ -138,7 +138,7 @@ def setup(self, start: Optional[int] = None) -> None: self.rootnode: Optional[NL] = None self.used_names: Set[str] = set() - def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool: + def addtoken(self, type: int, value: Text, context: Context) -> bool: """Add a token; return True iff this is the end of the program.""" # Map from token to label ilabel = self.classify(type, value, context) @@ -185,11 +185,10 @@ def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool: # No success finding a transition raise ParseError("bad input", type, value, context) - def classify(self, type: int, value: Optional[Text], context: Context) -> int: + def classify(self, type: int, value: Text, context: Context) -> int: """Turn a token into a label. (Internal)""" if type == token.NAME: # Keep a listing of all used names - assert value is not None self.used_names.add(value) # Check for reserved words ilabel = self.grammar.keywords.get(value) @@ -201,12 +200,10 @@ def classify(self, type: int, value: Optional[Text], context: Context) -> int: return ilabel def shift( - self, type: int, value: Optional[Text], newstate: int, context: Context + self, type: int, value: Text, newstate: int, context: Context ) -> None: """Shift a token. (Internal)""" dfa, state, node = self.stack[-1] - assert value is not None - assert context is not None rawnode: RawNode = (type, value, context, None) newnode = self.convert(self.grammar, rawnode) if newnode is not None: diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index dfd7d68ec2a..e16aacbab2c 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -27,6 +27,7 @@ function to which the 5 fields described above are passed as 5 arguments, each time a new token is found.""" +import sys from typing import ( Callable, Iterable, @@ -39,6 +40,12 @@ Union, cast, ) + +if sys.version_info >= (3, 8): + from typing import Final +else: + from typing_extensions import Final + from blib2to3.pgen2.token import * from blib2to3.pgen2.grammar import Grammar @@ -139,7 +146,7 @@ def _combinations(*l): PseudoExtras = group(r"\\\r?\n", Comment, Triple) PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name) -pseudoprog = re.compile(PseudoToken, re.UNICODE) +pseudoprog: Final = re.compile(PseudoToken, re.UNICODE) single3prog = re.compile(Single3) double3prog = re.compile(Double3) @@ -149,7 +156,7 @@ def _combinations(*l): | {"u", "U", "ur", "uR", "Ur", "UR"} ) -endprogs = { +endprogs: Final = { "'": re.compile(Single), '"': re.compile(Double), "'''": single3prog, @@ -159,12 +166,12 @@ def _combinations(*l): **{prefix: None for prefix in _strprefixes}, } -triple_quoted = ( +triple_quoted: Final = ( {"'''", '"""'} | {f"{prefix}'''" for prefix in _strprefixes} | {f'{prefix}"""' for prefix in _strprefixes} ) -single_quoted = ( +single_quoted: Final = ( {"'", '"'} | {f"{prefix}'" for prefix in _strprefixes} | {f'{prefix}"' for prefix in _strprefixes} @@ -418,7 +425,7 @@ def generate_tokens( logical line; continuation lines are included. """ lnum = parenlev = continued = 0 - numchars = "0123456789" + numchars: Final = "0123456789" contstr, needcont = "", 0 contline: Optional[str] = None indents = [0] @@ -440,7 +447,7 @@ def generate_tokens( line = readline() except StopIteration: line = "" - lnum = lnum + 1 + lnum += 1 pos, max = 0, len(line) if contstr: # continued string @@ -481,14 +488,14 @@ def generate_tokens( column = 0 while pos < max: # measure leading whitespace if line[pos] == " ": - column = column + 1 + column += 1 elif line[pos] == "\t": column = (column // tabsize + 1) * tabsize elif line[pos] == "\f": column = 0 else: break - pos = pos + 1 + pos += 1 if pos == max: break @@ -652,16 +659,16 @@ def generate_tokens( continued = 1 else: if initial in "([{": - parenlev = parenlev + 1 + parenlev += 1 elif initial in ")]}": - parenlev = parenlev - 1 + parenlev -= 1 if stashed: yield stashed stashed = None yield (OP, token, spos, epos, line) else: yield (ERRORTOKEN, line[pos], (lnum, pos), (lnum, pos + 1), line) - pos = pos + 1 + pos += 1 if stashed: yield stashed From 911d0d8601318fcc04069f2af91a066e499f0db0 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Tue, 27 Jul 2021 18:44:22 -0400 Subject: [PATCH 11/21] More parsing optimizations - 4% faster - early binding; plus - reordering of if checks to reduce work / hit the happy case more often; plus - a few more tiny mypyc-specific tweaks --- src/blib2to3/pgen2/parse.py | 59 +++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py index 6b031889f4f..b5da4faccef 100644 --- a/src/blib2to3/pgen2/parse.py +++ b/src/blib2to3/pgen2/parse.py @@ -23,7 +23,7 @@ Set, ) from blib2to3.pgen2.grammar import Grammar -from blib2to3.pytree import NL, Context, RawNode, Leaf, Node +from blib2to3.pytree import convert, NL, Context, RawNode, Leaf, Node Results = Dict[Text, NL] @@ -100,6 +100,11 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None: to be converted. The syntax tree is converted from the bottom up. + **post-note: the convert argument is ignored since for Black's + usage, convert will always be blib2to3.pytree.convert. Allowing + this to be dynamic hurts mypyc's ability to use early binding. + These docs are left for historical and informational value. + A concrete syntax tree node is a (type, value, context, nodes) tuple, where type is the node type (a token or symbol number), value is None for symbols and a string for tokens, context is @@ -112,6 +117,7 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None: """ self.grammar = grammar + # See note in docstring above. TL;DR this is ignored. self.convert = convert or lam_sub def setup(self, start: Optional[int] = None) -> None: @@ -149,10 +155,18 @@ def addtoken(self, type: int, value: Text, context: Context) -> bool: arcs = states[state] # Look for a state with this label for i, newstate in arcs: - t, v = self.grammar.labels[i] - if ilabel == i: + t = self.grammar.labels[i][0] + if t >= 256: + # See if it's a symbol and if we're in its first set + itsdfa = self.grammar.dfas[t] + itsstates, itsfirst = itsdfa + if ilabel in itsfirst: + # Push a symbol + self.push(t, itsdfa, newstate, context) + break # To continue the outer while loop + + elif ilabel == i: # Look it up in the list of labels - assert t < 256 # Shift a token; we're done with it self.shift(type, value, newstate, context) # Pop while we are in an accept-only state @@ -166,14 +180,7 @@ def addtoken(self, type: int, value: Text, context: Context) -> bool: states, first = dfa # Done with this token return False - elif t >= 256: - # See if it's a symbol and if we're in its first set - itsdfa = self.grammar.dfas[t] - itsstates, itsfirst = itsdfa - if ilabel in itsfirst: - # Push a symbol - self.push(t, self.grammar.dfas[t], newstate, context) - break # To continue the outer while loop + else: if (0, state) in arcs: # An accepting state, pop it and try something else @@ -199,16 +206,13 @@ def classify(self, type: int, value: Text, context: Context) -> int: raise ParseError("bad token", type, value, context) return ilabel - def shift( - self, type: int, value: Text, newstate: int, context: Context - ) -> None: + def shift(self, type: int, value: Text, newstate: int, context: Context) -> None: """Shift a token. (Internal)""" dfa, state, node = self.stack[-1] rawnode: RawNode = (type, value, context, None) - newnode = self.convert(self.grammar, rawnode) - if newnode is not None: - assert node[-1] is not None - node[-1].append(newnode) + newnode = convert(self.grammar, rawnode) + assert node[-1] is not None + node[-1].append(newnode) self.stack[-1] = (dfa, newstate, node) def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None: @@ -221,12 +225,11 @@ def push(self, type: int, newdfa: DFAS, newstate: int, context: Context) -> None def pop(self) -> None: """Pop a nonterminal. (Internal)""" popdfa, popstate, popnode = self.stack.pop() - newnode = self.convert(self.grammar, popnode) - if newnode is not None: - if self.stack: - dfa, state, node = self.stack[-1] - assert node[-1] is not None - node[-1].append(newnode) - else: - self.rootnode = newnode - self.rootnode.used_names = self.used_names + newnode = convert(self.grammar, popnode) + if self.stack: + dfa, state, node = self.stack[-1] + assert node[-1] is not None + node[-1].append(newnode) + else: + self.rootnode = newnode + self.rootnode.used_names = self.used_names From 1f0df056c3fa91516a0309a9f1f0c0c834902382 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 31 Jul 2021 18:19:22 -0400 Subject: [PATCH 12/21] Just some cleanup --- setup.py | 10 ++++------ src/black/strings.py | 2 +- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index 637e92a4b37..b8b5dc1f945 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ def find_python_files(base: Path) -> List[Path]: if USE_MYPYC: from mypyc.build import mypycify - source_base = CURRENT_DIR / "src" + src = CURRENT_DIR / "src" # TIP: filepaths are normalized to use forward slashes and are relative to ./src/ # before being checked against. blocklist = [ @@ -59,12 +59,10 @@ def find_python_files(base: Path) -> List[Path]: ] discovered = [] # black-primer and blackd have no good reason to be compiled. - discovered.extend(find_python_files(source_base / "black")) - discovered.extend(find_python_files(source_base / "blib2to3")) + discovered.extend(find_python_files(src / "black")) + discovered.extend(find_python_files(src / "blib2to3")) mypyc_targets = [ - str(p) - for p in discovered - if p.relative_to(source_base).as_posix() not in blocklist + str(p) for p in discovered if p.relative_to(src).as_posix() not in blocklist ] opt_level = os.getenv("MYPYC_OPT_LEVEL", "3") diff --git a/src/black/strings.py b/src/black/strings.py index 8294f1ffc6d..ffc860be551 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -151,7 +151,7 @@ def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str: return f"{new_prefix}{match.group(2)}" -@lru_cache(maxsize=None) +@lru_cache(maxsize=256) def _cached_compile(pattern: str) -> re.Pattern: return re.compile(pattern) From 58fbe9ca63381f78217ca1eaf1ae748f47ade318 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sun, 1 Aug 2021 19:21:05 -0400 Subject: [PATCH 13/21] --version now indicates whether black is compiled --- src/black/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index 3cd9958c4f7..b2d839b70fe 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -54,6 +54,8 @@ from _black_version import version as __version__ +COMPILED = Path(__file__).suffix in (".pyd", ".so") + # types FileContent = str Encoding = str @@ -323,7 +325,10 @@ def validate_regex( " due to exclusion patterns." ), ) -@click.version_option(version=__version__) +@click.version_option( + version=__version__, + message=f"%(prog)s, %(version)s (compiled: {'yes' if COMPILED else 'no'})", +) @click.argument( "src", nargs=-1, From c7de2eafb5d07033429ab3a18ce02ed093b645c5 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Mon, 2 Aug 2021 20:36:42 -0400 Subject: [PATCH 14/21] Round 3 of optimizations - 95% black + 5% blib2to3 Honestly at this point, I was tired of optimizing and only attempted basic and safe optimizations. I hope this actually makes a performance impact :p --- src/black/comments.py | 15 ++++++++++----- src/black/nodes.py | 30 +++++++++++++++++------------- src/black/parsing.py | 10 ++++++---- src/black/strings.py | 3 ++- src/blib2to3/pgen2/tokenize.py | 2 +- src/blib2to3/pytree.py | 2 +- 6 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/black/comments.py b/src/black/comments.py index c7513c21ef5..a8152d687a3 100644 --- a/src/black/comments.py +++ b/src/black/comments.py @@ -1,8 +1,14 @@ +import sys from dataclasses import dataclass from functools import lru_cache import regex as re from typing import Iterator, List, Optional, Union +if sys.version_info >= (3, 8): + from typing import Final +else: + from typing_extensions import Final + from blib2to3.pytree import Node, Leaf from blib2to3.pgen2 import token @@ -12,11 +18,10 @@ # types LN = Union[Leaf, Node] - -FMT_OFF = {"# fmt: off", "# fmt:off", "# yapf: disable"} -FMT_SKIP = {"# fmt: skip", "# fmt:skip"} -FMT_PASS = {*FMT_OFF, *FMT_SKIP} -FMT_ON = {"# fmt: on", "# fmt:on", "# yapf: enable"} +FMT_OFF: Final = {"# fmt: off", "# fmt:off", "# yapf: disable"} +FMT_SKIP: Final = {"# fmt: skip", "# fmt:skip"} +FMT_PASS: Final = {*FMT_OFF, *FMT_SKIP} +FMT_ON: Final = {"# fmt: on", "# fmt:on", "# yapf: enable"} @dataclass diff --git a/src/black/nodes.py b/src/black/nodes.py index 41cd9c3cf8d..838b492c283 100644 --- a/src/black/nodes.py +++ b/src/black/nodes.py @@ -15,10 +15,10 @@ Union, ) -if sys.version_info < (3, 8): - from typing_extensions import Final -else: +if sys.version_info >= (3, 8): from typing import Final +else: + from typing_extensions import Final # lib2to3 fork from blib2to3.pytree import Node, Leaf, type_repr @@ -30,7 +30,7 @@ pygram.initialize(CACHE_DIR) -syms = pygram.python_symbols +syms: Final = pygram.python_symbols # types @@ -128,12 +128,16 @@ "//=", } -IMPLICIT_TUPLE = {syms.testlist, syms.testlist_star_expr, syms.exprlist} -BRACKET = {token.LPAR: token.RPAR, token.LSQB: token.RSQB, token.LBRACE: token.RBRACE} -OPENING_BRACKETS = set(BRACKET.keys()) -CLOSING_BRACKETS = set(BRACKET.values()) -BRACKETS = OPENING_BRACKETS | CLOSING_BRACKETS -ALWAYS_NO_SPACE = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT} +IMPLICIT_TUPLE: Final = {syms.testlist, syms.testlist_star_expr, syms.exprlist} +BRACKET: Final = { + token.LPAR: token.RPAR, + token.LSQB: token.RSQB, + token.LBRACE: token.RBRACE, +} +OPENING_BRACKETS: Final = set(BRACKET.keys()) +CLOSING_BRACKETS: Final = set(BRACKET.values()) +BRACKETS: Final = OPENING_BRACKETS | CLOSING_BRACKETS +ALWAYS_NO_SPACE: Final = CLOSING_BRACKETS | {token.COMMA, STANDALONE_COMMENT} class Visitor(Generic[T]): @@ -176,9 +180,9 @@ def whitespace(leaf: Leaf, *, complex_subscript: bool) -> str: # noqa: C901 `complex_subscript` signals whether the given leaf is part of a subscription which has non-trivial arguments, like arithmetic expressions or function calls. """ - NO = "" - SPACE = " " - DOUBLESPACE = " " + NO: Final = "" + SPACE: Final = " " + DOUBLESPACE: Final = " " t = leaf.type p = leaf.parent v = leaf.value diff --git a/src/black/parsing.py b/src/black/parsing.py index b2da38c1309..b861a44cc39 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -36,6 +36,11 @@ else: ast3 = ast27 = ast +if sys.version_info >= (3, 8): + TYPE_IGNORE_CLASSES: Final = (ast3.TypeIgnore, ast27.TypeIgnore, ast.TypeIgnore) +else: + TYPE_IGNORE_CLASSES: Final = (ast3.TypeIgnore, ast27.TypeIgnore) + class InvalidInput(ValueError): """Raised when input source code fails all parse attempts.""" @@ -160,10 +165,7 @@ def stringify_ast( for field in sorted(node._fields): # noqa: F402 # TypeIgnore has only one field 'lineno' which breaks this comparison - type_ignore_classes: Tuple[Type, ...] = (ast3.TypeIgnore, ast27.TypeIgnore) - if sys.version_info >= (3, 8): - type_ignore_classes += (ast.TypeIgnore,) - if isinstance(node, type_ignore_classes): + if isinstance(node, TYPE_IGNORE_CLASSES): break try: diff --git a/src/black/strings.py b/src/black/strings.py index ffc860be551..96917f8830a 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -17,6 +17,7 @@ STRING_PREFIX_RE: Final = re.compile( r"^([" + STRING_PREFIX_CHARS + r"]*)(.*)$", re.DOTALL ) +FIRST_NON_WHITESPACE_RE: Final = re.compile(r"\s*\t+\s*(\S)") def sub_twice(regex: Pattern[str], replacement: str, original: str) -> str: @@ -46,7 +47,7 @@ def lines_with_leading_tabs_expanded(s: str) -> List[str]: for line in s.splitlines(): # Find the index of the first non-whitespace character after a string of # whitespace that includes at least one tab - match = re.match(r"\s*\t+\s*(\S)", line) + match = FIRST_NON_WHITESPACE_RE.match(line) if match: first_non_whitespace_idx = match.start(1) diff --git a/src/blib2to3/pgen2/tokenize.py b/src/blib2to3/pgen2/tokenize.py index e16aacbab2c..283fac2d537 100644 --- a/src/blib2to3/pgen2/tokenize.py +++ b/src/blib2to3/pgen2/tokenize.py @@ -514,7 +514,7 @@ def generate_tokens( COMMENT, comment_token, (lnum, pos), - (lnum, pos + len(comment_token)), + (lnum, nl_pos), line, ) yield (NL, line[nl_pos:], (lnum, nl_pos), (lnum, len(line)), line) diff --git a/src/blib2to3/pytree.py b/src/blib2to3/pytree.py index 010d6ea335c..ee92c2a2936 100644 --- a/src/blib2to3/pytree.py +++ b/src/blib2to3/pytree.py @@ -434,7 +434,7 @@ def __str__(self) -> Text: This reproduces the input source exactly. """ - return self.prefix + str(self.value) + return self._prefix + str(self.value) def _eq(self, other) -> bool: """Compare two nodes for equality.""" From eaa4f6c37725edf799a28874419d3b50f7685e81 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 7 Aug 2021 18:09:31 -0400 Subject: [PATCH 15/21] Fix crashes and errors since merge from main Mostly just dataclasses nonsense + more pain from the TOML bare CR thing ... ugh --- mypy.ini | 20 ++++++++++++++++++-- src/black/handle_ipynb_magics.py | 13 +++++++------ 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/mypy.ini b/mypy.ini index 2ed7d096cae..134a60897c7 100644 --- a/mypy.ini +++ b/mypy.ini @@ -22,8 +22,8 @@ warn_unused_ignores=True # Until we're not supporting 3.6 primer needs this disallow_any_generics=False -# Unreachable blocks have been an issue when compiling mypyc, let's try to avoid 'em -# in the first place. +# Unreachable blocks have been an issue when compiling mypyc, let's try +# to avoid 'em in the first place. warn_unreachable=True # The following are off by default. Flip them on if you feel @@ -36,7 +36,23 @@ cache_dir=/dev/null [mypy-aiohttp.*] follow_imports=skip + [mypy-black] # The following is because of `patch_click()`. Remove when # we drop Python 3.6 support. warn_unused_ignores=False + +[mypy-black.files] +# Unfortunately tomli has deprecated strings and changed the API type +# annotations to bytes all while strings still work. We still use strings +# since it's unclear whether this will last (depends on how much the +# TOML people like bare CR as a newline sequence I suppose). Anyway, +# some versions of tomli still specify str as the type so mypy complains +# that the type: ignore is unnecessary. Gosh I wish I wasn't telling +# this story. +# +# See also: https://github.com/psf/black/pull/2408 +# https://github.com/pypa/pip/pull/10238 (because black is +# following whatever they do) +# https://github.com/toml-lang/toml/issues/837 +warn_unused_ignores=False diff --git a/src/black/handle_ipynb_magics.py b/src/black/handle_ipynb_magics.py index ad93c444efc..c1d72404948 100644 --- a/src/black/handle_ipynb_magics.py +++ b/src/black/handle_ipynb_magics.py @@ -312,7 +312,7 @@ class CellMagic: body: str -@dataclasses.dataclass +# ast.NodeVisitor + dataclass = breakage under mypyc. class CellMagicFinder(ast.NodeVisitor): """Find cell magics. @@ -331,7 +331,8 @@ class CellMagicFinder(ast.NodeVisitor): and we look for instances of the latter. """ - cell_magic: Optional[CellMagic] = None + def __init__(self, cell_magic: Optional[CellMagic] = None) -> None: + self.cell_magic = cell_magic def visit_Expr(self, node: ast.Expr) -> None: """Find cell magic, extract header and body.""" @@ -357,7 +358,8 @@ class OffsetAndMagic: magic: str -@dataclasses.dataclass +# Unsurprisingly, dataclasses are Not. An. Option. Here. Due. To. Mypyc. As. Usual. +# > fyi it's due the ast.NodeVisitor parent type class MagicFinder(ast.NodeVisitor): """Visit cell to look for get_ipython calls. @@ -377,9 +379,8 @@ class MagicFinder(ast.NodeVisitor): types of magics). """ - magics: Dict[int, List[OffsetAndMagic]] = dataclasses.field( - default_factory=lambda: collections.defaultdict(list) - ) + def __init__(self) -> None: + self.magics: Dict[int, List[OffsetAndMagic]] = collections.defaultdict(list) def visit_Assign(self, node: ast.Assign) -> None: """Look for system assign magics. From e9834e0c4375803af29879a55be121d7b27241aa Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Wed, 11 Aug 2021 13:31:16 -0400 Subject: [PATCH 16/21] Mild hack so mypyc doesn't break diff-shades + cleanup diff-shades depends on reformat_many being monkeypatchable so it can calculate files and black.Mode without copying and pasting a bunch of parsing, file discovery, and configuration code. On the other hand, I've been working on blackbench and upcoming version 21.8a1 is now compatible with the driver convert changes. --- src/black/__init__.py | 4 +++- src/black/parsing.py | 4 ++-- src/blib2to3/pgen2/driver.py | 12 +++--------- src/blib2to3/pytree.py | 4 ---- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/black/__init__.py b/src/black/__init__.py index 19d892fc9bd..f28a8956c46 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -29,8 +29,9 @@ Union, ) -from dataclasses import replace import click +from dataclasses import replace +from mypy_extensions import mypyc_attr from black.const import DEFAULT_LINE_LENGTH, DEFAULT_INCLUDES, DEFAULT_EXCLUDES from black.const import STDIN_PLACEHOLDER @@ -647,6 +648,7 @@ def reformat_one( report.failed(src, str(exc)) +@mypyc_attr(patchable=True) def reformat_many( sources: Set[Path], fast: bool, write_back: WriteBack, mode: Mode, report: "Report" ) -> None: diff --git a/src/black/parsing.py b/src/black/parsing.py index b861a44cc39..7a47078a062 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -12,7 +12,7 @@ # lib2to3 fork from blib2to3.pytree import Node, Leaf -from blib2to3 import pygram, pytree +from blib2to3 import pygram from blib2to3.pgen2 import driver from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.parse import ParseError @@ -91,7 +91,7 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - src_txt += "\n" for grammar in get_grammars(set(target_versions)): - drv = driver.Driver(grammar, pytree.convert) + drv = driver.Driver(grammar) try: result = drv.parse_string(src_txt, True) break diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py index 615612c043f..324a7f46f7c 100644 --- a/src/blib2to3/pgen2/driver.py +++ b/src/blib2to3/pgen2/driver.py @@ -36,7 +36,7 @@ # Pgen imports from . import grammar, parse, token, tokenize, pgen from logging import Logger -from blib2to3.pytree import _Convert, NL +from blib2to3.pytree import NL from blib2to3.pgen2.grammar import Grammar from blib2to3.pgen2.tokenize import GoodTokenInfo @@ -44,22 +44,16 @@ class Driver(object): - def __init__( - self, - grammar: Grammar, - convert: Optional[_Convert] = None, - logger: Optional[Logger] = None, - ) -> None: + def __init__(self, grammar: Grammar, logger: Optional[Logger] = None) -> None: self.grammar = grammar if logger is None: logger = logging.getLogger(__name__) self.logger = logger - self.convert = convert def parse_tokens(self, tokens: Iterable[GoodTokenInfo], debug: bool = False) -> NL: """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. - p = parse.Parser(self.grammar, self.convert) + p = parse.Parser(self.grammar) p.setup() lineno = 1 column = 0 diff --git a/src/blib2to3/pytree.py b/src/blib2to3/pytree.py index ee92c2a2936..3af283398bd 100644 --- a/src/blib2to3/pytree.py +++ b/src/blib2to3/pytree.py @@ -14,7 +14,6 @@ from typing import ( Any, - Callable, Dict, Iterator, List, @@ -978,6 +977,3 @@ def generate_matches( r.update(r0) r.update(r1) yield c0 + c1, r - - -_Convert = Callable[[Grammar, RawNode], Any] From f103dc03e4ada2b8978592641daa0255870c69cc Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 21 Aug 2021 15:25:17 -0400 Subject: [PATCH 17/21] Address feedback & cleanup comments - I don't actually have to go on a rant about tomli and bare CR it turns out. - Subclassing KeyError isn't actually *that* important for one custom exception. --- mypy.ini | 15 --------------- src/black/__init__.py | 2 ++ src/black/brackets.py | 2 -- src/black/files.py | 2 +- src/black/handle_ipynb_magics.py | 4 ++-- src/black/linegen.py | 7 ++++--- src/black/strings.py | 5 ++++- 7 files changed, 13 insertions(+), 24 deletions(-) diff --git a/mypy.ini b/mypy.ini index 134a60897c7..b3bb1f01425 100644 --- a/mypy.ini +++ b/mypy.ini @@ -41,18 +41,3 @@ follow_imports=skip # The following is because of `patch_click()`. Remove when # we drop Python 3.6 support. warn_unused_ignores=False - -[mypy-black.files] -# Unfortunately tomli has deprecated strings and changed the API type -# annotations to bytes all while strings still work. We still use strings -# since it's unclear whether this will last (depends on how much the -# TOML people like bare CR as a newline sequence I suppose). Anyway, -# some versions of tomli still specify str as the type so mypy complains -# that the type: ignore is unnecessary. Gosh I wish I wasn't telling -# this story. -# -# See also: https://github.com/psf/black/pull/2408 -# https://github.com/pypa/pip/pull/10238 (because black is -# following whatever they do) -# https://github.com/toml-lang/toml/issues/837 -warn_unused_ignores=False diff --git a/src/black/__init__.py b/src/black/__init__.py index f28a8956c46..4d63176abb9 100644 --- a/src/black/__init__.py +++ b/src/black/__init__.py @@ -179,6 +179,8 @@ def validate_regex( @click.command( context_settings=dict(help_option_names=["-h", "--help"]), + # While Click does set this field automatically using the docstring, mypyc + # (annoyingly) strips 'em so we need to set it here too. help="The uncompromising code formatter.", ) @click.option("-c", "--code", type=str, help="Format the code passed in as a string.") diff --git a/src/black/brackets.py b/src/black/brackets.py index d92d7c11c81..c5ed4bf5b9f 100644 --- a/src/black/brackets.py +++ b/src/black/brackets.py @@ -49,8 +49,6 @@ DOT_PRIORITY: Final = 1 -# Ideally this would be a subclass of KeyError, but mypyc doesn't like that. -# See also: https://mypyc.readthedocs.io/en/latest/native_classes.html#inheritance. class BracketMatchError(Exception): """Raised when an opening bracket is unable to be matched to a closing bracket.""" diff --git a/src/black/files.py b/src/black/files.py index 6cde8ba5999..ddea1bd7592 100644 --- a/src/black/files.py +++ b/src/black/files.py @@ -95,7 +95,7 @@ def parse_pyproject_toml(path_config: str) -> Dict[str, Any]: If parsing fails, will raise a tomli.TOMLDecodeError """ with open(path_config, encoding="utf8") as f: - pyproject_toml = tomli.load(f) # type: ignore # due to deprecated API usage + pyproject_toml = tomli.loads(f.read()) config = pyproject_toml.get("tool", {}).get("black", {}) return {k.replace("--", "").replace("-", "_"): v for k, v in config.items()} diff --git a/src/black/handle_ipynb_magics.py b/src/black/handle_ipynb_magics.py index c1d72404948..d530b5784c9 100644 --- a/src/black/handle_ipynb_magics.py +++ b/src/black/handle_ipynb_magics.py @@ -358,8 +358,8 @@ class OffsetAndMagic: magic: str -# Unsurprisingly, dataclasses are Not. An. Option. Here. Due. To. Mypyc. As. Usual. -# > fyi it's due the ast.NodeVisitor parent type +# Unsurprisingly, subclassing ast.NodeVisitor means we can't use dataclasses here +# as mypyc will generate broken code. class MagicFinder(ast.NodeVisitor): """Visit cell to look for get_ipython calls. diff --git a/src/black/linegen.py b/src/black/linegen.py index 1691cc51ba8..bfdcf102b72 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -365,9 +365,10 @@ def _rhs( line, line_length=mode.line_length, features=features ) - # HACK: functions (like rhs) compiled by mypyc don't retain their __name__ - # attribute which is needed in `run_transformer` further down. Unfortunately - # a nested class breaks mypyc too. So a class must be created via type ... + # HACK: nested functions (like _rhs) compiled by mypyc don't retain their + # __name__ attribute which is needed in `run_transformer` further down. + # Unfortunately a nested class breaks mypyc too. So a class must be created + # via type ... https://github.com/mypyc/mypyc/issues/884 rhs = type("rhs", (), {"__call__": _rhs})() if mode.experimental_string_processing: diff --git a/src/black/strings.py b/src/black/strings.py index 96917f8830a..0c96047e4aa 100644 --- a/src/black/strings.py +++ b/src/black/strings.py @@ -152,7 +152,10 @@ def normalize_string_prefix(s: str, remove_u_prefix: bool = False) -> str: return f"{new_prefix}{match.group(2)}" -@lru_cache(maxsize=256) +# Re(gex) does actually cache patterns internally but this still improves +# performance on a long list literal of strings by 5-9% since lru_cache's +# caching overhead is much lower. +@lru_cache(maxsize=64) def _cached_compile(pattern: str) -> re.Pattern: return re.compile(pattern) From 2f238ca66df9a5a7ac78b416591d347aece2618b Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Thu, 28 Oct 2021 18:54:28 -0400 Subject: [PATCH 18/21] Skip a few more monkeypatching tests --- tests/test_black.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_black.py b/tests/test_black.py index ef5d0a99ec1..9692f6193f6 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -1056,6 +1056,7 @@ def test_pipe_force_py36(self) -> None: actual = result.output self.assertFormatEqual(actual, expected) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin(self) -> None: with patch( "black.format_stdin_to_stdout", @@ -1073,6 +1074,7 @@ def test_reformat_one_with_stdin(self) -> None: fsts.assert_called_once() report.done.assert_called_with(path, black.Changed.YES) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin_filename(self) -> None: with patch( "black.format_stdin_to_stdout", @@ -1120,6 +1122,7 @@ def test_reformat_one_with_stdin_filename_pyi(self) -> None: # __BLACK_STDIN_FILENAME__ should have been stripped report.done.assert_called_with(expected, black.Changed.YES) + @pytest.mark.incompatible_with_mypyc def test_reformat_one_with_stdin_filename_ipynb(self) -> None: with patch( "black.format_stdin_to_stdout", From f561b0c664a6a8726759e21ef5c26afb3577159b Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Thu, 28 Oct 2021 19:14:31 -0400 Subject: [PATCH 19/21] Bring back ignore for unused type ignore --- mypy.ini | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mypy.ini b/mypy.ini index 5005d35db20..cfceaa3ee86 100644 --- a/mypy.ini +++ b/mypy.ini @@ -35,6 +35,11 @@ check_untyped_defs=True # No incremental mode cache_dir=/dev/null +[mypy-black] +# The following is because of `patch_click()`. Remove when +# we drop Python 3.6 support. +warn_unused_ignores=False + [mypy-black_primer.*] # Until we're not supporting 3.6 primer needs this disallow_any_generics=False From 5ef46f4c27132206c1230d5966a28821486bc50c Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 13 Nov 2021 22:43:42 -0500 Subject: [PATCH 20/21] Optimizations + compatiblity fixes --- src/black/nodes.py | 3 +++ src/blib2to3/pgen2/parse.py | 8 ++------ src/blib2to3/pytree.py | 2 -- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/black/nodes.py b/src/black/nodes.py index 6e5c235e512..36dd1890511 100644 --- a/src/black/nodes.py +++ b/src/black/nodes.py @@ -20,6 +20,8 @@ else: from typing_extensions import Final +from mypy_extensions import mypyc_attr + # lib2to3 fork from blib2to3.pytree import Node, Leaf, type_repr from blib2to3 import pygram @@ -142,6 +144,7 @@ RARROW = 55 +@mypyc_attr(allow_interpreted_subclasses=True) class Visitor(Generic[T]): """Basic lib2to3 visitor that yields things of type `T` on `visit()`.""" diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py index 9c7fd115da7..792e8e66698 100644 --- a/src/blib2to3/pgen2/parse.py +++ b/src/blib2to3/pgen2/parse.py @@ -70,9 +70,7 @@ def switch_to(self, ilabel: int) -> Iterator[None]: finally: self.parser.stack = self._start_point - def add_token( - self, tok_type: int, tok_val: Text, raw: bool = False - ) -> None: + def add_token(self, tok_type: int, tok_val: Text, raw: bool = False) -> None: func: Callable[..., Any] if raw: func = self.parser._addtoken @@ -251,9 +249,7 @@ def addtoken(self, type: int, value: Text, context: Context) -> bool: return self._addtoken(ilabel, type, value, context) - def _addtoken( - self, ilabel: int, type: int, value: Text, context: Context - ) -> bool: + def _addtoken(self, ilabel: int, type: int, value: Text, context: Context) -> bool: # Loop until the token is shifted; may raise exceptions while True: dfa, state, node = self.stack[-1] diff --git a/src/blib2to3/pytree.py b/src/blib2to3/pytree.py index 3af283398bd..39ff481f6f7 100644 --- a/src/blib2to3/pytree.py +++ b/src/blib2to3/pytree.py @@ -91,8 +91,6 @@ def __eq__(self, other: Any) -> bool: return NotImplemented return self._eq(other) - __hash__ = None # type: Any # For Py3 compatibility. - @property def prefix(self) -> Text: raise NotImplementedError From f5f1099dd9043e7bd62c3fd6d39dee1fd8ba458d Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Mon, 15 Nov 2021 23:01:21 -0500 Subject: [PATCH 21/21] Fix crash on PyPy by deoptimizing :( --- src/black/parsing.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/black/parsing.py b/src/black/parsing.py index f52e17bc3d1..504e20be002 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -41,11 +41,6 @@ else: ast3 = ast27 = ast -if sys.version_info >= (3, 8): - TYPE_IGNORE_CLASSES: Final = (ast3.TypeIgnore, ast27.TypeIgnore, ast.TypeIgnore) -else: - TYPE_IGNORE_CLASSES: Final = (ast3.TypeIgnore, ast27.TypeIgnore) - class InvalidInput(ValueError): """Raised when input source code fails all parse attempts.""" @@ -175,10 +170,13 @@ def stringify_ast( yield f"{' ' * depth}{node.__class__.__name__}(" for field in sorted(node._fields): # noqa: F402 - # TypeIgnore will not be present using pypy < 3.8, so no need for this + # TypeIgnore will not be present using pypy < 3.8, so need for this if not (_IS_PYPY and sys.version_info < (3, 8)): # TypeIgnore has only one field 'lineno' which breaks this comparison - if isinstance(node, TYPE_IGNORE_CLASSES): + type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) + if sys.version_info >= (3, 8): + type_ignore_classes += (ast.TypeIgnore,) + if isinstance(node, type_ignore_classes): break try: