Merge pull request #739 from hhatto/avoid-lib2to3

Avoid lib2to3 (second challenge)
hhatto · Mar 17, 2024 · 7c7c95e · 7c7c95e
2 parents b0f2793 + bc21480
commit 7c7c95e
Show file tree

Hide file tree

Showing 5 changed files with 107 additions and 156 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,3 +18,4 @@ coverage.xml
 dist
 htmlcov
 pep8.py
+test/suite/out/*.py.err
diff --git a/autopep8.py b/autopep8.py
@@ -106,6 +106,14 @@ class documentation for more information.
 DOCSTRING_START_REGEX = re.compile(r'^u?r?(?P<kind>["\']{3})')
 ENABLE_REGEX = re.compile(r'# *(fmt|autopep8): *on')
 DISABLE_REGEX = re.compile(r'# *(fmt|autopep8): *off')
+ENCODING_MAGIC_COMMENT = re.compile(
+    r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)'
+)
+COMPARE_TYPE_REGEX = re.compile(
+    r'([=!]=)\s+type(?:\s*\(\s*([^)]*[^ )])\s*\))'
+    r'|\btype(?:\s*\(\s*([^)]*[^ )])\s*\))\s+([=!]=)'
+)
+TYPE_REGEX = re.compile(r'(type\s*\(\s*[^)]*?[^\s)]\s*\))')
 
 EXIT_CODE_OK = 0
 EXIT_CODE_ERROR = 1
@@ -129,25 +137,6 @@ class documentation for more information.
 # to be enabled, disable both of them
 CONFLICTING_CODES = ('W503', 'W504')
 
-# W602 is handled separately due to the need to avoid "with_traceback".
-CODE_TO_2TO3 = {
-    'E231': ['ws_comma'],
-    'E721': ['idioms'],
-    'W690': ['apply',
-             'except',
-             'exitfunc',
-             'numliterals',
-             'operator',
-             'paren',
-             'reduce',
-             'renames',
-             'standarderror',
-             'sys_exc',
-             'throw',
-             'tuple_params',
-             'xreadlines']}
-
-
 if sys.platform == 'win32':  # pragma: no cover
     DEFAULT_CONFIG = os.path.expanduser(r'~\.pycodestyle')
 else:
@@ -175,16 +164,31 @@ def open_with_encoding(filename, mode='r', encoding=None, limit_byte_check=-1):
                    newline='')  # Preserve line endings
 
 
+def _detect_encoding_from_file(filename: str):
+    try:
+        with open(filename) as input_file:
+            for idx, line in enumerate(input_file):
+                if idx == 0 and line[0] == '\ufeff':
+                    return "utf-8-sig"
+                if idx >= 2:
+                    break
+                match = ENCODING_MAGIC_COMMENT.search(line)
+                if match:
+                    return match.groups()[0]
+    except Exception:
+        pass
+    # Python3's default encoding
+    return 'utf-8'
+
+
 def detect_encoding(filename, limit_byte_check=-1):
     """Return file encoding."""
+    encoding = _detect_encoding_from_file(filename)
+    if encoding == "utf-8-sig":
+        return encoding
     try:
-        with open(filename, 'rb') as input_file:
-            from lib2to3.pgen2 import tokenize as lib2to3_tokenize
-            encoding = lib2to3_tokenize.detect_encoding(input_file.readline)[0]
-
         with open_with_encoding(filename, encoding=encoding) as test_file:
             test_file.read(limit_byte_check)
-
         return encoding
     except (LookupError, SyntaxError, UnicodeDecodeError):
         return 'latin-1'
@@ -449,7 +453,7 @@ class FixPEP8(object):
         - e502
         - e701,e702,e703,e704
         - e711,e712,e713,e714
-        - e722
+        - e721,e722
         - e731
         - w291
         - w503,504
@@ -1262,6 +1266,58 @@ def fix_e714(self, result):
                         new_target[:pos_start], 'is not', new_target[pos_end:])
                 self.source[line_index] = new_target
 
+    def fix_e721(self, result):
+        """fix comparison type"""
+        (line_index, _, target) = get_index_offset_contents(result,
+                                                            self.source)
+        match = COMPARE_TYPE_REGEX.search(target)
+        if match:
+            # NOTE: match objects
+            #  * type(a) == type(b)  -> (None, None, 'a', '==')
+            #  * str == type(b)      -> ('==', 'b', None, None)
+            #  * type("") != type(b) -> (None, None, '""', '!=')
+            start = match.start()
+            end = match.end()
+            _prefix = ""
+            _suffix = ""
+            first_match_type_obj = match.groups()[1]
+            if first_match_type_obj is None:
+                _target_obj = match.groups()[2]
+            else:
+                _target_obj = match.groups()[1]
+                _suffix = target[end:]
+
+            isinstance_stmt = " isinstance"
+            is_not_condition = (
+                match.groups()[0] == "!=" or match.groups()[3] == "!="
+            )
+            if is_not_condition:
+                isinstance_stmt = " not isinstance"
+
+            _type_comp = f"{_target_obj}, {target[:start]}"
+
+            _prefix_tmp = target[:start].split()
+            if len(_prefix_tmp) >= 1:
+                _type_comp = f"{_target_obj}, {target[:start]}"
+                if first_match_type_obj is not None:
+                    _prefix = " ".join(_prefix_tmp[:-1])
+                    _type_comp = f"{_target_obj}, {_prefix_tmp[-1]}"
+                else:
+                    _prefix = " ".join(_prefix_tmp)
+
+            _suffix_tmp = target[end:]
+            _suffix_type_match = TYPE_REGEX.search(_suffix_tmp)
+            if len(_suffix_tmp.split()) >= 1 and _suffix_type_match:
+                if _suffix_type_match:
+                    type_match_end = _suffix_type_match.end()
+                    _suffix = _suffix_tmp[type_match_end:]
+            if _suffix_type_match:
+                cmp_b = _suffix_type_match.groups()[0]
+                _type_comp = f"{_target_obj}, {cmp_b}"
+
+            fix_line = f"{_prefix}{isinstance_stmt}({_type_comp}){_suffix}"
+            self.source[line_index] = fix_line
+
     def fix_e722(self, result):
         """fix bare except"""
         (line_index, _, target) = get_index_offset_contents(result,
@@ -1717,69 +1773,6 @@ def split_and_strip_non_empty_lines(text):
     return [line.strip() for line in text.splitlines() if line.strip()]
 
 
-def refactor(source, fixer_names, ignore=None, filename=''):
-    """Return refactored code using lib2to3.
-
-    Skip if ignore string is produced in the refactored code.
-
-    """
-    not_found_end_of_file_newline = source and source.rstrip("\r\n") == source
-    if not_found_end_of_file_newline:
-        input_source = source + "\n"
-    else:
-        input_source = source
-
-    from lib2to3 import pgen2
-    try:
-        new_text = refactor_with_2to3(input_source,
-                                      fixer_names=fixer_names,
-                                      filename=filename)
-    except (pgen2.parse.ParseError,
-            SyntaxError,
-            UnicodeDecodeError,
-            UnicodeEncodeError):
-        return source
-
-    if ignore:
-        if ignore in new_text and ignore not in source:
-            return source
-
-    if not_found_end_of_file_newline:
-        return new_text.rstrip("\r\n")
-
-    return new_text
-
-
-def code_to_2to3(select, ignore, where='', verbose=False):
-    fixes = set()
-    for code, fix in CODE_TO_2TO3.items():
-        if code_match(code, select=select, ignore=ignore):
-            if verbose:
-                print('--->  Applying {} fix for {}'.format(where,
-                                                            code.upper()),
-                      file=sys.stderr)
-            fixes |= set(fix)
-    return fixes
-
-
-def fix_2to3(source,
-             aggressive=True, select=None, ignore=None, filename='',
-             where='global', verbose=False):
-    """Fix various deprecated code (via lib2to3)."""
-    if not aggressive:
-        return source
-
-    select = select or []
-    ignore = ignore or []
-
-    return refactor(source,
-                    code_to_2to3(select=select,
-                                 ignore=ignore,
-                                 where=where,
-                                 verbose=verbose),
-                    filename=filename)
-
-
 def find_newline(source):
     """Return type of newline used in source.
 
@@ -3175,24 +3168,6 @@ def _leading_space_count(line):
     return i
 
 
-def refactor_with_2to3(source_text, fixer_names, filename=''):
-    """Use lib2to3 to refactor the source.
-
-    Return the refactored source code.
-
-    """
-    from lib2to3.refactor import RefactoringTool
-    fixers = ['lib2to3.fixes.fix_' + name for name in fixer_names]
-    tool = RefactoringTool(fixer_names=fixers, explicit=fixers)
-
-    from lib2to3.pgen2 import tokenize as lib2to3_tokenize
-    try:
-        # The name parameter is necessary particularly for the "import" fixer.
-        return str(tool.refactor_string(source_text, name=filename))
-    except lib2to3_tokenize.TokenError:
-        return source_text
-
-
 def check_syntax(code):
     """Return True if syntax is okay."""
     try:
@@ -3685,14 +3660,6 @@ def apply_global_fixes(source, options, where='global', filename='',
             source = function(source,
                               aggressive=options.aggressive)
 
-    source = fix_2to3(source,
-                      aggressive=options.aggressive,
-                      select=options.select,
-                      ignore=options.ignore,
-                      filename=filename,
-                      where=where,
-                      verbose=options.verbose)
-
     return source
 
 
@@ -4127,10 +4094,6 @@ def supported_fixes():
         yield (code.upper() + (4 - len(code)) * ' ',
                re.sub(r'\s+', ' ', docstring_summary(function.__doc__)))
 
-    for code in sorted(CODE_TO_2TO3):
-        yield (code.upper() + (4 - len(code)) * ' ',
-               re.sub(r'\s+', ' ', docstring_summary(fix_2to3.__doc__)))
-
 
 def docstring_summary(docstring):
     """Return summary of docstring."""

diff --git a/test/suite/E72.py b/test/suite/E72.py
@@ -26,9 +26,9 @@
 assert type(res) == type((0))
 #: E721
 assert type(res) != type((1, ))
-#: E721
+#: Okay
 assert type(res) is type((1, ))
-#: E721
+#: Okay
 assert type(res) is not type((1, ))
 #: E211 E721
 assert type(res) == type ([2, ])

diff --git a/test/suite/out/E72.py b/test/suite/out/E72.py
@@ -12,7 +12,7 @@
 #: E721
 import types
 
-if not isinstance(res, types.ListType):
+if type(res) is not types.ListType:
     pass
 #: E721
 assert isinstance(res, type(False)) or isinstance(res, type(None))
@@ -26,10 +26,10 @@
 assert isinstance(res, type((0)))
 #: E721
 assert not isinstance(res, type((1, )))
-#: E721
-assert isinstance(res, type((1, )))
-#: E721
-assert not isinstance(res, type((1, )))
+#: Okay
+assert type(res) is type((1, ))
+#: Okay
+assert type(res) is not type((1, ))
 #: E211 E721
 assert isinstance(res, type([2, ]))
 #: E201 E201 E202 E721

diff --git a/test/test_autopep8.py b/test/test_autopep8.py
@@ -384,37 +384,6 @@ def test_split_at_offsets_with_out_of_order(self):
         self.assertEqual(['12', '3', '4'],
                          autopep8.split_at_offsets('1234', [3, 2]))
 
-    def test_fix_2to3(self):
-        self.assertEqual(
-            'try: pass\nexcept ValueError as e: pass\n',
-            autopep8.fix_2to3('try: pass\nexcept ValueError, e: pass\n'))
-
-        self.assertEqual(
-            'while True: pass\n',
-            autopep8.fix_2to3('while 1: pass\n'))
-
-        self.assertEqual(
-            """\
-import sys
-sys.maxsize
-""",
-            autopep8.fix_2to3("""\
-import sys
-sys.maxint
-"""))
-
-    def test_fix_2to3_subset(self):
-        line = 'type(res) == type(42)\n'
-        fixed = 'isinstance(res, type(42))\n'
-
-        self.assertEqual(fixed, autopep8.fix_2to3(line))
-        self.assertEqual(fixed, autopep8.fix_2to3(line, select=['E721']))
-        self.assertEqual(fixed, autopep8.fix_2to3(line, select=['E7']))
-
-        self.assertEqual(line, autopep8.fix_2to3(line, select=['W']))
-        self.assertEqual(line, autopep8.fix_2to3(line, select=['E999']))
-        self.assertEqual(line, autopep8.fix_2to3(line, ignore=['E721']))
-
     def test_is_python_file(self):
         self.assertTrue(autopep8.is_python_file(
             os.path.join(ROOT_DIR, 'autopep8.py')))
@@ -4406,6 +4375,24 @@ def test_e721_in_conditional(self):
         with autopep8_context(line, options=['--aggressive']) as result:
             self.assertEqual(fixed, result)
 
+    def test_e721_in_conditional_pat2(self):
+        line = "if type(res) == type(42):\n    pass\n"
+        fixed = "if isinstance(res, type(42)):\n    pass\n"
+        with autopep8_context(line, options=['--aggressive']) as result:
+            self.assertEqual(fixed, result)
+
+    def test_e721_in_not_conditional(self):
+        line = "if type(res) != type(''):\n    pass\n"
+        fixed = "if not isinstance(res, type('')):\n    pass\n"
+        with autopep8_context(line, options=['--aggressive']) as result:
+            self.assertEqual(fixed, result)
+
+    def test_e721_in_not_conditional_pat2(self):
+        line = "if type(a) != type(b) or type(a) == type(ccc):\n    pass\n"
+        fixed = "if not isinstance(a, type(b)) or isinstance(a, type(ccc)):\n    pass\n"
+        with autopep8_context(line, options=['--aggressive']) as result:
+            self.assertEqual(fixed, result)
+
     def test_e722(self):
         line = "try:\n    print(a)\nexcept:\n    pass\n"
         fixed = "try:\n    print(a)\nexcept BaseException:\n    pass\n"