switch to processing logical_lines instead of entire files

logical lines that begin with a string and don't follow other docstrings are now considered to be docstrings fixes zheller#100 requires flake8>=3.0 drop support for python 2.7 and 3.4 added some type assertions update tests for new docstring rules and to call flake8 directly
plinss · Jun 4, 2020 · 84a8733 · 84a8733
1 parent 4612549
commit 84a8733
Show file tree

Hide file tree

Showing 15 changed files with 293 additions and 502 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -2,8 +2,6 @@ sudo: false
 dist: xenial  # is required for python3.7+
 language: python
 python:
-  - "2.7"
-  - "3.4"
   - "3.5"
   - "3.6"
   - "3.7"

diff --git a/flake8_quotes/__about__.py b/flake8_quotes/__about__.py
@@ -1 +1 @@
-__version__ = '3.2.0'
+__version__ = '4.0.0'
diff --git a/flake8_quotes/__init__.py b/flake8_quotes/__init__.py
@@ -1,26 +1,19 @@
-import optparse
 import tokenize
 import warnings
+from typing import Iterator, List, Set, Tuple
 
-# Polyfill stdin loading/reading lines
-# https://gitlab.com/pycqa/flake8-polyfill/blob/1.0.1/src/flake8_polyfill/stdin.py#L52-57
-try:
-    from flake8.engine import pep8
-    stdin_get_value = pep8.stdin_get_value
-    readlines = pep8.readlines
-except ImportError:
-    from flake8 import utils
-    import pycodestyle
-    stdin_get_value = utils.stdin_get_value
-    readlines = pycodestyle.readlines
+from flake8.options.manager import OptionManager
 
 from flake8_quotes.__about__ import __version__
 from flake8_quotes.docstring_detection import get_docstring_tokens
 
 
-class QuoteChecker(object):
+class QuoteChecker:
     name = __name__
     version = __version__
+    line: str
+    tokens: List[tokenize.TokenInfo]
+    docstring_tokens: Set[tokenize.TokenInfo]
 
     INLINE_QUOTES = {
         # When user wants only single quotes
@@ -73,55 +66,30 @@ class QuoteChecker(object):
     DOCSTRING_QUOTES["'''"] = DOCSTRING_QUOTES["'"]
     DOCSTRING_QUOTES['"""'] = DOCSTRING_QUOTES['"']
 
-    def __init__(self, tree, lines=None, filename='(none)'):
-        self.filename = filename
-        self.lines = lines
-
-    @staticmethod
-    def _register_opt(parser, *args, **kwargs):
-        """
-        Handler to register an option for both Flake8 3.x and 2.x.
-
-        This is based on:
-        https://github.com/PyCQA/flake8/blob/3.0.0b2/docs/source/plugin-development/cross-compatibility.rst#option-handling-on-flake8-2-and-3
-
-        It only supports `parse_from_config` from the original function and it
-        uses the `Option` object returned to get the string.
-        """
-        try:
-            # Flake8 3.x registration
-            parser.add_option(*args, **kwargs)
-        except (optparse.OptionError, TypeError):
-            # Flake8 2.x registration
-            parse_from_config = kwargs.pop('parse_from_config', False)
-            option = parser.add_option(*args, **kwargs)
-            if parse_from_config:
-                parser.config_options.append(option.get_opt_string().lstrip('-'))
-
     @classmethod
-    def add_options(cls, parser):
-        cls._register_opt(parser, '--quotes', action='store',
-                          parse_from_config=True, type='choice',
-                          choices=sorted(cls.INLINE_QUOTES.keys()),
-                          help='Deprecated alias for `--inline-quotes`')
-        cls._register_opt(parser, '--inline-quotes', default="'",
-                          action='store', parse_from_config=True, type='choice',
-                          choices=sorted(cls.INLINE_QUOTES.keys()),
-                          help="Quote to expect in all files (default: ')")
-        cls._register_opt(parser, '--multiline-quotes', default=None, action='store',
-                          parse_from_config=True, type='choice',
-                          choices=sorted(cls.MULTILINE_QUOTES.keys()),
-                          help='Quote to expect in all files (default: """)')
-        cls._register_opt(parser, '--docstring-quotes', default=None, action='store',
-                          parse_from_config=True, type='choice',
-                          choices=sorted(cls.DOCSTRING_QUOTES.keys()),
-                          help='Quote to expect in all files (default: """)')
-        cls._register_opt(parser, '--avoid-escape', default=None, action='store_true',
-                          parse_from_config=True,
-                          help='Avoiding escaping same quotes in inline strings (enabled by default)')
-        cls._register_opt(parser, '--no-avoid-escape', dest='avoid_escape', default=None, action='store_false',
-                          parse_from_config=False,
-                          help='Disable avoiding escaping same quotes in inline strings')
+    def add_options(cls, option_manager: OptionManager) -> None:
+        option_manager.add_option('--quotes', action='store',
+                                  parse_from_config=True, type='choice',
+                                  choices=sorted(cls.INLINE_QUOTES.keys()),
+                                  help='Deprecated alias for `--inline-quotes`')
+        option_manager.add_option('--inline-quotes', default="'",
+                                  action='store', parse_from_config=True, type='choice',
+                                  choices=sorted(cls.INLINE_QUOTES.keys()),
+                                  help="Quote to expect in all files (default: ')")
+        option_manager.add_option('--multiline-quotes', default=None, action='store',
+                                  parse_from_config=True, type='choice',
+                                  choices=sorted(cls.MULTILINE_QUOTES.keys()),
+                                  help='Quote to expect in all files (default: """)')
+        option_manager.add_option('--docstring-quotes', default=None, action='store',
+                                  parse_from_config=True, type='choice',
+                                  choices=sorted(cls.DOCSTRING_QUOTES.keys()),
+                                  help='Quote to expect in all files (default: """)')
+        option_manager.add_option('--avoid-escape', default=None, action='store_true',
+                                  parse_from_config=True,
+                                  help='Avoiding escaping same quotes in inline strings (enabled by default)')
+        option_manager.add_option('--no-avoid-escape', dest='avoid_escape', default=None, action='store_false',
+                                  parse_from_config=False,
+                                  help='Disable avoiding escaping same quotes in inline strings')
 
     @classmethod
     def parse_options(cls, options):
@@ -160,37 +128,14 @@ def parse_options(cls, options):
         else:
             cls.config.update({'avoid_escape': True})
 
-    def get_file_contents(self):
-        if self.filename in ('stdin', '-', None):
-            return stdin_get_value().splitlines(True)
-        else:
-            if self.lines:
-                return self.lines
-            else:
-                return readlines(self.filename)
-
-    def run(self):
-        file_contents = self.get_file_contents()
-
-        noqa_line_numbers = self.get_noqa_lines(file_contents)
-        errors = self.get_quotes_errors(file_contents)
-
-        for error in errors:
-            if error.get('line') not in noqa_line_numbers:
-                yield (error.get('line'), error.get('col'), error.get('message'), type(self))
-
-    def get_noqa_lines(self, file_contents):
-        tokens = [Token(t) for t in tokenize.generate_tokens(lambda L=iter(file_contents): next(L))]
-        return [token.start_row
-                for token in tokens
-                if token.type == tokenize.COMMENT and token.string.endswith('noqa')]
-
-    def get_quotes_errors(self, file_contents):
-        tokens = [Token(t) for t in tokenize.generate_tokens(lambda L=iter(file_contents): next(L))]
-        docstring_tokens = get_docstring_tokens(tokens)
-
-        for token in tokens:
+    def __init__(self, logical_line: str, previous_logical: str, tokens: List[tokenize.TokenInfo]) -> None:
+        self.line = logical_line
+        self.tokens = tokens
+        prev_tokens = tokenize.tokenize(lambda L=iter([previous_logical.encode('utf-8')]): next(L))
+        self.docstring_tokens = get_docstring_tokens(prev_tokens, self.tokens)
 
+    def __iter__(self) -> Iterator[Tuple[Tuple[int, int], str]]:
+        for token in self.tokens:
             if token.type != tokenize.STRING:
                 # ignore non strings
                 continue
@@ -210,21 +155,16 @@ def get_quotes_errors(self, file_contents):
             #   "foo"[0] * 3 = " * 3 = """
             #   "foo"[0:3] = "fo
             #   """foo"""[0:3] = """
-            is_docstring = token in docstring_tokens
+            is_docstring = (token in self.docstring_tokens)
             is_multiline_string = unprefixed_string[0] * 3 == unprefixed_string[0:3]
-            start_row, start_col = token.start
 
             # If our string is a docstring
             # DEV: Docstring quotes must come before multiline quotes as it can as a multiline quote
             if is_docstring:
                 if self.config['good_docstring'] in unprefixed_string:
                     continue
 
-                yield {
-                    'message': 'Q002 Remove bad quotes from docstring',
-                    'line': start_row,
-                    'col': start_col,
-                }
+                yield (token.start, 'Q002 Remove bad quotes from docstring')
             # Otherwise if our string is multiline
             elif is_multiline_string:
                 # If our string is or containing a known good string, then ignore it
@@ -241,11 +181,7 @@ def get_quotes_errors(self, file_contents):
                     continue
 
                 # Output our error
-                yield {
-                    'message': 'Q001 Remove bad quotes from multiline string',
-                    'line': start_row,
-                    'col': start_col,
-                }
+                yield (token.start, 'Q001 Remove bad quotes from multiline string')
             # Otherwise (string is inline quote)
             else:
                 #   'This is a string'       -> Good
@@ -266,43 +202,9 @@ def get_quotes_errors(self, file_contents):
                         continue
                     if (self.config['good_single'] in string_contents and
                             not self.config['bad_single'] in string_contents):
-                        yield {
-                            'message': 'Q003 Change outer quotes to avoid escaping inner quotes',
-                            'line': start_row,
-                            'col': start_col,
-                        }
+                        yield (token.start, 'Q003 Change outer quotes to avoid escaping inner quotes')
                     continue
 
                 # If not preferred type, only allow use to avoid escapes.
                 if not self.config['good_single'] in string_contents:
-                    yield {
-                        'message': 'Q000 Remove bad quotes',
-                        'line': start_row,
-                        'col': start_col,
-                    }
-
-
-class Token:
-    """Python 2 and 3 compatible token"""
-    def __init__(self, token):
-        self.token = token
-
-    @property
-    def type(self):
-        return self.token[0]
-
-    @property
-    def string(self):
-        return self.token[1]
-
-    @property
-    def start(self):
-        return self.token[2]
-
-    @property
-    def start_row(self):
-        return self.token[2][0]
-
-    @property
-    def start_col(self):
-        return self.token[2][1]
+                    yield (token.start, 'Q000 Remove bad quotes')
diff --git a/flake8_quotes/docstring_detection.py b/flake8_quotes/docstring_detection.py
@@ -18,6 +18,7 @@
 
 # These tokens don't matter here - they don't get in the way of docstrings
 TOKENS_TO_IGNORE = [
+    tokenize.ENCODING,
     tokenize.NEWLINE,
     tokenize.INDENT,
     tokenize.DEDENT,
@@ -26,8 +27,14 @@
 ]
 
 
-def get_docstring_tokens(tokens):
-    state = STATE_EXPECT_MODULE_DOCSTRING
+def get_docstring_tokens(prev_tokens, tokens):
+    for token in prev_tokens:
+        if token.type in TOKENS_TO_IGNORE:
+            continue
+        state = STATE_EXPECT_MODULE_DOCSTRING if (token.type != tokenize.STRING) else STATE_OTHER
+        break
+    else:
+        state = STATE_EXPECT_MODULE_DOCSTRING
     # The number of currently open parentheses, square brackets, etc.
     # This doesn't check if they're properly balanced, i.e. there isn't ([)], but we shouldn't
     # need to - if they aren't, it shouldn't parse at all, so we ignore the bracket type

diff --git a/setup.py b/setup.py
@@ -28,7 +28,7 @@ def read(*filenames, **kwargs):
     author_email='zheller@gmail.com',
     version=about['__version__'],
     install_requires=[
-        'flake8',
+        'flake8>=3.0.0',
     ],
     url='http://github.com/zheller/flake8-quotes/',
     long_description=LONG_DESCRIPTION,
@@ -52,10 +52,7 @@ def read(*filenames, **kwargs):
         'Operating System :: OS Independent',
         'License :: OSI Approved :: MIT License',
         'Programming Language :: Python',
-        'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',

diff --git a/test/data/docstring_doubles.py b/test/data/docstring_doubles.py
@@ -28,11 +28,11 @@ def f(self, bar="""
         some_expression = 'hello world'
 
         """
-        this is not a docstring
+        this is a variable docstring
         """
 
         if l:
             """
-            Looks like a docstring, but in reality it isn't - only modules, classes and functions
+            this is a non-standard docstring
             """
             pass
diff --git a/test/data/docstring_doubles_module_multiline.py b/test/data/docstring_doubles_module_multiline.py
@@ -7,5 +7,5 @@
 def foo():
     pass
 """
-this is not a docstring
+this is a non-standard a docstring
 """
diff --git a/test/data/docstring_doubles_module_singleline.py b/test/data/docstring_doubles_module_singleline.py
@@ -3,4 +3,4 @@
 
 def foo():
     pass
-""" this is not a docstring """
+""" this is a non-standard docstring """
diff --git a/test/data/docstring_singles.py b/test/data/docstring_singles.py
@@ -30,11 +30,11 @@ def f(self, bar='''
         some_expression = 'hello world'
 
         '''
-        this is not a docstring
+        this is a variable docstring
         '''
 
         if l:
             '''
-            Looks like a docstring, but in reality it isn't - only modules, classes and functions
+            this is a non-standard docstring
             '''
             pass
diff --git a/test/data/docstring_singles_module_multiline.py b/test/data/docstring_singles_module_multiline.py
@@ -7,5 +7,5 @@
 def foo():
     pass
 '''
-this is not a docstring
+this is a non-standard a docstring
 '''
diff --git a/test/data/docstring_singles_module_singleline.py b/test/data/docstring_singles_module_singleline.py
@@ -3,4 +3,4 @@
 
 def foo():
     pass
-''' this is not a docstring '''
+''' this is a non-standard docstring '''
diff --git a/test/data/no_qa.py b/test/data/no_qa.py