Merge pull request #188 from davidhalter/line-ending

Fix line endings support at various locations
davidhalter · May 30, 2021 · ed47650 · ed47650
2 parents 60fed7b + 7000dd2
commit ed47650
Show file tree

Hide file tree

Showing 7 changed files with 22 additions and 13 deletions.
diff --git a/parso/python/pep8.py b/parso/python/pep8.py
@@ -74,7 +74,7 @@ def __init__(self, config, leaf, parent, in_suite_introducer=False):
         parent_indentation = n.indentation
 
         next_leaf = leaf.get_next_leaf()
-        if '\n' in next_leaf.prefix:
+        if '\n' in next_leaf.prefix or '\r' in next_leaf.prefix:
             # This implies code like:
             # foobarbaz(
             #     a,
@@ -116,7 +116,7 @@ def __init__(self, config, leaf, parent):
         self.type = IndentationTypes.IMPLICIT
 
         next_leaf = leaf.get_next_leaf()
-        if leaf == ':' and '\n' not in next_leaf.prefix:
+        if leaf == ':' and '\n' not in next_leaf.prefix and '\r' not in next_leaf.prefix:
             self.indentation += ' '
 
 
@@ -216,8 +216,8 @@ def _visit_node(self, node):
             endmarker = node.children[-1]
             prev = endmarker.get_previous_leaf()
             prefix = endmarker.prefix
-            if (not prefix.endswith('\n') and (
-                    prefix or prev is None or prev.value != '\n')):
+            if (not prefix.endswith('\n') and not prefix.endswith('\r') and (
+                    prefix or prev is None or prev.value not in {'\n', '\r\n', '\r'})):
                 self.add_issue(endmarker, 292, "No newline at end of file")
 
         if typ in _IMPORT_TYPES:
@@ -465,7 +465,8 @@ def _visit_part(self, part, spacing, leaf):
                             + self._config.indentation:
                         self.add_issue(part, 129, "Line with same indent as next logical block")
                     elif indentation != should_be_indentation:
-                        if not self._check_tabs_spaces(spacing) and part.value != '\n':
+                        if not self._check_tabs_spaces(spacing) and part.value not in \
+                                {'\n', '\r\n', '\r'}:
                             if value in '])}':
                                 if node.type == IndentationTypes.VERTICAL_BRACKET:
                                     self.add_issue(
@@ -652,7 +653,8 @@ def add_not_spaces(*args):
             else:
                 prev_spacing = self._previous_spacing
                 if prev in _ALLOW_SPACE and spaces != prev_spacing.value \
-                        and '\n' not in self._previous_leaf.prefix:
+                        and '\n' not in self._previous_leaf.prefix \
+                        and '\r' not in self._previous_leaf.prefix:
                     message = "Whitespace before operator doesn't match with whitespace after"
                     self.add_issue(spacing, 229, message)
 

diff --git a/parso/python/prefix.py b/parso/python/prefix.py
@@ -18,7 +18,7 @@ def __init__(self, leaf, typ, value, spacing='', start_pos=None):
 
     @property
     def end_pos(self) -> Tuple[int, int]:
-        if self.value.endswith('\n'):
+        if self.value.endswith('\n') or self.value.endswith('\r'):
             return self.start_pos[0] + 1, 0
         if self.value == unicode_bom:
             # The bom doesn't have a length at the start of a Python file.
@@ -50,8 +50,8 @@ def search_ancestor(self, *node_types):
 
 
 _comment = r'#[^\n\r\f]*'
-_backslash = r'\\\r?\n'
-_newline = r'\r?\n'
+_backslash = r'\\\r?\n|\\\r'
+_newline = r'\r?\n|\r'
 _form_feed = r'\f'
 _only_spacing = '$'
 _spacing = r'[ \t]*'
@@ -94,7 +94,7 @@ def split_prefix(leaf, start_pos):
             bom = True
 
         start = match.end(0)
-        if value.endswith('\n'):
+        if value.endswith('\n') or value.endswith('\r'):
             line += 1
             column = -start
 

diff --git a/parso/python/tokenize.py b/parso/python/tokenize.py
@@ -548,7 +548,7 @@ def dedent_if_necessary(start):
                     additional_prefix = prefix + token
                 new_line = True
             elif initial == '#':  # Comments
-                assert not token.endswith("\n")
+                assert not token.endswith("\n") and not token.endswith("\r")
                 if fstring_stack and fstring_stack[-1].is_in_expr():
                     # `#` is not allowed in f-string expressions
                     yield PythonToken(ERRORTOKEN, initial, spos, prefix)

diff --git a/parso/utils.py b/parso/utils.py
@@ -92,7 +92,7 @@ def detect_encoding():
             # UTF-8 byte-order mark
             return 'utf-8'
 
-        first_two_lines = re.match(br'(?:[^\n]*\n){0,2}', source).group(0)
+        first_two_lines = re.match(br'(?:[^\r\n]*(?:\r\n|\r|\n)){0,2}', source).group(0)
         possible_encoding = re.search(br"coding[=:]\s*([-\w.]+)",
                                       first_two_lines)
         if possible_encoding:

diff --git a/test/test_pep8.py b/test/test_pep8.py
@@ -15,6 +15,8 @@ def assert_issue(code):
         assert issue.code == 292
 
     assert not issues('asdf = 1\n')
+    assert not issues('asdf = 1\r\n')
+    assert not issues('asdf = 1\r')
     assert_issue('asdf = 1')
     assert_issue('asdf = 1\n# foo')
     assert_issue('# foobar')

diff --git a/test/test_prefix.py b/test/test_prefix.py
@@ -19,6 +19,7 @@
     (' \f ', ['\f', ' ']),
     (' \f ', ['\f', ' ']),
     (' \r\n', ['\r\n', '']),
+    (' \r', ['\r', '']),
     ('\\\n', ['\\\n', '']),
     ('\\\r\n', ['\\\r\n', '']),
     ('\t\t\n\t', ['\n', '\t']),
@@ -34,7 +35,7 @@ def test_simple_prefix_splitting(string, tokens):
         assert pt.value == expected
 
         # Calculate the estimated end_pos
-        if expected.endswith('\n'):
+        if expected.endswith('\n') or expected.endswith('\r'):
             end_pos = start_pos[0] + 1, 0
         else:
             end_pos = start_pos[0], start_pos[1] + len(expected) + len(pt.spacing)

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -74,6 +74,10 @@ def test_utf8_bom():
     ('code', 'errors'), [
         (b'# coding: wtf-12\nfoo', 'strict'),
         (b'# coding: wtf-12\nfoo', 'replace'),
+        (b'# coding: wtf-12\r\nfoo', 'strict'),
+        (b'# coding: wtf-12\r\nfoo', 'replace'),
+        (b'# coding: wtf-12\rfoo', 'strict'),
+        (b'# coding: wtf-12\rfoo', 'replace'),
     ]
 )
 def test_bytes_to_unicode_failing_encoding(code, errors):