Skip to content

Commit

Permalink
fix: certain strange characters caused reporting to fail. #1512
Browse files Browse the repository at this point in the history
It turns out that str.splitlines() will break text on some characters
that file.readline() does not!  Use readline() to read source files the
same way that Python does.
  • Loading branch information
nedbat committed Dec 23, 2022
1 parent 152cdc7 commit 35e249f
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 1 deletion.
4 changes: 4 additions & 0 deletions CHANGES.rst
Expand Up @@ -23,9 +23,13 @@ Unreleased
- File pattern rules were too strict, forbidding plus signs and curly braces in
directory and file names. This is now fixed, closing `issue 1513`_.

- Unusual Unicode or control characters in source files could prevent
reporting. This is now fixed, closing `issue 1512`_.

- The PyPy wheel now installs on PyPy 3.7, 3.8, and 3.9, closing `issue 1510`_.

.. _issue 1510: https://github.com/nedbat/coveragepy/issues/1510
.. _issue 1512: https://github.com/nedbat/coveragepy/issues/1512
.. _issue 1513: https://github.com/nedbat/coveragepy/issues/1513


Expand Down
3 changes: 2 additions & 1 deletion coverage/phystokens.py
Expand Up @@ -4,6 +4,7 @@
"""Better tokenizing for coverage.py."""

import ast
import io
import keyword
import re
import token
Expand Down Expand Up @@ -172,7 +173,7 @@ def generate_tokens(self, text):
"""A stand-in for `tokenize.generate_tokens`."""
if text != self.last_text:
self.last_text = text
readline = iter(text.splitlines(True)).__next__
readline = io.StringIO(text).readline
try:
self.last_tokens = list(tokenize.generate_tokens(readline))
except:
Expand Down
32 changes: 32 additions & 0 deletions tests/test_html.py
Expand Up @@ -469,6 +469,38 @@ def test_formfeeds(self):
formfeed_html = self.get_html_report_content("formfeed.py")
assert "line_two" in formfeed_html

def test_splitlines_special_chars(self):
# https://github.com/nedbat/coveragepy/issues/1512
# See https://docs.python.org/3/library/stdtypes.html#str.splitlines for
# the characters splitlines treats specially that readlines does not.

# I'm not exactly sure why we need the "a" strings here, but the old
# code wasn't failing without them.
self.make_file("splitlines_is_weird.py", """\
test = {
"0b": ["\x0b0"], "a1": "this is line 2",
"0c": ["\x0c0"], "a2": "this is line 3",
"1c": ["\x1c0"], "a3": "this is line 4",
"1d": ["\x1d0"], "a4": "this is line 5",
"1e": ["\x1e0"], "a5": "this is line 6",
"85": ["\x850"], "a6": "this is line 7",
"2028": ["\u20280"], "a7": "this is line 8",
"2029": ["\u20290"], "a8": "this is line 9",
}
DONE = 1
""")
cov = coverage.Coverage()
self.start_import_stop(cov, "splitlines_is_weird")
cov.html_report()

the_html = self.get_html_report_content("splitlines_is_weird.py")
assert "DONE" in the_html

# Check that the lines are properly decoded and reported...
html_lines = the_html.split("\n")
assert any(re.search(r'id="t2".*"this is line 2"', line) for line in html_lines)
assert any(re.search(r'id="t9".*"this is line 9"', line) for line in html_lines)


class HtmlTest(HtmlTestHelpers, CoverageTest):
"""Moar HTML tests."""
Expand Down

0 comments on commit 35e249f

Please sign in to comment.