Skip to content

Commit

Permalink
fix: use glob matching instead of fnmatch. #1407
Browse files Browse the repository at this point in the history
I didn't understand that fnmatch considers the entire string to be a
filename, even if it has slashes in it. This led to incorrect matching.
Now we use our own implementation of glob matching to get the correct
behavior.
  • Loading branch information
nedbat committed Oct 30, 2022
1 parent b3a1d97 commit f93c620
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 94 deletions.
79 changes: 55 additions & 24 deletions coverage/files.py
Expand Up @@ -3,7 +3,6 @@

"""File wrangling."""

import fnmatch
import hashlib
import ntpath
import os
Expand Down Expand Up @@ -172,7 +171,7 @@ def isabs_anywhere(filename):


def prep_patterns(patterns):
"""Prepare the file patterns for use in a `FnmatchMatcher`.
"""Prepare the file patterns for use in a `GlobMatcher`.
If a pattern starts with a wildcard, it is used as a pattern
as-is. If it does not start with a wildcard, then it is made
Expand Down Expand Up @@ -253,15 +252,15 @@ def match(self, module_name):
return False


class FnmatchMatcher:
class GlobMatcher:
"""A matcher for files by file name pattern."""
def __init__(self, pats, name="unknown"):
self.pats = list(pats)
self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS)
self.re = globs_to_regex(self.pats, case_insensitive=env.WINDOWS)
self.name = name

def __repr__(self):
return f"<FnmatchMatcher {self.name} {self.pats!r}>"
return f"<GlobMatcher {self.name} {self.pats!r}>"

def info(self):
"""A list of strings for displaying when dumping state."""
Expand All @@ -282,37 +281,69 @@ def sep(s):
return the_sep


def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
"""Convert fnmatch patterns to a compiled regex that matches any of them.
# Tokenizer for _glob_to_regex.
# None as a sub means disallowed.
G2RX_TOKENS = [(re.compile(rx), sub) for rx, sub in [
(r"\*\*\*+", None), # Can't have ***
(r"[^/]+\*\*+", None), # Can't have x**
(r"\*\*+[^/]+", None), # Can't have **x
(r"\*\*/\*\*", None), # Can't have **/**
(r"^\*+/", r"(.*[/\\\\])?"), # ^*/ matches any prefix-slash, or nothing.
(r"/\*+$", r"[/\\\\].*"), # /*$ matches any slash-suffix.
(r"\*\*/", r"(.*[/\\\\])?"), # **/ matches any subdirs, including none
(r"/", r"[/\\\\]"), # / matches either slash or backslash
(r"\*", r"[^/\\\\]*"), # * matches any number of non slash-likes
(r"\?", r"[^/\\\\]"), # ? matches one non slash-like
(r"\[.*?\]", r"\g<0>"), # [a-f] matches [a-f]
(r"[a-zA-Z0-9_-]+", r"\g<0>"), # word chars match themselves
(r"[\[\]+{}]", None), # Can't have regex special chars
(r".", r"\\\g<0>"), # Anything else is escaped to be safe
]]

def _glob_to_regex(pattern):
"""Convert a file-path glob pattern into a regex."""
# Turn all backslashes into slashes to simplify the tokenizer.
pattern = pattern.replace("\\", "/")
if "/" not in pattern:
pattern = "**/" + pattern
path_rx = []
pos = 0
while pos < len(pattern):
for rx, sub in G2RX_TOKENS:
m = rx.match(pattern, pos=pos)
if m:
if sub is None:
raise ConfigError(f"File pattern can't include {m[0]!r}")
path_rx.append(m.expand(sub))
pos = m.end()
break
return "".join(path_rx)


def globs_to_regex(patterns, case_insensitive=False, partial=False):
"""Convert glob patterns to a compiled regex that matches any of them.
Slashes are always converted to match either slash or backslash, for
Windows support, even when running elsewhere.
If the pattern has no slash or backslash, then it is interpreted as
matching a file name anywhere it appears in the tree. Otherwise, the glob
pattern must match the whole file path.
If `partial` is true, then the pattern will match if the target string
starts with the pattern. Otherwise, it must match the entire string.
Returns: a compiled regex object. Use the .match method to compare target
strings.
"""
regexes = (fnmatch.translate(pattern) for pattern in patterns)
# */ at the start should also match nothing.
regexes = (re.sub(r"^\(\?s:\.\*(\\\\|/)", r"(?s:^(.*\1)?", regex) for regex in regexes)
# Be agnostic: / can mean backslash or slash.
regexes = (re.sub(r"/", r"[\\\\/]", regex) for regex in regexes)

if partial:
# fnmatch always adds a \Z to match the whole string, which we don't
# want, so we remove the \Z. While removing it, we only replace \Z if
# followed by paren (introducing flags), or at end, to keep from
# destroying a literal \Z in the pattern.
regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)

flags = 0
if case_insensitive:
flags |= re.IGNORECASE
compiled = re.compile(join_regex(regexes), flags=flags)

rx = join_regex(map(_glob_to_regex, patterns))
if not partial:
rx = rf"(?:{rx})\Z"
compiled = re.compile(rx, flags=flags)
return compiled


Expand Down Expand Up @@ -342,7 +373,7 @@ def pprint(self):
def add(self, pattern, result):
"""Add the `pattern`/`result` pair to the list of aliases.
`pattern` is an `fnmatch`-style pattern. `result` is a simple
`pattern` is an `glob`-style pattern. `result` is a simple
string. When mapping paths, if a path starts with a match against
`pattern`, then that match is replaced with `result`. This models
isomorphic source trees being rooted at different places on two
Expand Down Expand Up @@ -370,7 +401,7 @@ def add(self, pattern, result):
pattern += pattern_sep

# Make a regex from the pattern.
regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True)
regex = globs_to_regex([pattern], case_insensitive=True, partial=True)

# Normalize the result: it must end with a path separator.
result_sep = sep(result)
Expand Down
6 changes: 3 additions & 3 deletions coverage/inorout.py
Expand Up @@ -16,7 +16,7 @@
from coverage import env
from coverage.disposition import FileDisposition, disposition_init
from coverage.exceptions import CoverageException, PluginError
from coverage.files import TreeMatcher, FnmatchMatcher, ModuleMatcher
from coverage.files import TreeMatcher, GlobMatcher, ModuleMatcher
from coverage.files import prep_patterns, find_python_files, canonical_filename
from coverage.misc import sys_modules_saved
from coverage.python import source_for_file, source_for_morf
Expand Down Expand Up @@ -260,10 +260,10 @@ def debug(msg):
self.pylib_match = TreeMatcher(self.pylib_paths, "pylib")
debug(f"Python stdlib matching: {self.pylib_match!r}")
if self.include:
self.include_match = FnmatchMatcher(self.include, "include")
self.include_match = GlobMatcher(self.include, "include")
debug(f"Include matching: {self.include_match!r}")
if self.omit:
self.omit_match = FnmatchMatcher(self.omit, "omit")
self.omit_match = GlobMatcher(self.omit, "omit")
debug(f"Omit matching: {self.omit_match!r}")

self.cover_match = TreeMatcher(self.cover_paths, "coverage")
Expand Down
6 changes: 3 additions & 3 deletions coverage/report.py
Expand Up @@ -6,7 +6,7 @@
import sys

from coverage.exceptions import CoverageException, NoDataError, NotPython
from coverage.files import prep_patterns, FnmatchMatcher
from coverage.files import prep_patterns, GlobMatcher
from coverage.misc import ensure_dir_for_file, file_be_gone


Expand Down Expand Up @@ -57,11 +57,11 @@ def get_analysis_to_report(coverage, morfs):
config = coverage.config

if config.report_include:
matcher = FnmatchMatcher(prep_patterns(config.report_include), "report_include")
matcher = GlobMatcher(prep_patterns(config.report_include), "report_include")
file_reporters = [fr for fr in file_reporters if matcher.match(fr.filename)]

if config.report_omit:
matcher = FnmatchMatcher(prep_patterns(config.report_omit), "report_omit")
matcher = GlobMatcher(prep_patterns(config.report_omit), "report_omit")
file_reporters = [fr for fr in file_reporters if not matcher.match(fr.filename)]

if not file_reporters:
Expand Down
1 change: 0 additions & 1 deletion tests/test_api.py
Expand Up @@ -71,7 +71,6 @@ def test_unexecuted_file(self):
assert missing == [1]

def test_filenames(self):

self.make_file("mymain.py", """\
import mymod
a = 1
Expand Down

0 comments on commit f93c620

Please sign in to comment.