pytest-dev · bluetech · May 7, 2020 · Apr 25, 2020
diff --git a/changelog/7122.breaking.rst b/changelog/7122.breaking.rst
@@ -0,0 +1,3 @@
+Expressions given to the ``-m`` and ``-k`` options are no longer evaluated using Python's ``eval()``.
+The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against.
+Python constants, keywords or other operators are no longer evaluated differently.
diff --git a/doc/en/example/markers.rst b/doc/en/example/markers.rst
@@ -141,14 +141,14 @@ Or select multiple nodes:
 Using ``-k expr`` to select tests based on their name
 -------------------------------------------------------
 
-.. versionadded: 2.0/2.3.4
+.. versionadded:: 2.0/2.3.4
 
 You can use the ``-k`` command line option to specify an expression
 which implements a substring match on the test names instead of the
 exact match on markers that ``-m`` provides.  This makes it easy to
 select tests based on their names:
 
-.. versionadded: 5.4
+.. versionchanged:: 5.4
 
 The expression matching is now case-insensitive.
 
@@ -198,20 +198,8 @@ Or to select "http" and "quick" tests:
 
     ===================== 2 passed, 2 deselected in 0.12s ======================
 
-.. note::
-
-    If you are using expressions such as ``"X and Y"`` then both ``X`` and ``Y``
-    need to be simple non-keyword names. For example, ``"pass"`` or ``"from"``
-    will result in SyntaxErrors because ``"-k"`` evaluates the expression using
-    Python's `eval`_ function.
-
-.. _`eval`: https://docs.python.org/3.6/library/functions.html#eval
-
+You can use ``and``, ``or``, ``not`` and parentheses.
 
-    However, if the ``"-k"`` argument is a simple string, no such restrictions
-    apply. Also ``"-k 'not STRING'"`` has no restrictions.  You can also
-    specify numbers like ``"-k 1.3"`` to match tests which are parametrized
-    with the float ``"1.3"``.
 
 Registering markers
 -------------------------------------

diff --git a/src/_pytest/mark/expression.py b/src/_pytest/mark/expression.py
@@ -0,0 +1,173 @@
+r"""
+Evaluate match expressions, as used by `-k` and `-m`.
+
+The grammar is:
+
+expression: expr? EOF
+expr:       and_expr ('or' and_expr)*
+and_expr:   not_expr ('and' not_expr)*
+not_expr:   'not' not_expr | '(' expr ')' | ident
+ident:      (\w|:|\+|-|\.|\[|\])+
+
+The semantics are:
+
+- Empty expression evaluates to False.
+- ident evaluates to True of False according to a provided matcher function.
+- or/and/not evaluate according to the usual boolean semantics.
+"""
+import enum
+import re
+from typing import Callable
+from typing import Iterator
+from typing import Optional
+from typing import Sequence
+
+import attr
+
+from _pytest.compat import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import NoReturn
+
+
+__all__ = [
+    "evaluate",
+    "ParseError",
+]
+
+
+class TokenType(enum.Enum):
+    LPAREN = "left parenthesis"
+    RPAREN = "right parenthesis"
+    OR = "or"
+    AND = "and"
+    NOT = "not"
+    IDENT = "identifier"
+    EOF = "end of input"
+
+
+@attr.s(frozen=True, slots=True)
+class Token:
+    type = attr.ib(type=TokenType)
+    value = attr.ib(type=str)
+    pos = attr.ib(type=int)
+
+
+class ParseError(Exception):
+    """The expression contains invalid syntax.
+
+    :param column: The column in the line where the error occurred (1-based).
+    :param message: A description of the error.
+    """
+
+    def __init__(self, column: int, message: str) -> None:
+        self.column = column
+        self.message = message
+
+    def __str__(self) -> str:
+        return "at column {}: {}".format(self.column, self.message)
+
+
+class Scanner:
+    __slots__ = ("tokens", "current")
+
+    def __init__(self, input: str) -> None:
+        self.tokens = self.lex(input)
+        self.current = next(self.tokens)
+
+    def lex(self, input: str) -> Iterator[Token]:
+        pos = 0
+        while pos < len(input):
+            if input[pos] in (" ", "\t"):
+                pos += 1
+            elif input[pos] == "(":
+                yield Token(TokenType.LPAREN, "(", pos)
+                pos += 1
+            elif input[pos] == ")":
+                yield Token(TokenType.RPAREN, ")", pos)
+                pos += 1
+            else:
+                match = re.match(r"(:?\w|:|\+|-|\.|\[|\])+", input[pos:])
+                if match:
+                    value = match.group(0)
+                    if value == "or":
+                        yield Token(TokenType.OR, value, pos)
+                    elif value == "and":
+                        yield Token(TokenType.AND, value, pos)
+                    elif value == "not":
+                        yield Token(TokenType.NOT, value, pos)
+                    else:
+                        yield Token(TokenType.IDENT, value, pos)
+                    pos += len(value)
+                else:
+                    raise ParseError(
+                        pos + 1, 'unexpected character "{}"'.format(input[pos]),
+                    )
+        yield Token(TokenType.EOF, "", pos)
+
+    def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]:
+        if self.current.type is type:
+            token = self.current
+            if token.type is not TokenType.EOF:
+                self.current = next(self.tokens)
+            return token
+        if reject:
+            self.reject((type,))
+        return None
+
+    def reject(self, expected: Sequence[TokenType]) -> "NoReturn":
+        raise ParseError(
+            self.current.pos + 1,
+            "expected {}; got {}".format(
+                " OR ".join(type.value for type in expected), self.current.type.value,
+            ),
+        )
+
+
+def expression(s: Scanner, matcher: Callable[[str], bool]) -> bool:
+    if s.accept(TokenType.EOF):
+        return False
+    ret = expr(s, matcher)
+    s.accept(TokenType.EOF, reject=True)
+    return ret
+
+
+def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
+    ret = and_expr(s, matcher)
+    while s.accept(TokenType.OR):
+        rhs = and_expr(s, matcher)
+        ret = ret or rhs
+    return ret
+
+
+def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
+    ret = not_expr(s, matcher)
+    while s.accept(TokenType.AND):
+        rhs = not_expr(s, matcher)
+        ret = ret and rhs
+    return ret
+
+
+def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
+    if s.accept(TokenType.NOT):
+        return not not_expr(s, matcher)
+    if s.accept(TokenType.LPAREN):
+        ret = expr(s, matcher)
+        s.accept(TokenType.RPAREN, reject=True)
+        return ret
+    ident = s.accept(TokenType.IDENT)
+    if ident:
+        return matcher(ident.value)
+    s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))
+
+
+def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
+    """Evaluate a match expression as used by -k and -m.
+
+    :param input: The input expression - one line.
+    :param matcher: Given an identifier, should return whether it matches or not.
+                    Should be prepared to handle arbitrary strings as input.
+
+    Returns whether the entire expression matches or not.
+    """
+    return expression(Scanner(input), matcher)
diff --git a/src/_pytest/mark/legacy.py b/src/_pytest/mark/legacy.py
@@ -2,44 +2,46 @@
 this is a place where we put datastructures used by legacy apis
 we hope to remove
 """
-import keyword
 from typing import Set
 
 import attr
 
 from _pytest.compat import TYPE_CHECKING
 from _pytest.config import UsageError
+from _pytest.mark.expression import evaluate
+from _pytest.mark.expression import ParseError
 
 if TYPE_CHECKING:
     from _pytest.nodes import Item  # noqa: F401 (used in type string)
 
 
 @attr.s
-class MarkMapping:
-    """Provides a local mapping for markers where item access
-    resolves to True if the marker is present. """
+class MarkMatcher:
+    """A matcher for markers which are present."""
 
     own_mark_names = attr.ib()
 
     @classmethod
-    def from_item(cls, item):
+    def from_item(cls, item) -> "MarkMatcher":
         mark_names = {mark.name for mark in item.iter_markers()}
         return cls(mark_names)
 
-    def __getitem__(self, name):
+    def __call__(self, name: str) -> bool:
         return name in self.own_mark_names
 
 
 @attr.s
-class KeywordMapping:
-    """Provides a local mapping for keywords.
-    Given a list of names, map any substring of one of these names to True.
+class KeywordMatcher:
+    """A matcher for keywords.
+
+    Given a list of names, matches any substring of one of these names. The
+    string inclusion check is case-insensitive.
     """
 
     _names = attr.ib(type=Set[str])
 
     @classmethod
-    def from_item(cls, item: "Item") -> "KeywordMapping":
+    def from_item(cls, item: "Item") -> "KeywordMatcher":
         mapped_names = set()
 
         # Add the names of the current item and any parent items
@@ -62,12 +64,7 @@ def from_item(cls, item: "Item") -> "KeywordMapping":
 
         return cls(mapped_names)
 
-    def __getitem__(self, subname: str) -> bool:
-        """Return whether subname is included within stored names.
-
-        The string inclusion check is case-insensitive.
-
-        """
+    def __call__(self, subname: str) -> bool:
         subname = subname.lower()
         names = (name.lower() for name in self._names)
 
@@ -77,18 +74,17 @@ def __getitem__(self, subname: str) -> bool:
         return False
 
 
-python_keywords_allowed_list = ["or", "and", "not"]
-
-
-def matchmark(colitem, markexpr):
+def matchmark(colitem, markexpr: str) -> bool:
     """Tries to match on any marker names, attached to the given colitem."""
     try:
-        return eval(markexpr, {}, MarkMapping.from_item(colitem))
-    except Exception:
-        raise UsageError("Wrong expression passed to '-m': {}".format(markexpr))
+        return evaluate(markexpr, MarkMatcher.from_item(colitem))
+    except ParseError as e:
+        raise UsageError(
+            "Wrong expression passed to '-m': {}: {}".format(markexpr, e)
+        ) from None
 
 
-def matchkeyword(colitem, keywordexpr):
+def matchkeyword(colitem, keywordexpr: str) -> bool:
     """Tries to match given keyword expression to given collector item.
 
     Will match on the name of colitem, including the names of its parents.
@@ -97,20 +93,9 @@ def matchkeyword(colitem, keywordexpr):
     Additionally, matches on names in the 'extra_keyword_matches' set of
     any item, as well as names directly assigned to test functions.
     """
-    mapping = KeywordMapping.from_item(colitem)
-    if " " not in keywordexpr:
-        # special case to allow for simple "-k pass" and "-k 1.3"
-        return mapping[keywordexpr]
-    elif keywordexpr.startswith("not ") and " " not in keywordexpr[4:]:
-        return not mapping[keywordexpr[4:]]
-    for kwd in keywordexpr.split():
-        if keyword.iskeyword(kwd) and kwd not in python_keywords_allowed_list:
-            raise UsageError(
-                "Python keyword '{}' not accepted in expressions passed to '-k'".format(
-                    kwd
-                )
-            )
     try:
-        return eval(keywordexpr, {}, mapping)
-    except Exception:
-        raise UsageError("Wrong expression passed to '-k': {}".format(keywordexpr))
+        return evaluate(keywordexpr, KeywordMatcher.from_item(colitem))
+    except ParseError as e:
+        raise UsageError(
+            "Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
+        ) from None