Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stop using Python's eval() for -m and -k #7122

Merged
merged 1 commit into from May 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog/7122.breaking.rst
@@ -0,0 +1,3 @@
Expressions given to the ``-m`` and ``-k`` options are no longer evaluated using Python's ``eval()``.
The format supports ``or``, ``and``, ``not``, parenthesis and general identifiers to match against.
Python constants, keywords or other operators are no longer evaluated differently.
18 changes: 3 additions & 15 deletions doc/en/example/markers.rst
Expand Up @@ -141,14 +141,14 @@ Or select multiple nodes:
Using ``-k expr`` to select tests based on their name
-------------------------------------------------------

.. versionadded: 2.0/2.3.4
.. versionadded:: 2.0/2.3.4

You can use the ``-k`` command line option to specify an expression
which implements a substring match on the test names instead of the
exact match on markers that ``-m`` provides. This makes it easy to
select tests based on their names:

.. versionadded: 5.4
.. versionchanged:: 5.4

The expression matching is now case-insensitive.

Expand Down Expand Up @@ -198,20 +198,8 @@ Or to select "http" and "quick" tests:

===================== 2 passed, 2 deselected in 0.12s ======================

.. note::

If you are using expressions such as ``"X and Y"`` then both ``X`` and ``Y``
need to be simple non-keyword names. For example, ``"pass"`` or ``"from"``
will result in SyntaxErrors because ``"-k"`` evaluates the expression using
Python's `eval`_ function.

.. _`eval`: https://docs.python.org/3.6/library/functions.html#eval

You can use ``and``, ``or``, ``not`` and parentheses.

However, if the ``"-k"`` argument is a simple string, no such restrictions
apply. Also ``"-k 'not STRING'"`` has no restrictions. You can also
specify numbers like ``"-k 1.3"`` to match tests which are parametrized
with the float ``"1.3"``.

Registering markers
-------------------------------------
Expand Down
173 changes: 173 additions & 0 deletions src/_pytest/mark/expression.py
@@ -0,0 +1,173 @@
r"""
Evaluate match expressions, as used by `-k` and `-m`.

The grammar is:

expression: expr? EOF
expr: and_expr ('or' and_expr)*
and_expr: not_expr ('and' not_expr)*
not_expr: 'not' not_expr | '(' expr ')' | ident
ident: (\w|:|\+|-|\.|\[|\])+

The semantics are:

- Empty expression evaluates to False.
- ident evaluates to True of False according to a provided matcher function.
- or/and/not evaluate according to the usual boolean semantics.
"""
import enum
import re
from typing import Callable
from typing import Iterator
from typing import Optional
from typing import Sequence

import attr

from _pytest.compat import TYPE_CHECKING

if TYPE_CHECKING:
from typing import NoReturn


__all__ = [
"evaluate",
"ParseError",
]


class TokenType(enum.Enum):
LPAREN = "left parenthesis"
RPAREN = "right parenthesis"
OR = "or"
AND = "and"
NOT = "not"
IDENT = "identifier"
EOF = "end of input"


@attr.s(frozen=True, slots=True)
class Token:
type = attr.ib(type=TokenType)
value = attr.ib(type=str)
pos = attr.ib(type=int)


class ParseError(Exception):
"""The expression contains invalid syntax.

:param column: The column in the line where the error occurred (1-based).
:param message: A description of the error.
"""

def __init__(self, column: int, message: str) -> None:
self.column = column
self.message = message

def __str__(self) -> str:
return "at column {}: {}".format(self.column, self.message)


class Scanner:
__slots__ = ("tokens", "current")

def __init__(self, input: str) -> None:
self.tokens = self.lex(input)
self.current = next(self.tokens)

def lex(self, input: str) -> Iterator[Token]:
pos = 0
while pos < len(input):
if input[pos] in (" ", "\t"):
pos += 1
elif input[pos] == "(":
yield Token(TokenType.LPAREN, "(", pos)
pos += 1
elif input[pos] == ")":
yield Token(TokenType.RPAREN, ")", pos)
pos += 1
else:
match = re.match(r"(:?\w|:|\+|-|\.|\[|\])+", input[pos:])
if match:
value = match.group(0)
if value == "or":
yield Token(TokenType.OR, value, pos)
elif value == "and":
yield Token(TokenType.AND, value, pos)
elif value == "not":
yield Token(TokenType.NOT, value, pos)
else:
yield Token(TokenType.IDENT, value, pos)
pos += len(value)
else:
raise ParseError(
pos + 1, 'unexpected character "{}"'.format(input[pos]),
)
yield Token(TokenType.EOF, "", pos)

def accept(self, type: TokenType, *, reject: bool = False) -> Optional[Token]:
if self.current.type is type:
token = self.current
if token.type is not TokenType.EOF:
self.current = next(self.tokens)
return token
if reject:
self.reject((type,))
return None

def reject(self, expected: Sequence[TokenType]) -> "NoReturn":
raise ParseError(
self.current.pos + 1,
"expected {}; got {}".format(
" OR ".join(type.value for type in expected), self.current.type.value,
),
)


def expression(s: Scanner, matcher: Callable[[str], bool]) -> bool:
if s.accept(TokenType.EOF):
return False
ret = expr(s, matcher)
s.accept(TokenType.EOF, reject=True)
return ret


def expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
ret = and_expr(s, matcher)
while s.accept(TokenType.OR):
rhs = and_expr(s, matcher)
ret = ret or rhs
return ret


def and_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
ret = not_expr(s, matcher)
while s.accept(TokenType.AND):
rhs = not_expr(s, matcher)
ret = ret and rhs
return ret


def not_expr(s: Scanner, matcher: Callable[[str], bool]) -> bool:
if s.accept(TokenType.NOT):
return not not_expr(s, matcher)
if s.accept(TokenType.LPAREN):
ret = expr(s, matcher)
s.accept(TokenType.RPAREN, reject=True)
return ret
ident = s.accept(TokenType.IDENT)
if ident:
return matcher(ident.value)
s.reject((TokenType.NOT, TokenType.LPAREN, TokenType.IDENT))


def evaluate(input: str, matcher: Callable[[str], bool]) -> bool:
"""Evaluate a match expression as used by -k and -m.

:param input: The input expression - one line.
:param matcher: Given an identifier, should return whether it matches or not.
Should be prepared to handle arbitrary strings as input.

Returns whether the entire expression matches or not.
"""
return expression(Scanner(input), matcher)
65 changes: 25 additions & 40 deletions src/_pytest/mark/legacy.py
Expand Up @@ -2,44 +2,46 @@
this is a place where we put datastructures used by legacy apis
we hope to remove
"""
import keyword
from typing import Set

import attr

from _pytest.compat import TYPE_CHECKING
from _pytest.config import UsageError
from _pytest.mark.expression import evaluate
from _pytest.mark.expression import ParseError

if TYPE_CHECKING:
from _pytest.nodes import Item # noqa: F401 (used in type string)


@attr.s
class MarkMapping:
"""Provides a local mapping for markers where item access
resolves to True if the marker is present. """
class MarkMatcher:
"""A matcher for markers which are present."""

own_mark_names = attr.ib()

@classmethod
def from_item(cls, item):
def from_item(cls, item) -> "MarkMatcher":
mark_names = {mark.name for mark in item.iter_markers()}
return cls(mark_names)

def __getitem__(self, name):
def __call__(self, name: str) -> bool:
return name in self.own_mark_names


@attr.s
class KeywordMapping:
"""Provides a local mapping for keywords.
Given a list of names, map any substring of one of these names to True.
class KeywordMatcher:
"""A matcher for keywords.

Given a list of names, matches any substring of one of these names. The
string inclusion check is case-insensitive.
"""

_names = attr.ib(type=Set[str])

@classmethod
def from_item(cls, item: "Item") -> "KeywordMapping":
def from_item(cls, item: "Item") -> "KeywordMatcher":
mapped_names = set()

# Add the names of the current item and any parent items
Expand All @@ -62,12 +64,7 @@ def from_item(cls, item: "Item") -> "KeywordMapping":

return cls(mapped_names)

def __getitem__(self, subname: str) -> bool:
"""Return whether subname is included within stored names.

The string inclusion check is case-insensitive.

"""
def __call__(self, subname: str) -> bool:
subname = subname.lower()
names = (name.lower() for name in self._names)

Expand All @@ -77,18 +74,17 @@ def __getitem__(self, subname: str) -> bool:
return False


python_keywords_allowed_list = ["or", "and", "not"]


def matchmark(colitem, markexpr):
def matchmark(colitem, markexpr: str) -> bool:
"""Tries to match on any marker names, attached to the given colitem."""
try:
return eval(markexpr, {}, MarkMapping.from_item(colitem))
except Exception:
raise UsageError("Wrong expression passed to '-m': {}".format(markexpr))
return evaluate(markexpr, MarkMatcher.from_item(colitem))
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-m': {}: {}".format(markexpr, e)
) from None


def matchkeyword(colitem, keywordexpr):
def matchkeyword(colitem, keywordexpr: str) -> bool:
"""Tries to match given keyword expression to given collector item.

Will match on the name of colitem, including the names of its parents.
Expand All @@ -97,20 +93,9 @@ def matchkeyword(colitem, keywordexpr):
Additionally, matches on names in the 'extra_keyword_matches' set of
any item, as well as names directly assigned to test functions.
"""
mapping = KeywordMapping.from_item(colitem)
if " " not in keywordexpr:
# special case to allow for simple "-k pass" and "-k 1.3"
return mapping[keywordexpr]
elif keywordexpr.startswith("not ") and " " not in keywordexpr[4:]:
return not mapping[keywordexpr[4:]]
for kwd in keywordexpr.split():
if keyword.iskeyword(kwd) and kwd not in python_keywords_allowed_list:
raise UsageError(
"Python keyword '{}' not accepted in expressions passed to '-k'".format(
kwd
)
)
try:
return eval(keywordexpr, {}, mapping)
except Exception:
raise UsageError("Wrong expression passed to '-k': {}".format(keywordexpr))
return evaluate(keywordexpr, KeywordMatcher.from_item(colitem))
except ParseError as e:
raise UsageError(
"Wrong expression passed to '-k': {}: {}".format(keywordexpr, e)
) from None