Skip to content

Commit

Permalink
Fix #14, ensure redos expressions are backtrackable
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwager committed Mar 4, 2020
1 parent 331f034 commit 49a3a7b
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Fixed
- False positive for `DUO138` when expressions aren't backtrackable ([#14](https://github.com/dlint-py/dlint/issues/14))

## [0.10.2] - 2020-02-19
### Changed
Expand Down
61 changes: 47 additions & 14 deletions dlint/redos/detect.py
Expand Up @@ -35,15 +35,20 @@

class OpNode(object):

def __init__(self, op, args):
def __init__(self, op, args, backtrackable=False):
self.op = op
self.args = args
self.children = []
self.backtrackable = backtrackable
self.children = collections.deque()

def __str__(self, level=0):
result = (
" " * level
+ "{}: {}".format(self.op, self.args)
+ "{}: args={} backtrackable={}".format(
self.op,
self.args,
self.backtrackable
)
+ "\n"
)

Expand All @@ -53,15 +58,22 @@ def __str__(self, level=0):
return result

def __repr__(self):
return "<{} - op={} args={}>".format(
return "<{} - op={} args={} backtrackable={}>".format(
self.__class__.__name__,
self.op,
self.args
self.args,
self.backtrackable
)


def build_op_tree(node, subpattern):
for op, av in subpattern.data:
def build_op_tree_helper(node, subpattern, parent_backtrackable):
prev_sibling_backtrackable = False

# Iterating in reverse helps with determining backtrackability. A
# subpattern's ability to backtrack depends on subsequent subpatterns, so
# it's easier to determine the last subpattern's backtrackability then
# propagate that information backwards.
for op, av in reversed(subpattern.data):
args = []
subpatterns = []

Expand All @@ -82,11 +94,23 @@ def build_op_tree(node, subpattern):
else:
args.append(av)

new_node = OpNode(op, tuple(args))
for sp in subpatterns:
build_op_tree(new_node, sp)
current_backtrackable = (
parent_backtrackable
or prev_sibling_backtrackable
)
new_node = OpNode(op, tuple(args), current_backtrackable)
for sp in reversed(subpatterns):
build_op_tree_helper(new_node, sp, current_backtrackable)

prev_sibling_backtrackable = (
prev_sibling_backtrackable
or not optional_repeat(new_node)
)
node.children.appendleft(new_node)


node.children.append(new_node)
def build_op_tree(node, subpattern):
build_op_tree_helper(node, subpattern, False)


class CharacterRange(object):
Expand Down Expand Up @@ -208,6 +232,15 @@ def __repr__(self):
)


def optional_repeat(node):
if node.op not in sre_parse._REPEATCODES:
return False

repeat_min, repeat_max = node.args

return repeat_min == 0


def large_repeat(node):
if node.op not in sre_parse._REPEATCODES:
return False
Expand Down Expand Up @@ -235,9 +268,9 @@ def max_nested_quantifiers(node):
max_nested_quantifiers(child)
for child in node.children
)
is_large_repeat = int(large_repeat(node))
is_catastrophic = int(large_repeat(node) and node.backtrackable)

return is_large_repeat + child_max
return is_catastrophic + child_max


def inclusive_alternation_branch(branch_node):
Expand All @@ -262,7 +295,7 @@ def mutually_inclusive_alternation_helper(node, nested_quantifier):
inclusive_alternation = inclusive_alternation_branch(node)

return any(
(nested_quantifier and inclusive_alternation)
(nested_quantifier and inclusive_alternation and node.backtrackable)
or mutually_inclusive_alternation_helper(child, nested_quantifier)
for child in node.children
)
Expand Down
47 changes: 47 additions & 0 deletions tests/test_bad_re_catastrophic_use.py
Expand Up @@ -268,5 +268,52 @@ def test_bad_re_catastrophic_not_literal_string(re_call):
assert result == expected


BACKTRACKABLE = [
(r"(a+)+z", True), # present subsequent subexpression
(r"([a-c]|[c-e])+z", True), # alternation present subsequent subexpression
(r"(a+)+", False), # missing subsequent subexpression
(r"(a+)+x?y?z?", False), # all subsequent are optional
(r"(a+)+z{0,5}", False), # all subsequent are optional
(r"(a+)+z{,5}", False), # all subsequent are optional
(r"(a+)+z*", False), # all subsequent are optional
(r"(a+)+b*c?d*?e", True), # long propagation
(r"(a+)+$", True), # at end propagation
(r"(a+)+\\Z", True), # at end string propagation
(r"(a|([b-c]|[c-e]))+z", True), # parent subsequent subexpression
(r"(a|([b-c]|[c-e])+z)", True), # sibling subsequent subexpression
(r"(a|([b-c]|[c-e]z)+)", False), # sibling expression parent quantifier
(r"(?://[^\n]*)*", False), # found in the wild
(r"(?:[ \t].*?(?:\n|$))*", False), # found in the wild
(r"<.*?>|((?:\\w[-\\w]*|&.*?;)+)", False), # found in the wild
(r"^(\\s+)\\w+=.*(\\n\\1\\w+=.*)+", False), # found in the wild
(r"([^*{}\\s]@|[^*{}@]|\\*(?!/))+", False), # found in the wild
]


@pytest.mark.parametrize("expression, catastrophic", BACKTRACKABLE)
def test_bad_re_catastrophic_backtrackable(expression, catastrophic):
python_node = dlint.test.base.get_ast_node(
"""
import re
re.search('{EXPRESSION}')
""".format(EXPRESSION=expression)
)

linter = dlint.linters.BadReCatastrophicUseLinter()
linter.visit(python_node)

result = linter.get_results()
expected = [] if not catastrophic else [
dlint.linters.base.Flake8Result(
lineno=4,
col_offset=0,
message=dlint.linters.BadReCatastrophicUseLinter._error_tmpl
)
]

assert result == expected


if __name__ == "__main__":
unittest.main()

0 comments on commit 49a3a7b

Please sign in to comment.