Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: attempt to simplify the multiply operator. #547

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
81 changes: 30 additions & 51 deletions pyparsing/core.py
Expand Up @@ -1504,64 +1504,43 @@ def __mul__(self, other) -> "ParserElement":
occurrences. If this behavior is desired, then write
``expr*(None, n) + ~expr``
"""
if other is Ellipsis:
other = (0, None)
elif isinstance(other, tuple) and other[:1] == (Ellipsis,):
other = ((0,) + other[1:] + (None,))[:2]

if not isinstance(other, (int, tuple)):
return NotImplemented

if isinstance(other, int):
minElements, optElements = other, 0
minElements, maxElements = other, other
elif isinstance(other, tuple):
minElements, maxElements = (other + (None, None))[:2]
ptmcg marked this conversation as resolved.
Show resolved Hide resolved
elif other is Ellipsis:
minElements, maxElements = other, None
else:
other = tuple(o if o is not Ellipsis else None for o in other)
other = (other + (None, None))[:2]
if other[0] is None:
other = (0, other[1])
if isinstance(other[0], int) and other[1] is None:
if other[0] == 0:
return ZeroOrMore(self)
if other[0] == 1:
return OneOrMore(self)
else:
return self * other[0] + ZeroOrMore(self)
elif isinstance(other[0], int) and isinstance(other[1], int):
minElements, optElements = other
optElements -= minElements
else:
return NotImplemented
return NotImplemented

if minElements < 0:
if minElements in (Ellipsis, None):
minElements = 0
elif type(minElements) != int:
return NotImplemented
elif minElements < 0:
raise ValueError("cannot multiply ParserElement by negative value")
if optElements < 0:
raise ValueError(
"second tuple value must be greater or equal to first tuple value"
)
if minElements == optElements == 0:
return And([])

if optElements:

def makeOptionalList(n):
if n > 1:
return Opt(self + makeOptionalList(n - 1))
else:
return Opt(self)

if minElements:
if minElements == 1:
ret = self + makeOptionalList(optElements)
else:
ret = And([self] * minElements) + makeOptionalList(optElements)
if maxElements in (Ellipsis, None):
if minElements == 0:
return ZeroOrMore(self)
elif minElements == 1:
return OneOrMore(self)
else:
ret = makeOptionalList(optElements)
else:
if minElements == 1:
ret = self
return self * minElements + ZeroOrMore(self)
elif type(maxElements) != int:
return NotImplemented
elif maxElements < minElements:
raise ValueError("second tuple value must be greater or equal to first tuple value")

if minElements == maxElements:
if minElements == 0:
return And([])
elif minElements == 1:
return self
else:
ret = And([self] * minElements)
return ret
return And([self] * minElements)

return And([self] * minElements) + (Opt(self) * (maxElements - minElements))
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No hurry on code review; but from re-inspecting this myself, I'm beginning to wonder about this Opt construction and the effect this could have on the parse output.

In particular: perhaps it's better to have a chain of nested Opt nodes (recursive, instead of iterative as in this PR), because then matching an input against the parse tree can exit as soon as a single token doesn't match -- whereas the And[Opt(...), Opt(...), ...] formulation here would be evaluated in full (I think?).


def __rmul__(self, other) -> "ParserElement":
return self.__mul__(other)
Expand Down
13 changes: 9 additions & 4 deletions tests/test_unit.py
Expand Up @@ -3873,10 +3873,15 @@ def testMulWithNegativeNumber(self):
def testMulWithEllipsis(self):
"""multiply an expression with Ellipsis as ``expr * ...`` to match ZeroOrMore"""

expr = pp.Literal("A")("Achar") * ...
res = expr.parseString("A", parseAll=True)
self.assertEqual(["A"], res.asList(), "expected expr * ... to match ZeroOrMore")
print(res.dump())
for factor in (..., (...,), (..., 10,)):
expr = pp.Literal("A")("Achar") * factor
res = expr.parseString("A", parseAll=True)
self.assertEqual(
["A"],
res.asList(),
f"expected expr * {str(factor).replace('Ellipsis', '...')} to match ZeroOrMore",
)
print(res.dump())

def testUpcaseDowncaseUnicode(self):
import sys
Expand Down