Skip to content

Commit

Permalink
Revert strict LineStart interpretation in 3.0.0 to 2.4.x behavior (Is…
Browse files Browse the repository at this point in the history
…sue #317)
  • Loading branch information
ptmcg committed Oct 26, 2021
1 parent 8b3d958 commit 4ab17bb
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 40 deletions.
30 changes: 27 additions & 3 deletions CHANGES
Expand Up @@ -4,9 +4,33 @@ Change Log

Version 3.0.2 -
---------------
- Performance enhancement to `one_of` to always generate `regex`, even
if `caseless` or `as_keyword` args are given as `True` (unless explicitly
disabled by passing `use_regex=True`).
- Reverted change in behavior with LineStart and StringStart, which changed the
interpretation of when and how LineStart and StringStart should match when
a line starts with spaces. In 3.0.0, the xxxStart expressions were not
really treated like expressions in their own right, but as modifiers to the
following expression when used like `LineStart() + expr`, so that if there
were whitespace on the line before `expr` (which would match in versions prior
to 3.0.0), the match would fail.

3.0.0 implemented this by automatically promoting `LineStart() + expr` to
`AtLineStart(expr)`, which broke existing parsers that did not expect `expr` to
necessarily be right at the start of the line, but only be the first token
found on the line. This was reported as a regression in Issue #317.

In 3.0.2, pyparsing reverts to the previous behavior, but will retain the new
`AtLineStart` and `AtStringStart` expression classes, so that parsers can chose
whichever behavior applies in their specific instance. Specifically:

# matches expr if it is the first token on the line
# (allows for leading whitespace)
LineStart() + expr

# matches only if expr is found in column 1
AtLineStart(expr)

- Performance enhancement to `one_of` to always generate an internal `Regex`,
even if `caseless` or `as_keyword` args are given as `True` (unless explicitly
disabled by passing `use_regex=False`).


Version 3.0.1 -
Expand Down
8 changes: 4 additions & 4 deletions examples/test_bibparse.py
Expand Up @@ -57,22 +57,22 @@ def test_parse_string(self):
self.assertEqual(obj.parseString("{}").asList(), [])
self.assertEqual(obj.parseString('{a "string}')[0], 'a "string')
self.assertEqual(
["a ", ["nested"], "string"],
["a ", ["nested"], " string"],
obj.parseString("{a {nested} string}").asList(),
)
self.assertEqual(
["a ", ["double ", ["nested"]], "string"],
["a ", ["double ", ["nested"]], " string"],
obj.parseString("{a {double {nested}} string}").asList(),
)
for obj in (bp.quoted_string, bp.string, bp.field_value):
self.assertEqual([], obj.parseString('""').asList())
self.assertEqual("a string", obj.parseString('"a string"')[0])
self.assertEqual(
["a ", ["nested"], "string"],
["a ", ["nested"], " string"],
obj.parseString('"a {nested} string"').asList(),
)
self.assertEqual(
["a ", ["double ", ["nested"]], "string"],
["a ", ["double ", ["nested"]], " string"],
obj.parseString('"a {double {nested}} string"').asList(),
)

Expand Down
2 changes: 1 addition & 1 deletion pyparsing/__init__.py
Expand Up @@ -105,7 +105,7 @@
),
"",
)[__version_info__.release_level == "final"]
__version_time__ = "26 October 2021 20:39 UTC"
__version_time__ = "26 October 2021 23:54 UTC"
__versionTime__ = __version_time__
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"

Expand Down
41 changes: 18 additions & 23 deletions pyparsing/core.py
Expand Up @@ -2009,6 +2009,8 @@ def run_tests(
(Note that this is a raw string literal, you must include the leading ``'r'``.)
"""
from .testing import pyparsing_test

parseAll = parseAll and parse_all
fullDump = fullDump and full_dump
printResults = printResults and print_results
Expand All @@ -2030,23 +2032,22 @@ def run_tests(
BOM = "\ufeff"
for t in tests:
if comment is not None and comment.matches(t, False) or comments and not t:
comments.append(t)
comments.append(pyparsing_test.with_line_numbers(t))
continue
if not t:
continue
out = ["\n" + "\n".join(comments) if comments else "", t]
out = [
"\n" + "\n".join(comments) if comments else "",
pyparsing_test.with_line_numbers(t),
]
comments = []
try:
# convert newline marks to actual newlines, and strip leading BOM if present
t = NL.transform_string(t.lstrip(BOM))
result = self.parse_string(t, parse_all=parseAll)
except ParseBaseException as pe:
fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
if "\n" in t:
out.append(line(pe.loc, t))
out.append(" " * (col(pe.loc, t) - 1) + "^" + fatal)
else:
out.append(" " * pe.loc + "^" + fatal)
out.append(pe.explain())
out.append("FAIL: " + str(pe))
success = success and failureTests
result = pe
Expand Down Expand Up @@ -3388,22 +3389,20 @@ class LineStart(_PositionToken):

def __init__(self):
super().__init__()
self.leave_whitespace()
self.orig_whiteChars = set() | self.whiteChars
self.whiteChars.discard("\n")
self.skipper = Empty().set_whitespace_chars(self.whiteChars)
self.errmsg = "Expected start of line"

def __add__(self, other):
return AtLineStart(other)

def __sub__(self, other):
return AtLineStart(other) - Empty()

def preParse(self, instring, loc):
if loc == 0:
return loc
else:
if instring[loc : loc + 1] == "\n" and "\n" in self.whiteChars:
ret = loc + 1
else:
ret = super().preParse(instring, loc)
ret = self.skipper.preParse(instring, loc)
if "\n" in self.orig_whiteChars:
while instring[ret : ret + 1] == "\n":
ret = self.skipper.preParse(instring, ret + 1)
return ret

def parseImpl(self, instring, loc, doActions=True):
Expand Down Expand Up @@ -3444,12 +3443,6 @@ def __init__(self):
super().__init__()
self.errmsg = "Expected start of text"

def __add__(self, other):
return AtStringStart(other)

def __sub__(self, other):
return AtStringStart(other) - Empty()

def parseImpl(self, instring, loc, doActions=True):
if loc != 0:
# see if entire string up to here is just whitespace and ignoreables
Expand Down Expand Up @@ -3835,6 +3828,7 @@ def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
super().__init__(exprs, savelist)
if self.exprs:
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
else:
self.mayReturnEmpty = True

Expand Down Expand Up @@ -3976,6 +3970,7 @@ def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
if self.exprs:
self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
self.callPreparse = all(e.callPreparse for e in self.exprs)
self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
else:
self.mayReturnEmpty = True

Expand Down
61 changes: 52 additions & 9 deletions tests/test_unit.py
Expand Up @@ -3587,14 +3587,14 @@ def testLineStart2(self):
"""

test = dedent(test)
print(test)
print(pp.testing.with_line_numbers(test))

print("normal parsing")
for t, s, e in (pp.LineStart() + "AAA").scanString(test):
print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s]))
print(s, e, pp.lineno(s, test), pp.line(s, test), repr(t))
print()
self.assertEqual(
"A", test[s], "failed LineStart with insignificant newlines"
"A", t[0][0], "failed LineStart with insignificant newlines"
)

print(r"parsing without \n in whitespace chars")
Expand All @@ -3604,10 +3604,10 @@ def testLineStart2(self):
print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s]))
print()
self.assertEqual(
"A", test[s], "failed LineStart with insignificant newlines"
"A", t[0][0], "failed LineStart with insignificant newlines"
)

def testLineStart3(self):
def testLineStartWithLeadingSpaces(self):
# testing issue #272
instring = dedent(
"""
Expand All @@ -3634,16 +3634,21 @@ def testLineStart3(self):
alpha_line | pp.Word("_"),
alpha_line | alpha_line,
pp.MatchFirst([alpha_line, alpha_line]),
alpha_line ^ pp.Word("_"),
alpha_line ^ alpha_line,
pp.Or([alpha_line, pp.Word("_")]),
pp.LineStart() + pp.Word(pp.alphas) + pp.LineEnd().suppress(),
pp.And([pp.LineStart(), pp.Word(pp.alphas), pp.LineEnd().suppress()]),
]
fails = []
for test in tests:
print(test.searchString(instring))
self.assertEqual(
["a", "d", "e"], flatten(sum(test.search_string(instring)).as_list())
)
if ['a', 'b', 'c', 'd', 'e', 'f', 'g'] != flatten(sum(test.search_string(instring)).as_list()):
fails.append(test)
if fails:
self.fail("failed LineStart tests:\n{}".format("\n".join(str(expr) for expr in fails)))

def testLineStart4(self):
def testAtLineStart(self):
test = dedent(
"""\
AAA this line
Expand All @@ -3663,6 +3668,10 @@ def testLineStart4(self):
)

def testStringStart(self):
self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), "123", ["123"])
self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), " 123", ["123"])
self.assertParseAndCheckList(pp.StringStart() + "123", "123", ["123"])
self.assertParseAndCheckList(pp.StringStart() + "123", " 123", ["123"])
self.assertParseAndCheckList(pp.AtStringStart(pp.Word(pp.nums)), "123", ["123"])

self.assertParseAndCheckList(pp.AtStringStart("123"), "123", ["123"])
Expand All @@ -3673,6 +3682,40 @@ def testStringStart(self):
with self.assertRaisesParseException():
pp.AtStringStart("123").parse_string(" 123")

def testStringStartAndLineStartInsideAnd(self):
P_MTARG = (
pp.StringStart()
+ pp.Word("abcde")
+ pp.StringEnd()
)

P_MTARG2 = (
pp.LineStart()
+ pp.Word("abcde")
+ pp.StringEnd()
)

P_MTARG3 = (
pp.AtLineStart(pp.Word("abcde"))
+ pp.StringEnd()
)

def test(expr, string):
expr.streamline()
print(expr, repr(string), end=" ")
print(expr.parse_string(string))

test(P_MTARG, "aaa")
test(P_MTARG2, "aaa")
test(P_MTARG2, "\naaa")
test(P_MTARG2, " aaa")
test(P_MTARG2, "\n aaa")

with self.assertRaisesParseException():
test(P_MTARG3, " aaa")
with self.assertRaisesParseException():
test(P_MTARG3, "\n aaa")

def testLineAndStringEnd(self):

NLs = pp.OneOrMore(pp.lineEnd)
Expand Down

0 comments on commit 4ab17bb

Please sign in to comment.