Revert strict LineStart interpretation in 3.0.0 to 2.4.x behavior (Is…

…sue #317)
pyparsing · Oct 26, 2021 · 4ab17bb · 4ab17bb
1 parent 8b3d958
commit 4ab17bb
Show file tree

Hide file tree

Showing 5 changed files with 102 additions and 40 deletions.
diff --git a/CHANGES b/CHANGES
@@ -4,9 +4,33 @@ Change Log
 
 Version 3.0.2 -
 ---------------
-- Performance enhancement to `one_of` to always generate `regex`, even
-  if `caseless` or `as_keyword` args are given as `True` (unless explicitly
-  disabled by passing `use_regex=True`).
+- Reverted change in behavior with LineStart and StringStart, which changed the
+  interpretation of when and how LineStart and StringStart should match when
+  a line starts with spaces. In 3.0.0, the xxxStart expressions were not
+  really treated like expressions in their own right, but as modifiers to the
+  following expression when used like `LineStart() + expr`, so that if there
+  were whitespace on the line before `expr` (which would match in versions prior
+  to 3.0.0), the match would fail.
+
+  3.0.0 implemented this by automatically promoting `LineStart() + expr` to
+  `AtLineStart(expr)`, which broke existing parsers that did not expect `expr` to
+  necessarily be right at the start of the line, but only be the first token
+  found on the line. This was reported as a regression in Issue #317.
+
+  In 3.0.2, pyparsing reverts to the previous behavior, but will retain the new
+  `AtLineStart` and `AtStringStart` expression classes, so that parsers can chose
+  whichever behavior applies in their specific instance. Specifically:
+
+      # matches expr if it is the first token on the line
+      # (allows for leading whitespace)
+      LineStart() + expr
+
+      # matches only if expr is found in column 1
+      AtLineStart(expr)
+
+- Performance enhancement to `one_of` to always generate an internal `Regex`,
+  even if `caseless` or `as_keyword` args are given as `True` (unless explicitly
+  disabled by passing `use_regex=False`).
 
 
 Version 3.0.1 -

diff --git a/examples/test_bibparse.py b/examples/test_bibparse.py
@@ -57,22 +57,22 @@ def test_parse_string(self):
             self.assertEqual(obj.parseString("{}").asList(), [])
             self.assertEqual(obj.parseString('{a "string}')[0], 'a "string')
             self.assertEqual(
-                ["a ", ["nested"], "string"],
+                ["a ", ["nested"], " string"],
                 obj.parseString("{a {nested} string}").asList(),
             )
             self.assertEqual(
-                ["a ", ["double ", ["nested"]], "string"],
+                ["a ", ["double ", ["nested"]], " string"],
                 obj.parseString("{a {double {nested}} string}").asList(),
             )
         for obj in (bp.quoted_string, bp.string, bp.field_value):
             self.assertEqual([], obj.parseString('""').asList())
             self.assertEqual("a string", obj.parseString('"a string"')[0])
             self.assertEqual(
-                ["a ", ["nested"], "string"],
+                ["a ", ["nested"], " string"],
                 obj.parseString('"a {nested} string"').asList(),
             )
             self.assertEqual(
-                ["a ", ["double ", ["nested"]], "string"],
+                ["a ", ["double ", ["nested"]], " string"],
                 obj.parseString('"a {double {nested}} string"').asList(),
             )
 

diff --git a/pyparsing/__init__.py b/pyparsing/__init__.py
@@ -105,7 +105,7 @@
     ),
     "",
 )[__version_info__.release_level == "final"]
-__version_time__ = "26 October 2021 20:39 UTC"
+__version_time__ = "26 October 2021 23:54 UTC"
 __versionTime__ = __version_time__
 __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
 

diff --git a/pyparsing/core.py b/pyparsing/core.py
@@ -2009,6 +2009,8 @@ def run_tests(
 
         (Note that this is a raw string literal, you must include the leading ``'r'``.)
         """
+        from .testing import pyparsing_test
+
         parseAll = parseAll and parse_all
         fullDump = fullDump and full_dump
         printResults = printResults and print_results
@@ -2030,23 +2032,22 @@ def run_tests(
         BOM = "\ufeff"
         for t in tests:
             if comment is not None and comment.matches(t, False) or comments and not t:
-                comments.append(t)
+                comments.append(pyparsing_test.with_line_numbers(t))
                 continue
             if not t:
                 continue
-            out = ["\n" + "\n".join(comments) if comments else "", t]
+            out = [
+                "\n" + "\n".join(comments) if comments else "",
+                pyparsing_test.with_line_numbers(t),
+            ]
             comments = []
             try:
                 # convert newline marks to actual newlines, and strip leading BOM if present
                 t = NL.transform_string(t.lstrip(BOM))
                 result = self.parse_string(t, parse_all=parseAll)
             except ParseBaseException as pe:
                 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
-                if "\n" in t:
-                    out.append(line(pe.loc, t))
-                    out.append(" " * (col(pe.loc, t) - 1) + "^" + fatal)
-                else:
-                    out.append(" " * pe.loc + "^" + fatal)
+                out.append(pe.explain())
                 out.append("FAIL: " + str(pe))
                 success = success and failureTests
                 result = pe
@@ -3388,22 +3389,20 @@ class LineStart(_PositionToken):
 
     def __init__(self):
         super().__init__()
+        self.leave_whitespace()
+        self.orig_whiteChars = set() | self.whiteChars
+        self.whiteChars.discard("\n")
+        self.skipper = Empty().set_whitespace_chars(self.whiteChars)
         self.errmsg = "Expected start of line"
 
-    def __add__(self, other):
-        return AtLineStart(other)
-
-    def __sub__(self, other):
-        return AtLineStart(other) - Empty()
-
     def preParse(self, instring, loc):
         if loc == 0:
             return loc
         else:
-            if instring[loc : loc + 1] == "\n" and "\n" in self.whiteChars:
-                ret = loc + 1
-            else:
-                ret = super().preParse(instring, loc)
+            ret = self.skipper.preParse(instring, loc)
+            if "\n" in self.orig_whiteChars:
+                while instring[ret : ret + 1] == "\n":
+                    ret = self.skipper.preParse(instring, ret + 1)
             return ret
 
     def parseImpl(self, instring, loc, doActions=True):
@@ -3444,12 +3443,6 @@ def __init__(self):
         super().__init__()
         self.errmsg = "Expected start of text"
 
-    def __add__(self, other):
-        return AtStringStart(other)
-
-    def __sub__(self, other):
-        return AtStringStart(other) - Empty()
-
     def parseImpl(self, instring, loc, doActions=True):
         if loc != 0:
             # see if entire string up to here is just whitespace and ignoreables
@@ -3835,6 +3828,7 @@ def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
         super().__init__(exprs, savelist)
         if self.exprs:
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
+            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
         else:
             self.mayReturnEmpty = True
 
@@ -3976,6 +3970,7 @@ def __init__(self, exprs: IterableType[ParserElement], savelist: bool = False):
         if self.exprs:
             self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
             self.callPreparse = all(e.callPreparse for e in self.exprs)
+            self.skipWhitespace = all(e.skipWhitespace for e in self.exprs)
         else:
             self.mayReturnEmpty = True
 

diff --git a/tests/test_unit.py b/tests/test_unit.py
@@ -3587,14 +3587,14 @@ def testLineStart2(self):
         """
 
         test = dedent(test)
-        print(test)
+        print(pp.testing.with_line_numbers(test))
 
         print("normal parsing")
         for t, s, e in (pp.LineStart() + "AAA").scanString(test):
-            print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s]))
+            print(s, e, pp.lineno(s, test), pp.line(s, test), repr(t))
             print()
             self.assertEqual(
-                "A", test[s], "failed LineStart with insignificant newlines"
+                "A", t[0][0], "failed LineStart with insignificant newlines"
             )
 
         print(r"parsing without \n in whitespace chars")
@@ -3604,10 +3604,10 @@ def testLineStart2(self):
                 print(s, e, pp.lineno(s, test), pp.line(s, test), repr(test[s]))
                 print()
                 self.assertEqual(
-                    "A", test[s], "failed LineStart with insignificant newlines"
+                    "A", t[0][0], "failed LineStart with insignificant newlines"
                 )
 
-    def testLineStart3(self):
+    def testLineStartWithLeadingSpaces(self):
         # testing issue #272
         instring = dedent(
             """
@@ -3634,16 +3634,21 @@ def testLineStart3(self):
             alpha_line | pp.Word("_"),
             alpha_line | alpha_line,
             pp.MatchFirst([alpha_line, alpha_line]),
+            alpha_line ^ pp.Word("_"),
+            alpha_line ^ alpha_line,
+            pp.Or([alpha_line, pp.Word("_")]),
             pp.LineStart() + pp.Word(pp.alphas) + pp.LineEnd().suppress(),
             pp.And([pp.LineStart(), pp.Word(pp.alphas), pp.LineEnd().suppress()]),
         ]
+        fails = []
         for test in tests:
             print(test.searchString(instring))
-            self.assertEqual(
-                ["a", "d", "e"], flatten(sum(test.search_string(instring)).as_list())
-            )
+            if ['a', 'b', 'c', 'd', 'e', 'f', 'g'] != flatten(sum(test.search_string(instring)).as_list()):
+                fails.append(test)
+        if fails:
+            self.fail("failed LineStart tests:\n{}".format("\n".join(str(expr) for expr in fails)))
 
-    def testLineStart4(self):
+    def testAtLineStart(self):
         test = dedent(
             """\
         AAA this line
@@ -3663,6 +3668,10 @@ def testLineStart4(self):
         )
 
     def testStringStart(self):
+        self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), "123", ["123"])
+        self.assertParseAndCheckList(pp.StringStart() + pp.Word(pp.nums), "   123", ["123"])
+        self.assertParseAndCheckList(pp.StringStart() + "123", "123", ["123"])
+        self.assertParseAndCheckList(pp.StringStart() + "123", "   123", ["123"])
         self.assertParseAndCheckList(pp.AtStringStart(pp.Word(pp.nums)), "123", ["123"])
 
         self.assertParseAndCheckList(pp.AtStringStart("123"), "123", ["123"])
@@ -3673,6 +3682,40 @@ def testStringStart(self):
         with self.assertRaisesParseException():
             pp.AtStringStart("123").parse_string("    123")
 
+    def testStringStartAndLineStartInsideAnd(self):
+        P_MTARG = (
+                pp.StringStart()
+                + pp.Word("abcde")
+                + pp.StringEnd()
+        )
+
+        P_MTARG2 = (
+                pp.LineStart()
+                + pp.Word("abcde")
+                + pp.StringEnd()
+        )
+
+        P_MTARG3 = (
+                pp.AtLineStart(pp.Word("abcde"))
+                + pp.StringEnd()
+        )
+
+        def test(expr, string):
+            expr.streamline()
+            print(expr, repr(string), end=" ")
+            print(expr.parse_string(string))
+
+        test(P_MTARG, "aaa")
+        test(P_MTARG2, "aaa")
+        test(P_MTARG2, "\naaa")
+        test(P_MTARG2, "   aaa")
+        test(P_MTARG2, "\n   aaa")
+
+        with self.assertRaisesParseException():
+            test(P_MTARG3, "   aaa")
+        with self.assertRaisesParseException():
+            test(P_MTARG3, "\n   aaa")
+
     def testLineAndStringEnd(self):
 
         NLs = pp.OneOrMore(pp.lineEnd)