diff --git a/doc/build/unreleased/366.rst b/doc/build/unreleased/366.rst new file mode 100644 index 00000000..27b02784 --- /dev/null +++ b/doc/build/unreleased/366.rst @@ -0,0 +1,9 @@ +.. change:: + :tags: bug, lexer + :tickets: 366 + + Fixed issue in lexer where the regexp used to match tags would not + correctly interpret quoted sections individually. While this parsing issue + still produced the same expected tag structure later on, the mis-handling + of quoted sections was also subject to a regexp crash if a tag had a large + number of quotes within its quoted sections. \ No newline at end of file diff --git a/mako/lexer.py b/mako/lexer.py index bfcf2869..77a24832 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -272,20 +272,24 @@ def parse(self): return self.template def match_tag_start(self): - match = self.match( - r""" + reg = r""" \<% # opening tag ([\w\.\:]+) # keyword - ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \ + ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \ # sign, string expression + # comma is for backwards compat + # identified in #366 \s* # more whitespace (/)?> # closing - """, + """ + + match = self.match( + reg, re.I | re.S | re.X, ) diff --git a/test/test_lexer.py b/test/test_lexer.py index 255c128f..a7b6fe36 100644 --- a/test/test_lexer.py +++ b/test/test_lexer.py @@ -1,5 +1,7 @@ import re +import pytest + from mako import compat from mako import exceptions from mako import parsetree @@ -146,6 +148,10 @@ def test_noexpr_allowed(self): """ assert_raises(exceptions.CompileException, Lexer(template).parse) + def test_tag_many_quotes(self): + template = "<%0" + '"' * 3000 + assert_raises(exceptions.SyntaxException, Lexer(template).parse) + def test_unmatched_tag(self): template = """ <%namespace name="bar"> @@ -432,9 +438,16 @@ def test_expr_in_attribute(self): ), ) - def test_pagetag(self): - template = """ - <%page cached="True", args="a, b"/> + @pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)]) + def test_pagetag(self, comma, numchars): + # note that the comma here looks like: + # <%page cached="True", args="a, b"/> + # that's what this test has looked like for decades, however, the + # comma there is not actually the right syntax. When issue #366 + # was fixed, the reg was altered to accommodate for this comma to allow + # backwards compat + template = f""" + <%page cached="True"{comma} args="a, b"/> some template """ @@ -453,7 +466,7 @@ def test_pagetag(self): some template """, - (2, 48), + (2, numchars), ), ], ),