From 925760291d6efec64fda6e9dd1fd9cfbd5be068c Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Mon, 29 Aug 2022 12:28:52 -0400 Subject: [PATCH] fix tag regexp to match quoted groups correctly Fixed issue in lexer where the regexp used to match tags would not correctly interpret quoted sections individually. While this parsing issue still produced the same expected tag structure later on, the mis-handling of quoted sections was also subject to a regexp crash if a tag had a large number of quotes within its quoted sections. Fixes: #366 Change-Id: I74e0d71ff7f419970711a7cd51adcf1bb90a44c0 --- doc/build/unreleased/366.rst | 9 +++++++++ mako/lexer.py | 12 ++++++++---- test/test_lexer.py | 21 +++++++++++++++++---- 3 files changed, 34 insertions(+), 8 deletions(-) create mode 100644 doc/build/unreleased/366.rst diff --git a/doc/build/unreleased/366.rst b/doc/build/unreleased/366.rst new file mode 100644 index 00000000..27b02784 --- /dev/null +++ b/doc/build/unreleased/366.rst @@ -0,0 +1,9 @@ +.. change:: + :tags: bug, lexer + :tickets: 366 + + Fixed issue in lexer where the regexp used to match tags would not + correctly interpret quoted sections individually. While this parsing issue + still produced the same expected tag structure later on, the mis-handling + of quoted sections was also subject to a regexp crash if a tag had a large + number of quotes within its quoted sections. \ No newline at end of file diff --git a/mako/lexer.py b/mako/lexer.py index bfcf2869..77a24832 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -272,20 +272,24 @@ def parse(self): return self.template def match_tag_start(self): - match = self.match( - r""" + reg = r""" \<% # opening tag ([\w\.\:]+) # keyword - ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \ + ((?:\s+\w+|\s*=\s*|"[^"]*?"|'[^']*?'|\s*,\s*)*) # attrname, = \ # sign, string expression + # comma is for backwards compat + # identified in #366 \s* # more whitespace (/)?> # closing - """, + """ + + match = self.match( + reg, re.I | re.S | re.X, ) diff --git a/test/test_lexer.py b/test/test_lexer.py index 255c128f..a7b6fe36 100644 --- a/test/test_lexer.py +++ b/test/test_lexer.py @@ -1,5 +1,7 @@ import re +import pytest + from mako import compat from mako import exceptions from mako import parsetree @@ -146,6 +148,10 @@ def test_noexpr_allowed(self): """ assert_raises(exceptions.CompileException, Lexer(template).parse) + def test_tag_many_quotes(self): + template = "<%0" + '"' * 3000 + assert_raises(exceptions.SyntaxException, Lexer(template).parse) + def test_unmatched_tag(self): template = """ <%namespace name="bar"> @@ -432,9 +438,16 @@ def test_expr_in_attribute(self): ), ) - def test_pagetag(self): - template = """ - <%page cached="True", args="a, b"/> + @pytest.mark.parametrize("comma,numchars", [(",", 48), ("", 47)]) + def test_pagetag(self, comma, numchars): + # note that the comma here looks like: + # <%page cached="True", args="a, b"/> + # that's what this test has looked like for decades, however, the + # comma there is not actually the right syntax. When issue #366 + # was fixed, the reg was altered to accommodate for this comma to allow + # backwards compat + template = f""" + <%page cached="True"{comma} args="a, b"/> some template """ @@ -453,7 +466,7 @@ def test_pagetag(self): some template """, - (2, 48), + (2, numchars), ), ], ),