From 4279e6538dc3ae2fa454f70a4fc6438cf306dff9 Mon Sep 17 00:00:00 2001 From: Takuto Ikuta Date: Thu, 30 Jun 2022 10:40:05 -0400 Subject: [PATCH] optimize some code paths Optimized some codepaths within the lexer/Python code generation process, improving performance for generation of templates prior to their being cached. Pull request courtesy Takuto Ikuta. This shows around 10% performance improvement in our use case (https://crbug.com/1214033#c32). Closes: #361 Pull-request: https://github.com/sqlalchemy/mako/pull/361 Pull-request-sha: bcdee5ccf57100490aa0e48baeda6f15b584ab32 Change-Id: If647f77a52d5745019dcc46f82fd7a928f990757 --- doc/build/unreleased/361.rst | 7 +++++++ mako/lexer.py | 7 +++---- mako/pygen.py | 26 ++++++++++++++++++-------- 3 files changed, 28 insertions(+), 12 deletions(-) create mode 100644 doc/build/unreleased/361.rst diff --git a/doc/build/unreleased/361.rst b/doc/build/unreleased/361.rst new file mode 100644 index 0000000..7d7ef4b --- /dev/null +++ b/doc/build/unreleased/361.rst @@ -0,0 +1,7 @@ +.. change:: + :tags: performance + :tickets: 361 + + Optimized some codepaths within the lexer/Python code generation process, + improving performance for generation of templates prior to their being + cached. Pull request courtesy Takuto Ikuta. diff --git a/mako/lexer.py b/mako/lexer.py index 527c4b5..c45e20e 100644 --- a/mako/lexer.py +++ b/mako/lexer.py @@ -74,12 +74,11 @@ def match_reg(self, reg): (start, end) = match.span() self.match_position = end + 1 if end == start else end self.matched_lineno = self.lineno - lines = re.findall(r"\n", self.text[mp : self.match_position]) cp = mp - 1 - while cp >= 0 and cp < self.textlength and self.text[cp] != "\n": - cp -= 1 + if cp >= 0 and cp < self.textlength: + cp = self.text[: cp + 1].rfind("\n") self.matched_charpos = mp - cp - self.lineno += len(lines) + self.lineno += self.text[mp : self.match_position].count("\n") return match def parse_until_text(self, watch_nesting, *text): diff --git a/mako/pygen.py b/mako/pygen.py index 46b0b52..7c46535 100644 --- a/mako/pygen.py +++ b/mako/pygen.py @@ -43,6 +43,15 @@ def __init__(self, stream): # source lines self.source_map = {} + self._re_space_comment = re.compile(r"^\s*#") + self._re_space = re.compile(r"^\s*$") + self._re_indent = re.compile(r":[ \t]*(?:#.*)?$") + self._re_compound = re.compile(r"^\s*(if|try|elif|while|for|with)") + self._re_indent_keyword = re.compile( + r"^\s*(def|class|else|elif|except|finally)" + ) + self._re_unindentor = re.compile(r"^\s*(else|elif|except|finally).*\:") + def _update_lineno(self, num): self.lineno += num @@ -86,8 +95,8 @@ def writeline(self, line): if ( line is None - or re.match(r"^\s*#", line) - or re.match(r"^\s*$", line) + or self._re_space_comment.match(line) + or self._re_space.match(line) ): hastext = False else: @@ -121,12 +130,12 @@ def writeline(self, line): # note that a line can both decrase (before printing) and # then increase (after printing) the indentation level. - if re.search(r":[ \t]*(?:#.*)?$", line): + if self._re_indent.search(line): # increment indentation count, and also # keep track of what the keyword was that indented us, # if it is a python compound statement keyword # where we might have to look for an "unindent" keyword - match = re.match(r"^\s*(if|try|elif|while|for|with)", line) + match = self._re_compound.match(line) if match: # its a "compound" keyword, so we will check for "unindentors" indentor = match.group(1) @@ -137,9 +146,7 @@ def writeline(self, line): # its not a "compound" keyword. but lets also # test for valid Python keywords that might be indenting us, # else assume its a non-indenting line - m2 = re.match( - r"^\s*(def|class|else|elif|except|finally)", line - ) + m2 = self._re_indent_keyword.match(line) if m2: self.indent += 1 self.indent_detail.append(indentor) @@ -167,7 +174,7 @@ def _is_unindentor(self, line): # if the current line doesnt have one of the "unindentor" keywords, # return False - match = re.match(r"^\s*(else|elif|except|finally).*\:", line) + match = self._re_unindentor.match(line) # if True, whitespace matches up, we have a compound indentor, # and this line has an unindentor, this # is probably good enough @@ -193,6 +200,9 @@ def _indent_line(self, line, stripspace=""): stripspace is a string of space that will be truncated from the start of the line before indenting.""" + if stripspace == "": + # Fast path optimization. + return self.indentstring * self.indent + line return re.sub( r"^%s" % stripspace, self.indentstring * self.indent, line