Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up JSON and reduce HTML formatter consumption #1569

Merged
merged 8 commits into from Oct 26, 2020
42 changes: 25 additions & 17 deletions pygments/formatters/html.py
Expand Up @@ -9,6 +9,7 @@
:license: BSD, see LICENSE for details.
"""

import functools
import os
import sys
import os.path
Expand Down Expand Up @@ -414,6 +415,7 @@ def __init__(self, **options):
self.tagurlformat = self._decodeifneeded(options.get('tagurlformat', ''))
self.filename = self._decodeifneeded(options.get('filename', ''))
self.wrapcode = get_bool_opt(options, 'wrapcode', False)
self.span_element_openers = {}

if self.tagsfile:
if not ctags:
Expand Down Expand Up @@ -455,13 +457,20 @@ def _get_css_class(self, ttype):
return ''

def _get_css_classes(self, ttype):
"""Return the css classes of this token type prefixed with
the classprefix option."""
"""Generate the opening <span> tag for a given token type using CSS classes."""
cls = self._get_css_class(ttype)
while ttype not in STANDARD_TYPES:
ttype = ttype.parent
cls = self._get_css_class(ttype) + ' ' + cls
return cls
return cls and '<span class="%s">' % cls or ''

def _get_css_inline_styles(self, ttype):
"""Generate the opening <span> tag for a given token type using inline CSS styles."""
cclass = self.ttype2class.get(ttype)
while cclass is None:
ttype = ttype.parent
cclass = self.ttype2class.get(ttype)
return cclass and '<span style="%s">' % self.class2style[cclass][0] or ''

def _create_stylesheet(self):
t2c = self.ttype2class = {Token: ''}
Expand Down Expand Up @@ -786,33 +795,32 @@ def _wrap_code(self, inner):
yield from inner
yield 0, '</code>'

@functools.lru_cache(maxsize=100)
def _translate_parts(self, value):
"""HTML-escape a value and split it by newlines."""
return value.translate(_escape_html_table).split('\n')

def _format_lines(self, tokensource):
"""
Just format the tokens, without any wrapping tags.
Yield individual lines.
"""
nocls = self.noclasses
lsep = self.lineseparator
# for <span style=""> lookup only
getcls = self.ttype2class.get
c2s = self.class2style
escape_table = _escape_html_table
tagsfile = self.tagsfile

lspan = ''
line = []
for ttype, value in tokensource:
if nocls:
cclass = getcls(ttype)
while cclass is None:
ttype = ttype.parent
cclass = getcls(ttype)
cspan = cclass and '<span style="%s">' % c2s[cclass][0] or ''
else:
cls = self._get_css_classes(ttype)
cspan = cls and '<span class="%s">' % cls or ''
try:
cspan = self.span_element_openers[ttype]
except KeyError:
if nocls:
cspan = self.span_element_openers[ttype] = self._get_css_inline_styles(ttype)
else:
cspan = self.span_element_openers[ttype] = self._get_css_classes(ttype)

parts = value.translate(escape_table).split('\n')
parts = self._translate_parts(value)

if tagsfile and ttype in Token.Name:
filename, linenumber = self._lookup_ctag(value)
Expand Down