Skip to content

Commit

Permalink
refactor chm_htmlescape()
Browse files Browse the repository at this point in the history
  • Loading branch information
tk0miya committed Dec 24, 2018
1 parent 503cf9c commit 6ffe549
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 19 deletions.
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Bugs fixed
* #5834: apidoc: wrong help for ``--tocfile``
* #5800: todo: crashed if todo is defined in TextElement
* #5846: htmlhelp: convert hex escaping to decimal escaping in .hhc/.hhk files
* htmlhelp: broken .hhk file generated when title contains a double quote

Testing
--------
Expand Down
34 changes: 16 additions & 18 deletions sphinx/builders/htmlhelp.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

import codecs
import os
import re
from os import path

from docutils import nodes
from six import PY3

from sphinx import addnodes
from sphinx.builders.html import StandaloneHTMLBuilder
Expand Down Expand Up @@ -170,22 +170,21 @@
}


def chm_htmlescape(*args, **kwargs):
# type: (*Any, **Any) -> unicode
def chm_htmlescape(s, quote=None):
# type: (unicode, bool) -> unicode
"""
chm_htmlescape() is a wrapper of htmlescape().
chm_htmlescape() is a wrapper of html.escape().
.hhc/.hhk files don't recognize hex escaping, we need convert
hex escaping to decimal escaping. for example: `'` -> `'`
htmlescape() may generates a hex escaping `'` for single
quote `'`, this wrapper fixes this.
hex escaping to decimal escaping. for example: ``'`` -> ``'``
html.escape() may generates a hex escaping ``'`` for single
quote ``'``, this wrapper fixes this.
"""
def convert(matchobj):
# type: (Match[unicode]) -> unicode
codepoint = int(matchobj.group(1), 16)
return '&#%d;' % codepoint
return re.sub(r'&#[xX]([0-9a-fA-F]+);',
convert,
htmlescape(*args, **kwargs))
if quote is None:
quote = PY3 # True for py3, False for py2 (for compatibility)

s = htmlescape(s, quote)
s = s.replace(''', ''') # re-escape as decimal
return s


class HTMLHelpBuilder(StandaloneHTMLBuilder):
Expand Down Expand Up @@ -297,7 +296,7 @@ def write_toc(node, ullevel=0):
write_toc(subnode, ullevel)
elif isinstance(node, nodes.reference):
link = node['refuri']
title = chm_htmlescape(node.astext()).replace('"', '"')
title = chm_htmlescape(node.astext(), True)
f.write(object_sitemap % (title, link))
elif isinstance(node, nodes.bullet_list):
if ullevel != 0:
Expand Down Expand Up @@ -327,10 +326,9 @@ def write_index(title, refs, subitems):
# type: (unicode, List[Tuple[unicode, unicode]], List[Tuple[unicode, List[Tuple[unicode, unicode]]]]) -> None # NOQA
def write_param(name, value):
# type: (unicode, unicode) -> None
item = ' <param name="%s" value="%s">\n' % \
(name, value)
item = ' <param name="%s" value="%s">\n' % (name, value)
f.write(item)
title = chm_htmlescape(title)
title = chm_htmlescape(title, True)
f.write('<LI> <OBJECT type="text/sitemap">\n')
write_param('Keyword', title)
if len(refs) == 0:
Expand Down
30 changes: 29 additions & 1 deletion tests/test_build_htmlhelp.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
"""
test_build_htmlhelp
~~~~~~~~~~~~~~~~~~~
Expand All @@ -9,6 +10,9 @@
import re

import pytest
from six import PY2

from sphinx.builders.htmlhelp import chm_htmlescape


@pytest.mark.sphinx('htmlhelp', testroot='build-htmlhelp')
Expand All @@ -22,5 +26,29 @@ def test_chm(app):
with open(hhk_path, 'rb') as f:
data = f.read()
m = re.search(br'&#[xX][0-9a-fA-F]+;', data)
assert m == None, 'Hex escaping exists in .hhk file: ' + str(m.group(0))
assert m is None, 'Hex escaping exists in .hhk file: ' + str(m.group(0))


def test_chm_htmlescape():
assert chm_htmlescape('Hello world') == 'Hello world'
assert chm_htmlescape(u'Unicode 文字') == u'Unicode 文字'
assert chm_htmlescape('&#x45') == '&amp;#x45'

if PY2:
assert chm_htmlescape('<Hello> "world"') == '&lt;Hello&gt; "world"'
assert chm_htmlescape('<Hello> "world"', True) == '&lt;Hello&gt; &quot;world&quot;'
assert chm_htmlescape('<Hello> "world"', False) == '&lt;Hello&gt; "world"'
else:
assert chm_htmlescape('<Hello> "world"') == '&lt;Hello&gt; &quot;world&quot;'
assert chm_htmlescape('<Hello> "world"', True) == '&lt;Hello&gt; &quot;world&quot;'
assert chm_htmlescape('<Hello> "world"', False) == '&lt;Hello&gt; "world"'

if PY2:
# single quotes are not escaped on py2 (following the behavior of cgi.escape())
assert chm_htmlescape("Hello 'world'") == "Hello 'world'"
assert chm_htmlescape("Hello 'world'", True) == "Hello 'world'"
assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"
else:
assert chm_htmlescape("Hello 'world'") == "Hello &#39;world&#39;"
assert chm_htmlescape("Hello 'world'", True) == "Hello &#39;world&#39;"
assert chm_htmlescape("Hello 'world'", False) == "Hello 'world'"

0 comments on commit 6ffe549

Please sign in to comment.