Skip to content

Commit

Permalink
Replace lxml.html.clean_html with bleach; drop lxml dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
akx committed Sep 6, 2022
1 parent 765285e commit 5ecd89d
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 4 deletions.
3 changes: 1 addition & 2 deletions nbconvert/exporters/templateexporter.py
Expand Up @@ -22,7 +22,6 @@
TemplateNotFound,
)
from jupyter_core.paths import jupyter_path
from lxml.html.clean import clean_html
from traitlets import Bool, Dict, HasTraits, List, Unicode, default, observe, validate
from traitlets.config import Config
from traitlets.utils.importstring import import_item
Expand Down Expand Up @@ -72,7 +71,7 @@
"escape_html": lambda s: html.escape(str(s)),
"escape_html_keep_quotes": lambda s: html.escape(str(s), quote=False),
# For sanitizing HTML for any XSS
"clean_html": clean_html,
"clean_html": filters.clean_html,
"strip_trailing_newline": filters.strip_trailing_newline,
"text_base64": filters.text_base64,
}
Expand Down
17 changes: 17 additions & 0 deletions nbconvert/filters/strings.py
Expand Up @@ -8,6 +8,7 @@
# Distributed under the terms of the Modified BSD License.

import base64
import bleach
import os
import re
import textwrap
Expand All @@ -21,6 +22,7 @@
__all__ = [
"wrap_text",
"html2text",
"clean_html",
"add_anchor",
"strip_dollars",
"strip_files_prefix",
Expand Down Expand Up @@ -75,6 +77,21 @@ def html2text(element):
return text


def clean_html(element):
if isinstance(element, bytes):
element = element.decode()
else:
element = str(element)
return bleach.clean(
element,
tags=[*bleach.ALLOWED_TAGS, "div", "pre", "code", "span"],
attributes={
**bleach.ALLOWED_ATTRIBUTES,
"*": ["class", "id"],
},
)


def _convert_header_id(header_contents):
"""Convert header contents to valid id value. Takes string as input, returns string.
Expand Down
2 changes: 1 addition & 1 deletion nbconvert/tests/test_nbconvertapp.py
Expand Up @@ -367,7 +367,7 @@ def test_no_input(self):
'<span class="o">=</span> '
'<span class="n">symbols</span>'
'<span class="p">(</span>'
"<span class=\"s1\">'x y z'</span>"
'<span class="s1">&#39;x y z&#39;</span>'
'<span class="p">)</span>'
)
for no_input_flag in (False, True):
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Expand Up @@ -20,7 +20,6 @@ classifiers = [
urls = {Homepage = "https://jupyter.org"}
requires-python = ">=3.7"
dependencies = [
"lxml",
"beautifulsoup4",
"bleach",
"defusedxml",
Expand Down

0 comments on commit 5ecd89d

Please sign in to comment.