From 9c7879fb49efc17cc0455554b560541726e78b07 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 31 Jul 2021 16:48:37 -0400 Subject: [PATCH 1/2] remove disallowed elements from VOID_ELEMENTS and remove elements that libxml2 doesn't consider void (like `col`) this list is descriptive, not prescriptive -- and is used only for testing tags in ACCEPTABLE_ELEMENTS. --- CHANGELOG.md | 8 ++++++++ lib/loofah/html5/safelist.rb | 8 ++------ test/html5/test_sanitizer.rb | 6 ++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2227cc4..3ba8b07 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ # Changelog +## next / unreleased + +### Changes + +* Deprecating `Loofah::HTML5::SafeList::VOID_ELEMENTS` which is not a canonical list of void HTML4 or HTML5 elements. +* Removed some elements from `Loofah::HTML5::SafeList::VOID_ELEMENTS` that either are not acceptable elements or aren't considered "void" by libxml2. + + ## 2.10.0 / 2021-06-06 ### Features diff --git a/lib/loofah/html5/safelist.rb b/lib/loofah/html5/safelist.rb index 3acafac..9f1525f 100644 --- a/lib/loofah/html5/safelist.rb +++ b/lib/loofah/html5/safelist.rb @@ -788,18 +788,14 @@ module SafeList ALLOWED_PROTOCOLS = ACCEPTABLE_PROTOCOLS ALLOWED_URI_DATA_MEDIATYPES = ACCEPTABLE_URI_DATA_MEDIATYPES + # TODO: remove VOID_ELEMENTS in a future major release + # and put it in the tests (it is used only for testing, not for functional behavior) VOID_ELEMENTS = Set.new([ "area", - "base", "br", - "col", - "embed", "hr", "img", "input", - "link", - "meta", - "param", ]) # additional tags we should consider safe since we have libxml2 fixing up our documents. diff --git a/test/html5/test_sanitizer.rb b/test/html5/test_sanitizer.rb index 6610dd6..bacf267 100755 --- a/test/html5/test_sanitizer.rb +++ b/test/html5/test_sanitizer.rb @@ -68,6 +68,12 @@ def assert_completes_in_reasonable_time(&block) end end + HTML5::SafeList::VOID_ELEMENTS.each do |tag_name| + define_method "test_void_#{tag_name}_is_in_allowed_list" do + assert_includes(HTML5::SafeList::ALLOWED_ELEMENTS, tag_name) + end + end + ## ## libxml2 downcases elements, so this is moot. ## From c490da06bf7c0d2590d2d325082abf45626832a3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 31 Jul 2021 17:02:55 -0400 Subject: [PATCH 2/2] feat: allow HTML5 element `wbr` --- CHANGELOG.md | 5 +++++ lib/loofah/html5/safelist.rb | 1 + 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ba8b07..c78bee0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## next / unreleased +### Features + +* Allow HTML5 element `wbr`. + + ### Changes * Deprecating `Loofah::HTML5::SafeList::VOID_ELEMENTS` which is not a canonical list of void HTML4 or HTML5 elements. diff --git a/lib/loofah/html5/safelist.rb b/lib/loofah/html5/safelist.rb index 9f1525f..583ea8c 100644 --- a/lib/loofah/html5/safelist.rb +++ b/lib/loofah/html5/safelist.rb @@ -140,6 +140,7 @@ module SafeList "ul", "var", "video", + "wbr", ]) MATHML_ELEMENTS = Set.new([