From a6922ce26eaddaa4d638b5df66c018d002d33b75 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 11 Aug 2021 13:24:38 -0400 Subject: [PATCH] feat: support empty HTML5 data attributes Closes #215 --- CHANGELOG.md | 7 +++++++ lib/loofah/html5/scrub.rb | 7 +++++-- test/html5/test_sanitizer.rb | 15 ++++++++++----- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44db4c3..d88fae2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## next / unreleased + +### Features + +* Support empty HTML5 data attributes. [[#215](https://github.com/flavorjones/loofah/issues/215)] + + ## 2.11.0 / 2021-07-31 ### Features diff --git a/lib/loofah/html5/scrub.rb b/lib/loofah/html5/scrub.rb index c541903..3924c47 100644 --- a/lib/loofah/html5/scrub.rb +++ b/lib/loofah/html5/scrub.rb @@ -10,6 +10,7 @@ module Scrub CRASS_SEMICOLON = { node: :semicolon, raw: ";" } CSS_IMPORTANT = '!important' CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/ + DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/ class << self def allowed_element?(element_name) @@ -25,7 +26,7 @@ def scrub_attributes(node) attr_node.node_name end - if attr_name =~ /\Adata-[\w-]+\z/ + if attr_name =~ DATA_ATTRIBUTE_NAME next end @@ -62,7 +63,9 @@ def scrub_attributes(node) scrub_css_attribute(node) node.attribute_nodes.each do |attr_node| - node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/ + if attr_node.value !~ /[^[:space:]]/ && attr_node.name !~ DATA_ATTRIBUTE_NAME + node.remove_attribute(attr_node.name) + end end force_correct_attribute_escaping!(node) diff --git a/test/html5/test_sanitizer.rb b/test/html5/test_sanitizer.rb index bacf267..c4e7f82 100755 --- a/test/html5/test_sanitizer.rb +++ b/test/html5/test_sanitizer.rb @@ -102,18 +102,23 @@ def assert_completes_in_reasonable_time(&block) def test_should_allow_data_attributes input = "

foo bar baz

" - output = "

foo <bad>bar</bad> baz

" - htmloutput = "

foo <bad>bar</bad> baz

" - check_sanitization(input, htmloutput, output, output) + check_sanitization(input, output, output, output) end def test_should_allow_multi_word_data_attributes input = "

foo bar baz

" - output = htmloutput = "

foo <bad>bar</bad> baz

" + output = "

foo <bad>bar</bad> baz

" - check_sanitization(input, htmloutput, output, output) + check_sanitization(input, output, output, output) + end + + def test_should_allow_empty_data_attributes + input = "

foo bar baz

" + output = "

foo <bad>bar</bad> baz

" + + check_sanitization(input, output, output, output) end def test_should_allow_contenteditable