diff --git a/lib/sanitize/transformers/clean_element.rb b/lib/sanitize/transformers/clean_element.rb index d91850a..c1e4363 100644 --- a/lib/sanitize/transformers/clean_element.rb +++ b/lib/sanitize/transformers/clean_element.rb @@ -122,16 +122,15 @@ def call(env) unless attr_allowlist.include?(attr_name) # The attribute isn't allowed. - if allow_data_attributes && attr_name.start_with?('data-') - # Arbitrary data attributes are allowed. If this is a data - # attribute, continue. - next if attr_name =~ REGEX_DATA_ATTR + # Arbitrary data attributes are allowed. If this is a data + # attribute, continue. + unless allow_data_attributes && attr_name.start_with?('data-') && + attr_name =~ REGEX_DATA_ATTR + # Either the attribute isn't a data attribute or arbitrary data + # attributes aren't allowed. Remove the attribute. + attr.unlink + next end - - # Either the attribute isn't a data attribute or arbitrary data - # attributes aren't allowed. Remove the attribute. - attr.unlink - next end # The attribute is allowed. diff --git a/test/test_clean_element.rb b/test/test_clean_element.rb index 37a8af0..5b3299c 100644 --- a/test/test_clean_element.rb +++ b/test/test_clean_element.rb @@ -491,6 +491,22 @@ }).must_equal "Text" end + it 'should sanitize protocols in data attributes even if data attributes are generically allowed' do + input = 'Text' + + Sanitize.fragment(input, { + :elements => ['a'], + :attributes => {'a' => [:data]}, + :protocols => {'a' => {'data-url' => ['https']}} + }).must_equal "Text" + + Sanitize.fragment(input, { + :elements => ['a'], + :attributes => {'a' => [:data]}, + :protocols => {'a' => {'data-url' => ['mailto']}} + }).must_equal input + end + it 'should prevent `` tags from being used to set a non-UTF-8 charset' do Sanitize.document('Howdy!', :elements => %w[html head meta body],