Skip to content

Commit

Permalink
Merge pull request #260 from flavorjones/flavorjones-more-flexible-te…
Browse files Browse the repository at this point in the history
…sting

test: make the generated tests more flexible
  • Loading branch information
flavorjones committed Apr 1, 2023
2 parents 6944760 + 24dbde5 commit c8211c1
Showing 1 changed file with 37 additions and 38 deletions.
75 changes: 37 additions & 38 deletions test/html5/test_sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,16 @@ def sanitize_html(stream)
Loofah.fragment(stream).scrub!(:escape).to_html
end

def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
def check_sanitization(input, *possible_answers)
# shotgun approach - if any of the possible answers match, we win

# libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
sane = sanitize_html(input).gsub('"', "'")
htmloutput = htmloutput.gsub('"', "'")
xhtmloutput = xhtmloutput.gsub('"', "'")
rexmloutput = rexmloutput.gsub('"', "'")
possible_output = possible_answers.compact.map do |possible_answer|
possible_answer.gsub('"', "'")
end

## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
## it would require a lot of manual hacking to make the tests match libxml's output.
## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane),
%Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
assert_includes(possible_output, sane)
end

def assert_completes_in_reasonable_time(&block)
Expand Down Expand Up @@ -81,7 +79,7 @@ def assert_completes_in_reasonable_time(&block)
# define_method "test_should_forbid_#{tag_name.upcase}_tag" do
# input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
# output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
# check_sanitization(input, output, output, output)
# check_sanitization(input, output)
# end
# end

Expand All @@ -96,36 +94,36 @@ def assert_completes_in_reasonable_time(&block)
output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
end
check_sanitization(input, htmloutput, output, output)
check_sanitization(input, htmloutput, output)
end
end

def test_should_allow_data_attributes
input = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
output = "<p data-foo='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"

check_sanitization(input, output, output, output)
check_sanitization(input, output)
end

def test_should_allow_multi_word_data_attributes
input = "<p data-foo-bar-id='11'>foo <bad>bar</bad> baz</p>"
output = "<p data-foo-bar-id='11'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"

check_sanitization(input, output, output, output)
check_sanitization(input, output)
end

def test_should_allow_empty_data_attributes
input = "<p data-foo data-bar="">foo <bad>bar</bad> baz</p>"
output = "<p data-foo data-bar=''>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"

check_sanitization(input, output, output, output)
check_sanitization(input, output)
end

def test_should_allow_contenteditable
input = '<p contenteditable="false">Hi!</p>'
output = '<p contenteditable="false">Hi!</p>'

check_sanitization(input, output, output, output)
check_sanitization(input, output)
end

##
Expand All @@ -135,66 +133,68 @@ def test_should_allow_contenteditable
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
# input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
# output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
# check_sanitization(input, output, output, output)
# check_sanitization(input, output)
# end
# end

HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_#{protocol}_uris" do
input = %(<a href="#{protocol}">foo</a>)
output = "<a href='#{protocol}'>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)
end
end

HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
input = %(<a href="#{protocol.upcase}">foo</a>)
output = "<a href='#{protocol.upcase}'>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)
end
end

["image/gif", "image/jpeg", "image/png", "text/css", "text/plain"].each do |data_uri_type|
define_method "test_should_allow_data_#{data_uri_type}_uris" do
input = %(<a href="data:#{data_uri_type}">foo</a>)
output = "<a href='data:#{data_uri_type}'>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)

input = %(<a href="data:#{data_uri_type};base64,R0lGODlhAQABA">foo</a>)
output = "<a href='data:#{data_uri_type};base64,R0lGODlhAQABA'>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)
end

define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)
end
end

def test_should_disallow_other_uri_mediatypes
input = %(<a href="data:foo">foo</a>)
output = "<a>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)

input = %(<a href="data:image/xxx">foo</a>)
output = "<a>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)

input = %(<a href="data:image/xxx;base64,R0lGODlhAQABA">foo</a>)
output = "<a>foo</a>"
check_sanitization(input, output, output, output)

check_sanitization(input, output)

input = %(<a href="data:text/html;base64,R0lGODlhAQABA">foo</a>)
output = "<a>foo</a>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)

# https://hackerone.com/bugs?report_id=1694173
# https://github.com/w3c/svgwg/issues/266
input = %(<svg><use href="data:image/svg+xml;base64,PHN2ZyBpZD0neCcgeG1s"/></svg>)
output = "<svg><use></use></svg>"
check_sanitization(input, output, output, output)

check_sanitization(input, output)
end

HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
Expand Down Expand Up @@ -239,18 +239,18 @@ def test_figure_element_is_valid
# def test_should_handle_astral_plane_characters
# input = "<p>&#x1d4b5; &#x1d538;</p>"
# output = "<p>\360\235\222\265 \360\235\224\270</p>"
# check_sanitization(input, output, output, output)
# check_sanitization(input, output)

# input = "<p><tspan>\360\235\224\270</tspan> a</p>"
# output = "<p><tspan>\360\235\224\270</tspan> a</p>"
# check_sanitization(input, output, output, output)
# check_sanitization(input, output)
# end

# This affects only NS4. Is it worth fixing?
# def test_javascript_includes
# input = %(<div size="&{alert('XSS')}">foo</div>)
# output = "<div>foo</div>"
# check_sanitization(input, output, output, output)
# check_sanitization(input, output)
# end

##
Expand All @@ -263,12 +263,11 @@ def test_figure_element_is_valid
Dir[File.join(File.dirname(__FILE__), "..", "assets", "testdata_sanitizer_tests1.dat")].each do |filename|
JSON::parse(open(filename).read).each do |test|
it "testdata sanitizer #{test["name"]}" do
check_sanitization(
test["input"],
test["output"],
test["xhtml"] || test["output"],
test["rexml"] || test["output"]
)
test.delete("name")
test.delete("commentary")
input = test.delete("input")
outputs = test.keys.sort.map { |k| test[k] }
check_sanitization(input, *outputs)
end
end
end
Expand All @@ -278,13 +277,13 @@ def test_figure_element_is_valid
define_method "test_allow_uri_refs_in_svg_attribute_#{attr_name}" do
input = "<rect #{attr_name}='url(#foo)' />"
output = "<rect #{attr_name}='url(#foo)'></rect>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)
end

define_method "test_disallow_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
input = "<rect #{attr_name}='yellow url(http://bad.com/) #fff \"blue\"' />"
output = "<rect #{attr_name}='yellow #fff \"blue\"'></rect>"
check_sanitization(input, output, output, output)
check_sanitization(input, output)
end
end

Expand Down

0 comments on commit c8211c1

Please sign in to comment.