Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clean up tests #2215

Merged
merged 3 commits into from Apr 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 18 additions & 1 deletion test/helper.rb
Expand Up @@ -127,7 +127,7 @@ def teardown

def stress_memory_while(&block)
# force the test to explicitly declare a skip
raise "JRuby doesn't do GC" if Nokogiri.jruby?
raise "memory stress tests shouldn't be run on JRuby" if Nokogiri.jruby?

old_stress = GC.stress
begin
Expand All @@ -138,6 +138,23 @@ def stress_memory_while(&block)
end
end

def skip_unless_libxml2(msg="this test should only run with libxml2")
skip(msg) unless Nokogiri.uses_libxml?
end

def skip_unless_jruby(msg="this test should only run with jruby")
skip(msg) unless Nokogiri.jruby?
end

def refute_valgrind_errors
# force the test to explicitly declare a skip
raise "memory stress tests shouldn't be run on JRuby" if Nokogiri.jruby?

yield.tap do
GC.start(full_mark: true) if GC_LEVEL == "minor"
end
end

def assert_indent(amount, doc, message = nil)
nodes = []
doc.traverse do |node|
Expand Down
42 changes: 21 additions & 21 deletions test/html/sax/test_push_parser.rb
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
# frozen_string_literal: true

require "helper"

Expand All @@ -12,38 +13,37 @@ def setup
end

def test_end_document_called
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<p id="asdfasdf">
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
assert ! @parser.document.end_document_called
assert(!@parser.document.end_document_called)
@parser.finish
assert @parser.document.end_document_called
assert(@parser.document.end_document_called)
end

def test_start_element
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><head><body><p id="asdfasdf">
eoxml

assert_equal [["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
@parser.document.start_elements
@parser.<<(<<-eoxml)
<!-- This is a comment -->
Paragraph 1
assert_equal([["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
@parser.document.start_elements)

@parser.<<(<<~eoxml)
<!-- This is a comment -->
Paragraph 1
</p></body></html>
eoxml
assert_equal [' This is a comment '], @parser.document.comments
assert_equal([' This is a comment '], @parser.document.comments)
@parser.finish
end


def test_chevron_partial_html
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<p id="asdfasdf">
eoxml

Expand All @@ -52,34 +52,34 @@ def test_chevron_partial_html
Paragraph 1
</p>
eoxml
assert_equal [' This is a comment '], @parser.document.comments
assert_equal([' This is a comment '], @parser.document.comments)
@parser.finish
end

def test_chevron
@parser.<<(<<-eoxml)
@parser.<<(<<~eoxml)
<p id="asdfasdf">
<!-- This is a comment -->
Paragraph 1
</p>
eoxml
@parser.finish
assert_equal [' This is a comment '], @parser.document.comments
assert_equal([' This is a comment '], @parser.document.comments)
end

def test_default_options
assert_equal 0, @parser.options
assert_equal(0, @parser.options)
end

def test_broken_encoding
skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
skip_unless_libxml2("ultra hard to fix for pure Java version")
@parser.options |= XML::ParseOptions::RECOVER
# This is ISO_8859-1:
@parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
@parser.finish
assert(@parser.document.errors.size >= 1)
assert_equal "Gau\337", @parser.document.data.join
assert_equal [["r"], ["body"], ["html"]], @parser.document.end_elements
assert_equal("Gau\337", @parser.document.data.join)
assert_equal([["r"], ["body"], ["html"]], @parser.document.end_elements)
end
end
end
Expand Down
124 changes: 62 additions & 62 deletions test/html/test_attributes.rb
Expand Up @@ -3,80 +3,80 @@
module Nokogiri
module HTML
class TestAttr < Nokogiri::TestCase
unless Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?
#
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
# wants to ensure these comments can be treated as "server-side includes",
# but as a result fails to ensure that serialization is well-formed,
# resulting in an opportunity for XSS injection of code into a final
# re-parsed document (presumably in a browser).
#
# the offending commit is:
#
# https://github.com/GNOME/libxml2/commit/960f0e2
#
# we'll test this by parsing the HTML, serializing it, then
# re-parsing it to ensure there isn't any ambiguity in the output
# that might allow code injection into a browser consuming
# "sanitized" output.
#
# complaints have been made upstream about this behavior, notably at
#
# https://bugzilla.gnome.org/show_bug.cgi?id=769760
#
# and multiple CVEs have been declared and fixed in downstream
# libraries as a result, a list is being kept up to date here:
#
# https://github.com/flavorjones/loofah/issues/144
#
[
#
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
# wants to ensure these comments can be treated as "server-side includes",
# but as a result fails to ensure that serialization is well-formed,
# resulting in an opportunity for XSS injection of code into a final
# re-parsed document (presumably in a browser).
# these tags and attributes are determined by the code at:
#
# the offending commit is:
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
#
# https://github.com/GNOME/libxml2/commit/960f0e2
{tag: "a", attr: "href"},
{tag: "div", attr: "href"},
{tag: "a", attr: "action"},
{tag: "div", attr: "action"},
{tag: "a", attr: "src"},
{tag: "div", attr: "src"},
{tag: "a", attr: "name"},
#
# we'll test this by parsing the HTML, serializing it, then
# re-parsing it to ensure there isn't any ambiguity in the output
# that might allow code injection into a browser consuming
# "sanitized" output.
# note that div+name is _not_ affected by the libxml2 issue.
# but we test it anyway to ensure our logic isn't modifying
# attributes that don't need modifying.
#
# complaints have been made upstream about this behavior, notably at
#
# https://bugzilla.gnome.org/show_bug.cgi?id=769760
#
# and multiple CVEs have been declared and fixed in downstream
# libraries as a result, a list is being kept up to date here:
#
# https://github.com/flavorjones/loofah/issues/144
#
[
#
# these tags and attributes are determined by the code at:
#
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
#
{tag: "a", attr: "href"},
{tag: "div", attr: "href"},
{tag: "a", attr: "action"},
{tag: "div", attr: "action"},
{tag: "a", attr: "src"},
{tag: "div", attr: "src"},
{tag: "a", attr: "name"},
#
# note that div+name is _not_ affected by the libxml2 issue.
# but we test it anyway to ensure our logic isn't modifying
# attributes that don't need modifying.
#
{tag: "div", attr: "name", unescaped: true},
].each do |config|
{tag: "div", attr: "name", unescaped: true},
].each do |config|

define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}
define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

reparsed = HTML.fragment(HTML.fragment(html).to_html)
attributes = reparsed.at_css(config[:tag]).attribute_nodes
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=unsafevalue()>-->le.com'>test</#{config[:tag]}>}

assert_equal [config[:attr]], attributes.collect(&:name)
if Nokogiri::VersionInfo.instance.libxml2?
if config[:unescaped]
#
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
# assert that this attribute's serialization is unaffected.
#
assert_equal %{examp<!--" unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
else
#
# let's match the behavior in libxml < 2.9.2.
# test that this attribute's serialization is well-formed and sanitized.
#
assert_equal %{examp<!--%22%20unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
end
reparsed = HTML.fragment(HTML.fragment(html).to_html)
attributes = reparsed.at_css(config[:tag]).attribute_nodes

assert_equal [config[:attr]], attributes.collect(&:name)
if Nokogiri::VersionInfo.instance.libxml2?
if config[:unescaped]
#
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
# assert that this attribute's serialization is unaffected.
#
assert_equal %{examp<!--" unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
else
#
# yay for consistency in javaland. move along, nothing to see here.
# let's match the behavior in libxml < 2.9.2.
# test that this attribute's serialization is well-formed and sanitized.
#
assert_equal %{examp<!--%22 unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
assert_equal %{examp<!--%22%20unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
end
else
#
# yay for consistency in javaland. move along, nothing to see here.
#
assert_equal %{examp<!--%22 unsafeattr=unsafevalue()>-->le.com}, attributes.first.value
end
end
end
Expand Down
42 changes: 23 additions & 19 deletions test/html/test_attributes_properly_escaped.rb
Expand Up @@ -3,31 +3,35 @@
module Nokogiri
module HTML
class TestAttributesProperlyEscaped < Nokogiri::TestCase
unless Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

def test_attribute_macros_are_escaped
html = "<p><i for=\"&{<test>}\"></i></p>"
document = Nokogiri::HTML::Document.new
nodes = document.parse(html)
def test_attribute_macros_are_escaped
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

assert_equal("<p><i for=\"&amp;{&lt;test&gt;}\"></i></p>", nodes[0].to_s)
end
html = "<p><i for=\"&{<test>}\"></i></p>"
document = Nokogiri::HTML::Document.new
nodes = document.parse(html)

def test_libxml_escapes_server_side_includes
original_html = %(<p><a href='<!--"><test>-->'></a></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s
assert_equal("<p><i for=\"&amp;{&lt;test&gt;}\"></i></p>", nodes[0].to_s)
end

def test_libxml_escapes_server_side_includes
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

original_html = %(<p><a href='<!--"><test>-->'></a></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s

assert_match(/!--%22&gt;&lt;test&gt;/, html)
end

assert_match(/!--%22&gt;&lt;test&gt;/, html)
end
def test_libxml_escapes_server_side_includes_without_nested_quotes
skip if Nokogiri::VersionInfo.instance.libxml2? && Nokogiri::VersionInfo.instance.libxml2_using_system?

def test_libxml_escapes_server_side_includes_without_nested_quotes
original_html = %(<p><i for="<!--<test>-->"></i></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s
original_html = %(<p><i for="<!--<test>-->"></i></p>)
document = Nokogiri::HTML::Document.new
html = document.parse(original_html).to_s

assert_match(/&lt;!--&lt;test&gt;/, html)
end
assert_match(/&lt;!--&lt;test&gt;/, html)
end
end
end
Expand Down
17 changes: 9 additions & 8 deletions test/namespaces/test_namespaces_in_parsed_doc.rb
Expand Up @@ -63,16 +63,17 @@ def test_parsed_namespace_count
end

def test_namespaces_under_memory_pressure_issue1155
skip("JRuby doesn't do GC.") if Nokogiri.jruby?
skip_unless_libxml2("valgrind tests should only run with libxml2")

# this test is here to emit warnings when run under valgrind
# see https://github.com/sparklemotion/nokogiri/issues/1155 for background
filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
doc = Nokogiri::XML File.open(filename)
refute_valgrind_errors do
# see https://github.com/sparklemotion/nokogiri/issues/1155 for background
filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
doc = Nokogiri::XML File.open(filename)

# bizarrely, can't repro without the call to #to_a
doc.xpath('//namespace::*').to_a.each do |ns|
ns.inspect
# bizarrely, can't repro without the call to #to_a
doc.xpath('//namespace::*').to_a.each do |ns|
ns.inspect
end
end
end
end
Expand Down
2 changes: 1 addition & 1 deletion test/test_nokogiri.rb
Expand Up @@ -2,7 +2,7 @@

class TestNokogiri < Nokogiri::TestCase
def test_libxml_iconv
skip "this constant is only set in the C extension when libxml2 is used" if !Nokogiri.uses_libxml?
skip_unless_libxml2("this constant is only set in the C extension when libxml2 is used")
assert Nokogiri.const_defined?(:LIBXML_ICONV_ENABLED)
end

Expand Down
12 changes: 4 additions & 8 deletions test/test_version.rb
Expand Up @@ -28,18 +28,14 @@ def test_version_info_basics
assert_equal(::Gem::Platform.local.to_s, Nokogiri::VERSION_INFO["ruby"]["gem_platform"])
end

def test_version_info_for_xerces
skip("xerces is only used for JRuby") unless Nokogiri.jruby?
def test_version_info_for_xerces_and_nekohtml
skip_unless_jruby("xerces/nekohtml is only used for JRuby")
assert_equal(Nokogiri::XERCES_VERSION, version_info["other_libraries"]["xerces"])
end

def test_version_info_for_nekohtml
skip("nekohtml is only used for JRuby") unless Nokogiri.jruby?
assert_equal(Nokogiri::NEKO_VERSION, version_info["other_libraries"]["nekohtml"])
end

def test_version_info_for_libxml
skip("libxml2 is only used for CRuby") unless Nokogiri.uses_libxml?
skip_unless_libxml2("libxml2 is only used for CRuby")

if Nokogiri::VersionInfo.instance.libxml2_using_packaged?
assert_equal("packaged", version_info["libxml"]["source"])
Expand All @@ -66,7 +62,7 @@ def test_version_info_for_libxml
end

def test_version_info_for_libxslt
skip("libxslt is only used for CRuby") unless Nokogiri.uses_libxml?
skip_unless_libxml2("libxslt is only used for CRuby")

if Nokogiri::VersionInfo.instance.libxml2_using_packaged?
assert_equal("packaged", version_info["libxslt"]["source"])
Expand Down