Skip to content

Commit

Permalink
Merge pull request #2526 from sparklemotion/2525-update-libxml-2_9_14…
Browse files Browse the repository at this point in the history
…-main

dep: update libxml2 to v2.9.14 (main branch)

---

**What problem is this PR intended to solve?**

Update libxml2 to v2.9.14 rom v2.9.13, see #2525 

> https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.9.14

Also ensure that tests pass against upstream libxml2 (#2468).

**Have you included adequate test coverage?**

This PR updates tests to reflect the difference in how incorrectly-opened comments are handled in this release.


**Does this change affect the behavior of either the C or the Java implementations?**

The C native implementation handling of incorrectly-opened comments is different from previous and different from the JRuby implementation's handling. These differences are fully captured and explained in the test suite.
  • Loading branch information
flavorjones committed May 4, 2022
2 parents b8802bf + 3ed5bf2 commit c9b9201
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 61 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/upstream.yml
Expand Up @@ -12,6 +12,8 @@ jobs:
runs-on: ubuntu-latest
container:
image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1
env:
CI_UPSTREAM_XMLSOFT: t
steps:
- uses: actions/checkout@v2
with:
Expand All @@ -37,6 +39,8 @@ jobs:
runs-on: ubuntu-latest
container:
image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1
env:
CI_UPSTREAM_XMLSOFT: t
steps:
- uses: actions/checkout@v2
with:
Expand Down
6 changes: 3 additions & 3 deletions dependencies.yml
@@ -1,7 +1,7 @@
libxml2:
version: "2.9.13"
sha256: "276130602d12fe484ecc03447ee5e759d0465558fbc9d6bd144e3745306ebf0e"
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.13.sha256sum
version: "2.9.14"
sha256: "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee"
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.sha256sum

libxslt:
version: "1.1.35"
Expand Down

This file was deleted.

8 changes: 8 additions & 0 deletions test/helper.rb
Expand Up @@ -132,6 +132,14 @@ class TestCase < MiniTest::Spec
@@test_count = 0 # rubocop:disable Style/ClassVars
@@gc_level = nil # rubocop:disable Style/ClassVars

def self.upstream_xmlsoft?
ENV["CI_UPSTREAM_XMLSOFT"] || Nokogiri::LIBXML_LOADED_VERSION.include?("-GIT")
end

def upstream_xmlsoft?
self.class.upstream_xmlsoft?
end

def initialize_nokogiri_test_gc_level
return if Nokogiri.jruby?
return if @@gc_level
Expand Down
29 changes: 26 additions & 3 deletions test/html4/test_comments.rb
Expand Up @@ -23,7 +23,7 @@ class TestComment < Nokogiri::TestCase
let(:html) { "<html><body><div id=under-test><!--></div><div id=also-here></div></body></html>" }

if Nokogiri.uses_libxml?
if Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch")
if Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch") || upstream_xmlsoft?
it "behaves as if the comment is closed correctly" do # COMPLIANT
assert_equal 1, subject.children.length
assert_predicate subject.children.first, :comment?
Expand Down Expand Up @@ -54,7 +54,7 @@ class TestComment < Nokogiri::TestCase
let(:html) { "<html><body><div id=under-test><!---></div><div id=also-here></div></body></html>" }

if Nokogiri.uses_libxml?
if Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch")
if Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch") || upstream_xmlsoft?
it "behaves as if the comment is closed correctly" do # COMPLIANT
assert_equal 1, subject.children.length
assert_predicate subject.children.first, :comment?
Expand Down Expand Up @@ -173,7 +173,7 @@ class TestComment < Nokogiri::TestCase
let(:body) { doc.at_css("body") }
let(:subject) { doc.at_css("div#under-test") }

if Nokogiri.uses_libxml?
if Nokogiri.uses_libxml?("<=2.9.13") && !upstream_xmlsoft?
it "ignores up to the next '>'" do # NON-COMPLIANT
assert_equal 2, body.children.length
assert_equal body.children[0], subject
Expand All @@ -183,10 +183,33 @@ class TestComment < Nokogiri::TestCase
assert_predicate body.children[1], :text?
assert_equal "-->hello", body.children[1].content
end
elsif Nokogiri.uses_libxml?
it "parses as pcdata" do # NON-COMPLIANT
assert_equal 1, body.children.length
assert_equal subject, body.children.first

assert_equal 3, subject.children.length
subject.children[0].tap do |child|
assert_predicate(child, :text?)
assert_equal("<! comment ", child.content)
end
subject.children[1].tap do |child|
assert_predicate(child, :element?)
assert_equal("div", child.name)
assert_equal("inner content", child.content)
end
subject.children[2].tap do |child|
assert_predicate(child, :text?)
assert_equal("-->hello", child.content)
end
end
end

if Nokogiri.jruby?
it "ignores up to the next '-->'" do # NON-COMPLIANT
assert_equal 1, body.children.length
assert_equal subject, body.children.first

assert_equal 1, subject.children.length
assert_predicate subject.children[0], :text?
assert_equal "hello", subject.children[0].content
Expand Down
27 changes: 17 additions & 10 deletions test/html4/test_document.rb
Expand Up @@ -779,7 +779,7 @@ def test_leaking_dtd_nodes_after_internal_subset_removal
doc = Nokogiri::HTML4::Document.parse(html)
expected = if Nokogiri.jruby?
[Nokogiri::XML::Node::COMMENT_NODE, Nokogiri::XML::Node::PI_NODE]
elsif Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch")
elsif Nokogiri.libxml2_patches.include?("0008-htmlParseComment-handle-abruptly-closed-comments.patch") || upstream_xmlsoft?
[Nokogiri::XML::Node::COMMENT_NODE]
else
[]
Expand All @@ -801,18 +801,25 @@ def test_leaking_dtd_nodes_after_internal_subset_removal

it "skips to the next start tag" do
# see https://github.com/sparklemotion/nokogiri/issues/2461 for why we're testing this edge case
if Nokogiri.uses_libxml?(">= 2.9.13")
skip_unless_libxml2_patch("0010-Revert-Different-approach-to-fix-quadratic-behavior.patch")
end

doc = Nokogiri::HTML4.parse(input)
body = doc.at_xpath("//body")

expected_error_snippet = Nokogiri.uses_libxml? ? "invalid element name" : "Missing start element name"
assert_includes(doc.errors.first.to_s, expected_error_snippet)

assert_equal("this < that", body.children.first.text, body.to_html)
assert_equal(["div", "div"], body.children.map(&:name), body.to_html)
if Nokogiri.uses_libxml?("= 2.9.13") && !upstream_xmlsoft?
# <body><div>this <div>second element</div></div></body>
assert_equal(1, body.children.length)
body.children.first.tap do |div|
assert_equal(2, div.children.length)
assert_equal("this ", div.children[0].content)
assert_equal("div", div.children[1].name)
assert_equal("second element", div.children[1].content)
end
else
# <body><div>this &lt; that</div><div>second element</div></body>
assert_equal(2, body.children.length)
assert_equal(["div", "div"], body.children.map(&:name), body.to_html)
assert_equal("this < that", body.children[0].text, body.to_html)
assert_equal("second element", body.children[1].text, body.to_html)
end
end
end

Expand Down

0 comments on commit c9b9201

Please sign in to comment.