From 212a46e23451c5154defff39e1acf92bcbd445df Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 2 May 2022 18:04:39 -0400 Subject: [PATCH] dep: update libxml2 to v2.9.14 from v2.9.13 https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.9.14 --- dependencies.yml | 6 +-- ...t-approach-to-fix-quadratic-behavior.patch | 45 ------------------- test/html4/test_comments.rb | 25 ++++++++++- test/html4/test_document.rb | 25 +++++++---- 4 files changed, 43 insertions(+), 58 deletions(-) delete mode 100644 patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch diff --git a/dependencies.yml b/dependencies.yml index 5e9b83bbc6..66e926e318 100644 --- a/dependencies.yml +++ b/dependencies.yml @@ -1,7 +1,7 @@ libxml2: - version: "2.9.13" - sha256: "276130602d12fe484ecc03447ee5e759d0465558fbc9d6bd144e3745306ebf0e" - # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.13.sha256sum + version: "2.9.14" + sha256: "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee" + # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.sha256sum libxslt: version: "1.1.35" diff --git a/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch b/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch deleted file mode 100644 index 614e227269..0000000000 --- a/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch +++ /dev/null @@ -1,45 +0,0 @@ -From ddc5f3d22644e0f6fbcc20541c86825757ffee62 Mon Sep 17 00:00:00 2001 -From: Mike Dalessio -Date: Mon, 21 Feb 2022 18:27:45 -0500 -Subject: [PATCH] Revert "Different approach to fix quadratic behavior in HTML - push parser" - -This reverts commit 798bdf13f6964a650b9a0b7b4b3a769f6f1d509a. ---- - HTMLparser.c | 14 +------------- - 1 file changed, 1 insertion(+), 13 deletions(-) - -diff --git a/HTMLparser.c b/HTMLparser.c -index eba2d7c..c0b8119 100644 ---- a/HTMLparser.c -+++ b/HTMLparser.c -@@ -3960,25 +3960,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { - htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, - "htmlParseStartTag: invalid element name\n", - NULL, NULL); -- /* -- * The recovery code is disabled for now as it can result in -- * quadratic behavior with the push parser. htmlParseStartTag -- * must consume all content up to the final '>' in order to avoid -- * rescanning for this terminator. -- * -- * For a proper fix in line with HTML5, htmlParseStartTag and -- * htmlParseElement should only be called when there's an ASCII -- * alpha character following the initial '<'. Otherwise, the '<' -- * should be emitted as text (unless followed by '!', '/' or '?'). -- */ --#if 0 - /* if recover preserve text on classic misconstructs */ - if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') || - (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) { - htmlParseCharDataInternal(ctxt, '<'); - return(-1); - } --#endif -+ - - /* Dump the bogus tag like browsers do */ - while ((CUR != 0) && (CUR != '>') && --- -2.31.0 - diff --git a/test/html4/test_comments.rb b/test/html4/test_comments.rb index 32d7a87855..56fd506827 100644 --- a/test/html4/test_comments.rb +++ b/test/html4/test_comments.rb @@ -173,7 +173,7 @@ class TestComment < Nokogiri::TestCase let(:body) { doc.at_css("body") } let(:subject) { doc.at_css("div#under-test") } - if Nokogiri.uses_libxml? + if Nokogiri.uses_libxml?("<=2.9.13") it "ignores up to the next '>'" do # NON-COMPLIANT assert_equal 2, body.children.length assert_equal body.children[0], subject @@ -183,10 +183,33 @@ class TestComment < Nokogiri::TestCase assert_predicate body.children[1], :text? assert_equal "-->hello", body.children[1].content end + elsif Nokogiri.uses_libxml? + it "parses as pcdata" do # NON-COMPLIANT + assert_equal 1, body.children.length + assert_equal subject, body.children.first + + assert_equal 3, subject.children.length + subject.children[0].tap do |child| + assert_predicate(child, :text?) + assert_equal("hello", child.content) + end + end end if Nokogiri.jruby? it "ignores up to the next '-->'" do # NON-COMPLIANT + assert_equal 1, body.children.length + assert_equal subject, body.children.first + assert_equal 1, subject.children.length assert_predicate subject.children[0], :text? assert_equal "hello", subject.children[0].content diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 7468ece78b..2885a60143 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -801,18 +801,25 @@ def test_leaking_dtd_nodes_after_internal_subset_removal it "skips to the next start tag" do # see https://github.com/sparklemotion/nokogiri/issues/2461 for why we're testing this edge case - if Nokogiri.uses_libxml?(">= 2.9.13") - skip_unless_libxml2_patch("0010-Revert-Different-approach-to-fix-quadratic-behavior.patch") - end - doc = Nokogiri::HTML4.parse(input) body = doc.at_xpath("//body") - expected_error_snippet = Nokogiri.uses_libxml? ? "invalid element name" : "Missing start element name" - assert_includes(doc.errors.first.to_s, expected_error_snippet) - - assert_equal("this < that", body.children.first.text, body.to_html) - assert_equal(["div", "div"], body.children.map(&:name), body.to_html) + if Nokogiri.uses_libxml?("= 2.9.13") + #
this
second element
+ assert_equal(1, body.children.length) + body.children.first.tap do |div| + assert_equal(2, div.children.length) + assert_equal("this ", div.children[0].content) + assert_equal("div", div.children[1].name) + assert_equal("second element", div.children[1].content) + end + else + #
this < that
second element
+ assert_equal(2, body.children.length) + assert_equal(["div", "div"], body.children.map(&:name), body.to_html) + assert_equal("this < that", body.children[0].text, body.to_html) + assert_equal("second element", body.children[1].text, body.to_html) + end end end