From 529f139b465408cda084f43d41f701787acf13d3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 27 Apr 2022 08:03:10 -0400 Subject: [PATCH 1/5] style(rubocop): Style/RedundantInitialize is smarter now --- lib/nokogiri/xml/node.rb | 4 ++-- nokogiri.gemspec | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index 09b12788b4..c90dea6c18 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -123,8 +123,8 @@ class Node # [Yields] Nokogiri::XML::Node # [Returns] Nokogiri::XML::Node # - def initialize(name, document) # rubocop:disable Style/RedundantInitialize - # This is intentionally empty. + def initialize(name, document) + # This is intentionally empty, and sets the method signature for subclasses. end ### diff --git a/nokogiri.gemspec b/nokogiri.gemspec index 75036af269..369003ddd0 100644 --- a/nokogiri.gemspec +++ b/nokogiri.gemspec @@ -330,7 +330,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency("rake-compiler-dock", "~> 1.2") spec.add_development_dependency("rdoc", "~> 6.3") spec.add_development_dependency("rexical", "~> 1.0.7") - spec.add_development_dependency("rubocop", "~> 1.23") + spec.add_development_dependency("rubocop", "~> 1.28", ">= 1.28.2") spec.add_development_dependency("rubocop-minitest", "~> 0.17") spec.add_development_dependency("rubocop-performance", "~> 1.12") spec.add_development_dependency("rubocop-rake", "~> 0.6") From f3521ba3d38922d76dd5ed59705eab3988213712 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 4 May 2022 11:23:36 -0400 Subject: [PATCH 2/5] style(rubocop): pend Style/FetchEnvVar for now and otherwise update the rubocop todo list --- .rubocop_todo.yml | 47 +++++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 799dc8330f..727df6f532 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2022-01-03 20:31:05 UTC using RuboCop version 1.24.1. +# on 2022-05-04 15:17:01 UTC using RuboCop version 1.28.2. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -22,16 +22,21 @@ Lint/MissingSuper: - 'lib/nokogiri/xml/document_fragment.rb' - 'lib/nokogiri/xml/processing_instruction.rb' -# Offense count: 4 -# Cop supports --auto-correct. +# Offense count: 1 +# This cop supports safe auto-correction (--auto-correct). +Lint/RedundantCopDisableDirective: + Exclude: + - 'lib/nokogiri/xml/processing_instruction.rb' + +# Offense count: 2 +# This cop supports safe auto-correction (--auto-correct). # Configuration parameters: ContextCreatingMethods, MethodCreatingMethods. Lint/UselessAccessModifier: Exclude: - - 'lib/nokogiri/css/xpath_visitor.rb' - 'lib/nokogiri/html5.rb' - 'lib/nokogiri/html5/document.rb' -# Offense count: 18 +# Offense count: 17 # Configuration parameters: CountBlocks. Metrics/BlockNesting: Max: 5 @@ -50,8 +55,8 @@ Performance/CollectionLiteralInLoop: - 'test/xml/test_dtd_encoding.rb' - 'test/xml/test_node_reparenting.rb' -# Offense count: 11 -# Cop supports --auto-correct. +# Offense count: 12 +# This cop supports safe auto-correction (--auto-correct). Performance/StringIdentifierArgument: Exclude: - 'lib/nokogiri/html5.rb' @@ -60,20 +65,14 @@ Performance/StringIdentifierArgument: - 'test/test_memory_leak.rb' - 'test/xml/test_builder.rb' -# Offense count: 1 -# Cop supports --auto-correct. -Performance/StringInclude: - Exclude: - - 'lib/nokogiri/css/xpath_visitor.rb' - # Offense count: 2 -# Cop supports --auto-correct. +# This cop supports safe auto-correction (--auto-correct). Performance/TimesMap: Exclude: - 'test/html5/test_nokogumbo.rb' # Offense count: 6 -# Cop supports --auto-correct. +# This cop supports safe auto-correction (--auto-correct). # Configuration parameters: EnforcedStyle. # SupportedStyles: nested, compact Style/ClassAndModuleChildren: @@ -85,14 +84,26 @@ Style/ClassAndModuleChildren: - 'test/xml/sax/test_parser.rb' - 'test/xml/sax/test_push_parser.rb' +# Offense count: 17 +# This cop supports safe auto-correction (--auto-correct). +# Configuration parameters: AllowedVars. +Style/FetchEnvVar: + Exclude: + - 'ext/nokogiri/extconf.rb' + - 'rakelib/debug.rake' + - 'rakelib/extensions.rake' + - 'rakelib/rdoc.rake' + - 'test/helper.rb' + # Offense count: 2 Style/MissingRespondToMissing: Exclude: - 'lib/nokogiri/xml/builder.rb' -# Offense count: 78 -# Cop supports --auto-correct. -# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns. +# Offense count: 81 +# This cop supports safe auto-correction (--auto-correct). +# Configuration parameters: AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, AllowedPatterns, IgnoredPatterns. # URISchemes: http, https +# AllowedPatterns: \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z), \A\s*(remote_)?test(_\w+)?\s.*(do|->)(\s|\Z) Layout/LineLength: Max: 250 From eac793487183a5e72464e53cccd260971d5f29b5 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 18 Apr 2022 08:16:36 -0400 Subject: [PATCH 3/5] dev: require yaml which was incidentally being eager loaded by rake-compiler --- rakelib/extensions.rake | 1 + 1 file changed, 1 insertion(+) diff --git a/rakelib/extensions.rake b/rakelib/extensions.rake index 17004227cc..c41af2be3e 100644 --- a/rakelib/extensions.rake +++ b/rakelib/extensions.rake @@ -373,6 +373,7 @@ if java? end else require "rake/extensiontask" + require "yaml" dependencies = YAML.load_file("dependencies.yml") From b7c4cc35de38fcfdde4da1203d79ae38bc4324bf Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 27 Apr 2022 13:29:39 -0400 Subject: [PATCH 4/5] test: unpend the LIBXML_LOADED_VERSION test on freebsd essentially reverting 97be5d1 Closes #2506 --- test/test_version.rb | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/test_version.rb b/test/test_version.rb index 74f53d00c0..b1872d88bc 100644 --- a/test/test_version.rb +++ b/test/test_version.rb @@ -60,13 +60,7 @@ def test_version_info_for_libxml major = Regexp.last_match(1).to_i minor = Regexp.last_match(2).to_i bug = Regexp.last_match(3).to_i - if RbConfig::CONFIG["target_os"].include?("freebsd13") && Nokogiri::VersionInfo.instance.libxml2_using_system? - pending("https://github.com/sparklemotion/nokogiri/issues/2506") do - assert_equal("#{major}.#{minor}.#{bug}", Nokogiri::VERSION_INFO["libxml"]["loaded"]) - end - else - assert_equal("#{major}.#{minor}.#{bug}", Nokogiri::VERSION_INFO["libxml"]["loaded"]) - end + assert_equal("#{major}.#{minor}.#{bug}", Nokogiri::VERSION_INFO["libxml"]["loaded"]) assert(version_info["libxml"].key?("iconv_enabled")) assert(version_info["libxslt"].key?("datetime_enabled")) From 66c2886e78f6801def83a549c3e6581ac48e61e8 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 2 May 2022 18:04:39 -0400 Subject: [PATCH 5/5] dep: update libxml2 to v2.9.14 from v2.9.13 https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.9.14 --- dependencies.yml | 6 +-- ...t-approach-to-fix-quadratic-behavior.patch | 45 ------------------- test/html4/test_comments.rb | 25 ++++++++++- test/html4/test_document.rb | 25 +++++++---- 4 files changed, 43 insertions(+), 58 deletions(-) delete mode 100644 patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch diff --git a/dependencies.yml b/dependencies.yml index 5e9b83bbc6..66e926e318 100644 --- a/dependencies.yml +++ b/dependencies.yml @@ -1,7 +1,7 @@ libxml2: - version: "2.9.13" - sha256: "276130602d12fe484ecc03447ee5e759d0465558fbc9d6bd144e3745306ebf0e" - # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.13.sha256sum + version: "2.9.14" + sha256: "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee" + # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.9/libxml2-2.9.14.sha256sum libxslt: version: "1.1.35" diff --git a/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch b/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch deleted file mode 100644 index 614e227269..0000000000 --- a/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch +++ /dev/null @@ -1,45 +0,0 @@ -From ddc5f3d22644e0f6fbcc20541c86825757ffee62 Mon Sep 17 00:00:00 2001 -From: Mike Dalessio -Date: Mon, 21 Feb 2022 18:27:45 -0500 -Subject: [PATCH] Revert "Different approach to fix quadratic behavior in HTML - push parser" - -This reverts commit 798bdf13f6964a650b9a0b7b4b3a769f6f1d509a. ---- - HTMLparser.c | 14 +------------- - 1 file changed, 1 insertion(+), 13 deletions(-) - -diff --git a/HTMLparser.c b/HTMLparser.c -index eba2d7c..c0b8119 100644 ---- a/HTMLparser.c -+++ b/HTMLparser.c -@@ -3960,25 +3960,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) { - htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, - "htmlParseStartTag: invalid element name\n", - NULL, NULL); -- /* -- * The recovery code is disabled for now as it can result in -- * quadratic behavior with the push parser. htmlParseStartTag -- * must consume all content up to the final '>' in order to avoid -- * rescanning for this terminator. -- * -- * For a proper fix in line with HTML5, htmlParseStartTag and -- * htmlParseElement should only be called when there's an ASCII -- * alpha character following the initial '<'. Otherwise, the '<' -- * should be emitted as text (unless followed by '!', '/' or '?'). -- */ --#if 0 - /* if recover preserve text on classic misconstructs */ - if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') || - (CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) { - htmlParseCharDataInternal(ctxt, '<'); - return(-1); - } --#endif -+ - - /* Dump the bogus tag like browsers do */ - while ((CUR != 0) && (CUR != '>') && --- -2.31.0 - diff --git a/test/html4/test_comments.rb b/test/html4/test_comments.rb index 32d7a87855..56fd506827 100644 --- a/test/html4/test_comments.rb +++ b/test/html4/test_comments.rb @@ -173,7 +173,7 @@ class TestComment < Nokogiri::TestCase let(:body) { doc.at_css("body") } let(:subject) { doc.at_css("div#under-test") } - if Nokogiri.uses_libxml? + if Nokogiri.uses_libxml?("<=2.9.13") it "ignores up to the next '>'" do # NON-COMPLIANT assert_equal 2, body.children.length assert_equal body.children[0], subject @@ -183,10 +183,33 @@ class TestComment < Nokogiri::TestCase assert_predicate body.children[1], :text? assert_equal "-->hello", body.children[1].content end + elsif Nokogiri.uses_libxml? + it "parses as pcdata" do # NON-COMPLIANT + assert_equal 1, body.children.length + assert_equal subject, body.children.first + + assert_equal 3, subject.children.length + subject.children[0].tap do |child| + assert_predicate(child, :text?) + assert_equal("hello", child.content) + end + end end if Nokogiri.jruby? it "ignores up to the next '-->'" do # NON-COMPLIANT + assert_equal 1, body.children.length + assert_equal subject, body.children.first + assert_equal 1, subject.children.length assert_predicate subject.children[0], :text? assert_equal "hello", subject.children[0].content diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 7468ece78b..2885a60143 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -801,18 +801,25 @@ def test_leaking_dtd_nodes_after_internal_subset_removal it "skips to the next start tag" do # see https://github.com/sparklemotion/nokogiri/issues/2461 for why we're testing this edge case - if Nokogiri.uses_libxml?(">= 2.9.13") - skip_unless_libxml2_patch("0010-Revert-Different-approach-to-fix-quadratic-behavior.patch") - end - doc = Nokogiri::HTML4.parse(input) body = doc.at_xpath("//body") - expected_error_snippet = Nokogiri.uses_libxml? ? "invalid element name" : "Missing start element name" - assert_includes(doc.errors.first.to_s, expected_error_snippet) - - assert_equal("this < that", body.children.first.text, body.to_html) - assert_equal(["div", "div"], body.children.map(&:name), body.to_html) + if Nokogiri.uses_libxml?("= 2.9.13") + #
this
second element
+ assert_equal(1, body.children.length) + body.children.first.tap do |div| + assert_equal(2, div.children.length) + assert_equal("this ", div.children[0].content) + assert_equal("div", div.children[1].name) + assert_equal("second element", div.children[1].content) + end + else + #
this < that
second element
+ assert_equal(2, body.children.length) + assert_equal(["div", "div"], body.children.map(&:name), body.to_html) + assert_equal("this < that", body.children[0].text, body.to_html) + assert_equal("second element", body.children[1].text, body.to_html) + end end end