From 63aa9a0be73332d99e8ba41656d3ede5399f759e Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 4 Feb 2021 21:12:03 -0500 Subject: [PATCH 1/9] test: default to a full GC mark phase after each test in an attempt to more reliably trigger memory bugs when they exist. This behavior can be changed by setting the env var NOKOGIRI_TEST_GC_SETTING to "none", "minor", or "stress" --- test/helper.rb | 46 +++++++++++++++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/test/helper.rb b/test/helper.rb index bd08bccc4c..43d4ebd5b4 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true require 'simplecov' SimpleCov.start do add_filter "/test/" @@ -58,23 +59,42 @@ class TestCase < MiniTest::Spec XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt') XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml') + unless Nokogiri.jruby? + GC_SETTING = if ["stress", "major", "minor", "none"].include?(ENV['NOKOGIRI_TEST_GC_SETTING']) + ENV['NOKOGIRI_TEST_GC_SETTING'] + else + "major" # the default + end + warn "#{__FILE__}:#{__LINE__}: suite GC setting: #{GC_SETTING}" + end + def setup - @fake_error_handler_called = false - Nokogiri::Test.__foreign_error_handler do - @fake_error_handler_called = true - end if Nokogiri.uses_libxml? + if Nokogiri.uses_libxml? + @fake_error_handler_called = false + Nokogiri::Test.__foreign_error_handler do + @fake_error_handler_called = true + end + end + + unless Nokogiri.jruby? + if GC_SETTING == "stress" + GC.stress = true + end + end end def teardown - refute(@fake_error_handler_called, "the fake error handler should never get called") if Nokogiri.uses_libxml? - - if ENV['NOKOGIRI_GC'] - STDOUT.putc '!' - if RUBY_PLATFORM =~ /java/ - require 'java' - java.lang.System.gc - else - GC.start + if Nokogiri.uses_libxml? + refute(@fake_error_handler_called, "the fake error handler should never get called") + end + + unless Nokogiri.jruby? + if GC_SETTING == "major" + GC.start(full_mark: true) + elsif GC_SETTING == "minor" + GC.start(full_mark: false) + elsif GC_SETTING == "stress" + GC.stress = false end end end From 9a01a7ff197fa269bc3f4499286648e99a9c22cc Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 5 Feb 2021 08:06:36 -0500 Subject: [PATCH 2/9] test: rake test:gdb is now available again after being removed when I ripped out Hoe and Hoe::Debugging --- rakelib/test.rake | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/rakelib/test.rake b/rakelib/test.rake index 89b9eb2645..dfbe186a18 100644 --- a/rakelib/test.rake +++ b/rakelib/test.rake @@ -57,6 +57,13 @@ class ValgrindTestTask < Rake::TestTask end end +class GdbTestTask < ValgrindTestTask + def ruby(*args, **options, &block) + command = "gdb --args #{RUBY} #{args.join(' ')}" + sh(command, **options, &block) + end +end + def nokogiri_test_task_configuration(t) t.libs << "test" t.test_files = FileList["test/**/test_*.rb"] @@ -71,4 +78,8 @@ namespace "test" do ValgrindTestTask.new("valgrind") do |t| nokogiri_test_task_configuration(t) end + + GdbTestTask.new("gdb") do |t| + nokogiri_test_task_configuration(t) + end end From be6fde4318aa25808d3f464c1da4a32ef00b9b76 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 5 Feb 2021 09:41:09 -0500 Subject: [PATCH 3/9] test: configure "fast fail" by setting NOKOGIRI_TEST_FAIL_FAST --- test/helper.rb | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/helper.rb b/test/helper.rb index 43d4ebd5b4..a89a4420d7 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -1,4 +1,14 @@ # frozen_string_literal: true +# +# Some environment variables that are used to configure the test suite: +# - NOKOGIRI_TEST_FAIL_FAST: if set to anything, emit test failure messages immediately upon failure +# - NOKOGIRI_TEST_GC_SETTING: +# - "stress" - run tests with GC.stress set to true +# - "major" (default) - force a major GC cycle after each test +# - "minor" - force a minor GC cycle after each test +# - "none" - normal GC functionality +# - NOKOGIRI_GC: read more in test/test_memory_leak.rb +# require 'simplecov' SimpleCov.start do add_filter "/test/" @@ -8,7 +18,10 @@ require 'minitest/autorun' require 'minitest/reporters' -Minitest::Reporters.use!(Minitest::Reporters::DefaultReporter.new({color: true, slow_count: 5, detailed_skip: false})) +NOKOGIRI_MINITEST_REPORTERS_OPTIONS = {color: true, slow_count: 5, detailed_skip: false} +NOKOGIRI_MINITEST_REPORTERS_OPTIONS[:fast_fail] = true if ENV["NOKOGIRI_TEST_FAIL_FAST"] +puts NOKOGIRI_MINITEST_REPORTERS_OPTIONS +Minitest::Reporters.use!(Minitest::Reporters::DefaultReporter.new(NOKOGIRI_MINITEST_REPORTERS_OPTIONS)) require 'fileutils' require 'tempfile' From 3d86c939764351146606476a79e9ba49f9a7b131 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 5 Feb 2021 10:52:15 -0500 Subject: [PATCH 4/9] dev: better output from files-modified-by-open-prs --- scripts/files-modified-by-open-prs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/files-modified-by-open-prs b/scripts/files-modified-by-open-prs index e2bbb38df4..2f958c0ab2 100755 --- a/scripts/files-modified-by-open-prs +++ b/scripts/files-modified-by-open-prs @@ -35,7 +35,8 @@ prs = get("gh pr list").split("\n") paths_to_prs = Hash.new { |h,k| h[k] = Set.new } prs.each do |pr| - pr_number = pr.split(/\s+/).first + pr_number, *pr_desc = pr.split(/\s+/) + pr_desc = pr_desc.join(" ") diff = get("gh pr diff #{pr_number}") diff_data = GitDiff::from_string(diff) @@ -43,7 +44,7 @@ prs.each do |pr| relevant_files = diff_data.files.map(&:b_path) if relevant_files.length > 0 - puts "PR #{pr_number}:" + puts "PR #{pr_number}: #{pr_desc}" relevant_files.sort.each do |file| puts "- #{file}" end From 91f97393dc3254c83d0772f6a142580541bb0702 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 1 Feb 2021 17:49:47 -0500 Subject: [PATCH 5/9] format: update xml_node_set.c set_union to rb_xml_node_set_union --- ext/nokogiri/xml_node_set.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index 3fbe23a8b1..f5ceef09e9 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -176,21 +176,22 @@ static VALUE include_eh(VALUE self, VALUE rb_node) * Returns a new set built by merging the set and the elements of the given * set. */ -static VALUE set_union(VALUE self, VALUE rb_other) +static VALUE rb_xml_node_set_union(VALUE rb_node_set, VALUE rb_other) { - xmlNodeSetPtr node_set, other; - xmlNodeSetPtr new; + xmlNodeSetPtr c_node_set, c_other; + xmlNodeSetPtr c_new_node_set; - if(!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) + if (!rb_obj_is_kind_of(rb_other, cNokogiriXmlNodeSet)) { rb_raise(rb_eArgError, "node_set must be a Nokogiri::XML::NodeSet"); + } - Data_Get_Struct(self, xmlNodeSet, node_set); - Data_Get_Struct(rb_other, xmlNodeSet, other); + Data_Get_Struct(rb_node_set, xmlNodeSet, c_node_set); + Data_Get_Struct(rb_other, xmlNodeSet, c_other); - new = xmlXPathNodeSetMerge(NULL, node_set); - new = xmlXPathNodeSetMerge(new, other); + c_new_node_set = xmlXPathNodeSetMerge(NULL, c_node_set); + c_new_node_set = xmlXPathNodeSetMerge(c_new_node_set, c_other); - return Nokogiri_wrap_xml_node_set(new, rb_iv_get(self, "@document")); + return Nokogiri_wrap_xml_node_set(c_new_node_set, rb_iv_get(rb_node_set, "@document")); } /* @@ -473,7 +474,7 @@ void init_xml_node_set(void) rb_define_method(klass, "[]", slice, -1); rb_define_method(klass, "slice", slice, -1); rb_define_method(klass, "push", push, 1); - rb_define_method(klass, "|", set_union, 1); + rb_define_method(klass, "|", rb_xml_node_set_union, 1); rb_define_method(klass, "-", minus, 1); rb_define_method(klass, "unlink", unlink_nodeset, 0); rb_define_method(klass, "to_a", to_array, 0); From 3014d6dcbd374b3bce598eca3501d96ed79cf098 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 1 Feb 2021 17:55:01 -0500 Subject: [PATCH 6/9] format: overhaul test/xml/test_node_set.rb --- test/xml/test_node_set.rb | 1388 +++++++++++++++++++------------------ 1 file changed, 715 insertions(+), 673 deletions(-) diff --git a/test/xml/test_node_set.rb b/test/xml/test_node_set.rb index 59969a7832..65214fa751 100644 --- a/test/xml/test_node_set.rb +++ b/test/xml/test_node_set.rb @@ -1,153 +1,152 @@ +# frozen_string_literal: true require "helper" module Nokogiri module XML class TestNodeSet < Nokogiri::TestCase - class TestNodeSetNamespaces < Nokogiri::TestCase - def setup - super - @xml = Nokogiri.XML('') - @list = @xml.xpath('//namespace::*') + describe Nokogiri::XML::NodeSet do + describe "namespaces" do + let(:ns_xml) { Nokogiri.XML('') } + let(:ns_list) { ns_xml.xpath('//namespace::*') } + + specify "#include?" do + assert(ns_list.include?(ns_list.first), 'list should have item') + end + + specify "#push" do + expected_length = ns_list.length + 1 + ns_list.push(ns_list.first) + assert(expected_length, ns_list.length) + end + + specify "#delete" do + expected_length = ns_list.length + ns_list.push(ns_list.first) + ns_list.delete(ns_list.first) + assert(expected_length, ns_list.length) + end + + it "doesn't free namespace nodes when deleted" do + first = ns_list.first + ns_list.delete(first) + assert_equal('http://www.w3.org/XML/1998/namespace', first.href) + end end - def test_include? - assert @list.include?(@list.first), 'list should have item' + let(:xml) { Nokogiri::XML(File.read(XML_FILE), XML_FILE) } + let(:list) { xml.css('employee') } + + describe "#filter" do + it "finds all nodes that match the expression" do + list = xml.css('address').filter('*[domestic="Yes"]') + assert_equal(%w{Yes} * 4, list.map { |n| n['domestic'] }) + end end - def test_push - @list.push @list.first + specify "#remove_attr" do + list.each { |x| x['class'] = 'blah' } + assert_equal(list, list.remove_attr('class')) + list.each { |x| assert_nil(x['class']) } end - def test_delete - @list.push @list.first - @list.delete @list.first + specify "#remove_attribute" do + list.each { |x| x['class'] = 'blah' } + assert_equal(list, list.remove_attribute('class')) + list.each { |x| assert_nil(x['class']) } end - - def test_reference_after_delete - first = @list.first - @list.delete(first) - assert_equal 'http://www.w3.org/XML/1998/namespace', first.href - end - end - def setup - super - @xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE) - @list = @xml.css('employee') - end + specify "#add_class" do + assert_equal(list, list.add_class('bar')) + list.each { |x| assert_equal('bar', x['class']) } - def test_break_works - assert_equal 7, @xml.root.elements.each { |x| break 7 } - end + list.add_class('bar') + list.each { |x| assert_equal('bar', x['class']) } - def test_filter - list = @xml.css('address').filter('*[domestic="Yes"]') - assert_equal(%w{ Yes } * 4, list.map { |n| n['domestic'] }) - end - - def test_remove_attr - @list.each { |x| x['class'] = 'blah' } - assert_equal @list, @list.remove_attr('class') - @list.each { |x| assert_nil x['class'] } - end + list.add_class('baz') + list.each { |x| assert_equal('bar baz', x['class']) } + end - def test_remove_attribute - @list.each { |x| x['class'] = 'blah' } - assert_equal @list, @list.remove_attribute('class') - @list.each { |x| assert_nil x['class'] } - end + specify "#append_class" do + assert_equal(list, list.append_class('bar')) + list.each { |x| assert_equal('bar', x['class']) } - def test_add_class - assert_equal @list, @list.add_class('bar') - @list.each { |x| assert_equal 'bar', x['class'] } + list.append_class('bar') + list.each { |x| assert_equal('bar bar', x['class']) } - @list.add_class('bar') - @list.each { |x| assert_equal 'bar', x['class'] } + list.append_class('baz') + list.each { |x| assert_equal('bar bar baz', x['class']) } + end - @list.add_class('baz') - @list.each { |x| assert_equal 'bar baz', x['class'] } - end + describe "#remove_class" do + it "removes the attribute when no classes remain" do + assert_equal(list, list.remove_class('bar')) + list.each { |e| assert_nil(e['class']) } - def test_append_class - assert_equal @list, @list.append_class('bar') - @list.each { |x| assert_equal 'bar', x['class'] } + list.each { |e| e['class'] = '' } + assert_equal(list, list.remove_class('bar')) + list.each { |e| assert_nil(e['class']) } + end - @list.append_class('bar') - @list.each { |x| assert_equal 'bar bar', x['class'] } + it "leaves the remaining classes" do + list.each { |e| e['class'] = 'foo bar' } - @list.append_class('baz') - @list.each { |x| assert_equal 'bar bar baz', x['class'] } - end + assert_equal(list, list.remove_class('bar')) + list.each { |e| assert_equal('foo', e['class']) } + end - def test_remove_class_with_no_class - assert_equal @list, @list.remove_class('bar') - @list.each { |e| assert_nil e['class'] } + it "removes the class attribute when passed no arguments" do + list.each { |e| e['class'] = 'foo' } - @list.each { |e| e['class'] = '' } - assert_equal @list, @list.remove_class('bar') - @list.each { |e| assert_nil e['class'] } - end + assert_equal(list, list.remove_class) + list.each { |e| assert_nil(e['class']) } + end + end - def test_remove_class_single - @list.each { |e| e['class'] = 'foo bar' } + [:attribute, :attr, :set].each do |method| + describe "##{method}" do + it "sets the attribute value" do + list.each { |e| assert_nil(e['foo']) } - assert_equal @list, @list.remove_class('bar') - @list.each { |e| assert_equal 'foo', e['class'] } - end + list.send(method, 'foo', 'bar') + list.each { |e| assert_equal('bar', e['foo']) } + end - def test_remove_class_completely - @list.each { |e| e['class'] = 'foo' } + it "sets the attribute value given a block" do + list.each { |e| assert_nil(e['foo']) } - assert_equal @list, @list.remove_class - @list.each { |e| assert_nil e['class'] } - end + list.send(method, "foo") { |e| e.at_css("employeeId").text } + list.each { |e| assert_equal(e.at_css("employeeId").text, e['foo']) } + end - def test_attribute_set - @list.each { |e| assert_nil e['foo'] } + it "sets the attribute value given a hash" do + list.each { |e| assert_nil(e['foo']) } - [ ['attribute', 'bar'], ['attr', 'biz'], ['set', 'baz'] ].each do |t| - @list.send(t.first.to_sym, 'foo', t.last) - @list.each { |e| assert_equal t.last, e['foo'] } + list.send(method, { 'foo' => 'bar' }) + list.each { |e| assert_equal('bar', e['foo']) } + end + end end - end - - def test_attribute_set_with_block - @list.each { |e| assert_nil e['foo'] } - [ ['attribute', 'bar'], ['attr', 'biz'], ['set', 'baz'] ].each do |t| - @list.send(t.first.to_sym, 'foo') { |x| x.at_css("employeeId").text } - @list.each { |e| assert_equal e.at_css("employeeId").text, e['foo'] } + it "#attribute with no args gets attribute from first node" do + list.first['foo'] = 'bar' + assert_equal(list.first.attribute('foo'), list.attribute('foo')) end - end - - def test_attribute_set_with_hash - @list.each { |e| assert_nil e['foo'] } - [ ['attribute', 'bar'], ['attr', 'biz'], ['set', 'baz'] ].each do |t| - @list.send(t.first.to_sym, 'foo' => t.last) - @list.each { |e| assert_equal t.last, e['foo'] } + it "#attribute with no args on empty set" do + set = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new) + assert_nil(set.attribute("foo")) end - end - def test_attribute_with_no_args_gets_attribute_from_first_node - @list.first['foo'] = 'bar' - assert_equal @list.first.attribute('foo'), @list.attribute('foo') - end - - def test_attribute_with_no_args_on_empty_set - set = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new) - assert_nil set.attribute("foo") - end - - def test_search_empty_node_set - set = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new) - assert_equal 0, set.css('foo').length - assert_equal 0, set.xpath('.//foo').length - assert_equal 0, set.search('foo').length - end + describe "searching" do + it "an empty node set returns no results" do + set = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new) + assert_equal(0, set.css('foo').length) + assert_equal(0, set.xpath('.//foo').length) + assert_equal(0, set.search('foo').length) + end - def test_node_set_search_with_multiple_queries - xml = ' + it "with multiple queries" do + xml = '
important thing
@@ -158,683 +157,726 @@ def test_node_set_search_with_multiple_queries

more stuff ' - set = Nokogiri::XML(xml).xpath(".//thing") - assert_kind_of Nokogiri::XML::NodeSet, set + set = Nokogiri::XML(xml).xpath(".//thing") + assert_kind_of(Nokogiri::XML::NodeSet, set) - assert_equal 3, set.xpath('./div', './p').length - assert_equal 3, set.css('.title', '.content', 'p').length - assert_equal 3, set.search('./div', 'p.blah').length - end + assert_equal(3, set.xpath('./div', './p').length) + assert_equal(3, set.css('.title', '.content', 'p').length) + assert_equal(3, set.search('./div', 'p.blah').length) + end - def test_search_with_custom_selector - set = @xml.xpath('//staff') + it "with a custom selector" do + set = xml.xpath('//staff') + + [ + [:xpath, '//*[awesome(.)]'], + [:search, '//*[awesome(.)]'], + [:css, '*:awesome'], + [:search, '*:awesome'], + ].each do |method, query| + callback_handler = Class.new do + def awesome(ns) + ns.select { |n| n.name == 'employee' } + end + end.new + custom_employees = set.send(method, query, callback_handler) + + assert_equal(xml.xpath('//employee'), custom_employees, + "using #{method} with custom selector '#{query}'") + end + end - [ - [:xpath, '//*[awesome(.)]'], - [:search, '//*[awesome(.)]'], - [:css, '*:awesome'], - [:search, '*:awesome'] - ].each do |method, query| - custom_employees = set.send(method, query, Class.new { - def awesome ns - ns.select { |n| n.name == 'employee' } - end - }.new) + it "with variable bindings" do + set = xml.xpath('//staff') - assert_equal(@xml.xpath('//employee'), custom_employees, - "using #{method} with custom selector '#{query}'") - end - end + assert_equal(4, set.xpath('//address[@domestic=$value]', nil, value: 'Yes').length, + "using #xpath with variable binding") - def test_search_with_variable_bindings - set = @xml.xpath('//staff') + assert_equal(4, set.search('//address[@domestic=$value]', nil, value: 'Yes').length, + "using #search with variable binding") + end - assert_equal(4, set.xpath('//address[@domestic=$value]', nil, :value => 'Yes').length, - "using #xpath with variable binding") + it "context search returns itself" do + set = xml.xpath('//staff') + assert_equal(set.to_a, set.search('.').to_a) + end - assert_equal(4, set.search('//address[@domestic=$value]', nil, :value => 'Yes').length, - "using #search with variable binding") - end + it "css searches match self" do + html = Nokogiri::HTML("

") + set = html.xpath("/html/body/div") + assert_equal(set.first, set.css(".a").first) + assert_equal(set.first, set.search(".a").first) + end - def test_search_self - set = @xml.xpath('//staff') - assert_equal set.to_a, set.search('.').to_a - end + it "css search with namespace" do + fragment = Nokogiri::XML.fragment(<<~eoxml) + + + + + eoxml + assert(fragment.children.search('body', { 'xmlns' => 'http://www.w3.org/1999/xhtml' })) + end - def test_css_searches_match_self - html = Nokogiri::HTML("
") - set = html.xpath("/html/body/div") - assert_equal set.first, set.css(".a").first - assert_equal set.first, set.search(".a").first - end + it "xmlns is automatically registered" do + doc = Nokogiri::XML(<<~eoxml) + + + + + + eoxml + set = doc.css('foo') + assert_equal(1, set.css('xmlns|bar').length) + assert_equal(0, set.css('|bar').length) + assert_equal(1, set.xpath('//xmlns:bar').length) + assert_equal(1, set.search('xmlns|bar').length) + assert_equal(1, set.search('//xmlns:bar').length) + assert(set.at('//xmlns:bar')) + assert(set.at('xmlns|bar')) + assert(set.at('bar')) + end - def test_css_search_with_namespace - fragment = Nokogiri::XML.fragment(<<-eoxml) - - - - - eoxml - assert fragment.children.search( 'body', { 'xmlns' => 'http://www.w3.org/1999/xhtml' }) - end + it "#search accepts a namespace" do + xml = Nokogiri::XML.parse(<<~eoxml) + + + Michelin Model XGV + + + I'm a bicycle tire! + + + eoxml + set = xml / 'root' + assert_equal(1, set.length) + bike_tire = set.search('//bike:tire', 'bike' => "http://schwinn.com/") + assert_equal(1, bike_tire.length) + end - def test_double_equal - assert node_set_one = @xml.xpath('//employee') - assert node_set_two = @xml.xpath('//employee') + specify "#search" do + assert(node_set = xml.search('//employee')) + assert(sub_set = node_set.search('.//name')) + assert_equal(node_set.length, sub_set.length) + end - assert_not_equal node_set_one.object_id, node_set_two.object_id + it "returns an empty node set when no results were found" do + assert(node_set = xml.search('//asdkfjhasdlkfjhaldskfh')) + assert_equal(0, node_set.length) + end + end - assert_equal node_set_one, node_set_two - end + describe "#==" do + it "checks for equality of contents" do + assert(node_set_one = xml.xpath('//employee')) + assert(node_set_two = xml.xpath('//employee')) - def test_node_set_not_equal_to_string - node_set_one = @xml.xpath('//employee') - assert_not_equal node_set_one, "asdfadsf" - end + refute_equal(node_set_one.object_id, node_set_two.object_id) + refute_same(node_set_one, node_set_two) - def test_out_of_order_not_equal - one = @xml.xpath('//employee') - two = @xml.xpath('//employee') - two.push two.shift - assert_not_equal one, two - end + assert(node_set_one == node_set_two) + end - def test_shorter_is_not_equal - node_set_one = @xml.xpath('//employee') - node_set_two = @xml.xpath('//employee') - node_set_two.delete(node_set_two.first) + it "handles comparison to a string" do + node_set_one = xml.xpath('//employee') + refute(node_set_one == "asdfadsf") + end - assert_not_equal node_set_one, node_set_two - end + it "returns false if same elements are out of order" do + one = xml.xpath('//employee') + two = xml.xpath('//employee') + two.push(two.shift) + assert_not_equal(one, two) + end - def test_pop - set = @xml.xpath('//employee') - last = set.last - assert_equal last, set.pop - end + it "returns false if one is a subset of the other" do + node_set_one = xml.xpath('//employee') + node_set_two = xml.xpath('//employee') + node_set_two.delete(node_set_two.first) - def test_shift - set = @xml.xpath('//employee') - first = set.first - assert_equal first, set.shift - end + refute(node_set_one == node_set_two) + refute(node_set_two == node_set_one) + end + end - def test_shift_empty - set = Nokogiri::XML::NodeSet.new(@xml) - assert_nil set.shift - end + describe "#pop" do + it "returns the last element and mutates the set" do + set = xml.xpath('//employee') + last = set.last + assert_equal(last, set.pop) + end - def test_pop_empty - set = Nokogiri::XML::NodeSet.new(@xml) - assert_nil set.pop - end + it "returns nil for an empty set" do + set = Nokogiri::XML::NodeSet.new(xml) + assert_nil(set.pop) + end + end - def test_first_takes_arguments - assert node_set = @xml.xpath('//employee') - assert_equal 2, node_set.first(2).length - end - - def test_first_clamps_arguments - assert node_set = @xml.xpath('//employee[position() < 3]') - assert_equal 2, node_set.first(5).length - end + describe "#shift" do + it "returns the first element and mutates the set" do + set = xml.xpath('//employee') + first = set.first + assert_equal(first, set.shift) + end - [:dup, :clone].each do |method_name| - define_method "test_#{method_name}" do - assert node_set = @xml.xpath('//employee') - duplicate = node_set.send(method_name) - assert_equal node_set.length, duplicate.length - node_set.zip(duplicate).each do |a,b| - assert_equal a, b + it "returns nil for an empty set" do + set = Nokogiri::XML::NodeSet.new(xml) + assert_nil(set.shift) end end - end - def test_dup_on_empty_set - empty_set = Nokogiri::XML::NodeSet.new @xml, [] - assert_equal 0, empty_set.dup.length # this shouldn't raise null pointer exception - end + describe "#first" do + it "returns the first node" do + node_set = xml.xpath('//employee') + node = xml.at_xpath('//employee') + assert_equal(node, node_set.first) + end - def test_xmlns_is_automatically_registered - doc = Nokogiri::XML(<<-eoxml) - - - - - - eoxml - set = doc.css('foo') - assert_equal 1, set.css('xmlns|bar').length - assert_equal 0, set.css('|bar').length - assert_equal 1, set.xpath('//xmlns:bar').length - assert_equal 1, set.search('xmlns|bar').length - assert_equal 1, set.search('//xmlns:bar').length - assert set.at('//xmlns:bar') - assert set.at('xmlns|bar') - assert set.at('bar') - end + it "returns the first n nodes" do + assert(node_set = xml.xpath('//employee')) + assert_equal(2, node_set.first(2).length) + end - def test_children_has_document - set = @xml.root.children - assert_instance_of(NodeSet, set) - assert_equal @xml, set.document - end + it "returns all the nodes if arguments are longer than the set" do + assert(node_set = xml.xpath('//employee[position() < 3]')) + assert_equal(2, node_set.length) + assert_equal(2, node_set.first(5).length) + end + end - def test_length_size - assert node_set = @xml.search('//employee') - assert_equal node_set.length, node_set.size - end + [:dup, :clone].each do |method_name| + specify "##{method_name}" do + assert node_set = xml.xpath('//employee') + duplicate = node_set.send(method_name) + assert_equal node_set.length, duplicate.length + node_set.zip(duplicate).each do |a, b| + assert_equal a, b + end + end + end - def test_to_xml - assert node_set = @xml.search('//employee') - assert node_set.to_xml - end + specify "#dup on empty set" do + empty_set = Nokogiri::XML::NodeSet.new(xml, []) + assert_equal(0, empty_set.dup.length) # this shouldn't raise null pointer exception + end - def test_inner_html - doc = Nokogiri::HTML(<<-eohtml) - - -
- one -
-
- two -
- - - eohtml - assert html = doc.css('div').inner_html - assert_match '', html - end + it "Document#children node set has a document reference" do + set = xml.root.children + assert_instance_of(NodeSet, set) + assert_equal(xml, set.document) + end - def test_gt_string_arg - assert node_set = @xml.search('//employee') - assert_equal node_set.xpath('./employeeId'), (node_set > 'employeeId') - end + it "length and size are aliases" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.length, node_set.size) + end - def test_at_performs_a_search_with_css - assert node_set = @xml.search('//employee') - assert_equal node_set.first.first_element_child, node_set.at('employeeId') - assert_equal node_set.first.first_element_child, node_set.%('employeeId') - end + it "to_xml" do + assert(node_set = xml.search('//employee')) + assert(node_set.to_xml) + end - def test_at_performs_a_search_with_xpath - assert node_set = @xml.search('//employee') - assert_equal node_set.first.first_element_child, node_set.at('./employeeId') - assert_equal node_set.first.first_element_child, node_set.%('./employeeId') - end + it "inner_html" do + doc = Nokogiri::HTML(<<~eohtml) + + + +
+ two +
+ + + eohtml + assert(html = doc.css('div').inner_html) + assert_match('one', html) + assert_match('two', html) + end - def test_at_with_integer_index - assert node_set = @xml.search('//employee') - assert_equal node_set.first, node_set.at(0) - assert_equal node_set.first, node_set % 0 - end + it "gt_string_arg" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.xpath('./employeeId'), (node_set > 'employeeId')) + end - def test_at_xpath - assert node_set = @xml.search('//employee') - assert_equal node_set.first.first_element_child, node_set.at_xpath('./employeeId') - end + it "at_performs_a_search_with_css" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.first.first_element_child, node_set.at('employeeId')) + assert_equal(node_set.first.first_element_child, node_set.%('employeeId')) + end - def test_at_css - assert node_set = @xml.search('//employee') - assert_equal node_set.first.first_element_child, node_set.at_css('employeeId') - end + it "at_performs_a_search_with_xpath" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.first.first_element_child, node_set.at('./employeeId')) + assert_equal(node_set.first.first_element_child, node_set.%('./employeeId')) + end - def test_to_ary - assert node_set = @xml.search('//employee') - foo = [] - foo += node_set - assert_equal node_set.length, foo.length - end + it "at_with_integer_index" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.first, node_set.at(0)) + assert_equal(node_set.first, node_set % 0) + end - def test_push - node = Nokogiri::XML::Node.new('foo', @xml) - node.content = 'bar' + it "at_xpath" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.first.first_element_child, node_set.at_xpath('./employeeId')) + end - assert node_set = @xml.search('//employee') - node_set.push(node) + it "at_css" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.first.first_element_child, node_set.at_css('employeeId')) + end - assert node_set.include?(node) - end + it "to_ary" do + assert(node_set = xml.search('//employee')) + foo = [] + foo += node_set + assert_equal(node_set.length, foo.length) + end - def test_delete_with_invalid_argument - employees = @xml.search("//employee") - positions = @xml.search("//position") + specify "#push" do + node = Nokogiri::XML::Node.new('foo', xml) + node.content = 'bar' - assert_raises(ArgumentError) { employees.delete(positions) } - end + assert(node_set = xml.search('//employee')) + node_set.push(node) - def test_delete_when_present - employees = @xml.search("//employee") - wally = employees.first - assert employees.include?(wally) # testing setup - length = employees.length + assert(node_set.include?(node)) + end - result = employees.delete(wally) - assert_equal result, wally - assert ! employees.include?(wally) - assert length-1, employees.length - end + describe "#delete" do + it "raises ArgumentError when given an invalid argument" do + employees = xml.search("//employee") + positions = xml.search("//position") - def test_delete_when_not_present - employees = @xml.search("//employee") - phb = @xml.search("//position").first - assert ! employees.include?(phb) # testing setup - length = employees.length + assert_raises(ArgumentError) { employees.delete(positions) } + end - result = employees.delete(phb) - assert_nil result - assert length, employees.length - end + it "deletes the element when present and returns the deleted element" do + employees = xml.search("//employee") + wally = employees.first + assert(employees.include?(wally)) # testing setup + length = employees.length - def test_delete_on_empty_set - empty_set = Nokogiri::XML::NodeSet.new @xml, [] - employee = @xml.at_xpath("//employee") - assert_nil empty_set.delete(employee) - end + result = employees.delete(wally) + assert_equal(result, wally) + refute(employees.include?(wally)) + assert_equal(length - 1, employees.length) + end - def test_unlink - xml = Nokogiri::XML.parse(<<-eoxml) - - Bar - Bar - Bar - Hello world - Bar - Awesome - Awesome - - eoxml - set = xml.xpath('//a') - set.unlink - set.each do |node| - assert !node.parent - #assert !node.document - assert !node.previous_sibling - assert !node.next_sibling - end - assert_no_match(/Hello world/, xml.to_s) - end + it "does nothing and returns nil when not present" do + employees = xml.search("//employee") + phb = xml.search("//position").first + assert(!employees.include?(phb)) # testing setup + length = employees.length - def test_nodeset_search_takes_namespace - @xml = Nokogiri::XML.parse(<<-eoxml) - - - Michelin Model XGV - - - I'm a bicycle tire! - - - eoxml - set = @xml/'root' - assert_equal 1, set.length - bike_tire = set.search('//bike:tire', 'bike' => "http://schwinn.com/") - assert_equal 1, bike_tire.length - end + result = employees.delete(phb) + assert_nil(result) + assert_equal(length, employees.length) + end - def test_new_nodeset - node_set = Nokogiri::XML::NodeSet.new(@xml) - assert_equal(0, node_set.length) - node = Nokogiri::XML::Node.new('form', @xml) - node_set << node - assert_equal(1, node_set.length) - assert_equal(node, node_set.last) - end + it "does nothing and returns nil when sent to and empty set" do + empty_set = Nokogiri::XML::NodeSet.new(xml, []) + employee = xml.at_xpath("//employee") + assert_nil(empty_set.delete(employee)) + end + end - def test_search_on_nodeset - assert node_set = @xml.search('//employee') - assert sub_set = node_set.search('.//name') - assert_equal(node_set.length, sub_set.length) - end + specify "#unlink" do + xml = Nokogiri::XML.parse(<<~eoxml) + + Bar + Bar + Bar + Hello world + Bar + Awesome + Awesome + + eoxml + set = xml.xpath('//a') + set.unlink + set.each do |node| + assert(!node.parent) + # assert !node.document + assert(!node.previous_sibling) + assert(!node.next_sibling) + end + assert_no_match(/Hello world/, xml.to_s) + end - def test_negative_index_works - assert node_set = @xml.search('//employee') - assert_equal node_set.last, node_set[-1] - end + it "new_nodeset" do + node_set = Nokogiri::XML::NodeSet.new(xml) + assert_equal(0, node_set.length) + node = Nokogiri::XML::Node.new('form', xml) + node_set << node + assert_equal(1, node_set.length) + assert_equal(node, node_set.last) + end - def test_large_negative_index_returns_nil - assert node_set = @xml.search('//employee') - assert_nil(node_set[-1 * (node_set.length + 1)]) - end + describe "#wrap" do + it "wraps each node within a reified copy of the tag passed" do + employees = (xml / "//employee").wrap("") + assert_equal('wrapper', employees[0].parent.name) + assert_equal('employee', xml.search("//wrapper").first.children[0].name) + end - def test_node_set_fetches_private_data - assert node_set = @xml.search('//employee') + it "handles various node types and handles recursive reparenting" do + xml = 'contents' + doc = Nokogiri::XML(xml) + nodes = doc.at_css("root").xpath(".//* | .//*/text()") # foo and "contents" + nodes.wrap("") + wrappers = doc.css("wrapper") + assert_equal("root", wrappers.first.parent.name) + assert_equal("foo", wrappers.first.children.first.name) + assert_equal("foo", wrappers.last.parent.name) + assert(wrappers.last.children.first.text?) + assert_equal("contents", wrappers.last.children.first.text) + end - set = node_set - assert_equal(set[0], set[0]) - end + it "works for nodes in a fragment" do + frag = Nokogiri::XML::DocumentFragment.parse(<<~EOXML) + + hello + goodbye + + EOXML + employees = frag.xpath(".//employee") + employees.wrap("") + assert_equal('wrapper', employees[0].parent.name) + assert_equal('employee', frag.at(".//wrapper").children.first.name) + end - def test_node_set_returns_0 - assert node_set = @xml.search('//asdkfjhasdlkfjhaldskfh') - assert_equal(0, node_set.length) - end + it "preserves document structure" do + assert_equal("employeeId", + xml.at_xpath("//employee").children.detect { |j| !j.text? }.name) + xml.xpath("//employeeId[text()='EMP0001']").wrap("") + assert_equal("wrapper", + xml.at_xpath("//employee").children.detect { |j| !j.text? }.name) + end + end - def test_wrap - employees = (@xml/"//employee").wrap("") - assert_equal 'wrapper', employees[0].parent.name - assert_equal 'employee', @xml.search("//wrapper").first.children[0].name - end + [:+, :|].each do |method| + describe "##{method}" do + let(:names) { xml.search("name") } + let(:positions) { xml.search("position") } - def test_wrap_various_node_types - xml = 'contents' - doc = Nokogiri::XML xml - nodes = doc.at_css("root").xpath(".//* | .//*/text()") # foo and "contents" - nodes.wrap("") - wrappers = doc.css("wrapper") - assert_equal "root", wrappers.first.parent.name - assert_equal "foo", wrappers.first.children.first.name - assert_equal "foo", wrappers.last.parent.name - assert wrappers.last.children.first.text? - assert_equal "contents", wrappers.last.children.first.text - end + it "raises an exception when the rhs type isn't a NodeSet" do + assert_raises(ArgumentError) { names.send(method, positions.first) } + assert_raises(ArgumentError) { names.send(method, 3) } + end - def test_wrap_a_fragment - frag = Nokogiri::XML::DocumentFragment.parse <<-EOXML - - hello - goodbye - - EOXML - employees = frag.xpath ".//employee" - employees.wrap("") - assert_equal 'wrapper', employees[0].parent.name - assert_equal 'employee', frag.at(".//wrapper").children.first.name - end + it "returns the setwise union" do + names_len = names.length + positions_len = positions.length - def test_wrap_preserves_document_structure - assert_equal "employeeId", - @xml.at_xpath("//employee").children.detect{|j| ! j.text? }.name - @xml.xpath("//employeeId[text()='EMP0001']").wrap("") - assert_equal "wrapper", - @xml.at_xpath("//employee").children.detect{|j| ! j.text? }.name - end + result = names.send(method, positions) + assert_equal(names_len, names.length) + assert_equal(positions_len, positions.length) + assert_equal(names_len + positions_len, result.length) + end - def test_plus_operator - names = @xml.search("name") - positions = @xml.search("position") + it "ignores duplicates" do + names = xml.search("name") - names_len = names.length - positions_len = positions.length + assert_equal(names.length, (names + xml.search("name")).length) + end + end + end - assert_raises(ArgumentError) { names + positions.first } + it "#+=" do + names = xml.search("name") + positions = xml.search("positions") + expected_length = names.length + positions.length + names += positions + assert_equal(expected_length, names.length) + end - result = names + positions - assert_equal names_len, names.length - assert_equal positions_len, positions.length - assert_equal names.length + positions.length, result.length + it "#-" do + employees = xml.search("//employee") + women = xml.search("//employee[gender[text()='Female']]") - names += positions - assert_equal result.length, names.length - end + employees_len = employees.length + women_len = women.length - def test_union - names = @xml.search("name") + assert_raises(ArgumentError) { employees - women.first } - assert_equal(names.length, (names | @xml.search("name")).length) - end + result = employees - women + assert_equal(employees_len, employees.length) + assert_equal(women_len, women.length) + assert_equal(employees.length - women.length, result.length) - def test_minus_operator - employees = @xml.search("//employee") - females = @xml.search("//employee[gender[text()='Female']]") + employees -= women + assert_equal(result.length, employees.length) + end - employees_len = employees.length - females_len = females.length + describe "#[]" do + it "negative_index_works" do + assert(node_set = xml.search('//employee')) + assert_equal(node_set.last, node_set[-1]) + end - assert_raises(ArgumentError) { employees - females.first } + it "large_negative_index_returns_nil" do + assert(node_set = xml.search('//employee')) + assert_nil(node_set[-1 * (node_set.length + 1)]) + end - result = employees - females - assert_equal employees_len, employees.length - assert_equal females_len, females.length - assert_equal employees.length - females.length, result.length + it "array_index" do + employees = xml.search("//employee") + other = xml.search("//position").first - employees -= females - assert_equal result.length, employees.length - end + assert_equal(3, employees.index(employees[3])) + assert_nil(employees.index(other)) + assert_equal(3, employees.index { |employee| employee.search("employeeId/text()").to_s == "EMP0004" }) + assert_nil(employees.index { |employee| employee.search("employeeId/text()").to_s == "EMP0000" }) + end - def test_array_index - employees = @xml.search("//employee") - other = @xml.search("//position").first + it "slice_too_far" do + employees = xml.search("//employee") + assert_equal(employees.length, employees[0, employees.length + 1].length) + assert_equal(employees.length, employees[0, employees.length].length) + end - assert_equal 3, employees.index(employees[3]) - assert_nil employees.index(other) - assert_equal 3, employees.index {|employee| employee.search("employeeId/text()").to_s == "EMP0004" } - assert_nil employees.index {|employee| employee.search("employeeId/text()").to_s == "EMP0000" } - end + it "slice_on_empty_node_set" do + empty_set = Nokogiri::XML::NodeSet.new(xml, []) + assert_nil(empty_set[99]) + assert_nil(empty_set[99..101]) + assert_nil(empty_set[99, 2]) + end - def test_slice_too_far - employees = @xml.search("//employee") - assert_equal employees.length, employees[0, employees.length + 1].length - assert_equal employees.length, employees[0, employees.length].length - end + it "slice_waaaaaay_off_the_end" do + xml = Nokogiri::XML::Builder.new do + root { 100.times { div } } + end.doc + nodes = xml.css("div") + assert_equal(1, nodes.slice(99, 100_000).length) + assert_equal(0, nodes.slice(100, 100_000).length) + end - def test_slice_on_empty_node_set - empty_set = Nokogiri::XML::NodeSet.new @xml, [] - assert_nil empty_set[99] - assert_nil empty_set[99..101] - assert_nil empty_set[99,2] - end + it "array_slice_with_start_and_end" do + employees = xml.search("//employee") + assert_equal([employees[1], employees[2], employees[3]], employees[1, 3].to_a) + end - def test_slice_waaaaaay_off_the_end - xml = Nokogiri::XML::Builder.new { - root { 100.times { div } } - }.doc - nodes = xml.css "div" - assert_equal 1, nodes.slice(99, 100_000).length - assert_equal 0, nodes.slice(100, 100_000).length - end + it "array_index_bracket_equivalence" do + employees = xml.search("//employee") + assert_equal([employees[1], employees[2], employees[3]], employees[1, 3].to_a) + assert_equal([employees[1], employees[2], employees[3]], employees.slice(1, 3).to_a) + end - def test_array_slice_with_start_and_end - employees = @xml.search("//employee") - assert_equal [employees[1], employees[2], employees[3]], employees[1,3].to_a - end + it "array_slice_with_negative_start" do + employees = xml.search("//employee") + assert_equal([employees[2]], employees[-3, 1].to_a) + assert_equal([employees[2], employees[3]], employees[-3, 2].to_a) + end - def test_array_index_bracket_equivalence - employees = @xml.search("//employee") - assert_equal [employees[1], employees[2], employees[3]], employees[1,3].to_a - assert_equal [employees[1], employees[2], employees[3]], employees.slice(1,3).to_a - end + it "array_slice_with_invalid_args" do + employees = xml.search("//employee") + assert_nil(employees[99, 1]) # large start + assert_nil(employees[1, -1]) # negative len + assert_equal([], employees[1, 0].to_a) # zero len + end - def test_array_slice_with_negative_start - employees = @xml.search("//employee") - assert_equal [employees[2]], employees[-3,1].to_a - assert_equal [employees[2], employees[3]], employees[-3,2].to_a - end + it "array_slice_with_range" do + employees = xml.search("//employee") + assert_equal([employees[1], employees[2], employees[3]], employees[1..3].to_a) + assert_equal([employees[0], employees[1], employees[2], employees[3]], employees[0..3].to_a) + end + end - def test_array_slice_with_invalid_args - employees = @xml.search("//employee") - assert_nil employees[99, 1] # large start - assert_nil employees[1, -1] # negative len - assert_equal [], employees[1, 0].to_a # zero len - end + describe "#& intersection" do + it "with no overlap returns an empty set" do + employees = xml.search("//employee") + positions = xml.search("//position") - def test_array_slice_with_range - employees = @xml.search("//employee") - assert_equal [employees[1], employees[2], employees[3]], employees[1..3].to_a - assert_equal [employees[0], employees[1], employees[2], employees[3]], employees[0..3].to_a - end + assert_equal(0, (employees & positions).length) + end - def test_intersection_with_no_overlap - employees = @xml.search("//employee") - positions = @xml.search("//position") + it "with an empty set returns an empty set" do + empty_set = Nokogiri::XML::NodeSet.new(xml) + employees = xml.search("//employee") + assert_equal(0, (empty_set & employees).length) + assert_equal(0, (employees & empty_set).length) + end - assert_equal [], (employees & positions).to_a - end + it "returns the intersection" do + employees = xml.search("//employee") + first_set = employees[0..2] + second_set = employees[2..4] - def test_intersection - employees = @xml.search("//employee") - first_set = employees[0..2] - second_set = employees[2..4] + assert_equal([employees[2]], (first_set & second_set).to_a) + end + end - assert_equal [employees[2]], (first_set & second_set).to_a - end + describe "#include?" do + it "returns true or false appropriately" do + employees = xml.search("//employee") + yes = employees.first + no = xml.search("//position").first - def test_intersection_on_empty_set - empty_set = Nokogiri::XML::NodeSet.new @xml - employees = @xml.search("//employee") - assert_equal 0, (empty_set & employees).length - end + assert(employees.include?(yes)) + refute(employees.include?(no)) + end - def test_include? - employees = @xml.search("//employee") - yes = employees.first - no = @xml.search("//position").first + it "returns false on empty set" do + empty_set = Nokogiri::XML::NodeSet.new(xml, []) + employee = xml.at_xpath("//employee") + refute(empty_set.include?(employee)) + end + end - assert employees.include?(yes) - assert ! employees.include?(no) - end + describe "#each" do + it "supports break" do + assert_equal(7, xml.root.elements.each { |x| break 7 }) + end - def test_include_on_empty_node_set - empty_set = Nokogiri::XML::NodeSet.new @xml, [] - employee = @xml.at_xpath("//employee") - assert ! empty_set.include?(employee) - end + it "returns an enumerator given no block" do + if i_am_ruby_matching("~> 2.5.0") && i_am_in_a_systemd_container + # Ruby 2.5 Enumerators cause a segfault in systemd containers. Yes, really! + # + # https://bugs.ruby-lang.org/issues/14883 + # + # So let's skip this test if we think that's where we are. + skip("cannot use Ruby 2.5 Enumerator#each in a systemd container") + end + employees = xml.search("//employee") + enum = employees.each + assert_instance_of(Enumerator, enum) + assert_equal(enum.next, employees[0]) + assert_equal(enum.next, employees[1]) + end - def test_each - if i_am_ruby_matching("~> 2.5.0") && i_am_in_a_systemd_container - # Ruby 2.5 Enumerators cause a segfault in systemd containers. Yes, really! - # - # https://bugs.ruby-lang.org/issues/14883 - # - # So let's skip this test if we think that's where we are. - skip "cannot use Ruby 2.5 Enumerator#each in a systemd container" - end - employees = @xml.search("//employee") - enum = employees.each - assert_instance_of Enumerator, enum - assert_equal enum.next, employees[0] - assert_equal enum.next, employees[1] - end + it "returns self given a block" do + node_set1 = xml.css("address") + node_set2 = node_set1.each {} + assert_same(node_set1, node_set2) + end + end - def test_children - employees = @xml.search("//employee") - count = 0 - employees.each do |employee| - count += employee.children.length + specify "#children" do + employees = xml.search("//employee") + count = 0 + employees.each do |employee| + count += employee.children.length + end + set = employees.children + assert_equal(count, set.length) end - set = employees.children - assert_equal count, set.length - end - def test_inspect - employees = @xml.search("//employee") - inspected = employees.inspect + specify "#inspect" do + employees = xml.search("//employee") + inspected = employees.inspect - assert_equal "[#{employees.map(&:inspect).join(', ')}]", - inspected - end + assert_equal("[#{employees.map(&:inspect).join(', ')}]", + inspected) + end - def test_should_not_splode_when_accessing_namespace_declarations_in_a_node_set - 2.times do - xml = Nokogiri::XML "" - node_set = xml.xpath("//namespace::*") - assert_equal 1, node_set.size - node = node_set.first - node.to_s # segfaults in 1.4.0 and earlier + it "should_not_splode_when_accessing_namespace_declarations_in_a_node_set" do + 2.times do + xml = Nokogiri::XML("") + node_set = xml.xpath("//namespace::*") + assert_equal(1, node_set.size) + node = node_set.first + node.to_s # segfaults in 1.4.0 and earlier - # if we haven't segfaulted, let's make sure we handled it correctly - assert_instance_of Nokogiri::XML::Namespace, node + # if we haven't segfaulted, let's make sure we handled it correctly + assert_instance_of(Nokogiri::XML::Namespace, node) + end end - end - - def test_should_not_splode_when_arrayifying_node_set_containing_namespace_declarations - xml = Nokogiri::XML "" - node_set = xml.xpath("//namespace::*") - assert_equal 1, node_set.size - node_array = node_set.to_a - node = node_array.first - node.to_s # segfaults in 1.4.0 and earlier + it "should_not_splode_when_arrayifying_node_set_containing_namespace_declarations" do + xml = Nokogiri::XML("") + node_set = xml.xpath("//namespace::*") + assert_equal(1, node_set.size) - # if we haven't segfaulted, let's make sure we handled it correctly - assert_instance_of Nokogiri::XML::Namespace, node - end + node_array = node_set.to_a + node = node_array.first + node.to_s # segfaults in 1.4.0 and earlier - def test_should_not_splode_when_unlinking_node_set_containing_namespace_declarations - xml = Nokogiri::XML "" - node_set = xml.xpath("//namespace::*") - assert_equal 1, node_set.size + # if we haven't segfaulted, let's make sure we handled it correctly + assert_instance_of(Nokogiri::XML::Namespace, node) + end - node_set.unlink - end + it "should_not_splode_when_unlinking_node_set_containing_namespace_declarations" do + xml = Nokogiri::XML("") + node_set = xml.xpath("//namespace::*") + assert_equal(1, node_set.size) - def test_reverse - xml = Nokogiri::XML "bd" - children = xml.root.children - assert_instance_of Nokogiri::XML::NodeSet, children + node_set.unlink + end - reversed = children.reverse - assert_equal reversed[0], children[4] - assert_equal reversed[1], children[3] - assert_equal reversed[2], children[2] - assert_equal reversed[3], children[1] - assert_equal reversed[4], children[0] + specify "#reverse" do + xml = Nokogiri::XML("bd") + children = xml.root.children + assert_instance_of(Nokogiri::XML::NodeSet, children) - assert_equal children, children.reverse.reverse - end + reversed = children.reverse + assert_equal(reversed[0], children[4]) + assert_equal(reversed[1], children[3]) + assert_equal(reversed[2], children[2]) + assert_equal(reversed[3], children[1]) + assert_equal(reversed[4], children[0]) - def test_node_set_dup_result_has_document_and_is_decorated - x = Module.new do - def awesome! ; end + assert_equal(children, children.reverse.reverse) end - util_decorate(@xml, x) - node_set = @xml.css("address") - new_set = node_set.dup - assert_equal node_set.document, new_set.document - assert new_set.respond_to?(:awesome!) - end - def test_node_set_union_result_has_document_and_is_decorated - x = Module.new do - def awesome! ; end + it "node_set_dup_result_has_document_and_is_decorated" do + x = Module.new do + def awesome!; end + end + util_decorate(xml, x) + node_set = xml.css("address") + new_set = node_set.dup + assert_equal(node_set.document, new_set.document) + assert(new_set.respond_to?(:awesome!)) end - util_decorate(@xml, x) - node_set1 = @xml.css("address") - node_set2 = @xml.css("address") - new_set = node_set1 | node_set2 - assert_equal node_set1.document, new_set.document - assert new_set.respond_to?(:awesome!) - end - def test_node_set_intersection_result_has_document_and_is_decorated - x = Module.new do - def awesome! ; end + it "node_set_union_result_has_document_and_is_decorated" do + x = Module.new do + def awesome!; end + end + util_decorate(xml, x) + node_set1 = xml.css("address") + node_set2 = xml.css("address") + new_set = node_set1 | node_set2 + assert_equal(node_set1.document, new_set.document) + assert(new_set.respond_to?(:awesome!)) end - util_decorate(@xml, x) - node_set1 = @xml.css("address") - node_set2 = @xml.css("address") - new_set = node_set1 & node_set2 - assert_equal node_set1.document, new_set.document - assert new_set.respond_to?(:awesome!) - end - def test_node_set_difference_result_has_document_and_is_decorated - x = Module.new do - def awesome! ; end + it "node_set_intersection_result_has_document_and_is_decorated" do + x = Module.new do + def awesome!; end + end + util_decorate(xml, x) + node_set1 = xml.css("address") + node_set2 = xml.css("address") + new_set = node_set1 & node_set2 + assert_equal(node_set1.document, new_set.document) + assert(new_set.respond_to?(:awesome!)) end - util_decorate(@xml, x) - node_set1 = @xml.css("address") - node_set2 = @xml.css("address") - new_set = node_set1 - node_set2 - assert_equal node_set1.document, new_set.document - assert new_set.respond_to?(:awesome!) - end - def test_node_set_slice_result_has_document_and_is_decorated - x = Module.new do - def awesome! ; end + it "node_set_difference_result_has_document_and_is_decorated" do + x = Module.new do + def awesome!; end + end + util_decorate(xml, x) + node_set1 = xml.css("address") + node_set2 = xml.css("address") + new_set = node_set1 - node_set2 + assert_equal(node_set1.document, new_set.document) + assert(new_set.respond_to?(:awesome!)) end - util_decorate(@xml, x) - node_set = @xml.css("address") - new_set = node_set[0..-1] - assert_equal node_set.document, new_set.document - assert new_set.respond_to?(:awesome!) - end - def test_each_should_return_self - node_set1 = @xml.css("address") - node_set2 = node_set1.each {} - assert_equal node_set1, node_set2 + it "node_set_slice_result_has_document_and_is_decorated" do + x = Module.new do + def awesome!; end + end + util_decorate(xml, x) + node_set = xml.css("address") + new_set = node_set[0..-1] + assert_equal(node_set.document, new_set.document) + assert(new_set.respond_to?(:awesome!)) + end end end end From 0ee6c5bd81b5dd9d01d0a735c33acd956ca0e968 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 4 Feb 2021 21:10:37 -0500 Subject: [PATCH 7/9] fix: quash C90 compilation warning --- ext/nokogiri/xml_node.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index a9ebc2bef7..3aad6ae09c 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1341,9 +1341,9 @@ static VALUE line(VALUE self) static VALUE set_line(VALUE self, VALUE num) { xmlNodePtr node; - Data_Get_Struct(self, xmlNode, node); - int value = NUM2INT(num); + + Data_Get_Struct(self, xmlNode, node); if (value < 65535) { node->line = value; } From 56c19dbe53dae79d7fdd1de58b6bafb16d87867d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 1 Feb 2021 23:55:15 -0500 Subject: [PATCH 8/9] fix: allow NodeSets to contain Nodes from multiple Documents Specifically, this means we've added a mark callback for the NodeSet which marks each of the contained objects. Previously we skipped explicitly marking the contained objects due to the assumption that all the nodes would be from the same document as the NodeSet itself. Fixes #1952. --- CHANGELOG.md | 1 + ext/nokogiri/xml_node_set.c | 21 ++++++++++++++++++++- test/xml/test_node_set.rb | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76f2586be3..02f4c831cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed +* [CRuby] `NodeSet` may now safely contain `Node` objects from multiple documents. Previously the GC lifecycle of the parent `Document` objects could lead to contained nodes being GCed while still in scope. [[#1952](https://github.com/sparklemotion/nokogiri/issues/1952)] * [CRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was invoked twice on each object. * [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`. diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index f5ceef09e9..3cc368adea 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -15,6 +15,25 @@ static void Check_Node_Set_Node_Type(VALUE node) } +static void mark(xmlNodeSetPtr node_set) +{ + xmlNodePtr c_node; + VALUE rb_node; + int jnode; + + for (jnode = 0; jnode < node_set->nodeNr; jnode++) { + c_node = node_set->nodeTab[jnode]; + if (NOKOGIRI_NAMESPACE_EH(c_node)) { + rb_node = (VALUE)(((xmlNsPtr)c_node)->_private); + } else { + rb_node = (VALUE)(c_node->_private); + } + if (rb_node) { + rb_gc_mark(rb_node); + } + } +} + static void deallocate(xmlNodeSetPtr node_set) { /* @@ -405,7 +424,7 @@ VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) node_set = xmlXPathNodeSetCreate(NULL); } - new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, 0, deallocate, node_set); + new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, mark, deallocate, node_set); if (!NIL_P(document)) { rb_iv_set(new_set, "@document", document); diff --git a/test/xml/test_node_set.rb b/test/xml/test_node_set.rb index 65214fa751..6911f447c3 100644 --- a/test/xml/test_node_set.rb +++ b/test/xml/test_node_set.rb @@ -877,6 +877,36 @@ def awesome!; end assert_equal(node_set.document, new_set.document) assert(new_set.respond_to?(:awesome!)) end + + describe "adding nodes from different documents to the same NodeSet" do + # see https://github.com/sparklemotion/nokogiri/issues/1952 + it "should not segfault" do + skip("this tests a libxml2-specific issue") if Nokogiri.jruby? + + xml = <<~EOF + + + EOF + scope = lambda do + Nokogiri::XML::Document.parse(xml).css("container") + Nokogiri::XML::Document.parse(xml).css("container") + end + stress_memory_while do + node_set = scope.call + node_set.to_s + end + end + + it "should handle this case just fine" do + doc1 = Nokogiri::XML::Document.parse("
") + doc2 = Nokogiri::XML::Document.parse("
") + node_set = doc1.css("div") + assert_equal(doc1, node_set.document) + node_set += doc2.css("div") + assert_equal(2, node_set.length) + assert_equal(doc1, node_set[0].document) + assert_equal(doc2, node_set[1].document) + end + end end end end From 26872c97a8cff393911e39b9907042621d958f63 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 4 Feb 2021 21:14:48 -0500 Subject: [PATCH 9/9] fix: simplify GC lifecycle for Nodes in a NodeSet Previously, we created Ruby objects for namespaces and used a @namespace_cache attribute (an Array) to maintain references to just those Ruby objects. Now that we have a real GC mark function for NodeSet, let's use that instead of the hacky array attribute. --- ext/nokogiri/xml_namespace.c | 22 +++++ ext/nokogiri/xml_node_set.c | 161 +++++++++++++++-------------------- 2 files changed, 90 insertions(+), 93 deletions(-) diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index 7aea6ce1ac..82aba102de 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -1,5 +1,27 @@ #include +/* + * The lifecycle of a Namespace node is more complicated than other Nodes, for two reasons: + * + * 1. the underlying C structure has a different layout than all the other node structs, with the + * `_private` member where we store a pointer to Ruby object data not being in first position. + * 2. xmlNs structures returned in an xmlNodeset from an XPath query are copies of the document's + * namespaces, and so do not share the same memory lifecycle as everything else in a document. + * + * As a result of 1, you may see special handling of XML_NAMESPACE_DECL node types throughout the + * Nokogiri C code, though I intend to wrap up that logic in ruby_object_{get,set} functions + * shortly. + * + * As a result of 2, you will see we have special handling in this file and in xml_node_set.c to + * carefully manage the memory lifecycle of xmlNs structs to match the Ruby object's GC + * lifecycle. In xml_node_set.c we have local versions of xmlXPathNodeSetDel() and + * xmlXPathFreeNodeSet() that avoid freeing xmlNs structs in the node set. In this file, we decide + * whether or not to call dealloc_namespace() depending on whether the xmlNs struct appears to be + * in an xmlNodeSet (and thus the result of an XPath query) or not. + * + * Yes, this is madness. + */ + VALUE cNokogiriXmlNamespace ; static void dealloc_namespace(xmlNsPtr ns) diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index 3cc368adea..f352e8e2e7 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -3,7 +3,6 @@ #include static ID decorate ; -static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val); static void Check_Node_Set_Node_Type(VALUE node) @@ -15,41 +14,75 @@ static void Check_Node_Set_Node_Type(VALUE node) } +static +VALUE +ruby_object_get(xmlNodePtr c_node) +{ + /* see xmlElementType in libxml2 tree.h */ + switch (c_node->type) { + case XML_NAMESPACE_DECL: + /* _private is later in the namespace struct */ + return (VALUE)(((xmlNsPtr)c_node)->_private); + + case XML_DOCUMENT_NODE: + case XML_HTML_DOCUMENT_NODE: + /* in documents we use _private to store a tuple */ + if (DOC_RUBY_OBJECT_TEST(((xmlDocPtr)c_node))) { + return DOC_RUBY_OBJECT((xmlDocPtr)c_node); + } + return (VALUE)NULL; + + default: + return (VALUE)(c_node->_private); + } +} + + static void mark(xmlNodeSetPtr node_set) { - xmlNodePtr c_node; VALUE rb_node; int jnode; for (jnode = 0; jnode < node_set->nodeNr; jnode++) { - c_node = node_set->nodeTab[jnode]; - if (NOKOGIRI_NAMESPACE_EH(c_node)) { - rb_node = (VALUE)(((xmlNsPtr)c_node)->_private); - } else { - rb_node = (VALUE)(c_node->_private); - } + rb_node = ruby_object_get(node_set->nodeTab[jnode]); if (rb_node) { rb_gc_mark(rb_node); } } } +static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val) +{ + /* + * For reasons outlined in xml_namespace.c, here we reproduce xmlXPathNodeSetDel() except for the + * offending call to xmlXPathNodeSetFreeNs(). + */ + int i; + + if (cur == NULL) return; + if (val == NULL) return; + + /* + * find node in nodeTab + */ + for (i = 0;i < cur->nodeNr;i++) + if (cur->nodeTab[i] == val) break; + + if (i >= cur->nodeNr) { /* not found */ + return; + } + cur->nodeNr--; + for (;i < cur->nodeNr;i++) + cur->nodeTab[i] = cur->nodeTab[i + 1]; + cur->nodeTab[cur->nodeNr] = NULL; +} + + static void deallocate(xmlNodeSetPtr node_set) { /* - * - * since xpath queries return copies of the xmlNs structs, - * xmlXPathFreeNodeSet() frees those xmlNs structs that are in the - * NodeSet. - * - * this is bad if someone is still trying to use the Namespace object wrapped - * around the xmlNs, so we need to avoid that. - * - * here we reproduce xmlXPathFreeNodeSet() without the xmlNs logic. - * - * this doesn't cause a leak because Namespace objects that are in an XPath - * query NodeSet are given their own lifecycle in - * Nokogiri_wrap_xml_namespace(). + * For reasons outlined in xml_namespace.c, here we reproduce xmlXPathFreeNodeSet() except for the + * offending call to xmlXPathNodeSetFreeNs(). */ NOKOGIRI_DEBUG_START(node_set) ; if (node_set->nodeTab != NULL) @@ -59,6 +92,7 @@ static void deallocate(xmlNodeSetPtr node_set) NOKOGIRI_DEBUG_END(node_set) ; } + static VALUE allocate(VALUE klass) { return Nokogiri_wrap_xml_node_set(xmlXPathNodeSetCreate(NULL), Qnil); @@ -384,57 +418,28 @@ static VALUE unlink_nodeset(VALUE self) } -static void reify_node_set_namespaces(VALUE self) -{ - /* - * as mentioned in deallocate() above, xmlNs structs returned in an XPath - * NodeSet are duplicates, and we don't clean them up at deallocate() time. - * - * as a result, we need to make sure the Ruby manages this memory. we do this - * by forcing the creation of a Ruby object wrapped around the xmlNs. - * - * we also have to make sure that the NodeSet has a reference to the - * Namespace object, otherwise GC will kick in and the Namespace won't be - * marked. - * - * we *could* do this safely with *all* the nodes in the NodeSet, but we only - * *need* to do it for xmlNs structs, and so you get the code we have here. - */ - int j ; - xmlNodeSetPtr node_set ; - VALUE namespace_cache ; - - Data_Get_Struct(self, xmlNodeSet, node_set); - - namespace_cache = rb_iv_get(self, "@namespace_cache"); - - for (j = 0 ; j < node_set->nodeNr ; j++) { - if (NOKOGIRI_NAMESPACE_EH(node_set->nodeTab[j])) { - rb_ary_push(namespace_cache, Nokogiri_wrap_xml_node_set_node(node_set->nodeTab[j], self)); - } - } -} - - -VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) +VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr c_node_set, VALUE document) { - VALUE new_set ; + int j; + VALUE rb_node_set ; - if (node_set == NULL) { - node_set = xmlXPathNodeSetCreate(NULL); + if (c_node_set == NULL) { + c_node_set = xmlXPathNodeSetCreate(NULL); } - new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, mark, deallocate, node_set); + rb_node_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, mark, deallocate, c_node_set); if (!NIL_P(document)) { - rb_iv_set(new_set, "@document", document); - rb_funcall(document, decorate, 1, new_set); + rb_iv_set(rb_node_set, "@document", document); + rb_funcall(document, decorate, 1, rb_node_set); } - rb_iv_set(new_set, "@namespace_cache", rb_ary_new()); - reify_node_set_namespaces(new_set); + /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */ + for (j = 0 ; j < c_node_set->nodeNr ; j++) { + Nokogiri_wrap_xml_node_set_node(c_node_set->nodeTab[j], rb_node_set); + } - return new_set ; + return rb_node_set ; } VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) @@ -450,36 +455,6 @@ VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) } -static void xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val) -{ - /* - * as mentioned a few times above, we do not want to free xmlNs structs - * outside of the Namespace lifecycle. - * - * xmlXPathNodeSetDel() frees xmlNs structs, and so here we reproduce that - * function with the xmlNs logic. - */ - int i; - - if (cur == NULL) return; - if (val == NULL) return; - - /* - * find node in nodeTab - */ - for (i = 0;i < cur->nodeNr;i++) - if (cur->nodeTab[i] == val) break; - - if (i >= cur->nodeNr) { /* not found */ - return; - } - cur->nodeNr--; - for (;i < cur->nodeNr;i++) - cur->nodeTab[i] = cur->nodeTab[i + 1]; - cur->nodeTab[cur->nodeNr] = NULL; -} - - VALUE cNokogiriXmlNodeSet ; void init_xml_node_set(void) {