From ff300a98cda17a960c3b099787f3c0b024f9a4fd Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 1 Feb 2021 23:55:15 -0500 Subject: [PATCH] fix: allow NodeSets to contain Nodes from multiple Documents Specifically, this means we've added a mark callback for the NodeSet which marks each of the contained objects. Previously we skipped explicitly marking the contained objects due to the assumption that all the nodes would be from the same document as the NodeSet itself. Fixes #1952. --- CHANGELOG.md | 1 + ext/nokogiri/xml_node_set.c | 21 ++++++++++++++++++++- test/xml/test_node_set.rb | 30 ++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76f2586be3..02f4c831cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed +* [CRuby] `NodeSet` may now safely contain `Node` objects from multiple documents. Previously the GC lifecycle of the parent `Document` objects could lead to contained nodes being GCed while still in scope. [[#1952](https://github.com/sparklemotion/nokogiri/issues/1952)] * [CRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was invoked twice on each object. * [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`. diff --git a/ext/nokogiri/xml_node_set.c b/ext/nokogiri/xml_node_set.c index f5ceef09e9..3cc368adea 100644 --- a/ext/nokogiri/xml_node_set.c +++ b/ext/nokogiri/xml_node_set.c @@ -15,6 +15,25 @@ static void Check_Node_Set_Node_Type(VALUE node) } +static void mark(xmlNodeSetPtr node_set) +{ + xmlNodePtr c_node; + VALUE rb_node; + int jnode; + + for (jnode = 0; jnode < node_set->nodeNr; jnode++) { + c_node = node_set->nodeTab[jnode]; + if (NOKOGIRI_NAMESPACE_EH(c_node)) { + rb_node = (VALUE)(((xmlNsPtr)c_node)->_private); + } else { + rb_node = (VALUE)(c_node->_private); + } + if (rb_node) { + rb_gc_mark(rb_node); + } + } +} + static void deallocate(xmlNodeSetPtr node_set) { /* @@ -405,7 +424,7 @@ VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) node_set = xmlXPathNodeSetCreate(NULL); } - new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, 0, deallocate, node_set); + new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, mark, deallocate, node_set); if (!NIL_P(document)) { rb_iv_set(new_set, "@document", document); diff --git a/test/xml/test_node_set.rb b/test/xml/test_node_set.rb index 65214fa751..6911f447c3 100644 --- a/test/xml/test_node_set.rb +++ b/test/xml/test_node_set.rb @@ -877,6 +877,36 @@ def awesome!; end assert_equal(node_set.document, new_set.document) assert(new_set.respond_to?(:awesome!)) end + + describe "adding nodes from different documents to the same NodeSet" do + # see https://github.com/sparklemotion/nokogiri/issues/1952 + it "should not segfault" do + skip("this tests a libxml2-specific issue") if Nokogiri.jruby? + + xml = <<~EOF + + + EOF + scope = lambda do + Nokogiri::XML::Document.parse(xml).css("container") + Nokogiri::XML::Document.parse(xml).css("container") + end + stress_memory_while do + node_set = scope.call + node_set.to_s + end + end + + it "should handle this case just fine" do + doc1 = Nokogiri::XML::Document.parse("
") + doc2 = Nokogiri::XML::Document.parse("
") + node_set = doc1.css("div") + assert_equal(doc1, node_set.document) + node_set += doc2.css("div") + assert_equal(2, node_set.length) + assert_equal(doc1, node_set[0].document) + assert_equal(doc2, node_set[1].document) + end + end end end end