Skip to content

Commit

Permalink
fix: allow NodeSets to contain Nodes from multiple Documents
Browse files Browse the repository at this point in the history
Specifically, this means we've added a mark callback for the NodeSet
which marks each of the contained objects.

Previously we skipped explicitly marking the contained objects due to
the assumption that all the nodes would be from the same document as
the NodeSet itself.

Fixes #1952.
  • Loading branch information
flavorjones committed Feb 5, 2021
1 parent 121437f commit ff300a9
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -8,6 +8,7 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

### Fixed

* [CRuby] `NodeSet` may now safely contain `Node` objects from multiple documents. Previously the GC lifecycle of the parent `Document` objects could lead to contained nodes being GCed while still in scope. [[#1952](https://github.com/sparklemotion/nokogiri/issues/1952)]
* [CRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was invoked twice on each object.
* [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`.

Expand Down
21 changes: 20 additions & 1 deletion ext/nokogiri/xml_node_set.c
Expand Up @@ -15,6 +15,25 @@ static void Check_Node_Set_Node_Type(VALUE node)
}


static void mark(xmlNodeSetPtr node_set)
{
xmlNodePtr c_node;
VALUE rb_node;
int jnode;

for (jnode = 0; jnode < node_set->nodeNr; jnode++) {
c_node = node_set->nodeTab[jnode];
if (NOKOGIRI_NAMESPACE_EH(c_node)) {
rb_node = (VALUE)(((xmlNsPtr)c_node)->_private);
} else {
rb_node = (VALUE)(c_node->_private);
}
if (rb_node) {
rb_gc_mark(rb_node);
}
}
}

static void deallocate(xmlNodeSetPtr node_set)
{
/*
Expand Down Expand Up @@ -405,7 +424,7 @@ VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document)
node_set = xmlXPathNodeSetCreate(NULL);
}

new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, 0, deallocate, node_set);
new_set = Data_Wrap_Struct(cNokogiriXmlNodeSet, mark, deallocate, node_set);

if (!NIL_P(document)) {
rb_iv_set(new_set, "@document", document);
Expand Down
30 changes: 30 additions & 0 deletions test/xml/test_node_set.rb
Expand Up @@ -877,6 +877,36 @@ def awesome!; end
assert_equal(node_set.document, new_set.document)
assert(new_set.respond_to?(:awesome!))
end

describe "adding nodes from different documents to the same NodeSet" do
# see https://github.com/sparklemotion/nokogiri/issues/1952
it "should not segfault" do
skip("this tests a libxml2-specific issue") if Nokogiri.jruby?

xml = <<~EOF
<?xml version="1.0" encoding="UTF-8"?>
<container></container>
EOF
scope = lambda do
Nokogiri::XML::Document.parse(xml).css("container") + Nokogiri::XML::Document.parse(xml).css("container")
end
stress_memory_while do
node_set = scope.call
node_set.to_s
end
end

it "should handle this case just fine" do
doc1 = Nokogiri::XML::Document.parse("<div class='doc1'></div>")
doc2 = Nokogiri::XML::Document.parse("<div class='doc2'></div>")
node_set = doc1.css("div")
assert_equal(doc1, node_set.document)
node_set += doc2.css("div")
assert_equal(2, node_set.length)
assert_equal(doc1, node_set[0].document)
assert_equal(doc2, node_set[1].document)
end
end
end
end
end
Expand Down

0 comments on commit ff300a9

Please sign in to comment.