From b08a8586c7c34831be0f13f9147b84016d17d94b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 16 Oct 2022 10:07:05 -0400 Subject: [PATCH 1/3] test: repro namespace_scopes compaction issue Co-Authored-By: Matt Valentine-House Co-Authored-By: Peter Zhu --- test/test_compaction.rb | 42 +++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/test/test_compaction.rb b/test/test_compaction.rb index 5b0720f670..e06d36d40d 100644 --- a/test/test_compaction.rb +++ b/test/test_compaction.rb @@ -3,19 +3,41 @@ require "helper" describe "compaction" do - it "https://github.com/sparklemotion/nokogiri/pull/2579" do - skip unless GC.respond_to?(:verify_compaction_references) + describe Nokogiri::XML::Node do + it "compacts safely" do # https://github.com/sparklemotion/nokogiri/pull/2579 + skip unless GC.respond_to?(:verify_compaction_references) - big_doc = "" + ("a".."zz").map { |x| "<#{x}>#{x}" }.join + "" - doc = Nokogiri.XML(big_doc) + big_doc = "" + ("a".."zz").map { |x| "<#{x}>#{x}" }.join + "" + doc = Nokogiri.XML(big_doc) - # ensure a bunch of node objects have been wrapped - doc.root.children.each(&:inspect) + # ensure a bunch of node objects have been wrapped + doc.root.children.each(&:inspect) - # compact the heap and try to get the node wrappers to move - GC.verify_compaction_references(double_heap: true, toward: :empty) + # compact the heap and try to get the node wrappers to move + GC.verify_compaction_references(double_heap: true, toward: :empty) - # access the node wrappers and make sure they didn't move - doc.root.children.each(&:inspect) + # access the node wrappers and make sure they didn't move + doc.root.children.each(&:inspect) + end + end + + describe Nokogiri::XML::Namespace do + it "namespace_scopes" do + skip unless GC.respond_to?(:verify_compaction_references) + + doc = Nokogiri::XML(<<~EOF) + + + + + + EOF + + doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect + + GC.verify_compaction_references(double_heap: true, toward: :empty) + + doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect + end end end From 5f58b34724a6e48c7c478cfda5fc9c4cac581e08 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sat, 15 Oct 2022 12:36:10 -0400 Subject: [PATCH 2/3] fix: namespace nodes behave properly when compacted --- ext/nokogiri/nokogiri.h | 1 + ext/nokogiri/xml_namespace.c | 46 ++++++++++++++++++++++++++++++++---- ext/nokogiri/xml_node.c | 2 +- 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 51ddbf8d1e..399967ebd0 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -171,6 +171,7 @@ int noko_io_write(void *ctx, char *buffer, int len); int noko_io_close(void *ctx); #define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) +#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj)) VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ; VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ; diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index 0b41acf1af..52b49c5207 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -25,13 +25,15 @@ VALUE cNokogiriXmlNamespace ; static void -dealloc_namespace(xmlNsPtr ns) +_xml_namespace_dealloc(void *ptr) { /* * this deallocator is only used for namespace nodes that are part of an xpath * node set. see noko_xml_namespace_wrap(). */ + xmlNsPtr ns = ptr; NOKOGIRI_DEBUG_START(ns) ; + if (ns->href) { xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href)); } @@ -42,6 +44,36 @@ dealloc_namespace(xmlNsPtr ns) NOKOGIRI_DEBUG_END(ns) ; } +#ifdef HAVE_RB_GC_LOCATION +static void +_xml_namespace_update_references(void *ptr) +{ + xmlNsPtr ns = ptr; + if (ns->_private) { + ns->_private = (void *)rb_gc_location((VALUE)ns->_private); + } +} +#else +# define _xml_namespace_update_references 0 +#endif + +static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = { + "Nokogiri/XMLNamespace/WithDealloc", + {0, _xml_namespace_dealloc, 0, _xml_namespace_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; + +static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = { + "Nokogiri/XMLNamespace/WithoutDealloc", + {0, 0, 0, _xml_namespace_update_references}, + 0, 0, +#ifdef RUBY_TYPED_FREE_IMMEDIATELY + RUBY_TYPED_FREE_IMMEDIATELY, +#endif +}; /* * call-seq: @@ -54,7 +86,7 @@ prefix(VALUE self) { xmlNsPtr ns; - Data_Get_Struct(self, xmlNs, ns); + Noko_Namespace_Get_Struct(self, xmlNs, ns); if (!ns->prefix) { return Qnil; } return NOKOGIRI_STR_NEW2(ns->prefix); @@ -71,7 +103,7 @@ href(VALUE self) { xmlNsPtr ns; - Data_Get_Struct(self, xmlNs, ns); + Noko_Namespace_Get_Struct(self, xmlNs, ns); if (!ns->href) { return Qnil; } return NOKOGIRI_STR_NEW2(ns->href); @@ -87,14 +119,18 @@ noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document) } if (c_document) { - rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, c_namespace); + rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace, + &nokogiri_xml_namespace_type_without_dealloc, + c_namespace); if (DOC_RUBY_OBJECT_TEST(c_document)) { rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document)); rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace); } } else { - rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, c_namespace); + rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace, + &nokogiri_xml_namespace_type_with_dealloc, + c_namespace); } c_namespace->_private = (void *)rb_namespace; diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index c80d2d4247..01b5602b65 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1353,7 +1353,7 @@ set_namespace(VALUE self, VALUE namespace) Noko_Node_Get_Struct(self, xmlNode, node); if (!NIL_P(namespace)) { - Data_Get_Struct(namespace, xmlNs, ns); + Noko_Namespace_Get_Struct(namespace, xmlNs, ns); } xmlSetNs(node, ns); From 73d73d6e433f17f39e188f5c03ec176b60719416 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Sun, 16 Oct 2022 10:05:23 -0400 Subject: [PATCH 3/3] fix: Document#remove_namespaces! use-after-free bug --- ext/nokogiri/xml_document.c | 6 +++++- test/test_compaction.rb | 22 ++++++++++++++++++++ test/test_memory_leak.rb | 41 +++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 1 deletion(-) diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index 216381bfd4..f79ee137bf 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -104,7 +104,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node) (node->type == XML_XINCLUDE_START) || (node->type == XML_XINCLUDE_END)) && node->nsDef) { - xmlFreeNsList(node->nsDef); + xmlNsPtr curr = node->nsDef; + while (curr) { + noko_xml_document_pin_namespace(curr, node->doc); + curr = curr->next; + } node->nsDef = NULL; } diff --git a/test/test_compaction.rb b/test/test_compaction.rb index e06d36d40d..9e32cf9a2f 100644 --- a/test/test_compaction.rb +++ b/test/test_compaction.rb @@ -39,5 +39,27 @@ doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect end + + it "remove_namespaces!" do + skip unless GC.respond_to?(:verify_compaction_references) + + doc = Nokogiri::XML(<<~XML) + + hello from a + hello from b + + hello from c + + + XML + + namespaces = doc.root.namespaces + namespaces.each(&:inspect) + doc.remove_namespaces! + + GC.verify_compaction_references(double_heap: true, toward: :empty) + + namespaces.each(&:inspect) + end end end diff --git a/test/test_memory_leak.rb b/test/test_memory_leak.rb index 8b546c561c..aad3480bb7 100644 --- a/test/test_memory_leak.rb +++ b/test/test_memory_leak.rb @@ -191,6 +191,47 @@ def test_leaking_namespace_node_strings_with_prefix end end + def test_document_remove_namespaces_with_ruby_objects + xml = <<~XML + + hello from a + hello from b + + hello from c + + + XML + + 20.times do + 10_000.times do + doc = Nokogiri::XML::Document.parse(xml) + doc.namespaces.each(&:inspect) + doc.remove_namespaces! + end + puts MemInfo.rss + end + end + + def test_document_remove_namespaces_without_ruby_objects + xml = <<~XML + + hello from a + hello from b + + hello from c + + + XML + + 20.times do + 20_000.times do + doc = Nokogiri::XML::Document.parse(xml) + doc.remove_namespaces! + end + puts MemInfo.rss + end + end + def test_leaking_dtd_nodes_after_internal_subset_removal # see https://github.com/sparklemotion/nokogiri/issues/1784 100_000.times do |i|