Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: XML::Reader XML::Attr garbage collection #2599

Merged
merged 4 commits into from Jul 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/downstream.yml
Expand Up @@ -42,6 +42,9 @@ jobs:
- url: https://github.com/rails/rails
name: xmlmini
command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/"
- url: https://github.com/pythonicrubyist/creek
name: creek
command: "bundle exec rake spec"
runs-on: ubuntu-latest
container:
image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1
Expand All @@ -63,5 +66,8 @@ jobs:
else
echo "gem 'nokogiri', path: '..'" >> Gemfile
fi
if egrep "add_development_dependency.*\bbundler\b" *gemspec ; then
sed -i 's/.*add_development_dependency.*\bbundler\b.*//' *gemspec
fi
bundle install --local || bundle install
${{matrix.command}}
8 changes: 8 additions & 0 deletions ext/java/nokogiri/XmlReader.java
Expand Up @@ -142,9 +142,17 @@ public class XmlReader extends RubyObject
public IRubyObject
attribute_nodes(ThreadContext context)
{
context.runtime.getWarnings().warn("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
return currentNode().getAttributesNodes();
}

@JRubyMethod
public IRubyObject
attribute_hash(ThreadContext context)
{
return currentNode().getAttributes(context);
}

@JRubyMethod(name = "attributes?")
public IRubyObject
attributes_p(ThreadContext context)
Expand Down
5 changes: 3 additions & 2 deletions ext/java/nokogiri/internals/ReaderNode.java
Expand Up @@ -112,9 +112,10 @@ public abstract class ReaderNode
getAttributes(ThreadContext context)
{
final Ruby runtime = context.runtime;
if (attributeList == null) { return runtime.getNil(); }
RubyHash hash = RubyHash.newHash(runtime);
if (attributeList == null) { return hash; }
for (int i = 0; i < attributeList.length; i++) {
if (isNamespace(attributeList.names.get(i))) { continue; }
IRubyObject k = stringOrBlank(runtime, attributeList.names.get(i));
IRubyObject v = stringOrBlank(runtime, attributeList.values.get(i));
hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v)
Expand Down Expand Up @@ -150,8 +151,8 @@ public abstract class ReaderNode
getNamespaces(ThreadContext context)
{
final Ruby runtime = context.runtime;
if (namespaces == null) { return runtime.getNil(); }
RubyHash hash = RubyHash.newHash(runtime);
if (namespaces == null) { return hash; }
for (Map.Entry<String, String> entry : namespaces.entrySet()) {
IRubyObject k = stringOrBlank(runtime, entry.getKey());
IRubyObject v = stringOrBlank(runtime, entry.getValue());
Expand Down
1 change: 1 addition & 0 deletions ext/nokogiri/extconf.rb
Expand Up @@ -973,6 +973,7 @@ def compile
have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
have_func("rb_gc_location") # introduced in Ruby 2.7
have_func("rb_category_warning") # introduced in Ruby 3.0

other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""])
Expand Down
6 changes: 6 additions & 0 deletions ext/nokogiri/nokogiri.h
Expand Up @@ -209,6 +209,12 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)

#if HAVE_RB_CATEGORY_WARNING
# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
#else
# define NOKO_WARN_DEPRECATION(message) rb_warning(message)
#endif

void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
xmlStructuredErrorFunc handler);
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_node.c
Expand Up @@ -2071,7 +2071,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
}
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
// TODO: deprecate allowing Node
rb_warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
}
Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);

Expand Down
53 changes: 52 additions & 1 deletion ext/nokogiri/xml_reader.c
Expand Up @@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader)
return (0);
}

// TODO: merge this function into the `namespaces` method implementation
static void
Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
{
Expand Down Expand Up @@ -150,7 +151,11 @@ namespaces(VALUE self)
/*
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>

Get the attributes of the current node as an Array of Attr
Get the attributes of the current node as an Array of XML:Attr

⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.

See related: #attribute_hash, #attributes
*/
static VALUE
rb_xml_reader_attribute_nodes(VALUE rb_reader)
Expand All @@ -160,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
VALUE attr_nodes;
int j;

// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");

Data_Get_Struct(rb_reader, xmlTextReader, c_reader);

if (! has_attributes(c_reader)) {
Expand All @@ -181,6 +190,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
return attr_nodes;
}

/*
:call-seq: attribute_hash() → Hash<String ⇒ String>

Get the attributes of the current node as a Hash of names and values.

See related: #attributes and #namespaces
*/
static VALUE
rb_xml_reader_attribute_hash(VALUE rb_reader)
{
VALUE rb_attributes = rb_hash_new();
xmlTextReaderPtr c_reader;
xmlNodePtr c_node;
xmlAttrPtr c_property;

Data_Get_Struct(rb_reader, xmlTextReader, c_reader);

if (!has_attributes(c_reader)) {
return rb_attributes;
}

c_node = xmlTextReaderExpand(c_reader);
c_property = c_node->properties;
while (c_property != NULL) {
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
VALUE rb_value = Qnil;
xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property);

if (c_value) {
rb_value = NOKOGIRI_STR_NEW2(c_value);
xmlFree(c_value);
}

rb_hash_aset(rb_attributes, rb_name, rb_value);

c_property = c_property->next;
}

return rb_attributes;
}

/*
* call-seq:
* attribute_at(index)
Expand Down Expand Up @@ -696,6 +746,7 @@ noko_init_xml_reader()
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0);
Expand Down
14 changes: 6 additions & 8 deletions lib/nokogiri/xml/reader.rb
Expand Up @@ -83,16 +83,14 @@ def initialize(source, url = nil, encoding = nil) # :nodoc:
end
private :initialize

# Get the attributes of the current node as a Hash
# Get the attributes and namespaces of the current node as a Hash.
#
# [Returns] (Hash<String, String>) Attribute names and values
# This is the union of Reader#attribute_hash and Reader#namespaces
#
# [Returns]
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
def attributes
attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
hash[node.name] = node.to_s
end
ns = namespaces
attrs_hash.merge!(ns) if ns
attrs_hash
attribute_hash.merge(namespaces)
end

###
Expand Down
81 changes: 63 additions & 18 deletions test/xml/test_reader.rb
Expand Up @@ -228,23 +228,61 @@ def test_attributes?
reader.map(&:attributes?))
end

def test_attributes
reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
<x xmlns:tenderlove='http://tenderlovemaking.com/'
xmlns='http://mothership.connection.com/'
>
<tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
</x>
eoxml
def test_reader_attributes
reader = Nokogiri::XML::Reader.from_memory(<<~XML)
<x xmlns:tenderlove='http://tenderlovemaking.com/' xmlns='http://mothership.connection.com/'>
<tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
</x>
XML
assert_empty(reader.attributes)
assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/",
"xmlns" => "http://mothership.connection.com/", },
{}, { "awesome" => "true" }, {}, { "awesome" => "true" }, {},
{},
{ "awesome" => "true" },
{},
{ "awesome" => "true" },
{},
{ "xmlns:tenderlove" => "http://tenderlovemaking.com/",
"xmlns" => "http://mothership.connection.com/", },],
reader.map(&:attributes))
end

def test_reader_attributes_hash
reader = Nokogiri::XML::Reader.from_memory(<<~XML)
<x xmlns:tenderlove='http://tenderlovemaking.com/' xmlns='http://mothership.connection.com/'>
<tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
</x>
XML
assert_empty(reader.attribute_hash)
assert_equal([{},
{},
{ "awesome" => "true" },
{},
{ "awesome" => "true" },
{},
{},],
reader.map(&:attribute_hash))
end

def test_reader_namespaces
reader = Nokogiri::XML::Reader.from_memory(<<~XML)
<x xmlns:tenderlove='http://tenderlovemaking.com/' xmlns='http://mothership.connection.com/'>
<tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
</x>
XML
assert_empty(reader.namespaces)
assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/",
"xmlns" => "http://mothership.connection.com/", },
{},
{},
{},
{},
{},
{ "xmlns:tenderlove" => "http://tenderlovemaking.com/",
"xmlns" => "http://mothership.connection.com/", },],
reader.map(&:namespaces))
end

def test_attribute_roundtrip
reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
<x xmlns:tenderlove='http://tenderlovemaking.com/'
Expand Down Expand Up @@ -434,7 +472,9 @@ def test_reader_node_attributes_keep_a_reference_to_the_reader

reader = Nokogiri::XML::Reader.from_memory(xml)
reader.each do |element|
attribute_nodes += element.attribute_nodes
assert_output(nil, /Reader#attribute_nodes is deprecated/) do
attribute_nodes += element.attribute_nodes
end
end
end

Expand All @@ -451,7 +491,9 @@ def test_namespaced_attributes
eoxml
attr_ns = []
while reader.read
if reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE
next unless reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE

assert_output(nil, /Reader#attribute_nodes is deprecated/) do
reader.attribute_nodes.each { |attr| attr_ns << (attr.namespace.nil? ? nil : attr.namespace.prefix) }
end
end
Expand Down Expand Up @@ -555,14 +597,17 @@ def test_read_from_memory
end

def test_large_document_smoke_test
# simply run on a large document to verify that there no GC issues
xml = []
xml << "<elements>"
10000.times { |j| xml << "<element id=\"#{j}\"/>" }
xml << "</elements>"
xml = xml.join("\n")
skip_unless_libxml2("valgrind tests should only run with libxml2")

Nokogiri::XML::Reader.from_memory(xml).each(&:attributes)
refute_valgrind_errors do
xml = []
xml << "<elements>"
10000.times { |j| xml << "<element id=\"#{j}\"/>" }
xml << "</elements>"
xml = xml.join("\n")

Nokogiri::XML::Reader.from_memory(xml).each(&:attributes)
end
end

def test_correct_outer_xml_inclusion
Expand Down