From 30a2e6738edbb57e433133eaa010d1c823a522b6 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 12:36:05 -0400 Subject: [PATCH 1/4] ci: add creek to the downstream pipeline --- .github/workflows/downstream.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml index 5ea5371b76..7278f3f546 100644 --- a/.github/workflows/downstream.yml +++ b/.github/workflows/downstream.yml @@ -42,6 +42,9 @@ jobs: - url: https://github.com/rails/rails name: xmlmini command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/" + - url: https://github.com/pythonicrubyist/creek + name: creek + command: "bundle exec rake spec" runs-on: ubuntu-latest container: image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 @@ -63,5 +66,8 @@ jobs: else echo "gem 'nokogiri', path: '..'" >> Gemfile fi + if egrep "add_development_dependency.*\bbundler\b" *gemspec ; then + sed -i 's/.*add_development_dependency.*\bbundler\b.*//' *gemspec + fi bundle install --local || bundle install ${{matrix.command}} From 5e65d09507b542bda856bb575e8488f05e9f0442 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 12:21:45 -0400 Subject: [PATCH 2/4] feat: Reader#attribute_hash which is now being used to implement Reader#attributes fix: Reader#namespaces on JRuby now returns an empty hash when no attributes are present (previously returned nil) --- ext/java/nokogiri/XmlReader.java | 7 +++ ext/java/nokogiri/internals/ReaderNode.java | 5 +- ext/nokogiri/xml_reader.c | 43 ++++++++++++++++ lib/nokogiri/xml/reader.rb | 7 +-- test/xml/test_reader.rb | 56 +++++++++++++++++---- 5 files changed, 101 insertions(+), 17 deletions(-) diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index 21d171739d..0f8935dd49 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -145,6 +145,13 @@ public class XmlReader extends RubyObject return currentNode().getAttributesNodes(); } + @JRubyMethod + public IRubyObject + attribute_hash(ThreadContext context) + { + return currentNode().getAttributes(context); + } + @JRubyMethod(name = "attributes?") public IRubyObject attributes_p(ThreadContext context) diff --git a/ext/java/nokogiri/internals/ReaderNode.java b/ext/java/nokogiri/internals/ReaderNode.java index 15b7f3aac0..aef01a9681 100644 --- a/ext/java/nokogiri/internals/ReaderNode.java +++ b/ext/java/nokogiri/internals/ReaderNode.java @@ -112,9 +112,10 @@ public abstract class ReaderNode getAttributes(ThreadContext context) { final Ruby runtime = context.runtime; - if (attributeList == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (attributeList == null) { return hash; } for (int i = 0; i < attributeList.length; i++) { + if (isNamespace(attributeList.names.get(i))) { continue; } IRubyObject k = stringOrBlank(runtime, attributeList.names.get(i)); IRubyObject v = stringOrBlank(runtime, attributeList.values.get(i)); hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) @@ -150,8 +151,8 @@ public abstract class ReaderNode getNamespaces(ThreadContext context) { final Ruby runtime = context.runtime; - if (namespaces == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (namespaces == null) { return hash; } for (Map.Entry entry : namespaces.entrySet()) { IRubyObject k = stringOrBlank(runtime, entry.getKey()); IRubyObject v = stringOrBlank(runtime, entry.getValue()); diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index 4f87e18f14..81f4bdc275 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader) return (0); } +// TODO: merge this function into the `namespaces` method implementation static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { @@ -181,6 +182,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) return attr_nodes; } +/* + :call-seq: attribute_hash() → Hash + + Get the attributes of the current node as a Hash of names and values. + + See related: #attributes and #namespaces + */ +static VALUE +rb_xml_reader_attribute_hash(VALUE rb_reader) +{ + VALUE rb_attributes = rb_hash_new(); + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + xmlAttrPtr c_property; + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + if (!has_attributes(c_reader)) { + return rb_attributes; + } + + c_node = xmlTextReaderExpand(c_reader); + c_property = c_node->properties; + while (c_property != NULL) { + VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name); + VALUE rb_value = Qnil; + xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property); + + if (c_value) { + rb_value = NOKOGIRI_STR_NEW2(c_value); + xmlFree(c_value); + } + + rb_hash_aset(rb_attributes, rb_name, rb_value); + + c_property = c_property->next; + } + + return rb_attributes; +} + /* * call-seq: * attribute_at(index) @@ -696,6 +738,7 @@ noko_init_xml_reader() rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1); rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0); rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0); + rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0); rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0); rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0); rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0); diff --git a/lib/nokogiri/xml/reader.rb b/lib/nokogiri/xml/reader.rb index df13216afe..1a00796359 100644 --- a/lib/nokogiri/xml/reader.rb +++ b/lib/nokogiri/xml/reader.rb @@ -87,12 +87,7 @@ def initialize(source, url = nil, encoding = nil) # :nodoc: # # [Returns] (Hash) Attribute names and values def attributes - attrs_hash = attribute_nodes.each_with_object({}) do |node, hash| - hash[node.name] = node.to_s - end - ns = namespaces - attrs_hash.merge!(ns) if ns - attrs_hash + attribute_hash.merge(namespaces) end ### diff --git a/test/xml/test_reader.rb b/test/xml/test_reader.rb index b3019691e6..5b7e888203 100644 --- a/test/xml/test_reader.rb +++ b/test/xml/test_reader.rb @@ -228,23 +228,61 @@ def test_attributes? reader.map(&:attributes?)) end - def test_attributes - reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) - - snuggles! - - eoxml + def test_reader_attributes + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML assert_empty(reader.attributes) assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/", "xmlns" => "http://mothership.connection.com/", }, - {}, { "awesome" => "true" }, {}, { "awesome" => "true" }, {}, + {}, + { "awesome" => "true" }, + {}, + { "awesome" => "true" }, + {}, { "xmlns:tenderlove" => "http://tenderlovemaking.com/", "xmlns" => "http://mothership.connection.com/", },], reader.map(&:attributes)) end + def test_reader_attributes_hash + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML + assert_empty(reader.attribute_hash) + assert_equal([{}, + {}, + { "awesome" => "true" }, + {}, + { "awesome" => "true" }, + {}, + {},], + reader.map(&:attribute_hash)) + end + + def test_reader_namespaces + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML + assert_empty(reader.namespaces) + assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/", + "xmlns" => "http://mothership.connection.com/", }, + {}, + {}, + {}, + {}, + {}, + { "xmlns:tenderlove" => "http://tenderlovemaking.com/", + "xmlns" => "http://mothership.connection.com/", },], + reader.map(&:namespaces)) + end + def test_attribute_roundtrip reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) Date: Thu, 21 Jul 2022 10:38:32 -0400 Subject: [PATCH 3/4] dev: introduce NOKO_WARN_DEPRECATION macro which will use rb_category_warning if it's available, and rb_warning if it's not --- ext/nokogiri/extconf.rb | 1 + ext/nokogiri/nokogiri.h | 6 ++++++ ext/nokogiri/xml_node.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index c2aeed2d21..6d05d03ae1 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -973,6 +973,7 @@ def compile have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 have_func("rb_gc_location") # introduced in Ruby 2.7 +have_func("rb_category_warning") # introduced in Ruby 3.0 other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",") append_cppflags(%[-DNOKOGIRI_OTHER_LIBRARY_VERSIONS="\\\"#{other_library_versions_string}\\\""]) diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 42f4fd4a4d..63549ecec5 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -209,6 +209,12 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) +#if HAVE_RB_CATEGORY_WARNING +# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define NOKO_WARN_DEPRECATION(message) rb_warning(message) +#endif + void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data, xmlStructuredErrorFunc handler); diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 0986051847..5b972eb65f 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -2071,7 +2071,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass) } if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) { // TODO: deprecate allowing Node - rb_warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); } Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node); From 113440c557055a51695d1233110ad41c93adfcde Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 12:28:21 -0400 Subject: [PATCH 4/4] deprecate: Reader#attribute_nodes This method will be removed in a future version --- ext/java/nokogiri/XmlReader.java | 1 + ext/nokogiri/xml_reader.c | 10 +++++++++- lib/nokogiri/xml/reader.rb | 7 +++++-- test/xml/test_reader.rb | 25 ++++++++++++++++--------- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index 0f8935dd49..403b5e7d4c 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -142,6 +142,7 @@ public class XmlReader extends RubyObject public IRubyObject attribute_nodes(ThreadContext context) { + context.runtime.getWarnings().warn("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); return currentNode().getAttributesNodes(); } diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index 81f4bdc275..022f8ff531 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -151,7 +151,11 @@ namespaces(VALUE self) /* :call-seq: attribute_nodes() → Array - Get the attributes of the current node as an Array of Attr + Get the attributes of the current node as an Array of XML:Attr + + ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri. + + See related: #attribute_hash, #attributes */ static VALUE rb_xml_reader_attribute_nodes(VALUE rb_reader) @@ -161,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) VALUE attr_nodes; int j; + // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598 + // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c + NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); if (! has_attributes(c_reader)) { diff --git a/lib/nokogiri/xml/reader.rb b/lib/nokogiri/xml/reader.rb index 1a00796359..5f637cf953 100644 --- a/lib/nokogiri/xml/reader.rb +++ b/lib/nokogiri/xml/reader.rb @@ -83,9 +83,12 @@ def initialize(source, url = nil, encoding = nil) # :nodoc: end private :initialize - # Get the attributes of the current node as a Hash + # Get the attributes and namespaces of the current node as a Hash. # - # [Returns] (Hash) Attribute names and values + # This is the union of Reader#attribute_hash and Reader#namespaces + # + # [Returns] + # (Hash) Attribute names and values, and namespace prefixes and hrefs. def attributes attribute_hash.merge(namespaces) end diff --git a/test/xml/test_reader.rb b/test/xml/test_reader.rb index 5b7e888203..ab119656c7 100644 --- a/test/xml/test_reader.rb +++ b/test/xml/test_reader.rb @@ -472,7 +472,9 @@ def test_reader_node_attributes_keep_a_reference_to_the_reader reader = Nokogiri::XML::Reader.from_memory(xml) reader.each do |element| - attribute_nodes += element.attribute_nodes + assert_output(nil, /Reader#attribute_nodes is deprecated/) do + attribute_nodes += element.attribute_nodes + end end end @@ -489,7 +491,9 @@ def test_namespaced_attributes eoxml attr_ns = [] while reader.read - if reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE + next unless reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE + + assert_output(nil, /Reader#attribute_nodes is deprecated/) do reader.attribute_nodes.each { |attr| attr_ns << (attr.namespace.nil? ? nil : attr.namespace.prefix) } end end @@ -593,14 +597,17 @@ def test_read_from_memory end def test_large_document_smoke_test - # simply run on a large document to verify that there no GC issues - xml = [] - xml << "" - 10000.times { |j| xml << "" } - xml << "" - xml = xml.join("\n") + skip_unless_libxml2("valgrind tests should only run with libxml2") - Nokogiri::XML::Reader.from_memory(xml).each(&:attributes) + refute_valgrind_errors do + xml = [] + xml << "" + 10000.times { |j| xml << "" } + xml << "" + xml = xml.join("\n") + + Nokogiri::XML::Reader.from_memory(xml).each(&:attributes) + end end def test_correct_outer_xml_inclusion