diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e992254fb..7e79eb3512 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,61 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA * [CRuby] If a cycle is introduced when reparenting a node (i.e., the node becomes its own ancestor), a `RuntimeError` is raised. libxml2 does no checking for this, which means cycles would otherwise result in infinite loops on subsequent operations. (Note: JRuby/Xerces already does this.) [[#1912](https://github.com/sparklemotion/nokogiri/issues/1912)] +## 1.12.4 / unreleased + +### Notable fix: Namespace inheritance + +Namespace behavior when reparenting nodes has historically been poorly specified and the behavior +diverged between CRuby and JRuby. As a result, making this behavior consistent in v1.12.0 introduced +a breaking change. + +This patch release reverts the Builder behavior present in v1.12.0..v1.12.3 but keeps the Document +behavior. This release also introduces a Document attribute to allow affected users to easily change +this behavior for their legacy code without invasive changes. + + +#### Compensating Feature in XML::Document + +This release of Nokogiri introduces a new `Document` boolean attribute, `namespace_inheritance`, +which controls whether children should inherit a namespace when they are reparented. +`Nokogiri::XML:Document` defaults this attribute to `false` meaning "do not inherit," thereby making +explicit the behavior change introduced in v1.12.0. + +CRuby users who desire the pre-v1.12.0 behavior may set `document.namespace_inheritance = true` before +reparenting nodes. + +See https://nokogiri.org/rdoc/Nokogiri/XML/Document.html#namespace_inheritance-instance_method for +example usage. + + +#### Fix for XML::Builder + +However, recognizing that we want `Builder`-created children to inherit namespaces, Builder now will +set `namespace_inheritance=true` on the underlying document for both JRuby and CRuby. This means that, on CRuby, the pre-v1.12.0 behavior is restored. + +Users who want to turn this behavior off may pass a keyword argument to the Builder constructor like +so: + +``` ruby +Nokogiri::XML::Builder.new(namespace_inheritance: false) +``` + +See https://nokogiri.org/rdoc/Nokogiri/XML/Builder.html#label-Namespace+inheritance for example +usage. + + +#### Downstream gem maintainers + +Note that any downstream gems may want to specifically omit Nokogiri v1.12.0--v1.12.3 from their dependency specification if they rely on child namespace inheritance: + +``` ruby +Gem::Specification.new do |gem| + # ... + gem.add_runtime_dependency 'nokogiri', '!=1.12.3', '!=1.12.2', '!=1.12.1', '!=1.12.0' + # ... +end +``` + ### Fixed * [JRuby] Fix NPE in Schema parsing when an imported resource doesn't have a `systemId`. [[#2296](https://github.com/sparklemotion/nokogiri/issues/2296)] (Thanks, [@pepijnve](https://github.com/pepijnve)!) diff --git a/ext/java/nokogiri/XmlDocumentFragment.java b/ext/java/nokogiri/XmlDocumentFragment.java index 17c1be9081..7d8572605f 100644 --- a/ext/java/nokogiri/XmlDocumentFragment.java +++ b/ext/java/nokogiri/XmlDocumentFragment.java @@ -34,8 +34,6 @@ public class XmlDocumentFragment extends XmlNode { - private XmlElement fragmentContext; - public XmlDocumentFragment(Ruby ruby) { @@ -75,10 +73,6 @@ public class XmlDocumentFragment extends XmlNode fragment.setDocument(context, doc); fragment.setNode(context.runtime, doc.getDocument().createDocumentFragment()); - //TODO: Get namespace definitions from doc. - if (args.length == 3 && args[2] != null && args[2] instanceof XmlElement) { - fragment.fragmentContext = (XmlElement)args[2]; - } Helpers.invoke(context, fragment, "initialize", args); return fragment; } @@ -158,12 +152,6 @@ public class XmlDocumentFragment extends XmlNode return null; } - public XmlElement - getFragmentContext() - { - return fragmentContext; - } - @Override public void relink_namespace(ThreadContext context) diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index d1ced80f74..9475e4c561 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -12,6 +12,7 @@ import org.apache.xerces.dom.CoreDocumentImpl; import org.jruby.Ruby; import org.jruby.RubyArray; +import org.jruby.RubyBoolean; import org.jruby.RubyClass; import org.jruby.RubyFixnum; import org.jruby.RubyInteger; @@ -421,7 +422,14 @@ public class XmlNode extends RubyObject String nsURI = e.lookupNamespaceURI(prefix); this.node = NokogiriHelpers.renameNode(e, nsURI, e.getNodeName()); - if (nsURI == null || nsURI.isEmpty()) { return; } + if (nsURI == null || nsURI.isEmpty()) { + RubyBoolean ns_inherit = + (RubyBoolean)document(context.runtime).getInstanceVariable("@namespace_inheritance"); + if (ns_inherit.isTrue()) { + set_namespace(context, ((XmlNode)parent(context)).namespace(context)); + } + return; + } String currentPrefix = e.getParentNode().lookupPrefix(nsURI); String currentURI = e.getParentNode().lookupNamespaceURI(prefix); @@ -1743,24 +1751,11 @@ protected enum AdoptScheme { e.appendChild(otherNode); otherNode = e; } else { - addNamespaceURIIfNeeded(otherNode); parent.appendChild(otherNode); } return new Node[] { otherNode }; } - private void - addNamespaceURIIfNeeded(Node child) - { - if (this instanceof XmlDocumentFragment && ((XmlDocumentFragment) this).getFragmentContext() != null) { - XmlElement fragmentContext = ((XmlDocumentFragment) this).getFragmentContext(); - String namespace_uri = fragmentContext.node.getNamespaceURI(); - if (namespace_uri != null && namespace_uri.length() > 0) { - NokogiriHelpers.renameNode(child, namespace_uri, child.getNodeName()); - } - } - } - protected void adoptAsPrevSibling(ThreadContext context, Node parent, diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 6286bccbf7..3472946630 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -69,6 +69,13 @@ relink_namespace(xmlNodePtr reparented) /* Avoid segv when relinking against unlinked nodes. */ if (reparented->type != XML_ELEMENT_NODE || !reparented->parent) { return; } + /* Make sure that our reparented node has the correct namespaces */ + if (!reparented->ns && + (reparented->doc != (xmlDocPtr)reparented->parent) && + (rb_iv_get(DOC_RUBY_OBJECT(reparented->doc), "@namespace_inheritance") == Qtrue)) { + xmlSetNs(reparented, reparented->parent->ns); + } + /* Search our parents for an existing definition */ if (reparented->nsDef) { xmlNsPtr curr = reparented->nsDef; diff --git a/lib/nokogiri/xml/builder.rb b/lib/nokogiri/xml/builder.rb index 1c7ed087fe..d08ca83f31 100644 --- a/lib/nokogiri/xml/builder.rb +++ b/lib/nokogiri/xml/builder.rb @@ -196,6 +196,41 @@ module XML # # Note the "foo:object" tag. # + # === Namespace inheritance + # + # In the Builder context, children will inherit their parent's namespace. This is the same + # behavior as if the underlying {XML::Document} set +namespace_inheritance+ to +true+: + # + # result = Nokogiri::XML::Builder.new do |xml| + # xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do + # xml.Header + # end + # end + # result.doc.to_xml + # # => + # # + # # + # # + # + # Users may turn this behavior off by passing a keyword argument +namespace_inheritance:false+ + # to the initializer: + # + # result = Nokogiri::XML::Builder.new(namespace_inheritance: false) do |xml| + # xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do + # xml.Header + # xml["soapenv"].Body # users may explicitly opt into the namespace + # end + # end + # result.doc.to_xml + # # => + # # + # #
+ # # + # # + # + # For more information on namespace inheritance, please see {XML::Document#namespace_inheritance} + # + # # == Document Types # # To create a document type (DTD), access use the Builder#doc method to get @@ -226,6 +261,8 @@ module XML # # class Builder + DEFAULT_DOCUMENT_OPTIONS = {namespace_inheritance: true} + # The current Document object being built attr_accessor :doc @@ -282,6 +319,7 @@ def initialize(options = {}, root = nil, &block) @arity = nil @ns = nil + options = DEFAULT_DOCUMENT_OPTIONS.merge(options) options.each do |k, v| @doc.send(:"#{k}=", v) end diff --git a/lib/nokogiri/xml/document.rb b/lib/nokogiri/xml/document.rb index d09c0e41ad..4a4e5087a1 100644 --- a/lib/nokogiri/xml/document.rb +++ b/lib/nokogiri/xml/document.rb @@ -113,9 +113,55 @@ def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions:: # A list of Nokogiri::XML::SyntaxError found when parsing a document attr_accessor :errors + # When true, reparented elements without a namespace will inherit their new parent's + # namespace (if one exists). Defaults to +false+. + # + # @example Default behavior of namespace inheritance + # xml = <<~EOF + # + # + # + # + # EOF + # doc = Nokogiri::XML(xml) + # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo") + # parent.add_child("") + # doc.to_xml + # # => + # # + # # + # # + # # + # # + # + # @example Setting namespace inheritance to +true+ + # xml = <<~EOF + # + # + # + # + # EOF + # doc = Nokogiri::XML(xml) + # doc.namespace_inheritance = true + # parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo") + # parent.add_child("") + # doc.to_xml + # # => + # # + # # + # # + # # + # # + # + # @return [Boolean] + # + # @since v1.12.4 + attr_accessor :namespace_inheritance + def initialize *args # :nodoc: @errors = [] @decorators = nil + @namespace_inheritance = false end ## diff --git a/test/xml/test_builder.rb b/test/xml/test_builder.rb index 85d9b1e6b3..022a200970 100644 --- a/test/xml/test_builder.rb +++ b/test/xml/test_builder.rb @@ -107,9 +107,37 @@ def test_non_root_namespace assert_equal "one", b.doc.at("hello", "xmlns" => "one").namespace.href end - def test_builder_namespace_children_do_not_inherit - # see https://github.com/sparklemotion/nokogiri/issues/1712 + def test_builder_namespace_inheritance_true + # see https://github.com/sparklemotion/nokogiri/issues/2317 result = Nokogiri::XML::Builder.new(encoding: 'utf-8') do |xml| + xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do + xml.Header + end + end + assert(result.doc.namespace_inheritance) + assert( + result.doc.at_xpath("//soapenv:Header", "soapenv" => "http://schemas.xmlsoap.org/soap/envelope/"), + "header element should have a namespace" + ) + end + + def test_builder_namespace_inheritance_false + # see https://github.com/sparklemotion/nokogiri/issues/2317 + result = Nokogiri::XML::Builder.new(encoding: 'utf-8', namespace_inheritance: false) do |xml| + xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do + xml.Header + end + end + refute(result.doc.namespace_inheritance) + assert( + result.doc.at_xpath("//Header"), + "header element should not have a namespace" + ) + end + + def test_builder_namespace_inheritance_false_part_deux + # see https://github.com/sparklemotion/nokogiri/issues/1712 + result = Nokogiri::XML::Builder.new(encoding: 'utf-8', namespace_inheritance: false) do |xml| xml['soapenv'].Envelope('xmlns:soapenv' => 'http://schemas.xmlsoap.org/soap/envelope/', 'xmlns:emer' => 'http://dashcs.com/api/v1/emergency') do xml['soapenv'].Header xml['soapenv'].Body do @@ -197,12 +225,18 @@ def test_specified_namespace_undeclared } end + def test_set_namespace_inheritance + assert(Nokogiri::XML::Builder.new.doc.namespace_inheritance) + refute(Nokogiri::XML::Builder.new(namespace_inheritance: false).doc.namespace_inheritance) + end + def test_set_encoding builder = Nokogiri::XML::Builder.new(:encoding => "UTF-8") do |xml| xml.root do xml.bar "blah" end end + assert_equal "UTF-8", builder.doc.encoding assert_match "UTF-8", builder.to_xml end diff --git a/test/xml/test_document.rb b/test/xml/test_document.rb index e56ffba913..48eed1d10e 100644 --- a/test/xml/test_document.rb +++ b/test/xml/test_document.rb @@ -15,6 +15,13 @@ class TestDocument < Nokogiri::TestCase let(:xml) { Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) } + def test_default_namespace_inheritance + doc = Nokogiri::XML::Document.new + refute(doc.namespace_inheritance) + doc.namespace_inheritance = true + assert(doc.namespace_inheritance) + end + def test_dtd_with_empty_internal_subset doc = Nokogiri::XML(<<~eoxml) diff --git a/test/xml/test_node_reparenting.rb b/test/xml/test_node_reparenting.rb index 7ef7e30236..3027eeadaa 100644 --- a/test/xml/test_node_reparenting.rb +++ b/test/xml/test_node_reparenting.rb @@ -557,6 +557,35 @@ def coerce(data) end end end + + describe "given a parent node with a non-default namespace" do + let(:doc) do + Nokogiri::XML(<<~EOF) + + + + + EOF + end + let(:parent) { doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo") } + + describe "and namespace_inheritance is off" do + it "inserts a child node that does not inherit the parent's namespace" do + refute(doc.namespace_inheritance) + child = parent.add_child("").first + assert_nil(child.namespace) + end + end + + describe "and namespace_inheritance is on" do + it "inserts a child node that inherits the parent's namespace" do + doc.namespace_inheritance = true + child = parent.add_child("").first + assert_not_nil(child.namespace) + assert_equal("http://nokogiri.org/default_ns/test/foo", child.namespace.href) + end + end + end end describe "#add_previous_sibling" do