diff --git a/CHANGELOG.md b/CHANGELOG.md index 36c03eae40..cfda5db97b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,11 @@ This version of Nokogiri uses [`jar-dependencies`](https://github.com/mkristian/ * [JRuby] `NodeSet#[]` now raises a TypeError if passed an invalid parameter type. [[#2211](https://github.com/sparklemotion/nokogiri/issues/2211)] +### Fixed + +* [CRuby] When reparenting HTML nodes, do not attempt to relink namespaces. Previously, an HTML attribute with a colon might be interpreted as a prefixed/namespaced atttribute (for example, "xml:lang"). [[#1790](https://github.com/sparklemotion/nokogiri/issues/1790)] + + ### Improved * Serialization of HTML5 documents and fragments has been re-implemented and is ~10x faster than previous versions. [[#2596](https://github.com/sparklemotion/nokogiri/issues/2596), [#2569](https://github.com/sparklemotion/nokogiri/issues/2569)] diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 69e6772959..656e47aff8 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -440,24 +440,24 @@ public class XmlNode extends RubyObject set_namespace(context, ((XmlNode)parent(context)).namespace(context)); } return; - } - - String currentPrefix = e.getParentNode().lookupPrefix(nsURI); - String currentURI = e.getParentNode().lookupNamespaceURI(prefix); - boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI); - - // add xmlns attribute if this is a new root node or if the node's - // namespace isn't a default namespace in the new document - if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) { - // this is the root node, so we must set the namespaces attributes anyway - e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI); - } else if (prefix == null) { - // this is a default namespace but isn't the default where this node is being added - if (!isDefault) { e.setAttribute("xmlns", nsURI); } - } else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) { - // this is a prefixed namespace - // but doesn't have the same prefix or the prefix is set to a different URI - e.setAttribute("xmlns:" + prefix, nsURI); + } else { + String currentPrefix = e.getParentNode().lookupPrefix(nsURI); + String currentURI = e.getParentNode().lookupNamespaceURI(prefix); + boolean isDefault = e.getParentNode().isDefaultNamespace(nsURI); + + // add xmlns attribute if this is a new root node or if the node's + // namespace isn't a default namespace in the new document + if (e.getParentNode().getNodeType() == Node.DOCUMENT_NODE) { + // this is the root node, so we must set the namespaces attributes anyway + e.setAttribute(prefix == null ? "xmlns" : "xmlns:" + prefix, nsURI); + } else if (prefix == null) { + // this is a default namespace but isn't the default where this node is being added + if (!isDefault) { e.setAttribute("xmlns", nsURI); } + } else if (!prefix.equals(currentPrefix) || nsURI.equals(currentURI)) { + // this is a prefixed namespace + // but doesn't have the same prefix or the prefix is set to a different URI + e.setAttribute("xmlns:" + prefix, nsURI); + } } if (e.hasAttributes()) { @@ -492,8 +492,10 @@ public class XmlNode extends RubyObject } } - if (this.node.hasChildNodes()) { - relink_namespace(context, getChildren()); + if (nsURI != null && !nsURI.isEmpty()) { + if (this.node.hasChildNodes()) { + relink_namespace(context, getChildren()); + } } } diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 1728c4d415..28c28b2aab 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -153,18 +153,6 @@ relink_namespace(xmlNodePtr reparented) } } - /* Only walk all children if there actually is a namespace we need to */ - /* reparent. */ - if (NULL == reparented->ns) { return; } - - /* When a node gets reparented, walk it's children to make sure that */ - /* their namespaces are reparented as well. */ - child = reparented->children; - while (NULL != child) { - relink_namespace(child); - child = child->next; - } - if (reparented->type == XML_ELEMENT_NODE) { attr = reparented->properties; while (NULL != attr) { @@ -172,6 +160,18 @@ relink_namespace(xmlNodePtr reparented) attr = attr->next; } } + + /* Only walk all children if there actually is a namespace we need to */ + /* reparent. */ + if (reparented->ns) { + /* When a node gets reparented, walk it's children to make sure that */ + /* their namespaces are reparented as well. */ + child = reparented->children; + while (NULL != child) { + relink_namespace(child); + child = child->next; + } + } } @@ -408,7 +408,10 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func /* if we've created a cycle, raise an exception */ raise_if_ancestor_of_self(reparented); - relink_namespace(reparented); + /* HTML doesn't have namespaces */ + if (!rb_obj_is_kind_of(DOC_RUBY_OBJECT(reparented->doc), cNokogiriHtml4Document)) { + relink_namespace(reparented); + } return reparented_obj ; } diff --git a/lib/nokogiri/html5/node.rb b/lib/nokogiri/html5/node.rb index 91e9f43d44..b25fa233b8 100644 --- a/lib/nokogiri/html5/node.rb +++ b/lib/nokogiri/html5/node.rb @@ -67,29 +67,8 @@ def fragment(tags) DocumentFragment.new(document, tags, self) end - - private - - # HTML elements can have attributes that contain colons. - # Nokogiri::XML::Node#[]= treats names with colons as a prefixed QName - # and tries to create an attribute in a namespace. This is especially - # annoying with attribute names like xml:lang since libxml2 will - # actually create the xml namespace if it doesn't exist already. - def add_child_node_and_reparent_attrs(node) - return super(node) unless document.is_a?(HTML5::Document) - - # I'm not sure what this method is supposed to do. Reparenting - # namespaces is handled by libxml2, including child namespaces which - # this method wouldn't handle. - # https://github.com/sparklemotion/nokogiri/issues/1790 - add_child_node(node) - # node.attribute_nodes.find_all { |a| a.namespace }.each do |attr| - # attr.remove - # ns = attr.namespace - # a["#{ns.prefix}:#{attr.name}"] = attr.value - # end - end end + # Monkey patch XML::Node.prepend(HTML5::Node) end diff --git a/lib/nokogiri/xml/node.rb b/lib/nokogiri/xml/node.rb index 28da40db48..3e1e3525db 100644 --- a/lib/nokogiri/xml/node.rb +++ b/lib/nokogiri/xml/node.rb @@ -145,9 +145,9 @@ def decorate! def add_child(node_or_tags) node_or_tags = coerce(node_or_tags) if node_or_tags.is_a?(XML::NodeSet) - node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) } + node_or_tags.each { |n| add_child_node(n) } else - add_child_node_and_reparent_attrs(node_or_tags) + add_child_node(node_or_tags) end node_or_tags end @@ -263,9 +263,9 @@ def children=(node_or_tags) node_or_tags = coerce(node_or_tags) children.unlink if node_or_tags.is_a?(XML::NodeSet) - node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) } + node_or_tags.each { |n| add_child_node(n) } else - add_child_node_and_reparent_attrs(node_or_tags) + add_child_node(node_or_tags) end end @@ -1392,14 +1392,6 @@ def inspect_attributes end IMPLIED_XPATH_CONTEXTS = [".//"].freeze - - def add_child_node_and_reparent_attrs(node) - add_child_node(node) - node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node| - attr_node.remove - node[attr_node.name] = attr_node.value - end - end end end end diff --git a/test/namespaces/test_namespaces_in_created_doc.rb b/test/namespaces/test_namespaces_in_created_doc.rb index 79a6d01225..ef4f6ce591 100644 --- a/test/namespaces/test_namespaces_in_created_doc.rb +++ b/test/namespaces/test_namespaces_in_created_doc.rb @@ -100,19 +100,19 @@ def test_created_namespaced_attribute_on_unparented_node_is_renamespaced_in_xml_ assert(child.attribute_nodes.first.namespace) end - # def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html4_doc - # doc1 = Nokogiri::HTML4("") - # doc2 = Nokogiri::HTML4("") - - # child = doc1.create_element("div") - # child["tempname"] = "en" - # attr = child.attribute("tempname") - # attr.name = "xml:lang" - # assert_nil(child.attribute_nodes.first.namespace) - - # doc2.at_css("body").add_child(child) - # assert_nil(child.attribute_nodes.first.namespace) - # end + def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html4_doc + doc1 = Nokogiri::HTML4("") + doc2 = Nokogiri::HTML4("") + + child = doc1.create_element("div") + child["tempname"] = "en" + attr = child.attribute("tempname") + attr.name = "xml:lang" + assert_nil(child.attribute_nodes.first.namespace) + + doc2.at_css("body").add_child(child) + assert_nil(child.attribute_nodes.first.namespace) + end def test_created_namespaced_attribute_on_unparented_node_is_not_renamespaced_in_html5_doc skip("HTML5 not supported on this platform yet") unless defined?(Nokogiri::HTML5)