diff --git a/CHANGELOG.md b/CHANGELOG.md index 05e16c9b28..c1da35d438 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ A related discussion about Trust exists at [#2357](https://github.com/sparklemot * [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)] * [CRuby] If a cycle is introduced when reparenting a node (i.e., the node becomes its own ancestor), a `RuntimeError` is raised. libxml2 does no checking for this, which means cycles would otherwise result in infinite loops on subsequent operations. (Note: JRuby/Xerces already does this.) [[#1912](https://github.com/sparklemotion/nokogiri/issues/1912)] * [CRuby] Source builds will download zlib and libiconv via HTTPS. [[#2391](https://github.com/sparklemotion/nokogiri/issues/2391)] (Thanks, [@jmartin-r7](https://github.com/jmartin-r7)!) +* [JRuby] `Node#line` behavior has been modified to return the line number of the node in the _final DOM structure_ (the value returned in JRuby is increased by 1 to count the XML prolog). This behavior is different from CRuby, which returns the node's position in the _input string_. This difference is not ideal, but at least is now officially documented and tested. The real-world impact of this change is that the value returned in JRuby is greater by 1 to account for the XML prolog in the output. [[#2380](https://github.com/sparklemotion/nokogiri/issues/2380)] (Thanks, [@dabdine](https://github.com/dabdine)!) ## 1.12.5 / 2021-09-27 diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index 9475e4c561..f7feb43b51 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -36,6 +36,7 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; +import org.w3c.dom.Comment; import nokogiri.internals.HtmlDomParserContext; import nokogiri.internals.NokogiriHelpers; @@ -1579,6 +1580,10 @@ public class XmlNode extends RubyObject return getNokogiriClass(context.runtime, "Nokogiri::XML::Node").getConstant(type); } + /* + * NOTE that the behavior of this function is very difference from the CRuby implementation, see + * the docstring in ext/nokogiri/xml_node.c for details. + */ @JRubyMethod public IRubyObject line(ThreadContext context) @@ -1586,7 +1591,10 @@ public class XmlNode extends RubyObject Node root = getOwnerDocument(); int[] counter = new int[1]; count(root, counter); - return RubyFixnum.newFixnum(context.runtime, counter[0] + 1); + // offset of 2: + // - one because humans start counting at 1 not zero + // - one to account for the XML declaration present in the output + return RubyFixnum.newFixnum(context.runtime, counter[0] + 2); } private boolean @@ -1599,9 +1607,14 @@ public class XmlNode extends RubyObject NodeList list = node.getChildNodes(); for (int jchild = 0; jchild < list.getLength(); jchild++) { Node child = list.item(jchild); + String text = null; if (child instanceof Text) { - String text = ((Text)child).getData(); + text = ((Text)child).getData(); + } else if (child instanceof Comment) { + text = ((Comment)child).getData(); + } + if (text != null) { int textLength = text.length(); for (int jchar = 0; jchar < textLength; jchar++) { if (text.charAt(jchar) == '\n') { diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 459bbc44f2..ba9e60c85e 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1698,10 +1698,31 @@ native_write_to( } /* - * call-seq: - * line + * :call-seq: + * line() → Integer + * + * [Returns] The line number of this Node. + * + * --- + * + * ⚠ The CRuby and JRuby implementations differ in important ways! + * + * Semantic differences: + * - The CRuby method reflects the node's line number in the parsed string + * - The JRuby method reflects the node's line number in the final DOM structure after + * corrections have been applied + * + * Performance differences: + * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time] + * (constant time) + * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear + * time, where n is the number of nodes before/above the element in the DOM) * - * Returns the line for this Node + * If you'd like to help improve the JRuby implementation, please review these issues and reach out + * to the maintainers: + * - https://github.com/sparklemotion/nokogiri/issues/1223 + * - https://github.com/sparklemotion/nokogiri/pull/2177 + * - https://github.com/sparklemotion/nokogiri/issues/2380 */ static VALUE rb_xml_node_line(VALUE rb_node) diff --git a/test/helper.rb b/test/helper.rb index 224525200b..0ec60b86c4 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -196,6 +196,20 @@ def assert_not_send(send_ary, m = nil) refute(recv.__send__(msg, *args), m) end unless method_defined?(:assert_not_send) + def pending(msg) + begin + yield + rescue MiniTest::Assertion + skip("pending #{msg}") + end + flunk("pending test unexpectedly passed: #{msg}") + end + + def pending_if(msg, pend_eh, &block) + return yield unless pend_eh + pending(msg, &block) + end + def i_am_ruby_matching(gem_version_requirement_string) Gem::Requirement.new(gem_version_requirement_string).satisfied_by?(Gem::Version.new(RUBY_VERSION)) end diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb index 94302c766d..bc52da09be 100644 --- a/test/xml/test_node.rb +++ b/test/xml/test_node.rb @@ -299,11 +299,11 @@ def test_append_with_attr end def test_inspect_ns - xml = Nokogiri::XML(<<~eoxml, &:noblanks) + xml = Nokogiri::XML(<<~XML, &:noblanks) - eoxml + XML ins = xml.inspect xml.traverse do |node| @@ -323,30 +323,30 @@ def test_inspect_ns end def test_namespace_definitions_when_some_exist - xml = Nokogiri::XML(<<~eoxml) + xml = Nokogiri::XML(<<~XML) - eoxml + XML namespace_definitions = xml.root.namespace_definitions assert_equal(2, namespace_definitions.length) end def test_namespace_definitions_when_no_exist - xml = Nokogiri::XML(<<~eoxml) + xml = Nokogiri::XML(<<~XML) - eoxml + XML namespace_definitions = xml.at_xpath("//xmlns:awesome").namespace_definitions assert_equal(0, namespace_definitions.length) end def test_default_namespace_goes_to_children - fruits = Nokogiri::XML(<<~eoxml) + fruits = Nokogiri::XML(<<~XML) - eoxml + XML apple = Nokogiri::XML::Node.new("Apple", fruits) orange = Nokogiri::XML::Node.new("Orange", fruits) apple << orange @@ -356,11 +356,11 @@ def test_default_namespace_goes_to_children end def test_parent_namespace_is_not_inherited - fruits = Nokogiri::XML(<<-eoxml) + fruits = Nokogiri::XML(<<-XML) - eoxml + XML apple = fruits.at_xpath("//fruit:apple", { "fruit" => "http://fruits.org" }) assert(apple) @@ -412,11 +412,11 @@ def test_different_document_compare end def test_duplicate_node_removes_namespace - fruits = Nokogiri::XML(<<~eoxml) + fruits = Nokogiri::XML(<<~XML) - eoxml + XML apple = fruits.root.xpath("fruit:Apple", { "fruit" => "www.fruits.org" })[0] new_apple = apple.dup fruits.root << new_apple @@ -450,10 +450,10 @@ def test_fragment_creates_elements end def test_node_added_to_root_should_get_namespace - fruits = Nokogiri::XML(<<~eoxml) + fruits = Nokogiri::XML(<<~XML) - eoxml + XML apple = fruits.fragment("") fruits.root << apple assert_equal(1, fruits.xpath("//xmlns:Apple").length) @@ -466,15 +466,15 @@ def test_new_node_can_have_ancestors end def test_children - doc = Nokogiri::XML(<<~eoxml) + doc = Nokogiri::XML(<<~XML) #{"" * 9} - eoxml + XML assert_equal(9, doc.root.children.length) assert_equal(9, doc.root.children.to_a.length) - doc = Nokogiri::XML(<<~eoxml) + doc = Nokogiri::XML(<<~XML) #{"" * 15} - eoxml + XML assert_equal(15, doc.root.children.length) assert_equal(15, doc.root.children.to_a.length) end @@ -658,13 +658,13 @@ def test_serialize_with_block end def test_hold_refence_to_subnode - doc = Nokogiri::XML(<<~eoxml) + doc = Nokogiri::XML(<<~XML) - eoxml + XML assert(node_a = doc.css("a").first) assert(node_b = node_a.css("b").first) node_a.unlink @@ -730,7 +730,7 @@ def test_find_by_css_class_with_nonstandard_whitespace end def test_find_by_css_with_tilde_eql - xml = Nokogiri::XML.parse(<<~eoxml) + xml = Nokogiri::XML.parse(<<~XML) Hello world Bar @@ -740,14 +740,14 @@ def test_find_by_css_with_tilde_eql Awesome Awesome - eoxml + XML set = xml.css('a[@class~="bar"]') assert_equal(4, set.length) assert_equal(["Bar"], set.map(&:content).uniq) end def test_unlink - xml = Nokogiri::XML.parse(<<~eoxml) + xml = Nokogiri::XML.parse(<<~XML) Bar Bar @@ -757,7 +757,7 @@ def test_unlink Awesome Awesome - eoxml + XML node = xml.xpath("//a")[3] assert_equal("Hello world", node.text) assert_match(/Hello world/, xml.to_s) @@ -921,7 +921,7 @@ def test_node_equality end def test_namespace_search_with_xpath_and_hash - xml = Nokogiri::XML.parse(<<~eoxml) + xml = Nokogiri::XML.parse(<<~XML) Michelin Model XGV @@ -930,14 +930,14 @@ def test_namespace_search_with_xpath_and_hash I'm a bicycle tire! - eoxml + XML tires = xml.xpath("//bike:tire", { "bike" => "http://schwinn.com/" }) assert_equal(1, tires.length) end def test_namespace_search_with_xpath_and_hash_with_symbol_keys - xml = Nokogiri::XML.parse(<<~eoxml) + xml = Nokogiri::XML.parse(<<~XML) Michelin Model XGV @@ -946,14 +946,14 @@ def test_namespace_search_with_xpath_and_hash_with_symbol_keys I'm a bicycle tire! - eoxml + XML tires = xml.xpath("//bike:tire", bike: "http://schwinn.com/") assert_equal(1, tires.length) end def test_namespace_search_with_css - xml = Nokogiri::XML.parse(<<~eoxml) + xml = Nokogiri::XML.parse(<<~XML) Michelin Model XGV @@ -962,7 +962,7 @@ def test_namespace_search_with_css I'm a bicycle tire! - eoxml + XML tires = xml.css("bike|tire", "bike" => "http://schwinn.com/") assert_equal(1, tires.length) @@ -970,13 +970,13 @@ def test_namespace_search_with_css def test_namespaced_attribute_search_with_xpath # from #593 - xml_content = <<~EOXML + xml_content = <<~XML with namespace no namespace - EOXML + XML xml_doc = Nokogiri::XML(xml_content) no_ns = xml_doc.xpath("//*[@att]") @@ -990,13 +990,13 @@ def test_namespaced_attribute_search_with_xpath def test_namespaced_attribute_search_with_css # from #593 - xml_content = <<~EOXML + xml_content = <<~XML with namespace no namespace - EOXML + XML xml_doc = Nokogiri::XML(xml_content) no_ns = xml_doc.css("*[att]") @@ -1009,14 +1009,14 @@ def test_namespaced_attribute_search_with_css end def test_namespaces_should_include_all_namespace_definitions - xml = Nokogiri::XML.parse(<<~EOF) + xml = Nokogiri::XML.parse(<<~XML) hello - EOF + XML namespaces = xml.namespaces # Document#namespace assert_equal({ "xmlns" => "http://quux.com/", @@ -1048,7 +1048,7 @@ def test_namespaces_should_include_all_namespace_definitions end def test_namespace - xml = Nokogiri::XML.parse(<<~EOF) + xml = Nokogiri::XML.parse(<<~XML) hello a @@ -1058,7 +1058,7 @@ def test_namespace
hello moon
- EOF + XML set = xml.search("//y/*") assert_equal("a", set[0].namespace.prefix) assert_equal("http://foo.com/", set[0].namespace.href) @@ -1082,9 +1082,9 @@ def test_namespace_without_an_href_on_html_node # describe how we handle microsoft word's HTML formatting. # this test is descriptive, not prescriptive. # - html = Nokogiri::HTML.parse(<<~EOF) + html = Nokogiri::HTML.parse(<<~XML)
foo
- EOF + XML node = html.at("div").children.first refute_nil(node) @@ -1171,11 +1171,11 @@ def test_namespace_in_rendered_xml # issue 771 def test_format_noblank - content = <<~eoxml + content = <<~XML hello - eoxml + XML subject = Nokogiri::XML(content) do |conf| conf.default_xml.noblanks end @@ -1212,7 +1212,7 @@ def test_processing_instruction_eh end def test_node_lang - document = Nokogiri::XML(<<~EOXML) + document = Nokogiri::XML(<<~XML)
foo
@@ -1220,7 +1220,7 @@ def test_node_lang
bar
bar
- EOXML + XML assert_equal("en", document.at_css(".english").lang) assert_equal("en", document.at_css(".english_child").lang) assert_equal("jp", document.at_css(".japanese").lang) @@ -1262,8 +1262,73 @@ def test_wrap end describe "#line" do + it "counts lines" do + xml = Nokogiri::XML(<<~XML) +
+ Test + + XML + + if Nokogiri.jruby? + # in the output + assert_equal(3, xml.at_css("b").line) + else + # in the input + assert_equal(2, xml.at_css("b").line) + end + end + + it "properly numbers lines with documents containing XML prolog" do + xml = Nokogiri::XML(<<~XML) + + + Test + + XML + + assert_equal(3, xml.at_css("b").line) + end + + it "properly numbers lines with documents containing XML comments" do + xml = Nokogiri::XML(<<~XML) + + + + Test + + + XML + + if Nokogiri.jruby? + assert_equal(5, xml.at_css("c").line) + else + assert_equal(4, xml.at_css("c").line) + end + end + + it "properly numbers lines with documents containing XML multiline comments" do + xml = Nokogiri::XML(<<~XML) + + + + + Test + + + + XML + + if Nokogiri.jruby? + assert_equal(7, xml.at_css("c").line) + else + assert_equal(6, xml.at_css("c").line) + end + end + it "returns a sensible line number for each node" do - xml = Nokogiri::XML(<<~eoxml) + xml = Nokogiri::XML(<<~XML) Hello world @@ -1272,11 +1337,16 @@ def test_wrap Goodbye world - eoxml - - set = xml.search("//b") - assert_equal(2, set[0].line) - assert_equal(5, set[1].line) + XML + + set = xml.css("b") + if Nokogiri.jruby? + assert_equal(3, set[0].line) + assert_equal(6, set[1].line) + else + assert_equal(2, set[0].line) + assert_equal(5, set[1].line) + end end it "supports a line number greater than a short int" do