diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05e16c9b28..c1da35d438 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,7 @@ A related discussion about Trust exists at [#2357](https://github.com/sparklemot
* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
* [CRuby] If a cycle is introduced when reparenting a node (i.e., the node becomes its own ancestor), a `RuntimeError` is raised. libxml2 does no checking for this, which means cycles would otherwise result in infinite loops on subsequent operations. (Note: JRuby/Xerces already does this.) [[#1912](https://github.com/sparklemotion/nokogiri/issues/1912)]
* [CRuby] Source builds will download zlib and libiconv via HTTPS. [[#2391](https://github.com/sparklemotion/nokogiri/issues/2391)] (Thanks, [@jmartin-r7](https://github.com/jmartin-r7)!)
+* [JRuby] `Node#line` behavior has been modified to return the line number of the node in the _final DOM structure_ (the value returned in JRuby is increased by 1 to count the XML prolog). This behavior is different from CRuby, which returns the node's position in the _input string_. This difference is not ideal, but at least is now officially documented and tested. The real-world impact of this change is that the value returned in JRuby is greater by 1 to account for the XML prolog in the output. [[#2380](https://github.com/sparklemotion/nokogiri/issues/2380)] (Thanks, [@dabdine](https://github.com/dabdine)!)
## 1.12.5 / 2021-09-27
diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java
index 9475e4c561..f7feb43b51 100644
--- a/ext/java/nokogiri/XmlNode.java
+++ b/ext/java/nokogiri/XmlNode.java
@@ -36,6 +36,7 @@
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
+import org.w3c.dom.Comment;
import nokogiri.internals.HtmlDomParserContext;
import nokogiri.internals.NokogiriHelpers;
@@ -1579,6 +1580,10 @@ public class XmlNode extends RubyObject
return getNokogiriClass(context.runtime, "Nokogiri::XML::Node").getConstant(type);
}
+ /*
+ * NOTE that the behavior of this function is very difference from the CRuby implementation, see
+ * the docstring in ext/nokogiri/xml_node.c for details.
+ */
@JRubyMethod
public IRubyObject
line(ThreadContext context)
@@ -1586,7 +1591,10 @@ public class XmlNode extends RubyObject
Node root = getOwnerDocument();
int[] counter = new int[1];
count(root, counter);
- return RubyFixnum.newFixnum(context.runtime, counter[0] + 1);
+ // offset of 2:
+ // - one because humans start counting at 1 not zero
+ // - one to account for the XML declaration present in the output
+ return RubyFixnum.newFixnum(context.runtime, counter[0] + 2);
}
private boolean
@@ -1599,9 +1607,14 @@ public class XmlNode extends RubyObject
NodeList list = node.getChildNodes();
for (int jchild = 0; jchild < list.getLength(); jchild++) {
Node child = list.item(jchild);
+ String text = null;
if (child instanceof Text) {
- String text = ((Text)child).getData();
+ text = ((Text)child).getData();
+ } else if (child instanceof Comment) {
+ text = ((Comment)child).getData();
+ }
+ if (text != null) {
int textLength = text.length();
for (int jchar = 0; jchar < textLength; jchar++) {
if (text.charAt(jchar) == '\n') {
diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c
index 459bbc44f2..ba9e60c85e 100644
--- a/ext/nokogiri/xml_node.c
+++ b/ext/nokogiri/xml_node.c
@@ -1698,10 +1698,31 @@ native_write_to(
}
/*
- * call-seq:
- * line
+ * :call-seq:
+ * line() → Integer
+ *
+ * [Returns] The line number of this Node.
+ *
+ * ---
+ *
+ * ⚠ The CRuby and JRuby implementations differ in important ways!
+ *
+ * Semantic differences:
+ * - The CRuby method reflects the node's line number in the parsed string
+ * - The JRuby method reflects the node's line number in the final DOM structure after
+ * corrections have been applied
+ *
+ * Performance differences:
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
+ * (constant time)
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
+ * time, where n is the number of nodes before/above the element in the DOM)
*
- * Returns the line for this Node
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
+ * to the maintainers:
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
*/
static VALUE
rb_xml_node_line(VALUE rb_node)
diff --git a/test/helper.rb b/test/helper.rb
index 224525200b..0ec60b86c4 100644
--- a/test/helper.rb
+++ b/test/helper.rb
@@ -196,6 +196,20 @@ def assert_not_send(send_ary, m = nil)
refute(recv.__send__(msg, *args), m)
end unless method_defined?(:assert_not_send)
+ def pending(msg)
+ begin
+ yield
+ rescue MiniTest::Assertion
+ skip("pending #{msg}")
+ end
+ flunk("pending test unexpectedly passed: #{msg}")
+ end
+
+ def pending_if(msg, pend_eh, &block)
+ return yield unless pend_eh
+ pending(msg, &block)
+ end
+
def i_am_ruby_matching(gem_version_requirement_string)
Gem::Requirement.new(gem_version_requirement_string).satisfied_by?(Gem::Version.new(RUBY_VERSION))
end
diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb
index 94302c766d..bc52da09be 100644
--- a/test/xml/test_node.rb
+++ b/test/xml/test_node.rb
@@ -299,11 +299,11 @@ def test_append_with_attr
end
def test_inspect_ns
- xml = Nokogiri::XML(<<~eoxml, &:noblanks)
+ xml = Nokogiri::XML(<<~XML, &:noblanks)
- eoxml
+ XML
ins = xml.inspect
xml.traverse do |node|
@@ -323,30 +323,30 @@ def test_inspect_ns
end
def test_namespace_definitions_when_some_exist
- xml = Nokogiri::XML(<<~eoxml)
+ xml = Nokogiri::XML(<<~XML)
- eoxml
+ XML
namespace_definitions = xml.root.namespace_definitions
assert_equal(2, namespace_definitions.length)
end
def test_namespace_definitions_when_no_exist
- xml = Nokogiri::XML(<<~eoxml)
+ xml = Nokogiri::XML(<<~XML)
- eoxml
+ XML
namespace_definitions = xml.at_xpath("//xmlns:awesome").namespace_definitions
assert_equal(0, namespace_definitions.length)
end
def test_default_namespace_goes_to_children
- fruits = Nokogiri::XML(<<~eoxml)
+ fruits = Nokogiri::XML(<<~XML)
- eoxml
+ XML
apple = Nokogiri::XML::Node.new("Apple", fruits)
orange = Nokogiri::XML::Node.new("Orange", fruits)
apple << orange
@@ -356,11 +356,11 @@ def test_default_namespace_goes_to_children
end
def test_parent_namespace_is_not_inherited
- fruits = Nokogiri::XML(<<-eoxml)
+ fruits = Nokogiri::XML(<<-XML)
- eoxml
+ XML
apple = fruits.at_xpath("//fruit:apple", { "fruit" => "http://fruits.org" })
assert(apple)
@@ -412,11 +412,11 @@ def test_different_document_compare
end
def test_duplicate_node_removes_namespace
- fruits = Nokogiri::XML(<<~eoxml)
+ fruits = Nokogiri::XML(<<~XML)
- eoxml
+ XML
apple = fruits.root.xpath("fruit:Apple", { "fruit" => "www.fruits.org" })[0]
new_apple = apple.dup
fruits.root << new_apple
@@ -450,10 +450,10 @@ def test_fragment_creates_elements
end
def test_node_added_to_root_should_get_namespace
- fruits = Nokogiri::XML(<<~eoxml)
+ fruits = Nokogiri::XML(<<~XML)
- eoxml
+ XML
apple = fruits.fragment("")
fruits.root << apple
assert_equal(1, fruits.xpath("//xmlns:Apple").length)
@@ -466,15 +466,15 @@ def test_new_node_can_have_ancestors
end
def test_children
- doc = Nokogiri::XML(<<~eoxml)
+ doc = Nokogiri::XML(<<~XML)
#{"" * 9}
- eoxml
+ XML
assert_equal(9, doc.root.children.length)
assert_equal(9, doc.root.children.to_a.length)
- doc = Nokogiri::XML(<<~eoxml)
+ doc = Nokogiri::XML(<<~XML)
#{"" * 15}
- eoxml
+ XML
assert_equal(15, doc.root.children.length)
assert_equal(15, doc.root.children.to_a.length)
end
@@ -658,13 +658,13 @@ def test_serialize_with_block
end
def test_hold_refence_to_subnode
- doc = Nokogiri::XML(<<~eoxml)
+ doc = Nokogiri::XML(<<~XML)
- eoxml
+ XML
assert(node_a = doc.css("a").first)
assert(node_b = node_a.css("b").first)
node_a.unlink
@@ -730,7 +730,7 @@ def test_find_by_css_class_with_nonstandard_whitespace
end
def test_find_by_css_with_tilde_eql
- xml = Nokogiri::XML.parse(<<~eoxml)
+ xml = Nokogiri::XML.parse(<<~XML)
Hello world
Bar
@@ -740,14 +740,14 @@ def test_find_by_css_with_tilde_eql
Awesome
Awesome
- eoxml
+ XML
set = xml.css('a[@class~="bar"]')
assert_equal(4, set.length)
assert_equal(["Bar"], set.map(&:content).uniq)
end
def test_unlink
- xml = Nokogiri::XML.parse(<<~eoxml)
+ xml = Nokogiri::XML.parse(<<~XML)
Bar
Bar
@@ -757,7 +757,7 @@ def test_unlink
Awesome
Awesome
- eoxml
+ XML
node = xml.xpath("//a")[3]
assert_equal("Hello world", node.text)
assert_match(/Hello world/, xml.to_s)
@@ -921,7 +921,7 @@ def test_node_equality
end
def test_namespace_search_with_xpath_and_hash
- xml = Nokogiri::XML.parse(<<~eoxml)
+ xml = Nokogiri::XML.parse(<<~XML)
Michelin Model XGV
@@ -930,14 +930,14 @@ def test_namespace_search_with_xpath_and_hash
I'm a bicycle tire!
- eoxml
+ XML
tires = xml.xpath("//bike:tire", { "bike" => "http://schwinn.com/" })
assert_equal(1, tires.length)
end
def test_namespace_search_with_xpath_and_hash_with_symbol_keys
- xml = Nokogiri::XML.parse(<<~eoxml)
+ xml = Nokogiri::XML.parse(<<~XML)
Michelin Model XGV
@@ -946,14 +946,14 @@ def test_namespace_search_with_xpath_and_hash_with_symbol_keys
I'm a bicycle tire!
- eoxml
+ XML
tires = xml.xpath("//bike:tire", bike: "http://schwinn.com/")
assert_equal(1, tires.length)
end
def test_namespace_search_with_css
- xml = Nokogiri::XML.parse(<<~eoxml)
+ xml = Nokogiri::XML.parse(<<~XML)
Michelin Model XGV
@@ -962,7 +962,7 @@ def test_namespace_search_with_css
I'm a bicycle tire!
- eoxml
+ XML
tires = xml.css("bike|tire", "bike" => "http://schwinn.com/")
assert_equal(1, tires.length)
@@ -970,13 +970,13 @@ def test_namespace_search_with_css
def test_namespaced_attribute_search_with_xpath
# from #593
- xml_content = <<~EOXML
+ xml_content = <<~XML
with namespace
no namespace
- EOXML
+ XML
xml_doc = Nokogiri::XML(xml_content)
no_ns = xml_doc.xpath("//*[@att]")
@@ -990,13 +990,13 @@ def test_namespaced_attribute_search_with_xpath
def test_namespaced_attribute_search_with_css
# from #593
- xml_content = <<~EOXML
+ xml_content = <<~XML
with namespace
no namespace
- EOXML
+ XML
xml_doc = Nokogiri::XML(xml_content)
no_ns = xml_doc.css("*[att]")
@@ -1009,14 +1009,14 @@ def test_namespaced_attribute_search_with_css
end
def test_namespaces_should_include_all_namespace_definitions
- xml = Nokogiri::XML.parse(<<~EOF)
+ xml = Nokogiri::XML.parse(<<~XML)
hello
- EOF
+ XML
namespaces = xml.namespaces # Document#namespace
assert_equal({ "xmlns" => "http://quux.com/",
@@ -1048,7 +1048,7 @@ def test_namespaces_should_include_all_namespace_definitions
end
def test_namespace
- xml = Nokogiri::XML.parse(<<~EOF)
+ xml = Nokogiri::XML.parse(<<~XML)
hello a
@@ -1058,7 +1058,7 @@ def test_namespace
hello moon
- EOF
+ XML
set = xml.search("//y/*")
assert_equal("a", set[0].namespace.prefix)
assert_equal("http://foo.com/", set[0].namespace.href)
@@ -1082,9 +1082,9 @@ def test_namespace_without_an_href_on_html_node
# describe how we handle microsoft word's HTML formatting.
# this test is descriptive, not prescriptive.
#
- html = Nokogiri::HTML.parse(<<~EOF)
+ html = Nokogiri::HTML.parse(<<~XML)
foo
- EOF
+ XML
node = html.at("div").children.first
refute_nil(node)
@@ -1171,11 +1171,11 @@ def test_namespace_in_rendered_xml
# issue 771
def test_format_noblank
- content = <<~eoxml
+ content = <<~XML
hello
- eoxml
+ XML
subject = Nokogiri::XML(content) do |conf|
conf.default_xml.noblanks
end
@@ -1212,7 +1212,7 @@ def test_processing_instruction_eh
end
def test_node_lang
- document = Nokogiri::XML(<<~EOXML)
+ document = Nokogiri::XML(<<~XML)
foo
@@ -1220,7 +1220,7 @@ def test_node_lang
bar
bar
- EOXML
+ XML
assert_equal("en", document.at_css(".english").lang)
assert_equal("en", document.at_css(".english_child").lang)
assert_equal("jp", document.at_css(".japanese").lang)
@@ -1262,8 +1262,73 @@ def test_wrap
end
describe "#line" do
+ it "counts lines" do
+ xml = Nokogiri::XML(<<~XML)
+
+ Test
+
+ XML
+
+ if Nokogiri.jruby?
+ # in the output
+ assert_equal(3, xml.at_css("b").line)
+ else
+ # in the input
+ assert_equal(2, xml.at_css("b").line)
+ end
+ end
+
+ it "properly numbers lines with documents containing XML prolog" do
+ xml = Nokogiri::XML(<<~XML)
+
+
+ Test
+
+ XML
+
+ assert_equal(3, xml.at_css("b").line)
+ end
+
+ it "properly numbers lines with documents containing XML comments" do
+ xml = Nokogiri::XML(<<~XML)
+
+
+
+ Test
+
+
+ XML
+
+ if Nokogiri.jruby?
+ assert_equal(5, xml.at_css("c").line)
+ else
+ assert_equal(4, xml.at_css("c").line)
+ end
+ end
+
+ it "properly numbers lines with documents containing XML multiline comments" do
+ xml = Nokogiri::XML(<<~XML)
+
+
+
+
+ Test
+
+
+
+ XML
+
+ if Nokogiri.jruby?
+ assert_equal(7, xml.at_css("c").line)
+ else
+ assert_equal(6, xml.at_css("c").line)
+ end
+ end
+
it "returns a sensible line number for each node" do
- xml = Nokogiri::XML(<<~eoxml)
+ xml = Nokogiri::XML(<<~XML)
Hello world
@@ -1272,11 +1337,16 @@ def test_wrap
Goodbye world
- eoxml
-
- set = xml.search("//b")
- assert_equal(2, set[0].line)
- assert_equal(5, set[1].line)
+ XML
+
+ set = xml.css("b")
+ if Nokogiri.jruby?
+ assert_equal(3, set[0].line)
+ assert_equal(6, set[1].line)
+ else
+ assert_equal(2, set[0].line)
+ assert_equal(5, set[1].line)
+ end
end
it "supports a line number greater than a short int" do