diff --git a/CHANGELOG.md b/CHANGELOG.md
index 05e16c9b28..c1da35d438 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -41,6 +41,7 @@ A related discussion about Trust exists at [#2357](https://github.com/sparklemot
* [CRuby] `Node#line` is no longer capped at 65535. libxml v2.9.0 and later support a new parse option, exposed as `Nokogiri::XML::ParseOptions::PARSE_BIG_LINES` and set in `ParseOptions::DEFAULT_XML`, `::DEFAULT_XSLT`, `::DEFAULT_HTML`, and `::DEFAULT_SCHEMA`. (Note that JRuby never had this problem.) [[#1764](https://github.com/sparklemotion/nokogiri/issues/1764), [#1493](https://github.com/sparklemotion/nokogiri/issues/1493), [#1617](https://github.com/sparklemotion/nokogiri/issues/1617), [#1505](https://github.com/sparklemotion/nokogiri/issues/1505), [#1003](https://github.com/sparklemotion/nokogiri/issues/1003), [#533](https://github.com/sparklemotion/nokogiri/issues/533)]
* [CRuby] If a cycle is introduced when reparenting a node (i.e., the node becomes its own ancestor), a `RuntimeError` is raised. libxml2 does no checking for this, which means cycles would otherwise result in infinite loops on subsequent operations. (Note: JRuby/Xerces already does this.) [[#1912](https://github.com/sparklemotion/nokogiri/issues/1912)]
* [CRuby] Source builds will download zlib and libiconv via HTTPS. [[#2391](https://github.com/sparklemotion/nokogiri/issues/2391)] (Thanks, [@jmartin-r7](https://github.com/jmartin-r7)!)
+* [JRuby] `Node#line` behavior has been modified to return the line number of the node in the _final DOM structure_ (the value returned in JRuby is increased by 1 to count the XML prolog). This behavior is different from CRuby, which returns the node's position in the _input string_. This difference is not ideal, but at least is now officially documented and tested. The real-world impact of this change is that the value returned in JRuby is greater by 1 to account for the XML prolog in the output. [[#2380](https://github.com/sparklemotion/nokogiri/issues/2380)] (Thanks, [@dabdine](https://github.com/dabdine)!)
## 1.12.5 / 2021-09-27
diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java
index 234f8847d3..f7feb43b51 100644
--- a/ext/java/nokogiri/XmlNode.java
+++ b/ext/java/nokogiri/XmlNode.java
@@ -1580,6 +1580,10 @@ public class XmlNode extends RubyObject
return getNokogiriClass(context.runtime, "Nokogiri::XML::Node").getConstant(type);
}
+ /*
+ * NOTE that the behavior of this function is very difference from the CRuby implementation, see
+ * the docstring in ext/nokogiri/xml_node.c for details.
+ */
@JRubyMethod
public IRubyObject
line(ThreadContext context)
@@ -1587,7 +1591,10 @@ public class XmlNode extends RubyObject
Node root = getOwnerDocument();
int[] counter = new int[1];
count(root, counter);
- return RubyFixnum.newFixnum(context.runtime, counter[0] + 1);
+ // offset of 2:
+ // - one because humans start counting at 1 not zero
+ // - one to account for the XML declaration present in the output
+ return RubyFixnum.newFixnum(context.runtime, counter[0] + 2);
}
private boolean
diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c
index 459bbc44f2..ba9e60c85e 100644
--- a/ext/nokogiri/xml_node.c
+++ b/ext/nokogiri/xml_node.c
@@ -1698,10 +1698,31 @@ native_write_to(
}
/*
- * call-seq:
- * line
+ * :call-seq:
+ * line() → Integer
+ *
+ * [Returns] The line number of this Node.
+ *
+ * ---
+ *
+ * ⚠ The CRuby and JRuby implementations differ in important ways!
+ *
+ * Semantic differences:
+ * - The CRuby method reflects the node's line number in the parsed string
+ * - The JRuby method reflects the node's line number in the final DOM structure after
+ * corrections have been applied
+ *
+ * Performance differences:
+ * - The CRuby method is {O(1)}[https://en.wikipedia.org/wiki/Time_complexity#Constant_time]
+ * (constant time)
+ * - The JRuby method is {O(n)}[https://en.wikipedia.org/wiki/Time_complexity#Linear_time] (linear
+ * time, where n is the number of nodes before/above the element in the DOM)
*
- * Returns the line for this Node
+ * If you'd like to help improve the JRuby implementation, please review these issues and reach out
+ * to the maintainers:
+ * - https://github.com/sparklemotion/nokogiri/issues/1223
+ * - https://github.com/sparklemotion/nokogiri/pull/2177
+ * - https://github.com/sparklemotion/nokogiri/issues/2380
*/
static VALUE
rb_xml_node_line(VALUE rb_node)
diff --git a/test/xml/test_node.rb b/test/xml/test_node.rb
index 24e9947e79..bc52da09be 100644
--- a/test/xml/test_node.rb
+++ b/test/xml/test_node.rb
@@ -1262,19 +1262,33 @@ def test_wrap
end
describe "#line" do
- it "properly numbers lines with documents containing XML prolog" do
+ it "counts lines" do
xml = Nokogiri::XML(<<~XML)
-
Test
XML
- pending_if("https://github.com/sparklemotion/nokogiri/issues/2380", Nokogiri.jruby?) do
+ if Nokogiri.jruby?
+ # in the output
assert_equal(3, xml.at_css("b").line)
+ else
+ # in the input
+ assert_equal(2, xml.at_css("b").line)
end
end
+ it "properly numbers lines with documents containing XML prolog" do
+ xml = Nokogiri::XML(<<~XML)
+
+
+ Test
+
+ XML
+
+ assert_equal(3, xml.at_css("b").line)
+ end
+
it "properly numbers lines with documents containing XML comments" do
xml = Nokogiri::XML(<<~XML)
@@ -1285,7 +1299,11 @@ def test_wrap
XML
- assert_equal(4, xml.at_css("c").line)
+ if Nokogiri.jruby?
+ assert_equal(5, xml.at_css("c").line)
+ else
+ assert_equal(4, xml.at_css("c").line)
+ end
end
it "properly numbers lines with documents containing XML multiline comments" do
@@ -1302,7 +1320,11 @@ def test_wrap
XML
- assert_equal(6, xml.at_css("c").line)
+ if Nokogiri.jruby?
+ assert_equal(7, xml.at_css("c").line)
+ else
+ assert_equal(6, xml.at_css("c").line)
+ end
end
it "returns a sensible line number for each node" do
@@ -1318,8 +1340,13 @@ def test_wrap
XML
set = xml.css("b")
- assert_equal(2, set[0].line)
- assert_equal(5, set[1].line)
+ if Nokogiri.jruby?
+ assert_equal(3, set[0].line)
+ assert_equal(6, set[1].line)
+ else
+ assert_equal(2, set[0].line)
+ assert_equal(5, set[1].line)
+ end
end
it "supports a line number greater than a short int" do