From b32c875100a8e0f0b25c4254c9b9a497f97f4599 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 18 Jan 2021 16:26:59 -0500 Subject: [PATCH] fix(jruby): improve how Node#line is calculated Note that Java's w3c.dom.Node interface does not have any way to get (or set) the line number for that node. The original JRuby implementation counted newlines, but did it poorly -- see #1223. This commit improves the newline-counting approach. But the solution itself -- counting newlines! -- is still questionable IMHO and absolutely inefficient. I had played around with an approach that I wrote about at #1223, where the SAX Parser knows what line it's on when `startElement` is invoked (via the XMLLocator). But I couldn't figure out how to preserve that information in the final Document or Node. If you, like me, think this approach is terrible; and if you *also* understand how to set this metadata on the Node or the Document, then please help us out. --- CHANGELOG.md | 5 ++++ ext/java/nokogiri/XmlNode.java | 20 +++++++++---- test/xml/test_node.rb | 51 +++++++++++++++++++--------------- 3 files changed, 48 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4661cf2598..76f2586be3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,11 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA * [JRuby] `{XML,HTML}::Document.parse` now invokes `#initialize` exactly once. Previously `#initialize` was not called, which was a problem for subclassing such as done by `Loofah`. +### Improved + +* [JRuby] Update the algorithm used to calculate `Node#line` to be wrong less-often. The underlying parser, Xerces, does not track line numbers, and so we've always used a hacky solution for this method. [[#1223](https://github.com/sparklemotion/nokogiri/issues/1223)] + + ## v1.11.1 / 2021-01-06 ### Fixed diff --git a/ext/java/nokogiri/XmlNode.java b/ext/java/nokogiri/XmlNode.java index cfbea83dfa..790b849ec2 100644 --- a/ext/java/nokogiri/XmlNode.java +++ b/ext/java/nokogiri/XmlNode.java @@ -1406,14 +1406,22 @@ private boolean count(Node node, int[] counter) { if (node == this.node) { return true; } + NodeList list = node.getChildNodes(); - for (int i=0; i - - Hello world - - - eoxml - - set = xml.search("//a") - node = set.first - assert_equal(2, node.line) - end - - def test_set_line - skip("Only supported with libxml2") unless Nokogiri.uses_libxml? - document = Nokogiri::XML::Document.new - node = document.create_element('a') - node.line = 54321 - assert_equal(54321, node.line) - end - def test_xpath_results_have_document_and_are_decorated x = Module.new do def awesome!; end @@ -1230,6 +1208,35 @@ def test_wrap assert_equal('wrapper', thing.parent.name) assert_equal('thing', doc.at_css("wrapper").children.first.name) end + + describe "#line" do + it "returns a sensible line number for each node" do + xml = Nokogiri::XML(<<~eoxml) + + + Hello world + + + Goodbye world + + + eoxml + + set = xml.search("//b") + assert_equal(2, set[0].line) + assert_equal(5, set[1].line) + end + end + + describe "#line=" do + it "overrides the line number of a node" do + skip("Xerces does not have line numbers for nodes") unless Nokogiri.uses_libxml? + document = Nokogiri::XML::Document.new + node = document.create_element('a') + node.line = 54321 + assert_equal(54321, node.line) + end + end end end end