From c4a9de41dcbd15d21890c3b241a3ba0cf65294f9 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 23 Sep 2021 16:35:45 -0400 Subject: [PATCH 1/2] format: clean up html4/test_document.rb --- test/html4/test_document.rb | 46 ++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 29d87cf6d9..a1bfbd69ad 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -103,21 +103,21 @@ def test_empty_string_returns_empty_doc assert_nil(doc.root) end - unless Nokogiri.uses_libxml?("~> 2.6.0") - def test_to_xhtml_with_indent - doc = Nokogiri::HTML("foo") - doc = Nokogiri::HTML(doc.to_xhtml(indent: 2)) - assert_indent(2, doc) - end + def test_to_xhtml_with_indent + skip if Nokogiri.uses_libxml?("~> 2.6.0") + doc = Nokogiri::HTML("foo") + doc = Nokogiri::HTML(doc.to_xhtml(indent: 2)) + assert_indent(2, doc) + end - def test_write_to_xhtml_with_indent - io = StringIO.new - doc = Nokogiri::HTML("foo") - doc.write_xhtml_to(io, indent: 5) - io.rewind - doc = Nokogiri::HTML(io.read) - assert_indent(5, doc) - end + def test_write_to_xhtml_with_indent + skip if Nokogiri.uses_libxml?("~> 2.6.0") + io = StringIO.new + doc = Nokogiri::HTML("foo") + doc.write_xhtml_to(io, indent: 5) + io.rewind + doc = Nokogiri::HTML(io.read) + assert_indent(5, doc) end def test_swap_should_not_exist @@ -360,8 +360,10 @@ def test_parse_temp_file File.open(HTML_FILE, "rb") { |f| temp_html_file.write(f.read) } temp_html_file.close temp_html_file.open - assert_equal(Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath("//div/a").length, - Nokogiri::HTML.parse(temp_html_file).xpath("//div/a").length) + assert_equal( + Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath("//div/a").length, + Nokogiri::HTML.parse(temp_html_file).xpath("//div/a").length + ) end def test_to_xhtml @@ -454,8 +456,8 @@ def test_doc_type assert_equal("-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id) assert_equal("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id) assert_equal( - "", html.to_s[0, -97] + "", + html.to_s[0, 97] ) end @@ -614,7 +616,7 @@ def test_find_classes EOHTML list = doc.css(".red") assert_equal(2, list.length) - assert_equal(%w{RED RED}, list.map(&:text)) + assert_equal(["RED", "RED"], list.map(&:text)) end def test_parse_can_take_io @@ -836,8 +838,10 @@ def initialize(*args) it "passes arguments to #initialize" do doc = klass.new("http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN") - assert_equal(["http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN"], - doc.initialized_with) + assert_equal( + ["http://www.w3.org/TR/REC-html40/loose.dtd", "-//W3C//DTD HTML 4.0 Transitional//EN"], + doc.initialized_with + ) end end From 7376cbd484bf0cf14850027984053713e3fe20e3 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 23 Sep 2021 16:36:40 -0400 Subject: [PATCH 2/2] fix: HTML4::Document.to_xhtml self-closing tags Commit 1d06b4f1 introduced NO_EMPTY_TAGS into SaveOptions::DEFAULT_XHTML which libxml2 ignored due to a long-standing bug in serialization. libxml2 v2.9.11 fixed that serialization bug (https://gitlab.gnome.org/GNOME/libxml2/-/commit/dc6f009) and started paying attention to the NO_EMPTY_TAGS save option, resulting in seeing output containing, e.g. `` instead of ``. This commit updates the default XHTML save options to drop the NO_EMPTY_TAGS flag, restoring this behavior. Closes #2324 --- lib/nokogiri/xml/node/save_options.rb | 2 +- test/html4/test_document.rb | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/nokogiri/xml/node/save_options.rb b/lib/nokogiri/xml/node/save_options.rb index 6da00134bb..8afeef345a 100644 --- a/lib/nokogiri/xml/node/save_options.rb +++ b/lib/nokogiri/xml/node/save_options.rb @@ -34,7 +34,7 @@ class SaveOptions DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML end # the default for XHTML document - DEFAULT_XHTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_XHTML + DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML # Integer representation of the SaveOptions attr_reader :options diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index a1bfbd69ad..d6690026f2 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -372,6 +372,15 @@ def test_to_xhtml assert_match("UTF-8", html.to_xhtml(encoding: "UTF-8")) end + def test_to_xhtml_self_closing_tags + # https://github.com/sparklemotion/nokogiri/issues/2324 + html = "
" + doc = Nokogiri::HTML::Document.parse(html) + xhtml = doc.to_xhtml + assert_match(%r(
), xhtml) + assert_match(%r(), xhtml) + end + def test_no_xml_header html = Nokogiri::HTML(<<~EOHTML)