From a0180c72c55c44b8e0db3a98040bd5f115742817 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 23 Sep 2021 16:36:40 -0400 Subject: [PATCH] fix: HTML4::Document.to_xhtml self-closing tags Commit 1d06b4f1 introduced NO_EMPTY_TAGS into SaveOptions::DEFAULT_XHTML which libxml2 ignored due to a long-standing bug in serialization. libxml2 v2.9.11 fixed that serialization bug (https://gitlab.gnome.org/GNOME/libxml2/-/commit/dc6f009) and started paying attention to the NO_EMPTY_TAGS save option, resulting in seeing output containing, e.g. `` instead of ``. This commit updates the default XHTML save options to drop the NO_EMPTY_TAGS flag, restoring this behavior. Closes #2324 --- lib/nokogiri/xml/node/save_options.rb | 2 +- test/html4/test_document.rb | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lib/nokogiri/xml/node/save_options.rb b/lib/nokogiri/xml/node/save_options.rb index 6da00134bb..8afeef345a 100644 --- a/lib/nokogiri/xml/node/save_options.rb +++ b/lib/nokogiri/xml/node/save_options.rb @@ -34,7 +34,7 @@ class SaveOptions DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML end # the default for XHTML document - DEFAULT_XHTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_XHTML + DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML # Integer representation of the SaveOptions attr_reader :options diff --git a/test/html4/test_document.rb b/test/html4/test_document.rb index 29d87cf6d9..c6f0bfe862 100644 --- a/test/html4/test_document.rb +++ b/test/html4/test_document.rb @@ -370,6 +370,15 @@ def test_to_xhtml assert_match("UTF-8", html.to_xhtml(encoding: "UTF-8")) end + def test_to_xhtml_self_closing_tags + # https://github.com/sparklemotion/nokogiri/issues/2324 + html = "
" + doc = Nokogiri::HTML::Document.parse(html) + xhtml = doc.to_xhtml + assert_match(%r(
), xhtml) + assert_match(%r(), xhtml) + end + def test_no_xml_header html = Nokogiri::HTML(<<~EOHTML)