From 4a08c25a603654f2fc505a7d2bf0c35a39870ad7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 15 Mar 2018 15:24:30 -0400 Subject: [PATCH] tests and fix for CVE-2018-8048 libxml2 >= 2.9.2 fails to escape comments within some attributes. It wants to ensure these comments can be treated as "server-side includes", but as a result fails to ensure that serialization is well-formed, resulting in an opportunity for XSS injection of code into a final re-parsed document (presumably in a browser). See https://github.com/flavorjones/loofah/issues/144 for more details and history around this libxml2 issue, which goes back a few years. At this point it's not clear to your humble maintainer why this hasn't been addressed upstream by libxml2 maintainers, and so I'm working around it in Loofah to protect Loofah users, while simultaneously attempting to escalate the issue upstream. [fixes #144] --- lib/loofah.rb | 1 + lib/loofah/html5/libxml2_workarounds.rb | 26 ++++++++++ lib/loofah/html5/scrub.rb | 33 +++++++++++- test/integration/test_ad_hoc.rb | 67 ++++++++++++++++++++++++- 4 files changed, 124 insertions(+), 3 deletions(-) create mode 100644 lib/loofah/html5/libxml2_workarounds.rb diff --git a/lib/loofah.rb b/lib/loofah.rb index 099f183b..ce41c270 100644 --- a/lib/loofah.rb +++ b/lib/loofah.rb @@ -6,6 +6,7 @@ require 'loofah/elements' require 'loofah/html5/whitelist' +require 'loofah/html5/libxml2_workarounds' require 'loofah/html5/scrub' require 'loofah/scrubber' diff --git a/lib/loofah/html5/libxml2_workarounds.rb b/lib/loofah/html5/libxml2_workarounds.rb new file mode 100644 index 00000000..fc3fb244 --- /dev/null +++ b/lib/loofah/html5/libxml2_workarounds.rb @@ -0,0 +1,26 @@ +# coding: utf-8 +require 'set' + +module Loofah + # + # constants related to working around unhelpful libxml2 behavior + # + # ಠ_ಠ + # + module LibxmlWorkarounds + # + # these attributes and qualifying parent tags are determined by the code at: + # + # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714 + # + # see comments about CVE-2018-8048 within the tests for more information + # + BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[ + href + action + src + name + ] + BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"} + end +end diff --git a/lib/loofah/html5/scrub.rb b/lib/loofah/html5/scrub.rb index 4e2ca648..99622bfe 100644 --- a/lib/loofah/html5/scrub.rb +++ b/lib/loofah/html5/scrub.rb @@ -1,5 +1,3 @@ -#encoding: US-ASCII - require 'cgi' require 'crass' @@ -65,6 +63,8 @@ def scrub_attributes node node.attribute_nodes.each do |attr_node| node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/ end + + force_correct_attribute_escaping! node end def scrub_css_attribute node @@ -100,6 +100,35 @@ def scrub_css style Crass::Parser.stringify sanitized_tree end + + private + + # + # libxml2 >= 2.9.2 fails to escape comments within some attributes. + # + # see comments about CVE-2018-8048 within the tests for more information + # + def force_correct_attribute_escaping! node + return unless Nokogiri::VersionInfo.instance.libxml2? + + node.attribute_nodes.each do |attr_node| + next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name) + + tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name] + next unless tag_name.nil? || tag_name == node.name + + # + # this block is just like CGI.escape in Ruby 2.4, but + # only encodes space and double-quote, to mimic + # pre-2.9.2 behavior + # + encoding = attr_node.value.encoding + attr_node.value = attr_node.value.gsub(/[ "]/) do |m| + '%' + m.unpack('H2' * m.bytesize).join('%').upcase + end.force_encoding(encoding) + end + end + end end end diff --git a/test/integration/test_ad_hoc.rb b/test/integration/test_ad_hoc.rb index 13539668..71958fda 100644 --- a/test/integration/test_ad_hoc.rb +++ b/test/integration/test_ad_hoc.rb @@ -188,6 +188,71 @@ def test_dont_remove_whitespace_between_tags html = "

Foo

\n

Bar

" assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text end + + # + # tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144) + # + # libxml2 >= 2.9.2 fails to escape comments within some attributes. It + # wants to ensure these comments can be treated as "server-side includes", + # but as a result fails to ensure that serialization is well-formed, + # resulting in an opportunity for XSS injection of code into a final + # re-parsed document (presumably in a browser). + # + # we'll test this by parsing the HTML, serializing it, then + # re-parsing it to ensure there isn't any ambiguity in the output + # that might allow code injection into a browser consuming + # "sanitized" output. + # + [ + # + # these tags and attributes are determined by the code at: + # + # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714 + # + {tag: "a", attr: "href"}, + {tag: "div", attr: "href"}, + {tag: "a", attr: "action"}, + {tag: "div", attr: "action"}, + {tag: "a", attr: "src"}, + {tag: "div", attr: "src"}, + {tag: "a", attr: "name"}, + # + # note that div+name is _not_ affected by the libxml2 issue. + # but we test it anyway to ensure our logic isn't modifying + # attributes that don't need modifying. + # + {tag: "div", attr: "name", unescaped: true}, + ].each do |config| + + define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do + html = %{<#{config[:tag]} #{config[:attr]}='example.com'>test} + + reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html) + attributes = reparsed.at_css(config[:tag]).attribute_nodes + + assert_equal [config[:attr]], attributes.collect(&:name) + if Nokogiri::VersionInfo.new.libxml2? + if config[:unescaped] + # + # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK. + # assert that this attribute's serialization is unaffected. + # + assert_equal %{example.com}, attributes.first.value + else + # + # let's match the behavior in libxml < 2.9.2. + # test that this attribute's serialization is well-formed and sanitized. + # + assert_equal %{example.com}, attributes.first.value + end + else + # + # yay for consistency in javaland. move along, nothing to see here. + # + assert_equal %{example.com}, attributes.first.value + end + end + + end end end -