diff --git a/lib/loofah/html5/scrub.rb b/lib/loofah/html5/scrub.rb index e2e3d70e..222ffb96 100644 --- a/lib/loofah/html5/scrub.rb +++ b/lib/loofah/html5/scrub.rb @@ -7,22 +7,22 @@ module HTML5 # :nodoc: module Scrub CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ - CRASS_SEMICOLON = { :node => :semicolon, :raw => ";" } + CRASS_SEMICOLON = { node: :semicolon, raw: ";" } CSS_IMPORTANT = '!important' class << self def allowed_element?(element_name) - ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name + ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?(element_name) end # alternative implementation of the html5lib attribute scrubbing algorithm def scrub_attributes(node) node.attribute_nodes.each do |attr_node| attr_name = if attr_node.namespace - "#{attr_node.namespace.prefix}:#{attr_node.node_name}" - else - attr_node.node_name - end + "#{attr_node.namespace.prefix}:#{attr_node.node_name}" + else + attr_node.node_name + end if attr_name =~ /\Adata-[\w-]+\z/ next @@ -58,13 +58,13 @@ def scrub_attributes(node) end end - scrub_css_attribute node + scrub_css_attribute(node) node.attribute_nodes.each do |attr_node| node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/ end - force_correct_attribute_escaping! node + force_correct_attribute_escaping!(node) end def scrub_css_attribute(node) @@ -73,33 +73,50 @@ def scrub_css_attribute(node) end def scrub_css(style) - style_tree = Crass.parse_properties style + style_tree = Crass.parse_properties(style) sanitized_tree = [] style_tree.each do |node| next unless node[:node] == :property next if node[:children].any? do |child| - [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)) + [:url, :bad_url].include?(child[:node]) end + name = node[:name].downcase - if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name) - sanitized_tree << node << CRASS_SEMICOLON - elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) - value = node[:value].split.map do |keyword| - if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH + next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || + SafeList::ALLOWED_SVG_PROPERTIES.include?(name) || + SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) + + value = node[:children].map do |child| + case child[:node] + when :whitespace + nil + when :string + nil + when :function + if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase) + Crass::Parser.stringify(child) + end + when :ident + keyword = child[:value] + if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) || + SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || + (keyword =~ CSS_KEYWORDISH) keyword end - end.compact - unless value.empty? - value << CSS_IMPORTANT if node[:important] - propstring = sprintf "%s:%s", name, value.join(" ") - sanitized_node = Crass.parse_properties(propstring).first - sanitized_tree << sanitized_node << CRASS_SEMICOLON + else + child[:raw] end - end + end.compact + + next if value.empty? + value << CSS_IMPORTANT if node[:important] + propstring = format("%s:%s", name, value.join(" ")) + sanitized_node = Crass.parse_properties(propstring).first + sanitized_tree << sanitized_node << CRASS_SEMICOLON end - Crass::Parser.stringify sanitized_tree + Crass::Parser.stringify(sanitized_tree) end # diff --git a/test/assets/testdata_sanitizer_tests1.dat b/test/assets/testdata_sanitizer_tests1.dat index a7298ac5..c8284dc3 100644 --- a/test/assets/testdata_sanitizer_tests1.dat +++ b/test/assets/testdata_sanitizer_tests1.dat @@ -458,7 +458,7 @@ "name": "style_attr_end_with_nothing", "input": "
", "output": "
", - "xhtml": "
", + "xhtml": "
", "rexml": "
" }, @@ -466,7 +466,7 @@ "name": "style_attr_end_with_space", "input": "
", "output": "
", - "xhtml": "
", + "xhtml": "
", "rexml": "
" }, @@ -474,7 +474,7 @@ "name": "style_attr_end_with_semicolon", "input": "
", "output": "
", - "xhtml": "
", + "xhtml": "
", "rexml": "
" }, @@ -482,7 +482,7 @@ "name": "style_attr_end_with_semicolon_space", "input": "
", "output": "
", - "xhtml": "
", + "xhtml": "
", "rexml": "
" }, diff --git a/test/html5/test_scrub.rb b/test/html5/test_scrub.rb deleted file mode 100644 index 84e17b69..00000000 --- a/test/html5/test_scrub.rb +++ /dev/null @@ -1,10 +0,0 @@ -require "helper" - -class UnitHTML5Scrub < Loofah::TestCase - include Loofah - - def test_scrub_css - assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;" - assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;" - end -end diff --git a/test/html5/test_scrub_css.rb b/test/html5/test_scrub_css.rb new file mode 100644 index 00000000..456f1a25 --- /dev/null +++ b/test/html5/test_scrub_css.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true +require "helper" + +class UnitHTML5Scrub < Loofah::TestCase + include Loofah + + describe "hex values" do + it "handles upper case" do + assert_equal "background:#ABC012;", Loofah::HTML5::Scrub.scrub_css("background: #ABC012") + end + it "handles lower case" do + assert_equal "background:#abc012;", Loofah::HTML5::Scrub.scrub_css("background: #abc012") + end + end + + describe "css functions" do + it "allows safe functions" do + assert_equal "background-color:linear-gradient(transparent 50%, #ffff66 50%);", + Loofah::HTML5::Scrub.scrub_css("background-color: linear-gradient(transparent 50%, #ffff66 50%);") + end + + it "disallows unsafe functions" do + assert_equal "", Loofah::HTML5::Scrub.scrub_css("background-color: haxxor-fun(transparent 50%, #ffff66 50%);") + end + + # see #199 for the bug we're testing here + it "allows safe functions in shorthand css properties" do + assert_equal "background:linear-gradient(transparent 50%, #ffff66 50%);", + Loofah::HTML5::Scrub.scrub_css("background: linear-gradient(transparent 50%, #ffff66 50%);") + end + end +end