diff --git a/CHANGELOG.md b/CHANGELOG.md index 7bbccfe2..46ab0bf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Features: * Support HTML5 `
` tag. #133 (Thanks, @MothOnMars!) +* Recognize HTML5 block elements. #136 (Thanks, @MothOnMars!) * Support SVG `` tag. #131 (Thanks, @baopham!) * Support for whitelisting CSS functions, initially just `calc`. #122/#123 (Thanks, @NikoRoberts!) diff --git a/lib/loofah/elements.rb b/lib/loofah/elements.rb index 229462d3..6e76c660 100644 --- a/lib/loofah/elements.rb +++ b/lib/loofah/elements.rb @@ -2,13 +2,88 @@ module Loofah module Elements - # Block elements in HTML4 - STRICT_BLOCK_LEVEL = Set.new %w[address blockquote center dir div dl - fieldset form h1 h2 h3 h4 h5 h6 hr isindex menu noframes - noscript ol p pre table ul] + STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[ + address + blockquote + center + dir + div + dl + fieldset + form + h1 + h2 + h3 + h4 + h5 + h6 + hr + isindex + menu + noframes + noscript + ol + p + pre + table + ul + ] - # The following elements may also be considered block-level elements since they may contain block-level elements - LOOSE_BLOCK_LEVEL = Set.new %w[dd dt frameset li tbody td tfoot th thead tr] + # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements + STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[ + address + article + aside + blockquote + canvas + dd + div + dl + dt + fieldset + figcaption + figure + footer + form + h1 + h2 + h3 + h4 + h5 + h6 + header + hgroup + hr + li + main + nav + noscript + ol + output + p + pre + section + table + tfoot + ul + video + ] + + STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5 + + # The following elements may also be considered block-level + # elements since they may contain block-level elements + LOOSE_BLOCK_LEVEL = Set.new %w[dd + dt + frameset + li + tbody + td + tfoot + th + thead + tr + ] BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL end diff --git a/test/integration/test_html.rb b/test/integration/test_html.rb index a5537d38..b00d5ac8 100644 --- a/test/integration/test_html.rb +++ b/test/integration/test_html.rb @@ -19,11 +19,16 @@ class IntegrationTestHtml < Loofah::TestCase end context "#to_text" do - it "add newlines before and after block elements" do + it "add newlines before and after html4 block elements" do html = Loofah.fragment "
tweedle

beetle

bottlepuddlepaddle
battle
muddle
" assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text end + it "add newlines before and after html5 block elements" do + html = Loofah.fragment "
tweedle
beetle
bottlepuddlepaddle
battle
muddle
" + assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text + end + it "remove extraneous whitespace" do html = Loofah.fragment "
tweedle\n\n\t\n\s\nbeetle
" assert_equal "\ntweedle\n\nbeetle\n", html.to_text @@ -47,11 +52,16 @@ class IntegrationTestHtml < Loofah::TestCase end context "#to_text" do - it "add newlines before and after block elements" do + it "add newlines before and after html4 block elements" do html = Loofah.document "
tweedle

beetle

bottlepuddlepaddle
battle
muddle
" assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text end + it "add newlines before and after html5 block elements" do + html = Loofah.document "
tweedle
beetle
bottlepuddlepaddle
battle
muddle
" + assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text + end + it "remove extraneous whitespace" do html = Loofah.document "
tweedle\n\n\t\n\s\nbeetle
" assert_equal "\ntweedle\n\nbeetle\n", html.to_text