From 7baf3f1cb3d9ff7fc3d5ab4ad94270563f8ce039 Mon Sep 17 00:00:00 2001 From: LoveCany Date: Sun, 29 May 2022 01:26:32 +0900 Subject: [PATCH 1/2] Implement releaxedHTML5 into safelists based on original relaxed (partly implemented) --- src/main/java/org/jsoup/safety/Safelist.java | 24 ++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/main/java/org/jsoup/safety/Safelist.java b/src/main/java/org/jsoup/safety/Safelist.java index cc5604e5e3..caa5270e1e 100644 --- a/src/main/java/org/jsoup/safety/Safelist.java +++ b/src/main/java/org/jsoup/safety/Safelist.java @@ -29,6 +29,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/
  • {@link #basic}
  • {@link #basicWithImages}
  • {@link #relaxed} +
  • {@link #relaxedHTML5()}

    If you need to allow more through (please be careful!), tweak a base safelist with: @@ -180,6 +181,28 @@ public static Safelist relaxed() { ; } + /** + This safelist allows a full range of text and structural body HTML with HTML5 standard, which is based on relaxed list: + a, abbr, area, article, aside, b, blockquote, br, caption, cite, code, col, colgroup, dd, del, details, + div, dl, dt, em, footer, h1, h2, h3, h4, h5, h6, i, img, label, li, ol, p, pre, q, section, small, span, strike, + strong, sub, summary, sup, table, tbody, td, textarea, tfoot, th, thead, tr, u, ul +

    + Links do not have an enforced rel=nofollow attribute, but you can add that if desired. +

    + + @return safelist + */ + public static Safelist relaxedHTML5() { + return relaxed().addTags("abbr", "area", "article", "aside", "blockquote", "del", "details", "footer", + "label", "section", "summary", "textarea") + .addAttributes("abbr", "title") + .addAttributes("area", "alt") + .addAttributes("details", "open") + .addAttributes("label", "for", "form") + .addAttributes("section", "aria-labelledby", "aria-live", "aria-relevant") + .addAttributes("textarea", "id", "name", "rows", "cols"); + } + /** Create a new, empty safelist. Generally it will be better to start with a default prepared safelist instead. @@ -187,6 +210,7 @@ public static Safelist relaxed() { @see #basicWithImages() @see #simpleText() @see #relaxed() + @see #relaxedHTML5() */ public Safelist() { tagNames = new HashSet<>(); From 1a75b6221f80af4a4cb262a5a7c8835e64f005ff Mon Sep 17 00:00:00 2001 From: LoveCany Date: Sun, 29 May 2022 01:26:44 +0900 Subject: [PATCH 2/2] Add test of relaxedHTML5 --- src/test/java/org/jsoup/safety/CleanerTest.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/test/java/org/jsoup/safety/CleanerTest.java b/src/test/java/org/jsoup/safety/CleanerTest.java index f73e462ba4..0833829f40 100644 --- a/src/test/java/org/jsoup/safety/CleanerTest.java +++ b/src/test/java/org/jsoup/safety/CleanerTest.java @@ -50,6 +50,13 @@ public class CleanerTest { assertEquals("

    Head

    OneTwo
    ", TextUtil.stripNewlines(cleanHtml)); } + @Test public void testRelaxedHTML5() { + String h = "
    Requirements

    Text

    OneTwo
    "; + String cleanHtml = Jsoup.clean(h, Safelist.relaxedHTML5()); + assertEquals("
    Requirements

    Text

    OneTwo
    ", + TextUtil.stripNewlines(cleanHtml)); + } + @Test public void testRemoveTags() { String h = "

    Nice

    Hello
    "; String cleanHtml = Jsoup.clean(h, Safelist.basic().removeTags("a"));