From 1a21165a792e178e3d7ef3c8bb7a5df4a63b5396 Mon Sep 17 00:00:00 2001 From: Jeremy Landis Date: Sat, 16 Oct 2021 21:00:57 -0400 Subject: [PATCH 1/2] [enhance] Expose maxPaddingWidth in OutputSettings with default of 30 retained --- CHANGES | 4 +++ .../java/org/jsoup/internal/StringUtil.java | 21 +++++++++--- src/main/java/org/jsoup/nodes/Document.java | 24 ++++++++++++- src/main/java/org/jsoup/nodes/Node.java | 2 +- .../org/jsoup/internal/StringUtilTest.java | 34 ++++++++++++++++--- 5 files changed, 73 insertions(+), 12 deletions(-) diff --git a/CHANGES b/CHANGES index eda0d2bc46..51ad5ee147 100644 --- a/CHANGES +++ b/CHANGES @@ -21,6 +21,10 @@ jsoup changelog case. + * Improvement: allow maxPaddingWidth in OutputSettings for pretty printing. This will continue to default to 30 + so very deeply nested nodes don't get insane padding amounts but now allows for user decision. Using -1 will + result in original unlimited length. + *** Release 1.14.3 [2021-Sep-30] * Improvement: added native XPath support in Element#selectXpath(String) diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java index eed2422062..d8913e5dbe 100644 --- a/src/main/java/org/jsoup/internal/StringUtil.java +++ b/src/main/java/org/jsoup/internal/StringUtil.java @@ -16,11 +16,10 @@ notice. */ public final class StringUtil { - // memoised padding up to 21 + // memoised padding up to 21 (blocks 0 to 20 spaces) static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "}; - private static final int maxPaddingWidth = 30; // so very deeply nested nodes don't get insane padding amounts /** * Join a collection of strings by a separator @@ -115,17 +114,29 @@ public String complete() { } /** - * Returns space padding (up to a max of 30). + * Returns space padding (up to a max of 30). Use {@link #padding(int, int)} if you need + * more than 30 spaces padded. * @param width amount of padding desired * @return string of spaces * width - */ + */ public static String padding(int width) { + return padding(width, 30); + } + + /** + * Returns space padding (up to a max of maxPaddingWidth - default 30). + * @param width amount of padding desired + * @param maxPaddingWidth amount of max space padding with default set at 30 and -1 means no max. + * @return string of spaces * width + */ + public static String padding(int width, int maxPaddingWidth) { if (width < 0) throw new IllegalArgumentException("width must be > 0"); if (width < padding.length) return padding[width]; - width = Math.min(width, maxPaddingWidth); + if (maxPaddingWidth != -1) + width = Math.min(width, maxPaddingWidth); char[] out = new char[width]; for (int i = 0; i < width; i++) out[i] = ' '; diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 1e1217bba7..6e5d8e3202 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -412,6 +412,7 @@ public enum Syntax {html, xml} private boolean prettyPrint = true; private boolean outline = false; private int indentAmount = 1; + private int maxPaddingWidth = 30; private Syntax syntax = Syntax.html; public OutputSettings() {} @@ -560,6 +561,27 @@ public OutputSettings indentAmount(int indentAmount) { return this; } + /** + * Get the current max padding amount, used when pretty printing + * so very deeply nested nodes don't get insane padding amounts. + * @return the current indent amount + */ + public int maxPaddingWidth() { + return maxPaddingWidth; + } + + /** + * Set the max padding amount for pretty printing so very deeply nested nodes don't get insane padding amounts. + * @param maxPaddingWidth number of spaces to use for indenting each level of nested nodes. Must be {@literal >=} -1. + * Default is 30 and -1 means unlimited. + * @return this, for chaining + */ + public OutputSettings maxPaddingWidth(int maxPaddingWidth) { + Validate.isTrue(maxPaddingWidth >= -1); + this.maxPaddingWidth = maxPaddingWidth; + return this; + } + @Override public OutputSettings clone() { OutputSettings clone; @@ -570,7 +592,7 @@ public OutputSettings clone() { } clone.charset(charset.name()); // new charset and charset encoder clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); - // indentAmount, prettyPrint are primitives so object.clone() will handle + // indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle return clone; } } diff --git a/src/main/java/org/jsoup/nodes/Node.java b/src/main/java/org/jsoup/nodes/Node.java index 5d62478408..27886f46b0 100644 --- a/src/main/java/org/jsoup/nodes/Node.java +++ b/src/main/java/org/jsoup/nodes/Node.java @@ -683,7 +683,7 @@ public String toString() { } protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { - accum.append('\n').append(StringUtil.padding(depth * out.indentAmount())); + accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); } /** diff --git a/src/test/java/org/jsoup/internal/StringUtilTest.java b/src/test/java/org/jsoup/internal/StringUtilTest.java index 1956084bae..dc29bdde41 100644 --- a/src/test/java/org/jsoup/internal/StringUtilTest.java +++ b/src/test/java/org/jsoup/internal/StringUtilTest.java @@ -20,11 +20,35 @@ public void join() { } @Test public void padding() { - assertEquals("", StringUtil.padding(0)); - assertEquals(" ", StringUtil.padding(1)); - assertEquals(" ", StringUtil.padding(2)); - assertEquals(" ", StringUtil.padding(15)); - assertEquals(" ", StringUtil.padding(45)); // we tap out at 30 + // memoization is up to 21 blocks (0 to 20 spaces) and exits early before min checks making maxPaddingWidth unused + assertEquals("", StringUtil.padding(0, -1)); + assertEquals(" ", StringUtil.padding(20, -1)); + + // this test escapes memoization and continues through + assertEquals(" ", StringUtil.padding(21, -1)); + + // this test escapes memoization and using unlimited length (-1) will allow requested spaces + assertEquals(" ", StringUtil.padding(30, -1)); + assertEquals(" ", StringUtil.padding(45, -1)); + + // we tap out at 0 for this test + assertEquals("", StringUtil.padding(0, 0)); + + // we tap out at 5 for this test because we do not escape memoization + assertEquals(" ", StringUtil.padding(5, 0)); + + // as memoization is escaped, setting zero for max padding will not allow any requested width + assertEquals("", StringUtil.padding(21, 0)); + + // we tap out at 30 for these tests making > 30 use 30 + assertEquals("", StringUtil.padding(0, 30)); + assertEquals(" ", StringUtil.padding(1, 30)); + assertEquals(" ", StringUtil.padding(2, 30)); + assertEquals(" ", StringUtil.padding(15, 30)); + assertEquals(" ", StringUtil.padding(45, 30)); + + // Testing deprecated version capped at 30 + assertEquals(" ", StringUtil.padding(45)); } @Test public void paddingInACan() { From 5621250536efafb584938118a3dee195fd679ca6 Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Thu, 21 Oct 2021 13:44:07 +1100 Subject: [PATCH 2/2] Make sure the max indent is applied regardless of canned indents And added a test that the max level set in OutputSettings is applied. --- CHANGES | 8 ++--- .../java/org/jsoup/internal/StringUtil.java | 17 +++++----- .../org/jsoup/internal/StringUtilTest.java | 13 +++++--- .../java/org/jsoup/nodes/ElementTest.java | 33 +++++++++++++++++++ 4 files changed, 53 insertions(+), 18 deletions(-) diff --git a/CHANGES b/CHANGES index 51ad5ee147..45bffa40fb 100644 --- a/CHANGES +++ b/CHANGES @@ -17,14 +17,14 @@ jsoup changelog element, e.g. ancestor-or-self::*. + * Improvement: allow a maxPaddingWidth on the indent level in OutputSettings when pretty printing. This defaults to + 30 to limit the indent level for very deeply nested elements, and may be disabled by setting to -1. + + * Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve case. - * Improvement: allow maxPaddingWidth in OutputSettings for pretty printing. This will continue to default to 30 - so very deeply nested nodes don't get insane padding amounts but now allows for user decision. Using -1 will - result in original unlimited length. - *** Release 1.14.3 [2021-Sep-30] * Improvement: added native XPath support in Element#selectXpath(String) diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java index d8913e5dbe..1882f2a9f1 100644 --- a/src/main/java/org/jsoup/internal/StringUtil.java +++ b/src/main/java/org/jsoup/internal/StringUtil.java @@ -114,29 +114,28 @@ public String complete() { } /** - * Returns space padding (up to a max of 30). Use {@link #padding(int, int)} if you need - * more than 30 spaces padded. + * Returns space padding (up to the default max of 30). Use {@link #padding(int, int)} to specify a different limit. * @param width amount of padding desired * @return string of spaces * width + * @see #padding(int, int) */ public static String padding(int width) { return padding(width, 30); } /** - * Returns space padding (up to a max of maxPaddingWidth - default 30). + * Returns space padding, up to a max of maxPaddingWidth. * @param width amount of padding desired - * @param maxPaddingWidth amount of max space padding with default set at 30 and -1 means no max. + * @param maxPaddingWidth maximum padding to apply. Set to {@code -1} for unlimited. * @return string of spaces * width */ public static String padding(int width, int maxPaddingWidth) { - if (width < 0) - throw new IllegalArgumentException("width must be > 0"); - - if (width < padding.length) - return padding[width]; + Validate.isTrue(width >= 0, "width must be >= 0"); + Validate.isTrue(maxPaddingWidth >= -1); if (maxPaddingWidth != -1) width = Math.min(width, maxPaddingWidth); + if (width < padding.length) + return padding[width]; char[] out = new char[width]; for (int i = 0; i < width; i++) out[i] = ' '; diff --git a/src/test/java/org/jsoup/internal/StringUtilTest.java b/src/test/java/org/jsoup/internal/StringUtilTest.java index dc29bdde41..2f4fff5da0 100644 --- a/src/test/java/org/jsoup/internal/StringUtilTest.java +++ b/src/test/java/org/jsoup/internal/StringUtilTest.java @@ -20,6 +20,12 @@ public void join() { } @Test public void padding() { + assertEquals("", StringUtil.padding(0)); + assertEquals(" ", StringUtil.padding(1)); + assertEquals(" ", StringUtil.padding(2)); + assertEquals(" ", StringUtil.padding(15)); + assertEquals(" ", StringUtil.padding(45)); // we default to tap out at 30 + // memoization is up to 21 blocks (0 to 20 spaces) and exits early before min checks making maxPaddingWidth unused assertEquals("", StringUtil.padding(0, -1)); assertEquals(" ", StringUtil.padding(20, -1)); @@ -34,9 +40,6 @@ public void join() { // we tap out at 0 for this test assertEquals("", StringUtil.padding(0, 0)); - // we tap out at 5 for this test because we do not escape memoization - assertEquals(" ", StringUtil.padding(5, 0)); - // as memoization is escaped, setting zero for max padding will not allow any requested width assertEquals("", StringUtil.padding(21, 0)); @@ -47,8 +50,8 @@ public void join() { assertEquals(" ", StringUtil.padding(15, 30)); assertEquals(" ", StringUtil.padding(45, 30)); - // Testing deprecated version capped at 30 - assertEquals(" ", StringUtil.padding(45)); + // max applies regardless of memoized + assertEquals(5, StringUtil.padding(20, 5).length()); } @Test public void paddingInACan() { diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java index 52afb83092..563442bd04 100644 --- a/src/test/java/org/jsoup/nodes/ElementTest.java +++ b/src/test/java/org/jsoup/nodes/ElementTest.java @@ -2,6 +2,7 @@ import org.jsoup.Jsoup; import org.jsoup.TextUtil; +import org.jsoup.internal.StringUtil; import org.jsoup.parser.ParseSettings; import org.jsoup.parser.Parser; import org.jsoup.parser.Tag; @@ -430,6 +431,38 @@ public void testSetIndent() { assertEquals("\n\n\n
\n

Hello there

\n
\n\n", doc.html()); } + @Test void testIndentLevel() { + // deep to test default and extended max + StringBuilder divs = new StringBuilder(); + for (int i = 0; i < 40; i++) { + divs.append("
"); + } + divs.append("Foo"); + Document doc = Jsoup.parse(divs.toString()); + Document.OutputSettings settings = doc.outputSettings(); + + int defaultMax = 30; + assertEquals(defaultMax, settings.maxPaddingWidth()); + String html = doc.html(); + assertTrue(html.contains("
\n" + + " Foo\n" + + "
")); + + settings.maxPaddingWidth(32); + assertEquals(32, settings.maxPaddingWidth()); + html = doc.html(); + assertTrue(html.contains("
\n" + + " Foo\n" + + "
")); + + settings.maxPaddingWidth(-1); + assertEquals(-1, settings.maxPaddingWidth()); + html = doc.html(); + assertTrue(html.contains("
\n" + + " Foo\n" + + "
")); + } + @Test public void testNotPretty() { Document doc = Jsoup.parse("
\n

Hello\n there\n

");