diff --git a/CHANGES b/CHANGES index eda0d2bc46..45bffa40fb 100644 --- a/CHANGES +++ b/CHANGES @@ -17,6 +17,10 @@ jsoup changelog element, e.g. ancestor-or-self::*. + * Improvement: allow a maxPaddingWidth on the indent level in OutputSettings when pretty printing. This defaults to + 30 to limit the indent level for very deeply nested elements, and may be disabled by setting to -1. + + * Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve case. diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java index eed2422062..1882f2a9f1 100644 --- a/src/main/java/org/jsoup/internal/StringUtil.java +++ b/src/main/java/org/jsoup/internal/StringUtil.java @@ -16,11 +16,10 @@ notice. */ public final class StringUtil { - // memoised padding up to 21 + // memoised padding up to 21 (blocks 0 to 20 spaces) static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " "}; - private static final int maxPaddingWidth = 30; // so very deeply nested nodes don't get insane padding amounts /** * Join a collection of strings by a separator @@ -115,17 +114,28 @@ public String complete() { } /** - * Returns space padding (up to a max of 30). + * Returns space padding (up to the default max of 30). Use {@link #padding(int, int)} to specify a different limit. * @param width amount of padding desired * @return string of spaces * width - */ + * @see #padding(int, int) + */ public static String padding(int width) { - if (width < 0) - throw new IllegalArgumentException("width must be > 0"); + return padding(width, 30); + } + /** + * Returns space padding, up to a max of maxPaddingWidth. + * @param width amount of padding desired + * @param maxPaddingWidth maximum padding to apply. Set to {@code -1} for unlimited. + * @return string of spaces * width + */ + public static String padding(int width, int maxPaddingWidth) { + Validate.isTrue(width >= 0, "width must be >= 0"); + Validate.isTrue(maxPaddingWidth >= -1); + if (maxPaddingWidth != -1) + width = Math.min(width, maxPaddingWidth); if (width < padding.length) - return padding[width]; - width = Math.min(width, maxPaddingWidth); + return padding[width]; char[] out = new char[width]; for (int i = 0; i < width; i++) out[i] = ' '; diff --git a/src/main/java/org/jsoup/nodes/Document.java b/src/main/java/org/jsoup/nodes/Document.java index 1e1217bba7..6e5d8e3202 100644 --- a/src/main/java/org/jsoup/nodes/Document.java +++ b/src/main/java/org/jsoup/nodes/Document.java @@ -412,6 +412,7 @@ public enum Syntax {html, xml} private boolean prettyPrint = true; private boolean outline = false; private int indentAmount = 1; + private int maxPaddingWidth = 30; private Syntax syntax = Syntax.html; public OutputSettings() {} @@ -560,6 +561,27 @@ public OutputSettings indentAmount(int indentAmount) { return this; } + /** + * Get the current max padding amount, used when pretty printing + * so very deeply nested nodes don't get insane padding amounts. + * @return the current indent amount + */ + public int maxPaddingWidth() { + return maxPaddingWidth; + } + + /** + * Set the max padding amount for pretty printing so very deeply nested nodes don't get insane padding amounts. + * @param maxPaddingWidth number of spaces to use for indenting each level of nested nodes. Must be {@literal >=} -1. + * Default is 30 and -1 means unlimited. + * @return this, for chaining + */ + public OutputSettings maxPaddingWidth(int maxPaddingWidth) { + Validate.isTrue(maxPaddingWidth >= -1); + this.maxPaddingWidth = maxPaddingWidth; + return this; + } + @Override public OutputSettings clone() { OutputSettings clone; @@ -570,7 +592,7 @@ public OutputSettings clone() { } clone.charset(charset.name()); // new charset and charset encoder clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name()); - // indentAmount, prettyPrint are primitives so object.clone() will handle + // indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle return clone; } } diff --git a/src/main/java/org/jsoup/nodes/Node.java b/src/main/java/org/jsoup/nodes/Node.java index 5d62478408..27886f46b0 100644 --- a/src/main/java/org/jsoup/nodes/Node.java +++ b/src/main/java/org/jsoup/nodes/Node.java @@ -683,7 +683,7 @@ public String toString() { } protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException { - accum.append('\n').append(StringUtil.padding(depth * out.indentAmount())); + accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth())); } /** diff --git a/src/test/java/org/jsoup/internal/StringUtilTest.java b/src/test/java/org/jsoup/internal/StringUtilTest.java index 1956084bae..2f4fff5da0 100644 --- a/src/test/java/org/jsoup/internal/StringUtilTest.java +++ b/src/test/java/org/jsoup/internal/StringUtilTest.java @@ -24,7 +24,34 @@ public void join() { assertEquals(" ", StringUtil.padding(1)); assertEquals(" ", StringUtil.padding(2)); assertEquals(" ", StringUtil.padding(15)); - assertEquals(" ", StringUtil.padding(45)); // we tap out at 30 + assertEquals(" ", StringUtil.padding(45)); // we default to tap out at 30 + + // memoization is up to 21 blocks (0 to 20 spaces) and exits early before min checks making maxPaddingWidth unused + assertEquals("", StringUtil.padding(0, -1)); + assertEquals(" ", StringUtil.padding(20, -1)); + + // this test escapes memoization and continues through + assertEquals(" ", StringUtil.padding(21, -1)); + + // this test escapes memoization and using unlimited length (-1) will allow requested spaces + assertEquals(" ", StringUtil.padding(30, -1)); + assertEquals(" ", StringUtil.padding(45, -1)); + + // we tap out at 0 for this test + assertEquals("", StringUtil.padding(0, 0)); + + // as memoization is escaped, setting zero for max padding will not allow any requested width + assertEquals("", StringUtil.padding(21, 0)); + + // we tap out at 30 for these tests making > 30 use 30 + assertEquals("", StringUtil.padding(0, 30)); + assertEquals(" ", StringUtil.padding(1, 30)); + assertEquals(" ", StringUtil.padding(2, 30)); + assertEquals(" ", StringUtil.padding(15, 30)); + assertEquals(" ", StringUtil.padding(45, 30)); + + // max applies regardless of memoized + assertEquals(5, StringUtil.padding(20, 5).length()); } @Test public void paddingInACan() { diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java index 52afb83092..563442bd04 100644 --- a/src/test/java/org/jsoup/nodes/ElementTest.java +++ b/src/test/java/org/jsoup/nodes/ElementTest.java @@ -2,6 +2,7 @@ import org.jsoup.Jsoup; import org.jsoup.TextUtil; +import org.jsoup.internal.StringUtil; import org.jsoup.parser.ParseSettings; import org.jsoup.parser.Parser; import org.jsoup.parser.Tag; @@ -430,6 +431,38 @@ public void testSetIndent() { assertEquals("\n\n\n
\n

Hello there

\n
\n\n", doc.html()); } + @Test void testIndentLevel() { + // deep to test default and extended max + StringBuilder divs = new StringBuilder(); + for (int i = 0; i < 40; i++) { + divs.append("
"); + } + divs.append("Foo"); + Document doc = Jsoup.parse(divs.toString()); + Document.OutputSettings settings = doc.outputSettings(); + + int defaultMax = 30; + assertEquals(defaultMax, settings.maxPaddingWidth()); + String html = doc.html(); + assertTrue(html.contains("
\n" + + " Foo\n" + + "
")); + + settings.maxPaddingWidth(32); + assertEquals(32, settings.maxPaddingWidth()); + html = doc.html(); + assertTrue(html.contains("
\n" + + " Foo\n" + + "
")); + + settings.maxPaddingWidth(-1); + assertEquals(-1, settings.maxPaddingWidth()); + html = doc.html(); + assertTrue(html.contains("
\n" + + " Foo\n" + + "
")); + } + @Test public void testNotPretty() { Document doc = Jsoup.parse("
\n

Hello\n there\n

");