Skip to content

Commit

Permalink
Expose maxPaddingWidth in OutputSettings keeping default as 30 (#1655)
Browse files Browse the repository at this point in the history
Co-authored-by: Jonathan Hedley <jonathan@hedley.net>
  • Loading branch information
hazendaz and jhy committed Oct 21, 2021
1 parent 7eb5a74 commit 3bd4d79
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 11 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Expand Up @@ -17,6 +17,10 @@ jsoup changelog
element, e.g. ancestor-or-self::*.
<https://github.com/jhy/jsoup/issues/1652>

* Improvement: allow a maxPaddingWidth on the indent level in OutputSettings when pretty printing. This defaults to
30 to limit the indent level for very deeply nested elements, and may be disabled by setting to -1.
<https://github.com/jhy/jsoup/pull/1655>

* Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve
case.
<https://github.com/jhy/jsoup/issues/1656>
Expand Down
26 changes: 18 additions & 8 deletions src/main/java/org/jsoup/internal/StringUtil.java
Expand Up @@ -16,11 +16,10 @@
notice.
*/
public final class StringUtil {
// memoised padding up to 21
// memoised padding up to 21 (blocks 0 to 20 spaces)
static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " "};
private static final int maxPaddingWidth = 30; // so very deeply nested nodes don't get insane padding amounts

/**
* Join a collection of strings by a separator
Expand Down Expand Up @@ -115,17 +114,28 @@ public String complete() {
}

/**
* Returns space padding (up to a max of 30).
* Returns space padding (up to the default max of 30). Use {@link #padding(int, int)} to specify a different limit.
* @param width amount of padding desired
* @return string of spaces * width
*/
* @see #padding(int, int)
*/
public static String padding(int width) {
if (width < 0)
throw new IllegalArgumentException("width must be > 0");
return padding(width, 30);
}

/**
* Returns space padding, up to a max of maxPaddingWidth.
* @param width amount of padding desired
* @param maxPaddingWidth maximum padding to apply. Set to {@code -1} for unlimited.
* @return string of spaces * width
*/
public static String padding(int width, int maxPaddingWidth) {
Validate.isTrue(width >= 0, "width must be >= 0");
Validate.isTrue(maxPaddingWidth >= -1);
if (maxPaddingWidth != -1)
width = Math.min(width, maxPaddingWidth);
if (width < padding.length)
return padding[width];
width = Math.min(width, maxPaddingWidth);
return padding[width];
char[] out = new char[width];
for (int i = 0; i < width; i++)
out[i] = ' ';
Expand Down
24 changes: 23 additions & 1 deletion src/main/java/org/jsoup/nodes/Document.java
Expand Up @@ -412,6 +412,7 @@ public enum Syntax {html, xml}
private boolean prettyPrint = true;
private boolean outline = false;
private int indentAmount = 1;
private int maxPaddingWidth = 30;
private Syntax syntax = Syntax.html;

public OutputSettings() {}
Expand Down Expand Up @@ -560,6 +561,27 @@ public OutputSettings indentAmount(int indentAmount) {
return this;
}

/**
* Get the current max padding amount, used when pretty printing
* so very deeply nested nodes don't get insane padding amounts.
* @return the current indent amount
*/
public int maxPaddingWidth() {
return maxPaddingWidth;
}

/**
* Set the max padding amount for pretty printing so very deeply nested nodes don't get insane padding amounts.
* @param maxPaddingWidth number of spaces to use for indenting each level of nested nodes. Must be {@literal >=} -1.
* Default is 30 and -1 means unlimited.
* @return this, for chaining
*/
public OutputSettings maxPaddingWidth(int maxPaddingWidth) {
Validate.isTrue(maxPaddingWidth >= -1);
this.maxPaddingWidth = maxPaddingWidth;
return this;
}

@Override
public OutputSettings clone() {
OutputSettings clone;
Expand All @@ -570,7 +592,7 @@ public OutputSettings clone() {
}
clone.charset(charset.name()); // new charset and charset encoder
clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
// indentAmount, prettyPrint are primitives so object.clone() will handle
// indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle
return clone;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/nodes/Node.java
Expand Up @@ -683,7 +683,7 @@ public String toString() {
}

protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
accum.append('\n').append(StringUtil.padding(depth * out.indentAmount()));
accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth()));
}

/**
Expand Down
29 changes: 28 additions & 1 deletion src/test/java/org/jsoup/internal/StringUtilTest.java
Expand Up @@ -24,7 +24,34 @@ public void join() {
assertEquals(" ", StringUtil.padding(1));
assertEquals(" ", StringUtil.padding(2));
assertEquals(" ", StringUtil.padding(15));
assertEquals(" ", StringUtil.padding(45)); // we tap out at 30
assertEquals(" ", StringUtil.padding(45)); // we default to tap out at 30

// memoization is up to 21 blocks (0 to 20 spaces) and exits early before min checks making maxPaddingWidth unused
assertEquals("", StringUtil.padding(0, -1));
assertEquals(" ", StringUtil.padding(20, -1));

// this test escapes memoization and continues through
assertEquals(" ", StringUtil.padding(21, -1));

// this test escapes memoization and using unlimited length (-1) will allow requested spaces
assertEquals(" ", StringUtil.padding(30, -1));
assertEquals(" ", StringUtil.padding(45, -1));

// we tap out at 0 for this test
assertEquals("", StringUtil.padding(0, 0));

// as memoization is escaped, setting zero for max padding will not allow any requested width
assertEquals("", StringUtil.padding(21, 0));

// we tap out at 30 for these tests making > 30 use 30
assertEquals("", StringUtil.padding(0, 30));
assertEquals(" ", StringUtil.padding(1, 30));
assertEquals(" ", StringUtil.padding(2, 30));
assertEquals(" ", StringUtil.padding(15, 30));
assertEquals(" ", StringUtil.padding(45, 30));

// max applies regardless of memoized
assertEquals(5, StringUtil.padding(20, 5).length());
}

@Test public void paddingInACan() {
Expand Down
33 changes: 33 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Expand Up @@ -2,6 +2,7 @@

import org.jsoup.Jsoup;
import org.jsoup.TextUtil;
import org.jsoup.internal.StringUtil;
import org.jsoup.parser.ParseSettings;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
Expand Down Expand Up @@ -430,6 +431,38 @@ public void testSetIndent() {
assertEquals("<html>\n<head></head>\n<body>\n<div>\n<p>Hello there</p>\n</div>\n</body>\n</html>", doc.html());
}

@Test void testIndentLevel() {
// deep to test default and extended max
StringBuilder divs = new StringBuilder();
for (int i = 0; i < 40; i++) {
divs.append("<div>");
}
divs.append("Foo");
Document doc = Jsoup.parse(divs.toString());
Document.OutputSettings settings = doc.outputSettings();

int defaultMax = 30;
assertEquals(defaultMax, settings.maxPaddingWidth());
String html = doc.html();
assertTrue(html.contains(" <div>\n" +
" Foo\n" +
" </div>"));

settings.maxPaddingWidth(32);
assertEquals(32, settings.maxPaddingWidth());
html = doc.html();
assertTrue(html.contains(" <div>\n" +
" Foo\n" +
" </div>"));

settings.maxPaddingWidth(-1);
assertEquals(-1, settings.maxPaddingWidth());
html = doc.html();
assertTrue(html.contains(" <div>\n" +
" Foo\n" +
" </div>"));
}

@Test
public void testNotPretty() {
Document doc = Jsoup.parse("<div> \n<p>Hello\n there\n</p></div>");
Expand Down

0 comments on commit 3bd4d79

Please sign in to comment.