Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose maxPaddingWidth in OutputSettings keeping default as 30 #1655

Merged
merged 2 commits into from Oct 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES
Expand Up @@ -17,6 +17,10 @@ jsoup changelog
element, e.g. ancestor-or-self::*.
<https://github.com/jhy/jsoup/issues/1652>

* Improvement: allow a maxPaddingWidth on the indent level in OutputSettings when pretty printing. This defaults to
30 to limit the indent level for very deeply nested elements, and may be disabled by setting to -1.
<https://github.com/jhy/jsoup/pull/1655>

* Bugfix: boolean attribute names should be case-insensitive, but were not when the parser was configured to preserve
case.
<https://github.com/jhy/jsoup/issues/1656>
Expand Down
26 changes: 18 additions & 8 deletions src/main/java/org/jsoup/internal/StringUtil.java
Expand Up @@ -16,11 +16,10 @@
notice.
*/
public final class StringUtil {
// memoised padding up to 21
// memoised padding up to 21 (blocks 0 to 20 spaces)
static final String[] padding = {"", " ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " "};
private static final int maxPaddingWidth = 30; // so very deeply nested nodes don't get insane padding amounts

/**
* Join a collection of strings by a separator
Expand Down Expand Up @@ -115,17 +114,28 @@ public String complete() {
}

/**
* Returns space padding (up to a max of 30).
* Returns space padding (up to the default max of 30). Use {@link #padding(int, int)} to specify a different limit.
* @param width amount of padding desired
* @return string of spaces * width
*/
* @see #padding(int, int)
*/
public static String padding(int width) {
if (width < 0)
throw new IllegalArgumentException("width must be > 0");
return padding(width, 30);
}

/**
* Returns space padding, up to a max of maxPaddingWidth.
* @param width amount of padding desired
* @param maxPaddingWidth maximum padding to apply. Set to {@code -1} for unlimited.
* @return string of spaces * width
*/
public static String padding(int width, int maxPaddingWidth) {
Validate.isTrue(width >= 0, "width must be >= 0");
Validate.isTrue(maxPaddingWidth >= -1);
if (maxPaddingWidth != -1)
width = Math.min(width, maxPaddingWidth);
if (width < padding.length)
return padding[width];
width = Math.min(width, maxPaddingWidth);
return padding[width];
char[] out = new char[width];
for (int i = 0; i < width; i++)
out[i] = ' ';
Expand Down
24 changes: 23 additions & 1 deletion src/main/java/org/jsoup/nodes/Document.java
Expand Up @@ -412,6 +412,7 @@ public enum Syntax {html, xml}
private boolean prettyPrint = true;
private boolean outline = false;
private int indentAmount = 1;
private int maxPaddingWidth = 30;
private Syntax syntax = Syntax.html;

public OutputSettings() {}
Expand Down Expand Up @@ -560,6 +561,27 @@ public OutputSettings indentAmount(int indentAmount) {
return this;
}

/**
* Get the current max padding amount, used when pretty printing
* so very deeply nested nodes don't get insane padding amounts.
* @return the current indent amount
*/
public int maxPaddingWidth() {
return maxPaddingWidth;
}

/**
* Set the max padding amount for pretty printing so very deeply nested nodes don't get insane padding amounts.
* @param maxPaddingWidth number of spaces to use for indenting each level of nested nodes. Must be {@literal >=} -1.
* Default is 30 and -1 means unlimited.
* @return this, for chaining
*/
public OutputSettings maxPaddingWidth(int maxPaddingWidth) {
Validate.isTrue(maxPaddingWidth >= -1);
this.maxPaddingWidth = maxPaddingWidth;
return this;
}

@Override
public OutputSettings clone() {
OutputSettings clone;
Expand All @@ -570,7 +592,7 @@ public OutputSettings clone() {
}
clone.charset(charset.name()); // new charset and charset encoder
clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
// indentAmount, prettyPrint are primitives so object.clone() will handle
// indentAmount, maxPaddingWidth, and prettyPrint are primitives so object.clone() will handle
return clone;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/nodes/Node.java
Expand Up @@ -683,7 +683,7 @@ public String toString() {
}

protected void indent(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
accum.append('\n').append(StringUtil.padding(depth * out.indentAmount()));
accum.append('\n').append(StringUtil.padding(depth * out.indentAmount(), out.maxPaddingWidth()));
}

/**
Expand Down
29 changes: 28 additions & 1 deletion src/test/java/org/jsoup/internal/StringUtilTest.java
Expand Up @@ -24,7 +24,34 @@ public void join() {
assertEquals(" ", StringUtil.padding(1));
assertEquals(" ", StringUtil.padding(2));
assertEquals(" ", StringUtil.padding(15));
assertEquals(" ", StringUtil.padding(45)); // we tap out at 30
assertEquals(" ", StringUtil.padding(45)); // we default to tap out at 30

// memoization is up to 21 blocks (0 to 20 spaces) and exits early before min checks making maxPaddingWidth unused
assertEquals("", StringUtil.padding(0, -1));
assertEquals(" ", StringUtil.padding(20, -1));

// this test escapes memoization and continues through
assertEquals(" ", StringUtil.padding(21, -1));

// this test escapes memoization and using unlimited length (-1) will allow requested spaces
assertEquals(" ", StringUtil.padding(30, -1));
assertEquals(" ", StringUtil.padding(45, -1));

// we tap out at 0 for this test
assertEquals("", StringUtil.padding(0, 0));

// as memoization is escaped, setting zero for max padding will not allow any requested width
assertEquals("", StringUtil.padding(21, 0));

// we tap out at 30 for these tests making > 30 use 30
assertEquals("", StringUtil.padding(0, 30));
assertEquals(" ", StringUtil.padding(1, 30));
assertEquals(" ", StringUtil.padding(2, 30));
assertEquals(" ", StringUtil.padding(15, 30));
assertEquals(" ", StringUtil.padding(45, 30));

// max applies regardless of memoized
assertEquals(5, StringUtil.padding(20, 5).length());
}

@Test public void paddingInACan() {
Expand Down
33 changes: 33 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Expand Up @@ -2,6 +2,7 @@

import org.jsoup.Jsoup;
import org.jsoup.TextUtil;
import org.jsoup.internal.StringUtil;
import org.jsoup.parser.ParseSettings;
import org.jsoup.parser.Parser;
import org.jsoup.parser.Tag;
Expand Down Expand Up @@ -430,6 +431,38 @@ public void testSetIndent() {
assertEquals("<html>\n<head></head>\n<body>\n<div>\n<p>Hello there</p>\n</div>\n</body>\n</html>", doc.html());
}

@Test void testIndentLevel() {
// deep to test default and extended max
StringBuilder divs = new StringBuilder();
for (int i = 0; i < 40; i++) {
divs.append("<div>");
}
divs.append("Foo");
Document doc = Jsoup.parse(divs.toString());
Document.OutputSettings settings = doc.outputSettings();

int defaultMax = 30;
assertEquals(defaultMax, settings.maxPaddingWidth());
String html = doc.html();
assertTrue(html.contains(" <div>\n" +
" Foo\n" +
" </div>"));

settings.maxPaddingWidth(32);
assertEquals(32, settings.maxPaddingWidth());
html = doc.html();
assertTrue(html.contains(" <div>\n" +
" Foo\n" +
" </div>"));

settings.maxPaddingWidth(-1);
assertEquals(-1, settings.maxPaddingWidth());
html = doc.html();
assertTrue(html.contains(" <div>\n" +
" Foo\n" +
" </div>"));
}

@Test
public void testNotPretty() {
Document doc = Jsoup.parse("<div> \n<p>Hello\n there\n</p></div>");
Expand Down