From fc41ec98eaf2a9f803602c4d2dea6703edbdf4c7 Mon Sep 17 00:00:00 2001
From: Jonathan Hedley <jonathan@hedley.net>
Date: Fri, 24 Jun 2022 16:32:10 +1000
Subject: [PATCH] Trim leading and trailing spaces in blocks when appropriate

Improved whitespace and indenting behaviour when pretty printing HTML. This corrects the indenting level of textnodes (that were indented in the original source), and improves the round-trip parse/output of pretty-printed HTML.

Trims leading and trailing whitespace for block elements when that whitespace is not part of the text flow.

As an implementation note: the pretty-printing code is getting too dispersed in the various node methods. Particularly, some aspects are not symmetric (e.g. elements and textnode indent, or head and tail). It would be a useful refactoring to make a pretty-print class and contain all this logic, and make the traversor use that and include a little more state (if last indented, if this is meaningful whitespace, etc).

Fixes #1798
---
 CHANGES                                       |  6 +++-
 src/main/java/org/jsoup/nodes/Attribute.java  |  2 +-
 src/main/java/org/jsoup/nodes/Entities.java   | 17 ++++++---
 src/main/java/org/jsoup/nodes/TextNode.java   | 36 +++++++++----------
 .../java/org/jsoup/nodes/XmlDeclaration.java  |  2 +-
 .../java/org/jsoup/nodes/DocumentTest.java    |  6 ++--
 .../java/org/jsoup/nodes/ElementTest.java     | 29 +++++++++++++++
 .../java/org/jsoup/parser/HtmlParserTest.java | 19 +++++-----
 .../parser/HtmlTreeBuilderStateTest.java      | 10 +++---
 .../java/org/jsoup/select/TraversorTest.java  |  2 +-
 src/test/resources/htmltests/medium.html      |  4 +--
 11 files changed, 86 insertions(+), 47 deletions(-)

diff --git a/CHANGES b/CHANGES
index 28d545582e..33ed77eb49 100644
--- a/CHANGES
+++ b/CHANGES
@@ -12,9 +12,13 @@ jsoup changelog
     a null if there is no match, will throw an IllegalArgumentException. This is useful if you want to simply abort
     processing if an expected match is not found.
 
-  * Improvement: when pretty-printing HTML, doctypes are emitted on a newline if there is a preceeding comment.
+  * Improvement: when pretty-printing HTML, doctypes are emitted on a newline if there is a preceding comment.
     <https://github.com/jhy/jsoup/pull/1664>
 
+  * Improvement: when pretty-printing, trim the leading and trailing spaces of textnodes in block tags when possible,
+    so that they are indented correctly.
+    <https://github.com/jhy/jsoup/issues/1798>
+
   * Bugfix: when using the readToByteBuffer method, such as in Connection.Response.body(), if the document has not
     already been parsed and must be read fully, and there is any maximum buffer size being applied, only the default
     internal buffer size is read.
diff --git a/src/main/java/org/jsoup/nodes/Attribute.java b/src/main/java/org/jsoup/nodes/Attribute.java
index 4106bd8a0f..56a34b49bf 100644
--- a/src/main/java/org/jsoup/nodes/Attribute.java
+++ b/src/main/java/org/jsoup/nodes/Attribute.java
@@ -142,7 +142,7 @@ static void htmlNoValidate(String key, @Nullable String val, Appendable accum, D
         accum.append(key);
         if (!shouldCollapseAttribute(key, val, out)) {
             accum.append("=\"");
-            Entities.escape(accum, Attributes.checkNotNull(val) , out, true, false, false);
+            Entities.escape(accum, Attributes.checkNotNull(val) , out, true, false, false, false);
             accum.append('"');
         }
     }
diff --git a/src/main/java/org/jsoup/nodes/Entities.java b/src/main/java/org/jsoup/nodes/Entities.java
index 732bf34d1e..d330005d79 100644
--- a/src/main/java/org/jsoup/nodes/Entities.java
+++ b/src/main/java/org/jsoup/nodes/Entities.java
@@ -142,7 +142,7 @@ public static String escape(String string, OutputSettings out) {
             return "";
         StringBuilder accum = StringUtil.borrowBuilder();
         try {
-            escape(accum, string, out, false, false, false);
+            escape(accum, string, out, false, false, false, false);
         } catch (IOException e) {
             throw new SerializationException(e); // doesn't happen
         }
@@ -160,9 +160,9 @@ public static String escape(String string) {
         return escape(string, DefaultOutput);
     }
 
-    // this method is ugly, and does a lot. but other breakups cause rescanning and stringbuilder generations
+    // this method does a lot, but other breakups cause rescanning and stringbuilder generations
     static void escape(Appendable accum, String string, OutputSettings out,
-                       boolean inAttribute, boolean normaliseWhite, boolean stripLeadingWhite) throws IOException {
+                       boolean inAttribute, boolean normaliseWhite, boolean stripLeadingWhite, boolean trimTrailing) throws IOException {
 
         boolean lastWasWhite = false;
         boolean reachedNonWhite = false;
@@ -172,19 +172,28 @@ static void escape(Appendable accum, String string, OutputSettings out,
         final int length = string.length();
 
         int codePoint;
+        boolean skipped = false;
         for (int offset = 0; offset < length; offset += Character.charCount(codePoint)) {
             codePoint = string.codePointAt(offset);
 
             if (normaliseWhite) {
                 if (StringUtil.isWhitespace(codePoint)) {
-                    if ((stripLeadingWhite && !reachedNonWhite) || lastWasWhite)
+                    if (stripLeadingWhite && !reachedNonWhite) continue;
+                    if (lastWasWhite) continue;
+                    if (trimTrailing) {
+                        skipped = true;
                         continue;
+                    }
                     accum.append(' ');
                     lastWasWhite = true;
                     continue;
                 } else {
                     lastWasWhite = false;
                     reachedNonWhite = true;
+                    if (skipped) {
+                        accum.append(' '); // wasn't the end, so need to place a normalized space
+                        skipped = false;
+                    }
                 }
             }
             // surrogate pairs, split implementation for efficiency on single char common case (saves creating strings, char[]):
diff --git a/src/main/java/org/jsoup/nodes/TextNode.java b/src/main/java/org/jsoup/nodes/TextNode.java
index e32a0dbf13..4dbd116818 100644
--- a/src/main/java/org/jsoup/nodes/TextNode.java
+++ b/src/main/java/org/jsoup/nodes/TextNode.java
@@ -83,31 +83,27 @@ public TextNode splitText(int offset) {
     void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException {
         final boolean prettyPrint = out.prettyPrint();
         final Element parent = parentNode instanceof Element ? ((Element) parentNode) : null;
-        final boolean blank = isBlank();
         final boolean normaliseWhite = prettyPrint && !Element.preserveWhitespace(parentNode);
 
-        // if this text is just whitespace, and the next node will cause an indent, skip this text:
-        if (normaliseWhite && blank) {
-            boolean canSkip = false;
+        boolean trimLeading = false;
+        boolean trimTrailing = false;
+        if (normaliseWhite) {
+            trimLeading = (siblingIndex == 0 && parent != null && parent.tag().isBlock()) ||
+                parentNode instanceof Document;
+            trimTrailing = nextSibling() == null && parent != null && parent.tag().isBlock();
+
+            // if this text is just whitespace, and the next node will cause an indent, skip this text:
             Node next = this.nextSibling();
-            if (next instanceof Element) {
-                Element nextEl = (Element) next;
-                canSkip = nextEl.shouldIndent(out);
-            } else if (next == null && parent != null) { // we are the last child, check parent
-                canSkip = parent.shouldIndent(out);
-            } else if (next instanceof TextNode && (((TextNode) next).isBlank())) {
-                // sometimes get a run of textnodes from parser if nodes are re-parented
-                canSkip = true;
-            }
-            if (canSkip)
-                return;
-        }
+            boolean couldSkip = (next instanceof Element && ((Element) next).shouldIndent(out)) // next will indent
+                || (next instanceof TextNode && (((TextNode) next).isBlank())); // next is blank text, from re-parenting
+            if (couldSkip && isBlank()) return;
 
-        if (prettyPrint && ((siblingIndex == 0 && parent != null && parent.tag().formatAsBlock() && !blank) || (out.outline() && siblingNodes().size() > 0 && !blank)))
-            indent(accum, depth, out);
+            if ((siblingIndex == 0 && parent != null && parent.tag().formatAsBlock() && !isBlank()) ||
+                (out.outline() && siblingNodes().size() > 0 && !isBlank()))
+                indent(accum, depth, out);
+        }
 
-        final boolean stripWhite = prettyPrint && parentNode instanceof Document;
-        Entities.escape(accum, coreValue(), out, false, normaliseWhite, stripWhite);
+        Entities.escape(accum, coreValue(), out, false, normaliseWhite, trimLeading, trimTrailing);
     }
 
 	void outerHtmlTail(Appendable accum, int depth, Document.OutputSettings out) {}
diff --git a/src/main/java/org/jsoup/nodes/XmlDeclaration.java b/src/main/java/org/jsoup/nodes/XmlDeclaration.java
index 6ab9b16241..ca22eb7ff7 100644
--- a/src/main/java/org/jsoup/nodes/XmlDeclaration.java
+++ b/src/main/java/org/jsoup/nodes/XmlDeclaration.java
@@ -60,7 +60,7 @@ private void getWholeDeclaration(Appendable accum, Document.OutputSettings out)
                 accum.append(key);
                 if (!val.isEmpty()) {
                     accum.append("=\"");
-                    Entities.escape(accum, val, out, true, false, false);
+                    Entities.escape(accum, val, out, true, false, false, false);
                     accum.append('"');
                 }
             }
diff --git a/src/test/java/org/jsoup/nodes/DocumentTest.java b/src/test/java/org/jsoup/nodes/DocumentTest.java
index a490b8ea16..77a140aad7 100644
--- a/src/test/java/org/jsoup/nodes/DocumentTest.java
+++ b/src/test/java/org/jsoup/nodes/DocumentTest.java
@@ -55,15 +55,15 @@ public class DocumentTest {
     @Test public void testOutputEncoding() {
         Document doc = Jsoup.parse("<p title=π>π & < > </p>");
         // default is utf-8
-        assertEquals("<p title=\"π\">π &amp; &lt; &gt; </p>", doc.body().html());
+        assertEquals("<p title=\"π\">π &amp; &lt; &gt;</p>", doc.body().html());
         assertEquals("UTF-8", doc.outputSettings().charset().name());
 
         doc.outputSettings().charset("ascii");
         assertEquals(Entities.EscapeMode.base, doc.outputSettings().escapeMode());
-        assertEquals("<p title=\"&#x3c0;\">&#x3c0; &amp; &lt; &gt; </p>", doc.body().html());
+        assertEquals("<p title=\"&#x3c0;\">&#x3c0; &amp; &lt; &gt;</p>", doc.body().html());
 
         doc.outputSettings().escapeMode(Entities.EscapeMode.extended);
-        assertEquals("<p title=\"&pi;\">&pi; &amp; &lt; &gt; </p>", doc.body().html());
+        assertEquals("<p title=\"&pi;\">&pi; &amp; &lt; &gt;</p>", doc.body().html());
     }
 
     @Test public void testXhtmlReferences() {
diff --git a/src/test/java/org/jsoup/nodes/ElementTest.java b/src/test/java/org/jsoup/nodes/ElementTest.java
index b2701e8441..1f92eb11d1 100644
--- a/src/test/java/org/jsoup/nodes/ElementTest.java
+++ b/src/test/java/org/jsoup/nodes/ElementTest.java
@@ -15,6 +15,7 @@
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
@@ -625,6 +626,7 @@ public void testAddNewText() {
         Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>");
         Element div = doc.getElementById("1");
         div.appendText(" there & now >");
+        assertEquals ("Hello there & now >", div.text());
         assertEquals("<p>Hello</p> there &amp; now &gt;", TextUtil.stripNewlines(div.html()));
     }
 
@@ -2305,4 +2307,31 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
         assertEquals("<!--\n comment \n -->\n<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc5.html());
         assertEquals("<!--\n comment \n -->\n<!doctype html>\n<html>\n <head></head>\n <body></body>\n</html>", doc6.html());
     }
+
+    @Test void textnodeInBlockIndent() {
+        String html ="<div>\n{{ msg }} \n </div>\n<div>\n{{ msg }} \n </div>";
+        Document doc = Jsoup.parse(html);
+        assertEquals("<div>\n {{ msg }}\n</div>\n<div>\n {{ msg }}\n</div>", doc.body().html());
+    }
+
+    @Test void stripTrailing() {
+        String html = "<p> This <span>is </span>fine. </p>";
+        Document doc = Jsoup.parse(html);
+        assertEquals("<p>This <span>is </span>fine.</p>", doc.body().html());
+    }
+
+    @Test void elementIndentAndSpaceTrims() {
+        String html = "<body><div> <p> One Two </p> <a>  Hello </a><p>\nSome text \n</p>\n </div>";
+        Document doc = Jsoup.parse(html);
+        assertEquals("<div>\n" +
+            " <p>One Two</p> <a> Hello </a>\n" +
+            " <p>Some text</p>\n" +
+            "</div>", doc.body().html());
+    }
+
+    @Test void divAInlineable() {
+        String html = "<body><div> <a>Text</a>";
+        Document doc = Jsoup.parse(html);
+        assertEquals("<div><a>Text</a>\n</div>", doc.body().html());
+    }
 }
\ No newline at end of file
diff --git a/src/test/java/org/jsoup/parser/HtmlParserTest.java b/src/test/java/org/jsoup/parser/HtmlParserTest.java
index dd1ecc36e4..48663a6371 100644
--- a/src/test/java/org/jsoup/parser/HtmlParserTest.java
+++ b/src/test/java/org/jsoup/parser/HtmlParserTest.java
@@ -158,7 +158,7 @@ public class HtmlParserTest {
 
     @Test public void testSpaceAfterTag() {
         Document doc = Jsoup.parse("<div > <a name=\"top\"></a ><p id=1 >Hello</p></div>");
-        assertEquals("<div> <a name=\"top\"></a><p id=\"1\">Hello</p></div>", TextUtil.stripNewlines(doc.body().html()));
+        assertEquals("<div><a name=\"top\"></a><p id=\"1\">Hello</p></div>", TextUtil.stripNewlines(doc.body().html()));
     }
 
     @Test public void createsDocumentStructure() {
@@ -279,7 +279,7 @@ public class HtmlParserTest {
     @Test public void handlesWhatWgExpensesTableExample() {
         // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#examples-0
         Document doc = Jsoup.parse("<table> <colgroup> <col> <colgroup> <col> <col> <col> <thead> <tr> <th> <th>2008 <th>2007 <th>2006 <tbody> <tr> <th scope=rowgroup> Research and development <td> $ 1,109 <td> $ 782 <td> $ 712 <tr> <th scope=row> Percentage of net sales <td> 3.4% <td> 3.3% <td> 3.7% <tbody> <tr> <th scope=rowgroup> Selling, general, and administrative <td> $ 3,761 <td> $ 2,963 <td> $ 2,433 <tr> <th scope=row> Percentage of net sales <td> 11.6% <td> 12.3% <td> 12.6% </table>");
-        assertEquals("<table><colgroup><col></colgroup><colgroup><col><col><col></colgroup><thead><tr><th></th><th>2008 </th><th>2007 </th><th>2006 </th></tr></thead><tbody><tr><th scope=\"rowgroup\"> Research and development </th><td> $ 1,109 </td><td> $ 782 </td><td> $ 712 </td></tr><tr><th scope=\"row\"> Percentage of net sales </th><td> 3.4% </td><td> 3.3% </td><td> 3.7% </td></tr></tbody><tbody><tr><th scope=\"rowgroup\"> Selling, general, and administrative </th><td> $ 3,761 </td><td> $ 2,963 </td><td> $ 2,433 </td></tr><tr><th scope=\"row\"> Percentage of net sales </th><td> 11.6% </td><td> 12.3% </td><td> 12.6% </td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
+        assertEquals("<table><colgroup><col></colgroup><colgroup><col><col><col></colgroup><thead><tr><th></th><th>2008</th><th>2007</th><th>2006</th></tr></thead><tbody><tr><th scope=\"rowgroup\">Research and development</th><td>$ 1,109</td><td>$ 782</td><td>$ 712</td></tr><tr><th scope=\"row\">Percentage of net sales</th><td>3.4%</td><td>3.3%</td><td>3.7%</td></tr></tbody><tbody><tr><th scope=\"rowgroup\">Selling, general, and administrative</th><td>$ 3,761</td><td>$ 2,963</td><td>$ 2,433</td></tr><tr><th scope=\"row\">Percentage of net sales</th><td>11.6%</td><td>12.3%</td><td>12.6%</td></tr></tbody></table>", TextUtil.stripNewlines(doc.body().html()));
     }
 
     @Test public void handlesTbodyTable() {
@@ -294,7 +294,7 @@ public class HtmlParserTest {
 
     @Test public void noTableDirectInTable() {
         Document doc = Jsoup.parse("<table> <td>One <td><table><td>Two</table> <table><td>Three");
-        assertEquals("<table><tbody><tr><td>One </td><td><table><tbody><tr><td>Two</td></tr></tbody></table><table><tbody><tr><td>Three</td></tr></tbody></table></td></tr></tbody></table>",
+        assertEquals("<table><tbody><tr><td>One</td><td><table><tbody><tr><td>Two</td></tr></tbody></table><table><tbody><tr><td>Three</td></tr></tbody></table></td></tr></tbody></table>",
             TextUtil.stripNewlines(doc.body().html()));
     }
 
@@ -568,7 +568,7 @@ public class HtmlParserTest {
     @Test public void normalisesDocument() {
         String h = "<!doctype html>One<html>Two<head>Three<link></head>Four<body>Five </body>Six </html>Seven ";
         Document doc = Jsoup.parse(h);
-        assertEquals("<!doctype html><html><head></head><body>OneTwoThree<link>FourFive Six Seven </body></html>",
+        assertEquals("<!doctype html><html><head></head><body>OneTwoThree<link>FourFive Six Seven</body></html>",
             TextUtil.stripNewlines(doc.html()));
     }
 
@@ -599,7 +599,7 @@ public class HtmlParserTest {
     @Test public void testHgroup() {
         // jsoup used to not allow hgroup in h{n}, but that's not in spec, and browsers are OK
         Document doc = Jsoup.parse("<h1>Hello <h2>There <hgroup><h1>Another<h2>headline</hgroup> <hgroup><h1>More</h1><p>stuff</p></hgroup>");
-        assertEquals("<h1>Hello </h1><h2>There <hgroup><h1>Another</h1><h2>headline</h2></hgroup><hgroup><h1>More</h1><p>stuff</p></hgroup></h2>", TextUtil.stripNewlines(doc.body().html()));
+        assertEquals("<h1>Hello</h1><h2>There <hgroup><h1>Another</h1><h2>headline</h2></hgroup><hgroup><h1>More</h1><p>stuff</p></hgroup></h2>", TextUtil.stripNewlines(doc.body().html()));
     }
 
     @Test public void testRelaxedTags() {
@@ -611,7 +611,7 @@ public class HtmlParserTest {
         // h* tags (h1 .. h9) in browsers can handle any internal content other than other h*. which is not per any
         // spec, which defines them as containing phrasing content only. so, reality over theory.
         Document doc = Jsoup.parse("<h1>Hello <div>There</div> now</h1> <h2>More <h3>Content</h3></h2>");
-        assertEquals("<h1>Hello <div>There</div> now</h1><h2>More </h2><h3>Content</h3>", TextUtil.stripNewlines(doc.body().html()));
+        assertEquals("<h1>Hello <div>There</div> now</h1><h2>More</h2><h3>Content</h3>", TextUtil.stripNewlines(doc.body().html()));
     }
 
     @Test public void testSpanContents() {
@@ -1175,7 +1175,8 @@ public void testInvalidTableContents() throws IOException {
         Document doc = Parser.htmlParser()
             .settings(preserveCase)
             .parseInput(html, "");
-        assertEquals("<A>ONE </A><A>Two</A>", StringUtil.normaliseWhitespace(doc.body().html()));
+        //assertEquals("<A>ONE </A><A>Two</A>", StringUtil.normaliseWhitespace(doc.body().html()));
+        assertEquals("<A>ONE </A><A>Two</A>", doc.body().html());
     }
 
     @Test public void normalizesDiscordantTags() {
@@ -1246,7 +1247,7 @@ public void testInvalidTableContents() throws IOException {
         File in = ParseTest.getFile("/htmltests/comments.html");
         Document doc = Jsoup.parse(in, "UTF-8");
 
-        assertEquals("<!--?xml version=\"1.0\" encoding=\"utf-8\"?--><!-- so --> <!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><!-- what --> <html xml:lang=\"en\" lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\"> <!-- now --> <head> <!-- then --> <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\"> <title>A Certain Kind of Test</title> </head> <body> <h1>Hello</h1>h1&gt; (There is a UTF8 hidden BOM at the top of this file.) </body> </html>",
+        assertEquals("<!--?xml version=\"1.0\" encoding=\"utf-8\"?--><!-- so --> <!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><!-- what --> <html xml:lang=\"en\" lang=\"en\" xmlns=\"http://www.w3.org/1999/xhtml\"><!-- now --> <head><!-- then --> <meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\"> <title>A Certain Kind of Test</title> </head> <body> <h1>Hello</h1>h1&gt; (There is a UTF8 hidden BOM at the top of this file.) </body> </html>",
             StringUtil.normaliseWhitespace(doc.html()));
 
         assertEquals("A Certain Kind of Test", doc.head().select("title").text());
@@ -1486,7 +1487,7 @@ private boolean didAddElements(String input) {
         String html = "<a>\n<b>\n<div>\n<a>test</a>\n</div>\n</b>\n</a>";
         Document doc = Jsoup.parse(html);
         assertNotNull(doc);
-        assertEquals("<a> <b> </b></a><b><div><a></a><a>test</a></div> </b>", TextUtil.stripNewlines(doc.body().html()));
+        assertEquals("<a> <b> </b></a><b><div><a> </a><a>test</a></div> </b>", TextUtil.stripNewlines(doc.body().html()));
     }
 
     @Test public void tagsMustStartWithAscii() {
diff --git a/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java b/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java
index c5dbbfc169..f05ee26897 100644
--- a/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java
+++ b/src/test/java/org/jsoup/parser/HtmlTreeBuilderStateTest.java
@@ -75,9 +75,9 @@ public void nestedAnchorElements01() {
         String s = Jsoup.parse(html).toString();
         assertEquals("<html>\n" +
             " <head></head>\n" +
-            " <body> <a href=\"#1\"> </a>\n" +
+            " <body><a href=\"#1\"> </a>\n" +
             "  <div>\n" +
-            "   <a href=\"#1\"></a><a href=\"#2\">child</a>\n" +
+            "   <a href=\"#1\"> </a><a href=\"#2\">child</a>\n" +
             "  </div>\n" +
             " </body>\n" +
             "</html>", s);
@@ -99,11 +99,11 @@ public void nestedAnchorElements02() {
         String s = Jsoup.parse(html).toString();
         assertEquals("<html>\n" +
             " <head></head>\n" +
-            " <body> <a href=\"#1\"> </a>\n" +
+            " <body><a href=\"#1\"> </a>\n" +
             "  <div>\n" +
-            "   <a href=\"#1\"></a>\n" +
+            "   <a href=\"#1\"> </a>\n" +
             "   <div>\n" +
-            "    <a href=\"#1\"></a><a href=\"#2\">child</a>\n" +
+            "    <a href=\"#1\"> </a><a href=\"#2\">child</a>\n" +
             "   </div>\n" +
             "  </div>\n" +
             " </body>\n" +
diff --git a/src/test/java/org/jsoup/select/TraversorTest.java b/src/test/java/org/jsoup/select/TraversorTest.java
index 8d0667e648..11a5167d61 100644
--- a/src/test/java/org/jsoup/select/TraversorTest.java
+++ b/src/test/java/org/jsoup/select/TraversorTest.java
@@ -95,7 +95,7 @@ public FilterResult tail(Node node, int depth) {
                 return ("b".equals(node.nodeName())) ? FilterResult.REMOVE : FilterResult.CONTINUE;
             }
         }, doc.select("div"));
-        assertEquals("<div></div>\n<div>\n There be \n</div>", doc.select("body").html());
+        assertEquals("<div></div>\n<div>\n There be\n</div>", doc.select("body").html());
     }
 
     @Test
diff --git a/src/test/resources/htmltests/medium.html b/src/test/resources/htmltests/medium.html
index 1f90df242e..728961cb52 100644
--- a/src/test/resources/htmltests/medium.html
+++ b/src/test/resources/htmltests/medium.html
@@ -1,10 +1,10 @@
 <html>
  <head>
-  <title>Large HTML</title>
+  <title>Medium HTML</title>
  </head>
  <body>
   <p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer nec odio. Praesent libero. Sed cursus ante dapibus diam. Sed nisi. Nulla quis sem at nibh elementum imperdiet. Duis sagittis ipsum. Praesent mauris. Fusce nec tellus sed augue semper porta. Mauris massa. Vestibulum lacinia arcu eget nulla. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Curabitur sodales ligula in libero.</p>
-  <p>Sed dignissim lacinia nunc. Curabitur tortor. Pellentesque nibh. Aenean quam. In scelerisque sem at dolor. Maecenas mattis. Sed convallis tristique sem. Proin ut ligula vel nunc egestas porttitor. <i>Lorem ipsum dolor sit amet, consectetur adipiscing elit</i>. Morbi lectus risus, iaculis vel, suscipit quis, luctus non, massa. Fusce ac turpis quis ligula lacinia aliquet. Mauris ipsum. <b>Aenean quam</b>. Nulla metus metus, ullamcorper vel, tincidunt sed, euismod in, nibh. </p>
+  <p>Sed dignissim lacinia nunc. Curabitur tortor. Pellentesque nibh. Aenean quam. In scelerisque sem at dolor. Maecenas mattis. Sed convallis tristique sem. Proin ut ligula vel nunc egestas porttitor. <i>Lorem ipsum dolor sit amet, consectetur adipiscing elit</i>. Morbi lectus risus, iaculis vel, suscipit quis, luctus non, massa. Fusce ac turpis quis ligula lacinia aliquet. Mauris ipsum. <b>Aenean quam</b>. Nulla metus metus, ullamcorper vel, tincidunt sed, euismod in, nibh.</p>
   <p>Quisque volutpat condimentum velit. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nam nec ante. Sed lacinia, urna non tincidunt mattis, tortor neque adipiscing diam, a cursus ipsum ante quis turpis. Nulla facilisi. Ut fringilla. Suspendisse potenti. Nunc feugiat mi a tellus consequat imperdiet. Vestibulum sapien. Proin quam. <b>Sed lacinia, urna non tincidunt mattis, tortor neque adipiscing diam, a cursus ipsum ante quis turpis</b>. Etiam ultrices. Suspendisse in justo eu magna luctus suscipit. Sed lectus. Integer euismod lacus luctus magna.</p>
   <p>Quisque cursus, metus vitae pharetra auctor, sem massa mattis sem, at interdum magna augue eget diam. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Morbi lacinia molestie dui. Praesent blandit dolor. Sed non quam. In vel mi sit amet augue congue elementum. Morbi in ipsum sit amet pede facilisis laoreet. <b>Suspendisse in justo eu magna luctus suscipit</b>. Donec lacus nunc, viverra nec, blandit vel, egestas et, augue. Vestibulum tincidunt malesuada tellus. Ut ultrices ultrices enim. Curabitur sit amet mauris. Morbi in dui quis est pulvinar ullamcorper. Nulla facilisi.</p>
   <p>Integer lacinia sollicitudin massa. <b>Quisque cursus, metus vitae pharetra auctor, sem massa mattis sem, at interdum magna augue eget diam</b>. Cras metus. Sed aliquet risus a tortor. Integer id quam. Morbi mi. <b>Morbi in dui quis est pulvinar ullamcorper</b>. Quisque nisl felis, venenatis tristique, dignissim in, ultrices sit amet, augue. Proin sodales libero eget ante. Nulla quam. Aenean laoreet. Vestibulum nisi lectus, commodo ac, facilisis ac, ultricies eu, pede. Ut orci risus, accumsan porttitor, cursus quis, aliquet eget, justo. Sed pretium blandit orci. <b>Quisque nisl felis, venenatis tristique, dignissim in, ultrices sit amet, augue</b>. Ut eu diam at pede suscipit sodales. Aenean lectus elit, fermentum non, convallis id, sagittis at, neque.</p>