Make CurrentElement not null; return root document if nothing on stack

Fixes #1601
jhy · Aug 4, 2021 · dd2536b · dd2536b
1 parent 26d3c14
commit dd2536b
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 9 deletions.
diff --git a/CHANGES b/CHANGES
@@ -17,9 +17,12 @@ jsoup changelog
   * Bugfix: updated the HtmlTreeParser resetInsertionMode to the current spec for supported elements.
     <https://github.com/jhy/jsoup/issues/1491>
 
-  * Bugfix: fixed an NPE when parsing fragment HTML into a table element.
+  * Bugfix: fixed an NPE when parsing fragment HTML into a standalone table element.
     <https://github.com/jhy/jsoup/issues/1603>
 
+  * Bugfix: fixed an NPE when parsing fragment heading HTML into a standalone p element.
+    <https://github.com/jhy/jsoup/issues/1601>
+
   * Bugfix [Fuzz]: fixed a slow parse when a tag or an attribute name has thousands of null characters in it.
     <https://github.com/jhy/jsoup/issues/1580>
 

diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
@@ -274,9 +274,7 @@ void insert(Token.Comment commentToken) {
 
     void insert(Token.Character characterToken) {
         final Node node;
-        Element el = currentElement();
-        if (el == null)
-            el = doc; // allows for whitespace to be inserted into the doc root object (not on the stack)
+        Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack)
         final String tagName = el.normalName();
         final String data = characterToken.getData();
 
@@ -604,7 +602,7 @@ List<String> getPendingTableCharacters() {
      process, then the UA must perform the above steps as if that element was not in the above list.
      */
     void generateImpliedEndTags(String excludeTag) {
-        while ((excludeTag != null && !currentElement().normalName().equals(excludeTag)) &&
+        while ((excludeTag != null && !currentElementIs(excludeTag)) &&
                 inSorted(currentElement().normalName(), TagSearchEndTags))
             pop();
     }

diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
@@ -774,7 +774,7 @@ boolean anyOtherEndTag(Token t, HtmlTreeBuilder tb) {
                 Element node = stack.get(pos);
                 if (node.normalName().equals(name)) {
                     tb.generateImpliedEndTags(name);
-                    if (!name.equals(tb.currentElement().normalName()))
+                    if (!tb.currentElementIs(name))
                         tb.error(this);
                     tb.popStackToClose(name);
                     break;
@@ -1000,7 +1000,6 @@ boolean process(Token t, HtmlTreeBuilder tb) {
                 }
                 return true; // todo: as above todo
             } else if (t.isEOF()) {
-                Element el = tb.currentElement();
                 if (tb.currentElementIs("html"))
                     tb.error(this);
                 return true; // stops parsing

diff --git a/src/main/java/org/jsoup/parser/TreeBuilder.java b/src/main/java/org/jsoup/parser/TreeBuilder.java
@@ -110,9 +110,14 @@ protected boolean processEndTag(String name) {
     }
 
 
-    @Nullable protected Element currentElement() {
+    /**
+     Get the current element (last on the stack). If all items have been removed, returns the document instead
+     (which might not actually be on the stack; use stack.size() == 0 to test if required.
+     @return the last element on the stack, if any; or the root document
+     */
+    protected Element currentElement() {
         int size = stack.size();
-        return size > 0 ? stack.get(size-1) : null;
+        return size > 0 ? stack.get(size-1) : doc;
     }
 
     /**
@@ -121,6 +126,8 @@ protected boolean processEndTag(String name) {
      @return true if there is a current element on the stack, and its name equals the supplied
      */
     protected boolean currentElementIs(String normalName) {
+        if (stack.size() == 0)
+            return false;
         Element current = currentElement();
         return current != null && current.normalName().equals(normalName);
     }

diff --git a/src/test/java/org/jsoup/parser/HtmlParserTest.java b/src/test/java/org/jsoup/parser/HtmlParserTest.java
@@ -1431,4 +1431,13 @@ private boolean didAddElements(String input) {
         element.html("<tr><td>One</td></tr>");
         assertEquals("<tr>\n <tr>\n  <td>One</td>\n </tr>\n</tr>", element.outerHtml());
     }
+
+    @Test public void parseFragmentOnCreatedDocument() {
+        String bareFragment = "<h2>text</h2>";
+        List<Node> nodes = new Document("").parser().parseFragmentInput(bareFragment, new Element("p"), "");
+        assertEquals(1, nodes.size());
+        Node node = nodes.get(0);
+        assertEquals("h2", node.nodeName());
+        assertEquals("<p><h2>text</h2></p>", node.parent().outerHtml());
+    }
 }