Skip to content

Commit

Permalink
Fixed an NPE hit on Current Element if there was nothing left on the …
Browse files Browse the repository at this point in the history
…stack

And guarded other calls to Current Element when used in the same manner

Fixes #1603
  • Loading branch information
jhy committed Aug 4, 2021
1 parent 436d119 commit 26d3c14
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 26 deletions.
5 changes: 4 additions & 1 deletion CHANGES
Expand Up @@ -14,9 +14,12 @@ jsoup changelog
* Bugfix: in the XML parser, XML processing instructions without attributes would be serialized as if they did.
<https://github.com/jhy/jsoup/issues/770>

* Bugfix: updated the HtmlTreeParser resetInsertionMode to the current spec for supported elements
* Bugfix: updated the HtmlTreeParser resetInsertionMode to the current spec for supported elements.
<https://github.com/jhy/jsoup/issues/1491>

* Bugfix: fixed an NPE when parsing fragment HTML into a table element.
<https://github.com/jhy/jsoup/issues/1603>

* Bugfix [Fuzz]: fixed a slow parse when a tag or an attribute name has thousands of null characters in it.
<https://github.com/jhy/jsoup/issues/1580>

Expand Down
49 changes: 25 additions & 24 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Expand Up @@ -580,7 +580,7 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
// static final String[] InBodyStartOptions = new String[]{"optgroup", "option"};
case "optgroup":
case "option":
if (tb.currentElement().normalName().equals("option"))
if (tb.currentElementIs("option"))
tb.processEndTag("option");
tb.reconstructFormattingElements();
tb.insert(startTag);
Expand All @@ -590,7 +590,7 @@ private boolean inBodyStartTag(Token t, HtmlTreeBuilder tb) {
case "rt":
if (tb.inScope("ruby")) {
tb.generateImpliedEndTags();
if (!tb.currentElement().normalName().equals("ruby")) {
if (!tb.currentElementIs("ruby")) {
tb.error(this);
tb.popStackToBefore("ruby"); // i.e. close up to but not include name
}
Expand Down Expand Up @@ -648,7 +648,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return false;
} else {
tb.generateImpliedEndTags(name);
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(name);
}
Expand All @@ -675,7 +675,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return false;
} else {
tb.generateImpliedEndTags();
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
// remove currentForm from stack. will shift anything under up.
tb.removeFromStack(currentForm);
Expand All @@ -688,7 +688,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return tb.process(endTag);
} else {
tb.generateImpliedEndTags(name);
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(name);
}
Expand All @@ -700,7 +700,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return false;
} else {
tb.generateImpliedEndTags(name);
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(name);
}
Expand All @@ -716,7 +716,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return false;
} else {
tb.generateImpliedEndTags(name);
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(Constants.Headings);
}
Expand All @@ -736,7 +736,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return false;
} else {
tb.generateImpliedEndTags();
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(name);
}
Expand All @@ -747,7 +747,7 @@ private boolean inBodyEndTag(Token t, HtmlTreeBuilder tb) {
return false;
}
tb.generateImpliedEndTags();
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(name);
tb.clearFormattingElementsToLastMarker();
Expand Down Expand Up @@ -1000,7 +1000,8 @@ boolean process(Token t, HtmlTreeBuilder tb) {
}
return true; // todo: as above todo
} else if (t.isEOF()) {
if (tb.currentElement().normalName().equals("html"))
Element el = tb.currentElement();
if (tb.currentElementIs("html"))
tb.error(this);
return true; // stops parsing
}
Expand Down Expand Up @@ -1059,7 +1060,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
return false;
} else {
tb.generateImpliedEndTags();
if (!tb.currentElement().normalName().equals("caption"))
if (!tb.currentElementIs("caption"))
tb.error(this);
tb.popStackToClose("caption");
tb.clearFormattingElementsToLastMarker();
Expand Down Expand Up @@ -1110,7 +1111,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
case EndTag:
Token.EndTag endTag = t.asEndTag();
if (endTag.normalName.equals("colgroup")) {
if (tb.currentElement().normalName().equals("html")) { // frag case
if (tb.currentElementIs("html")) { // frag case
tb.error(this);
return false;
} else {
Expand All @@ -1121,7 +1122,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
return anythingElse(t, tb);
break;
case EOF:
if (tb.currentElement().normalName().equals("html"))
if (tb.currentElementIs("html"))
return true; // stop parsing; frag case
else
return anythingElse(t, tb);
Expand Down Expand Up @@ -1277,7 +1278,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
return false;
}
tb.generateImpliedEndTags();
if (!tb.currentElement().normalName().equals(name))
if (!tb.currentElementIs(name))
tb.error(this);
tb.popStackToClose(name);
tb.clearFormattingElementsToLastMarker();
Expand Down Expand Up @@ -1344,13 +1345,13 @@ boolean process(Token t, HtmlTreeBuilder tb) {
if (name.equals("html"))
return tb.process(start, InBody);
else if (name.equals("option")) {
if (tb.currentElement().normalName().equals("option"))
if (tb.currentElementIs("option"))
tb.processEndTag("option");
tb.insert(start);
} else if (name.equals("optgroup")) {
if (tb.currentElement().normalName().equals("option"))
if (tb.currentElementIs("option"))
tb.processEndTag("option"); // pop option and flow to pop optgroup
if (tb.currentElement().normalName().equals("optgroup"))
if (tb.currentElementIs("optgroup"))
tb.processEndTag("optgroup");
tb.insert(start);
} else if (name.equals("select")) {
Expand All @@ -1373,15 +1374,15 @@ else if (name.equals("option")) {
name = end.normalName();
switch (name) {
case "optgroup":
if (tb.currentElement().normalName().equals("option") && tb.aboveOnStack(tb.currentElement()) != null && tb.aboveOnStack(tb.currentElement()).normalName().equals("optgroup"))
if (tb.currentElementIs("option") && tb.aboveOnStack(tb.currentElement()) != null && tb.aboveOnStack(tb.currentElement()).normalName().equals("optgroup"))
tb.processEndTag("option");
if (tb.currentElement().normalName().equals("optgroup"))
if (tb.currentElementIs("optgroup"))
tb.pop();
else
tb.error(this);
break;
case "option":
if (tb.currentElement().normalName().equals("option"))
if (tb.currentElementIs("option"))
tb.pop();
else
tb.error(this);
Expand All @@ -1400,7 +1401,7 @@ else if (name.equals("option")) {
}
break;
case EOF:
if (!tb.currentElement().normalName().equals("html"))
if (!tb.currentElementIs("html"))
tb.error(this);
break;
default:
Expand Down Expand Up @@ -1487,17 +1488,17 @@ boolean process(Token t, HtmlTreeBuilder tb) {
return false;
}
} else if (t.isEndTag() && t.asEndTag().normalName().equals("frameset")) {
if (tb.currentElement().normalName().equals("html")) { // frag
if (tb.currentElementIs("html")) { // frag
tb.error(this);
return false;
} else {
tb.pop();
if (!tb.isFragmentParsing() && !tb.currentElement().normalName().equals("frameset")) {
if (!tb.isFragmentParsing() && !tb.currentElementIs("frameset")) {
tb.transition(AfterFrameset);
}
}
} else if (t.isEOF()) {
if (!tb.currentElement().normalName().equals("html")) {
if (!tb.currentElementIs("html")) {
tb.error(this);
return true;
}
Expand Down
13 changes: 12 additions & 1 deletion src/main/java/org/jsoup/parser/TreeBuilder.java
Expand Up @@ -6,6 +6,7 @@
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;

import javax.annotation.Nullable;
import javax.annotation.ParametersAreNonnullByDefault;
import java.io.Reader;
import java.util.ArrayList;
Expand Down Expand Up @@ -109,11 +110,21 @@ protected boolean processEndTag(String name) {
}


protected Element currentElement() {
@Nullable protected Element currentElement() {
int size = stack.size();
return size > 0 ? stack.get(size-1) : null;
}

/**
Checks if the Current Element's normal name equals the supplied name.
@param normalName name to check
@return true if there is a current element on the stack, and its name equals the supplied
*/
protected boolean currentElementIs(String normalName) {
Element current = currentElement();
return current != null && current.normalName().equals(normalName);
}

/**
* If the parser is tracking errors, add an error at the current position.
* @param msg error message
Expand Down
7 changes: 7 additions & 0 deletions src/test/java/org/jsoup/parser/HtmlParserTest.java
Expand Up @@ -1424,4 +1424,11 @@ private boolean didAddElements(String input) {
int xmlElementCount = xml.getAllElements().size();
return htmlElementCount > xmlElementCount;
}

@Test public void canSetHtmlOnCreatedTableElements() {
// https://github.com/jhy/jsoup/issues/1603
Element element = new Element("tr");
element.html("<tr><td>One</td></tr>");
assertEquals("<tr>\n <tr>\n <td>One</td>\n </tr>\n</tr>", element.outerHtml());
}
}

0 comments on commit 26d3c14

Please sign in to comment.