diff --git a/CHANGES b/CHANGES index 18ac10cfb6..1717108966 100644 --- a/CHANGES +++ b/CHANGES @@ -17,9 +17,12 @@ jsoup changelog * Bugfix: updated the HtmlTreeParser resetInsertionMode to the current spec for supported elements. - * Bugfix: fixed an NPE when parsing fragment HTML into a table element. + * Bugfix: fixed an NPE when parsing fragment HTML into a standalone table element. + * Bugfix: fixed an NPE when parsing fragment heading HTML into a standalone p element. + + * Bugfix [Fuzz]: fixed a slow parse when a tag or an attribute name has thousands of null characters in it. diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java index 43e2c394a6..8d9d8fa90b 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java @@ -274,9 +274,7 @@ void insert(Token.Comment commentToken) { void insert(Token.Character characterToken) { final Node node; - Element el = currentElement(); - if (el == null) - el = doc; // allows for whitespace to be inserted into the doc root object (not on the stack) + Element el = currentElement(); // will be doc if no current element; allows for whitespace to be inserted into the doc root object (not on the stack) final String tagName = el.normalName(); final String data = characterToken.getData(); @@ -604,7 +602,7 @@ List getPendingTableCharacters() { process, then the UA must perform the above steps as if that element was not in the above list. */ void generateImpliedEndTags(String excludeTag) { - while ((excludeTag != null && !currentElement().normalName().equals(excludeTag)) && + while ((excludeTag != null && !currentElementIs(excludeTag)) && inSorted(currentElement().normalName(), TagSearchEndTags)) pop(); } diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java index f5b65ba770..d2e017dafe 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java @@ -774,7 +774,7 @@ boolean anyOtherEndTag(Token t, HtmlTreeBuilder tb) { Element node = stack.get(pos); if (node.normalName().equals(name)) { tb.generateImpliedEndTags(name); - if (!name.equals(tb.currentElement().normalName())) + if (!tb.currentElementIs(name)) tb.error(this); tb.popStackToClose(name); break; @@ -1000,7 +1000,6 @@ boolean process(Token t, HtmlTreeBuilder tb) { } return true; // todo: as above todo } else if (t.isEOF()) { - Element el = tb.currentElement(); if (tb.currentElementIs("html")) tb.error(this); return true; // stops parsing diff --git a/src/main/java/org/jsoup/parser/TreeBuilder.java b/src/main/java/org/jsoup/parser/TreeBuilder.java index 6aa5b7999c..42617f5201 100644 --- a/src/main/java/org/jsoup/parser/TreeBuilder.java +++ b/src/main/java/org/jsoup/parser/TreeBuilder.java @@ -110,9 +110,14 @@ protected boolean processEndTag(String name) { } - @Nullable protected Element currentElement() { + /** + Get the current element (last on the stack). If all items have been removed, returns the document instead + (which might not actually be on the stack; use stack.size() == 0 to test if required. + @return the last element on the stack, if any; or the root document + */ + protected Element currentElement() { int size = stack.size(); - return size > 0 ? stack.get(size-1) : null; + return size > 0 ? stack.get(size-1) : doc; } /** @@ -121,6 +126,8 @@ protected boolean processEndTag(String name) { @return true if there is a current element on the stack, and its name equals the supplied */ protected boolean currentElementIs(String normalName) { + if (stack.size() == 0) + return false; Element current = currentElement(); return current != null && current.normalName().equals(normalName); } diff --git a/src/test/java/org/jsoup/parser/HtmlParserTest.java b/src/test/java/org/jsoup/parser/HtmlParserTest.java index ae9e014bfa..35749059e5 100644 --- a/src/test/java/org/jsoup/parser/HtmlParserTest.java +++ b/src/test/java/org/jsoup/parser/HtmlParserTest.java @@ -1431,4 +1431,13 @@ private boolean didAddElements(String input) { element.html("One"); assertEquals("\n \n One\n \n", element.outerHtml()); } + + @Test public void parseFragmentOnCreatedDocument() { + String bareFragment = "

text

"; + List nodes = new Document("").parser().parseFragmentInput(bareFragment, new Element("p"), ""); + assertEquals(1, nodes.size()); + Node node = nodes.get(0); + assertEquals("h2", node.nodeName()); + assertEquals("

text

", node.parent().outerHtml()); + } }