diff --git a/CHANGES b/CHANGES index f103efe76e..479e93130d 100644 --- a/CHANGES +++ b/CHANGES @@ -47,6 +47,10 @@ jsoup changelog vs being able to read in one hit. + * Bugfix [Fuzz]: Speed improvement when closing missing empty tags (in XML comment processed as HTML) when thousands + deep in stack. + + *** Release 1.14.1 [2021-Jul-10] * Change: updated the minimum supported Java version from Java 7 to Java 8. diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java index 40419f7a67..43e2c394a6 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java @@ -443,7 +443,10 @@ private void replaceInQueue(ArrayList queue, Element out, Element in) { void resetInsertionMode() { // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode boolean last = false; - for (int pos = stack.size() -1; pos >= 0; pos--) { + final int bottom = stack.size() - 1; + final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0; + + for (int pos = bottom; pos >= upper; pos--) { Element node = stack.get(pos); if (pos == 0) { last = true; diff --git a/src/test/java/org/jsoup/integration/FuzzFixesTest.java b/src/test/java/org/jsoup/integration/FuzzFixesTest.java index a831fbb288..7c0fda17c6 100644 --- a/src/test/java/org/jsoup/integration/FuzzFixesTest.java +++ b/src/test/java/org/jsoup/integration/FuzzFixesTest.java @@ -171,4 +171,14 @@ public void parseTimeout1605() throws IOException { Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser()); assertNotNull(docXml); } + + @Test + public void parseTimeout1606() throws IOException { + // https://github.com/jhy/jsoup/issues/1606 + // Timesink when closing missing empty tag (in XML comment processed as HTML) when thousands deep + File in = ParseTest.getFile("/fuzztests/1606.html.gz"); + + Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser()); + assertNotNull(docXml); + } } diff --git a/src/test/resources/fuzztests/1606.html.gz b/src/test/resources/fuzztests/1606.html.gz new file mode 100644 index 0000000000..e0fd39d1e2 Binary files /dev/null and b/src/test/resources/fuzztests/1606.html.gz differ