From 436d119977bd44376e9dac92d82b21848a69732e Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Wed, 4 Aug 2021 16:05:14 +1000 Subject: [PATCH] Limit stack depth when missing tags hit resetInsertionMode() Fixes #1606 --- CHANGES | 4 ++++ .../java/org/jsoup/parser/HtmlTreeBuilder.java | 5 ++++- .../org/jsoup/integration/FuzzFixesTest.java | 10 ++++++++++ src/test/resources/fuzztests/1606.html.gz | Bin 0 -> 1446 bytes 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/fuzztests/1606.html.gz diff --git a/CHANGES b/CHANGES index f103efe76e..479e93130d 100644 --- a/CHANGES +++ b/CHANGES @@ -47,6 +47,10 @@ jsoup changelog vs being able to read in one hit. + * Bugfix [Fuzz]: Speed improvement when closing missing empty tags (in XML comment processed as HTML) when thousands + deep in stack. + + *** Release 1.14.1 [2021-Jul-10] * Change: updated the minimum supported Java version from Java 7 to Java 8. diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java index 40419f7a67..43e2c394a6 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java @@ -443,7 +443,10 @@ private void replaceInQueue(ArrayList queue, Element out, Element in) { void resetInsertionMode() { // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode boolean last = false; - for (int pos = stack.size() -1; pos >= 0; pos--) { + final int bottom = stack.size() - 1; + final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0; + + for (int pos = bottom; pos >= upper; pos--) { Element node = stack.get(pos); if (pos == 0) { last = true; diff --git a/src/test/java/org/jsoup/integration/FuzzFixesTest.java b/src/test/java/org/jsoup/integration/FuzzFixesTest.java index a831fbb288..7c0fda17c6 100644 --- a/src/test/java/org/jsoup/integration/FuzzFixesTest.java +++ b/src/test/java/org/jsoup/integration/FuzzFixesTest.java @@ -171,4 +171,14 @@ public void parseTimeout1605() throws IOException { Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser()); assertNotNull(docXml); } + + @Test + public void parseTimeout1606() throws IOException { + // https://github.com/jhy/jsoup/issues/1606 + // Timesink when closing missing empty tag (in XML comment processed as HTML) when thousands deep + File in = ParseTest.getFile("/fuzztests/1606.html.gz"); + + Document docXml = Jsoup.parse(new FileInputStream(in), "UTF-8", "https://example.com", Parser.xmlParser()); + assertNotNull(docXml); + } } diff --git a/src/test/resources/fuzztests/1606.html.gz b/src/test/resources/fuzztests/1606.html.gz new file mode 100644 index 0000000000000000000000000000000000000000..e0fd39d1e272729e387d490bad9d53776bc809f2 GIT binary patch literal 1446 zcmb2|=HNJ|&6UW&ZD?j-rk7EYo5S$-nq%H&2NAXhGL6a$yjHx@+R!WKbZ^s1$xA1+ zx;0mQ(Agoz{$f(`!4Sve+ z^f-O>k&M{6{M`{pRy_T2|E|}T)E)ceMDNw@*XiE$tLo#&Cx=fis|-)xC-tjnvw8f{ zJ$2o;jz{)xGn~_RD)+zt&&F%ppKhxx70uoM?P=ZZm&RMZuIW3sd42ow6T3D4IN!Tn z)meLd&bCk6Rx74$zBzk#*=||6OCL9^tBrpbckD3dx8=LV-{o(ZTW#`7|C#EqQu`~* z`EK{kvpcDLx23uL@R3%#w+enDt@ZzYKA%rMabB9PCJ6s3Pxk3ia}!qn|L600dn7{h zqH5P6WC1jJVz|$H$uQwR_eXyY#g+=^XD-t$j%