From 2676f413ede8139d58bf6ea43aae8c82d0b301ff Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Sun, 11 Jul 2021 17:00:05 +1000 Subject: [PATCH] Updated resetInsertionMode Fixes #1491 Need to add support for template tags --- CHANGES | 3 +++ .../org/jsoup/parser/HtmlTreeBuilder.java | 24 +++++++++++-------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/CHANGES b/CHANGES index 02bd724a30..411e6b8299 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,9 @@ jsoup changelog * Bugfix: when making a HTTP POST, if the request write fails, make sure the connection is immediately cleaned up. + * Bugfix: updated the HtmlTreeParser resetInsertionMode to the current spec for supported elements + + * Bugfix [Fuzz]: fixed a slow parse when a tag has thousands of null characters in it. diff --git a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java index 835fa323f0..ed533b8d92 100644 --- a/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java +++ b/src/main/java/org/jsoup/parser/HtmlTreeBuilder.java @@ -438,17 +438,20 @@ private void replaceInQueue(ArrayList queue, Element out, Element in) { } void resetInsertionMode() { + // https://html.spec.whatwg.org/multipage/parsing.html#the-insertion-mode boolean last = false; for (int pos = stack.size() -1; pos >= 0; pos--) { Element node = stack.get(pos); if (pos == 0) { last = true; - node = contextElement; + if (fragmentParsing) + node = contextElement; } String name = node != null ? node.normalName() : ""; if ("select".equals(name)) { transition(HtmlTreeBuilderState.InSelect); - break; // frag + // todo - should loop up (with some limit) and check for table or template hits + break; } else if (("td".equals(name) || "th".equals(name) && !last)) { transition(HtmlTreeBuilderState.InCell); break; @@ -463,25 +466,26 @@ void resetInsertionMode() { break; } else if ("colgroup".equals(name)) { transition(HtmlTreeBuilderState.InColumnGroup); - break; // frag + break; } else if ("table".equals(name)) { transition(HtmlTreeBuilderState.InTable); break; - } else if ("head".equals(name)) { - transition(HtmlTreeBuilderState.InBody); - break; // frag + // todo - template + } else if ("head".equals(name) && !last) { + transition(HtmlTreeBuilderState.InHead); + break; } else if ("body".equals(name)) { transition(HtmlTreeBuilderState.InBody); break; } else if ("frameset".equals(name)) { transition(HtmlTreeBuilderState.InFrameset); - break; // frag + break; } else if ("html".equals(name)) { - transition(HtmlTreeBuilderState.BeforeHead); - break; // frag + transition(headElement == null ? HtmlTreeBuilderState.BeforeHead : HtmlTreeBuilderState.AfterHead); + break; } else if (last) { transition(HtmlTreeBuilderState.InBody); - break; // frag + break; } } }