From d4f91c5eba9be87cd7d6ca90275e2200a4a6ff65 Mon Sep 17 00:00:00 2001 From: Hulmes <60814832+suarez12138@users.noreply.github.com> Date: Thu, 15 Jul 2021 14:51:42 +0800 Subject: [PATCH] Support for match with quoted pattern (#1536) * Fix #986 * Doc and variable name tweak For #1536 Co-authored-by: Jonathan Hedley --- src/main/java/org/jsoup/parser/TokenQueue.java | 9 ++++++++- src/main/java/org/jsoup/select/Selector.java | 2 ++ src/test/java/org/jsoup/parser/TokenQueueTest.java | 11 +++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jsoup/parser/TokenQueue.java b/src/main/java/org/jsoup/parser/TokenQueue.java index f269051708..42c30668fa 100644 --- a/src/main/java/org/jsoup/parser/TokenQueue.java +++ b/src/main/java/org/jsoup/parser/TokenQueue.java @@ -264,6 +264,7 @@ public String chompBalanced(char open, char close) { char last = 0; boolean inSingleQuote = false; boolean inDoubleQuote = false; + boolean inRegexQE = false; // regex \Q .. \E escapes from Pattern.quote() do { if (isEmpty()) break; @@ -273,8 +274,10 @@ public String chompBalanced(char open, char close) { inSingleQuote = !inSingleQuote; else if (c == '"' && c != open && !inSingleQuote) inDoubleQuote = !inDoubleQuote; - if (inSingleQuote || inDoubleQuote) + if (inSingleQuote || inDoubleQuote || inRegexQE){ + last = c; continue; + } if (c == open) { depth++; @@ -283,6 +286,10 @@ else if (c == '"' && c != open && !inSingleQuote) } else if (c == close) depth--; + } else if (c == 'Q') { + inRegexQE = true; + } else if (c == 'E') { + inRegexQE = false; } if (depth > 0 && last != 0) diff --git a/src/main/java/org/jsoup/select/Selector.java b/src/main/java/org/jsoup/select/Selector.java index 1fafbdb39f..0afd8c77df 100644 --- a/src/main/java/org/jsoup/select/Selector.java +++ b/src/main/java/org/jsoup/select/Selector.java @@ -73,6 +73,8 @@ * :emptyelements that have no children at all * * + *

A word on using regular expressions in these selectors: depending on the content of the regex, you will need to quote the pattern using Pattern.quote("regex") for it to parse correclty through both the selector parser and the regex parser. E.g. String query = "div:matches(" + Pattern.quote(regex) + ");".

+ * * @author Jonathan Hedley, jonathan@hedley.net * @see Element#select(String) */ diff --git a/src/test/java/org/jsoup/parser/TokenQueueTest.java b/src/test/java/org/jsoup/parser/TokenQueueTest.java index 2690c4bf97..463a19e4f4 100644 --- a/src/test/java/org/jsoup/parser/TokenQueueTest.java +++ b/src/test/java/org/jsoup/parser/TokenQueueTest.java @@ -1,8 +1,11 @@ package org.jsoup.parser; import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; import org.junit.jupiter.api.Test; +import java.util.regex.Pattern; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.fail; @@ -95,4 +98,12 @@ public void chompBalancedThrowIllegalArgumentException() { assertEquals("Did not find balanced marker at 'something(or another)) else'", expected.getMessage()); } } + + @Test + public void testQuotedPattern() { + final Document doc = Jsoup.parse("
\\) foo1
( foo2
1) foo3
"); + assertEquals("\n\\) foo1",doc.select("div:matches(" + Pattern.quote("\\)") + ")").get(0).childNode(0).toString()); + assertEquals("\n( foo2",doc.select("div:matches(" + Pattern.quote("(") + ")").get(0).childNode(0).toString()); + assertEquals("\n1) foo3",doc.select("div:matches(" + Pattern.quote("1)") + ")").get(0).childNode(0).toString()); + } }