Skip to content

Commit

Permalink
Support for match with quoted pattern (#1536)
Browse files Browse the repository at this point in the history
* Fix #986

* Doc and variable name tweak

For #1536

Co-authored-by: Jonathan Hedley <jonathan@hedley.net>
  • Loading branch information
suarez12138 and jhy committed Jul 15, 2021
1 parent 009dbb1 commit d4f91c5
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 1 deletion.
9 changes: 8 additions & 1 deletion src/main/java/org/jsoup/parser/TokenQueue.java
Expand Up @@ -264,6 +264,7 @@ public String chompBalanced(char open, char close) {
char last = 0;
boolean inSingleQuote = false;
boolean inDoubleQuote = false;
boolean inRegexQE = false; // regex \Q .. \E escapes from Pattern.quote()

do {
if (isEmpty()) break;
Expand All @@ -273,8 +274,10 @@ public String chompBalanced(char open, char close) {
inSingleQuote = !inSingleQuote;
else if (c == '"' && c != open && !inSingleQuote)
inDoubleQuote = !inDoubleQuote;
if (inSingleQuote || inDoubleQuote)
if (inSingleQuote || inDoubleQuote || inRegexQE){
last = c;
continue;
}

if (c == open) {
depth++;
Expand All @@ -283,6 +286,10 @@ else if (c == '"' && c != open && !inSingleQuote)
}
else if (c == close)
depth--;
} else if (c == 'Q') {
inRegexQE = true;
} else if (c == 'E') {
inRegexQE = false;
}

if (depth > 0 && last != 0)
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/jsoup/select/Selector.java
Expand Up @@ -73,6 +73,8 @@
* <tr><td><code>:empty</code></td><td>elements that have no children at all</td><td></td></tr>
* </table>
*
* <p>A word on using regular expressions in these selectors: depending on the content of the regex, you will need to quote the pattern using <b><code>Pattern.quote("regex")</code></b> for it to parse correclty through both the selector parser and the regex parser. E.g. <code>String query = "div:matches(" + Pattern.quote(regex) + ");"</code>.</p>
*
* @author Jonathan Hedley, jonathan@hedley.net
* @see Element#select(String)
*/
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/org/jsoup/parser/TokenQueueTest.java
@@ -1,8 +1,11 @@
package org.jsoup.parser;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.Test;

import java.util.regex.Pattern;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;

Expand Down Expand Up @@ -95,4 +98,12 @@ public void chompBalancedThrowIllegalArgumentException() {
assertEquals("Did not find balanced marker at 'something(or another)) else'", expected.getMessage());
}
}

@Test
public void testQuotedPattern() {
final Document doc = Jsoup.parse("<div>\\) foo1</div><div>( foo2</div><div>1) foo3</div>");
assertEquals("\n\\) foo1",doc.select("div:matches(" + Pattern.quote("\\)") + ")").get(0).childNode(0).toString());
assertEquals("\n( foo2",doc.select("div:matches(" + Pattern.quote("(") + ")").get(0).childNode(0).toString());
assertEquals("\n1) foo3",doc.select("div:matches(" + Pattern.quote("1)") + ")").get(0).childNode(0).toString());
}
}

0 comments on commit d4f91c5

Please sign in to comment.