Skip to content

Commit

Permalink
absUrl now supports already abs URLs, even when there's no defined st…
Browse files Browse the repository at this point in the history
…ream handler

Fixes #1610
  • Loading branch information
jhy committed Aug 14, 2021
1 parent e6b11b0 commit 50ff710
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGES
Expand Up @@ -4,6 +4,10 @@ jsoup changelog
* Improvement: support Pattern.quote \Q and \E escapes in the selector regex matchers.
<https://github.com/jhy/jsoup/pull/1536>

* Improvement: Element.absUrl() now supports tel: URLs, and other URLs that are already absolute but that Java does
not have input stream handlers for.
<https://github.com/jhy/jsoup/issues/1610>

* Bugfix: when serializing output, escape characters that are in the < 0x20 range. This improves XML output
compatibility, and makes HTML output with these characters easier to read (as they're otherwise invisible).
<https://github.com/jhy/jsoup/issues/1556>
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/org/jsoup/internal/StringUtil.java
Expand Up @@ -299,9 +299,11 @@ public static String resolve(final String baseUrl, final String relUrl) {
}
return resolve(base, relUrl).toExternalForm();
} catch (MalformedURLException e) {
return "";
// it may still be valid, just that Java doesn't have a registered stream handler for it, e.g. tel:
return validUriScheme.matcher(relUrl).find() ? relUrl : "";
}
}
private static final Pattern validUriScheme = Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]*:");

private static final ThreadLocal<Stack<StringBuilder>> threadLocalBuilders = new ThreadLocal<Stack<StringBuilder>>() {
@Override
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jsoup/nodes/Element.java
Expand Up @@ -82,7 +82,7 @@ public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attribute
* @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any.
* @see Tag#valueOf(String, ParseSettings)
*/
public Element(Tag tag, String baseUri) {
public Element(Tag tag, @Nullable String baseUri) {
this(tag, baseUri, null);
}

Expand Down
16 changes: 16 additions & 0 deletions src/test/java/org/jsoup/nodes/NodeTest.java
Expand Up @@ -3,6 +3,7 @@
import org.jsoup.Jsoup;
import org.jsoup.TextUtil;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
import org.jsoup.select.NodeVisitor;
import org.junit.jupiter.api.Test;

Expand Down Expand Up @@ -132,6 +133,21 @@ public void handlesAbsOnProtocolessAbsoluteUris() {
assertEquals("http://example.com/one/two.html", a1.absUrl("href"));
}

@Test public void handlesAbsOnUnknownProtocols() {
// https://github.com/jhy/jsoup/issues/1610
// URL would throw on unknown protocol tel: as no stream handler is registered

String[] urls = {"mailto:example@example.com", "tel:867-5309"}; // mail has a handler, tel doesn't
for (String url : urls) {
Attributes attr = new Attributes().put("href", url);
Element noBase = new Element(Tag.valueOf("a"), null, attr);
assertEquals(url, noBase.absUrl("href"));

Element withBase = new Element(Tag.valueOf("a"), "http://example.com/", attr);
assertEquals(url, withBase.absUrl("href"));
}
}

@Test public void testRemove() {
Document doc = Jsoup.parse("<p>One <span>two</span> three</p>");
Element p = doc.select("p").first();
Expand Down

0 comments on commit 50ff710

Please sign in to comment.