diff --git a/CHANGES b/CHANGES index 5f3b13b186..638ca4a369 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,10 @@ jsoup changelog * Improvement: support Pattern.quote \Q and \E escapes in the selector regex matchers. + * Improvement: Element.absUrl() now supports tel: URLs, and other URLs that are already absolute but that Java does + not have input stream handlers for. + + * Bugfix: when serializing output, escape characters that are in the < 0x20 range. This improves XML output compatibility, and makes HTML output with these characters easier to read (as they're otherwise invisible). diff --git a/src/main/java/org/jsoup/internal/StringUtil.java b/src/main/java/org/jsoup/internal/StringUtil.java index 53be1f9570..7917f80433 100644 --- a/src/main/java/org/jsoup/internal/StringUtil.java +++ b/src/main/java/org/jsoup/internal/StringUtil.java @@ -299,9 +299,11 @@ public static String resolve(final String baseUrl, final String relUrl) { } return resolve(base, relUrl).toExternalForm(); } catch (MalformedURLException e) { - return ""; + // it may still be valid, just that Java doesn't have a registered stream handler for it, e.g. tel: + return validUriScheme.matcher(relUrl).find() ? relUrl : ""; } } + private static final Pattern validUriScheme = Pattern.compile("^[a-zA-Z][a-zA-Z0-9+-.]*:"); private static final ThreadLocal> threadLocalBuilders = new ThreadLocal>() { @Override diff --git a/src/main/java/org/jsoup/nodes/Element.java b/src/main/java/org/jsoup/nodes/Element.java index 79839b68a8..0a0180bcc4 100644 --- a/src/main/java/org/jsoup/nodes/Element.java +++ b/src/main/java/org/jsoup/nodes/Element.java @@ -82,7 +82,7 @@ public Element(Tag tag, @Nullable String baseUri, @Nullable Attributes attribute * @param baseUri the base URI of this element. Optional, and will inherit from its parent, if any. * @see Tag#valueOf(String, ParseSettings) */ - public Element(Tag tag, String baseUri) { + public Element(Tag tag, @Nullable String baseUri) { this(tag, baseUri, null); } diff --git a/src/test/java/org/jsoup/nodes/NodeTest.java b/src/test/java/org/jsoup/nodes/NodeTest.java index 1dd66f11ed..47d8fc7886 100644 --- a/src/test/java/org/jsoup/nodes/NodeTest.java +++ b/src/test/java/org/jsoup/nodes/NodeTest.java @@ -3,6 +3,7 @@ import org.jsoup.Jsoup; import org.jsoup.TextUtil; import org.jsoup.parser.Tag; +import org.jsoup.select.Elements; import org.jsoup.select.NodeVisitor; import org.junit.jupiter.api.Test; @@ -132,6 +133,21 @@ public void handlesAbsOnProtocolessAbsoluteUris() { assertEquals("http://example.com/one/two.html", a1.absUrl("href")); } + @Test public void handlesAbsOnUnknownProtocols() { + // https://github.com/jhy/jsoup/issues/1610 + // URL would throw on unknown protocol tel: as no stream handler is registered + + String[] urls = {"mailto:example@example.com", "tel:867-5309"}; // mail has a handler, tel doesn't + for (String url : urls) { + Attributes attr = new Attributes().put("href", url); + Element noBase = new Element(Tag.valueOf("a"), null, attr); + assertEquals(url, noBase.absUrl("href")); + + Element withBase = new Element(Tag.valueOf("a"), "http://example.com/", attr); + assertEquals(url, withBase.absUrl("href")); + } + } + @Test public void testRemove() { Document doc = Jsoup.parse("

One two three

"); Element p = doc.select("p").first();