Skip to content

Commit

Permalink
Issue #6473 - canonicalPath refactor & fix alias check in PathResource
Browse files Browse the repository at this point in the history
Signed-off-by: Lachlan Roberts <lachlan@webtide.com>
  • Loading branch information
lachlan-roberts committed Jun 29, 2021
1 parent bc0fbbb commit 92a74d6
Show file tree
Hide file tree
Showing 27 changed files with 608 additions and 351 deletions.
154 changes: 130 additions & 24 deletions jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java
Expand Up @@ -15,7 +15,10 @@

import java.net.URI;
import java.net.URISyntaxException;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Set;

import org.eclipse.jetty.util.HostPort;
import org.eclipse.jetty.util.Index;
Expand All @@ -31,54 +34,92 @@
* via the static methods such as {@link #build()} and {@link #from(String)}.
*
* A URI such as
* <code>http://user@host:port/path;ignored/info;param?query#ignored</code>
* is split into the following undecoded elements:<ul>
* <code>http://user@host:port/path;param1/%2e/info;param2?query#fragment</code>
* this class will split it into the following optional elements:<ul>
* <li>{@link #getScheme()} - http:</li>
* <li>{@link #getAuthority()} - //name@host:port</li>
* <li>{@link #getHost()} - host</li>
* <li>{@link #getPort()} - port</li>
* <li>{@link #getPath()} - /path/info</li>
* <li>{@link #getParam()} - param</li>
* <li>{@link #getPath()} - /path;param1/%2e/info;param2</li>
* <li>{@link #getDecodedPath()} - /path/info</li>
* <li>{@link #getParam()} - param2</li>
* <li>{@link #getQuery()} - query</li>
* <li>{@link #getFragment()} - fragment</li>
* </ul>
* <p>Any parameters will be returned from {@link #getPath()}, but are excluded from the
* return value of {@link #getDecodedPath()}. If there are multiple parameters, the
* {@link #getParam()} method returns only the last one.
*/
* <p>The path part of the URI is provided in both raw form ({@link #getPath()}) and
* decoded form ({@link #getDecodedPath}), which has: path parameters removed,
* percent encoded characters expanded and relative segments resolved. This approach
* is somewhat contrary to <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a>
* which no longer defines path parameters (removed after
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>) and specifies
* that relative segment normalization should take place before percent encoded character
* expansion. A literal interpretation of the RFC can result in URI paths with ambiguities
* when viewed as strings. For example, a URI of {@code /foo%2f..%2fbar} is technically a single
* segment of "/foo/../bar", but could easily be misinterpreted as 3 segments resolving to "/bar"
* by a file system.
* </p>
* <p>
* Thus this class avoid and/or detects such ambiguities. Furthermore, by decoding characters and
* removing parameters before relative path normalization, ambiguous paths will be resolved in such
* a way to be non-standard-but-non-ambiguous to down stream interpretation of the decoded path string.
* The violations are recorded and available by API such as {@link #hasViolation(Violation)} so that requests
* containing them may be rejected in case the non-standard-but-non-ambiguous interpretations
* are not satisfactory for a given compliance configuration. Implementations that wish to
* process ambiguous URI paths must configure the compliance modes to accept them and then perform
* their own decoding of {@link #getPath()}.
* </p>
* <p>
* If there are multiple path parameters, only the last one is returned by {@link #getParam()}.
* </p>
**/
public interface HttpURI
{
/**
* Violations of safe URI interpretations
*/
enum Violation
{
/**
* URI contains ambiguous path segments e.g. {@code /foo/%2e%2e/bar}
* Ambiguous path segments e.g. <code>/foo/%2E%2E/bar</code>
*/
SEGMENT,
SEGMENT("Ambiguous path segments"),

/**
* URI contains ambiguous empty segments e.g. {@code /foo//bar} or {@code /foo/;param/}, but not {@code /foo/}
* Ambiguous path separator within a URI segment e.g. <code>/foo%2Fbar</code>
*/
EMPTY,
SEPARATOR("Ambiguous path separator"),

/**
* URI contains ambiguous path separator within a URI segment e.g. {@code /foo/b%2fr}
* Ambiguous path parameters within a URI segment e.g. <code>/foo/..;/bar</code>
*/
SEPARATOR,
PARAM("Ambiguous path parameters"),

/**
* URI contains ambiguous path encoding within a URI segment e.g. {@code /%2557EB-INF}
* Ambiguous double encoding within a URI segment e.g. <code>/%2557EB-INF</code>
*/
ENCODING,
ENCODING("Ambiguous double encoding"),

/**
* URI contains ambiguous path parameters within a URI segment e.g. {@code /foo/..;/bar}
* Ambiguous empty segments e.g. <code>/foo//bar</code>
*/
PARAM,
EMPTY("Ambiguous empty segments"),

/**
* Contains UTF16 encodings
* Non standard UTF-16 encoding eg <code>/foo%u2192bar</code>.
*/
UTF16
UTF16("Non standard UTF-16 encoding");

private final String _message;

Violation(String message)
{
_message = message;
}

String getMessage()
{
return _message;
}
}

static Mutable build()
Expand Down Expand Up @@ -135,6 +176,11 @@ static Immutable from(String scheme, String host, int port, String pathQuery)
return new Mutable(scheme, host, port, pathQuery).asImmutable();
}

static Immutable build(HttpURI schemeHostPort, HttpURI uri)
{
return new Immutable(schemeHostPort, uri);
}

Immutable asImmutable();

String asString();
Expand All @@ -147,6 +193,11 @@ static Immutable from(String scheme, String host, int port, String pathQuery)

String getHost();

/**
* Get a URI path parameter. Multiple and in segment parameters are ignored and only
* the last trailing parameter is returned.
* @return The last path parameter or null
*/
String getParam();

String getPath();
Expand Down Expand Up @@ -175,6 +226,17 @@ static Immutable from(String scheme, String host, int port, String pathQuery)
*/
boolean hasViolations();

/**
* @param violation the violation to check.
* @return true if the URI has the passed violation.
*/
boolean hasViolation(Violation violation);

/**
* @return Set of violations in the URI.
*/
Collection<Violation> getViolations();

/**
* @return True if the URI has a possibly ambiguous segment like '..;' or '%2e%2e'
*/
Expand Down Expand Up @@ -258,6 +320,22 @@ private Immutable(String uri)
_decodedPath = null;
}

private Immutable(HttpURI schemeHostPort, HttpURI uri)
{
_scheme = schemeHostPort.getScheme();
_user = schemeHostPort.getUser();
_host = schemeHostPort.getHost();
_port = schemeHostPort.getPort();
_path = uri.getPath();
_param = uri.getParam();
_query = uri.getQuery();
_fragment = uri.getFragment();
_uri = null;
_decodedPath = uri.getDecodedPath();
if (uri.hasViolations())
_violations.addAll(uri.getViolations());
}

@Override
public Immutable asImmutable()
{
Expand Down Expand Up @@ -417,6 +495,18 @@ public boolean hasViolations()
return !_violations.isEmpty();
}

@Override
public boolean hasViolation(Violation violation)
{
return _violations.contains(violation);
}

@Override
public Collection<Violation> getViolations()
{
return Collections.unmodifiableSet(_violations);
}

@Override
public boolean hasAmbiguousSegment()
{
Expand Down Expand Up @@ -790,6 +880,18 @@ public boolean hasViolations()
return !_violations.isEmpty();
}

@Override
public boolean hasViolation(Violation violation)
{
return _violations.contains(violation);
}

@Override
public Collection<Violation> getViolations()
{
return Collections.unmodifiableSet(_violations);
}

@Override
public boolean hasAmbiguousSegment()
{
Expand Down Expand Up @@ -1216,6 +1318,8 @@ else if (c == '/')
{
switch (encodedValue)
{
case 0:
throw new IllegalArgumentException("Illegal character in path");
case '/':
_violations.add(Violation.SEPARATOR);
break;
Expand Down Expand Up @@ -1387,10 +1491,12 @@ else if (c == '/')
}
else if (_path != null)
{
String canonical = URIUtil.canonicalPath(_path);
if (canonical == null)
throw new BadMessageException("Bad URI");
_decodedPath = URIUtil.decodePath(canonical);
// The RFC requires this to be canonical before decoding, but this can leave dot segments and dot dot segments
// which are not canonicalized and could be used in an attempt to bypass security checks.
String decodeNonCanonical = URIUtil.decodePath(_path);
_decodedPath = URIUtil.canonicalPath(decodeNonCanonical);
if (_decodedPath == null)
throw new IllegalArgumentException("Bad URI");
}
}

Expand Down
58 changes: 51 additions & 7 deletions jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java
Expand Up @@ -14,6 +14,7 @@
package org.eclipse.jetty.http;

import java.util.Arrays;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;
Expand Down Expand Up @@ -67,10 +68,6 @@ public enum Violation implements ComplianceViolation
* Allow ambiguous path encoding within a URI segment e.g. <code>/%2557EB-INF</code>
*/
AMBIGUOUS_PATH_ENCODING("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI path encoding"),
/**
* Allow Non canonical ambiguous paths. eg <code>/foo/x%2f%2e%2e%/bar</code> provided to applications as <code>/foo/x/../bar</code>
*/
NON_CANONICAL_AMBIGUOUS_PATHS("https://tools.ietf.org/html/rfc3986#section-3.3", "Non canonical ambiguous paths"),
/**
* Allow UTF-16 encoding eg <code>/foo%u2192bar</code>.
*/
Expand Down Expand Up @@ -125,10 +122,9 @@ public String getDescription()

/**
* Compliance mode that exactly follows <a href="https://tools.ietf.org/html/rfc3986">RFC3986</a>,
* including allowing all additional ambiguous URI Violations,
* except {@link Violation#NON_CANONICAL_AMBIGUOUS_PATHS}, thus ambiguous paths are canonicalized for safety.
* including allowing all additional ambiguous URI Violations.
*/
public static final UriCompliance RFC3986 = new UriCompliance("RFC3986", complementOf(of(Violation.NON_CANONICAL_AMBIGUOUS_PATHS)));
public static final UriCompliance RFC3986 = new UriCompliance("RFC3986", allOf(Violation.class));

/**
* Compliance mode that follows <a href="https://tools.ietf.org/html/rfc3986">RFC3986</a>
Expand Down Expand Up @@ -231,6 +227,11 @@ public static UriCompliance from(String spec)
boolean exclude = element.startsWith("-");
if (exclude)
element = element.substring(1);

// Ignore removed name. TODO: remove in future release.
if (element.equals("NON_CANONICAL_AMBIGUOUS_PATHS"))
continue;

Violation section = Violation.valueOf(element);
if (exclude)
violations.remove(section);
Expand Down Expand Up @@ -330,4 +331,47 @@ private static Set<Violation> copyOf(Set<Violation> violations)
return EnumSet.noneOf(Violation.class);
return EnumSet.copyOf(violations);
}

private static final EnumMap<HttpURI.Violation, Violation> __uriViolations = new EnumMap<>(HttpURI.Violation.class);

static
{
// create a map from Violation to compliance in a loop, so that any new violations added are detected with ISE
for (HttpURI.Violation violation : HttpURI.Violation.values())
{
switch (violation)
{
case SEPARATOR:
__uriViolations.put(violation, Violation.AMBIGUOUS_PATH_SEPARATOR);
break;
case SEGMENT:
__uriViolations.put(violation, Violation.AMBIGUOUS_PATH_SEGMENT);
break;
case PARAM:
__uriViolations.put(violation, Violation.AMBIGUOUS_PATH_PARAMETER);
break;
case ENCODING:
__uriViolations.put(violation, Violation.AMBIGUOUS_PATH_ENCODING);
break;
case EMPTY:
__uriViolations.put(violation, Violation.AMBIGUOUS_EMPTY_SEGMENT);
break;
case UTF16:
__uriViolations.put(violation, Violation.UTF16_ENCODINGS);
break;
default:
throw new IllegalStateException();
}
}
}

public static String checkUriCompliance(UriCompliance compliance, HttpURI uri)
{
for (HttpURI.Violation violation : HttpURI.Violation.values())
{
if (uri.hasViolation(violation) && (compliance == null || !compliance.allows(__uriViolations.get(violation))))
return violation.getMessage();
}
return null;
}
}

0 comments on commit 92a74d6

Please sign in to comment.