Skip to content

Commit

Permalink
Issue #6473 - canonicalPath refactor & fix alias check in PathResource
Browse files Browse the repository at this point in the history
Signed-off-by: Lachlan Roberts <lachlan@webtide.com>
  • Loading branch information
lachlan-roberts committed Jun 28, 2021
1 parent a02ade7 commit c2d5a82
Show file tree
Hide file tree
Showing 29 changed files with 602 additions and 382 deletions.
Expand Up @@ -18,6 +18,7 @@

package org.eclipse.jetty.http;

import java.util.EnumMap;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
Expand Down Expand Up @@ -214,4 +215,46 @@ public EnumSet<HttpComplianceSection> sections()
{
return _sections;
}

private static final EnumMap<HttpURI.Violation, HttpComplianceSection> __uriViolations = new EnumMap<>(HttpURI.Violation.class);
static
{
// create a map from Violation to compliance in a loop, so that any new violations added are detected with ISE
for (HttpURI.Violation violation : HttpURI.Violation.values())
{
switch (violation)
{
case SEPARATOR:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_SEPARATORS);
break;
case SEGMENT:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_SEGMENTS);
break;
case PARAM:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_PARAMETERS);
break;
case ENCODING:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_PATH_ENCODING);
break;
case EMPTY:
__uriViolations.put(violation, HttpComplianceSection.NO_AMBIGUOUS_EMPTY_SEGMENT);
break;
case UTF16:
__uriViolations.put(violation, HttpComplianceSection.NO_UTF16_ENCODINGS);
break;
default:
throw new IllegalStateException();
}
}
}

public static String checkUriCompliance(HttpCompliance compliance, HttpURI uri)
{
for (HttpURI.Violation violation : HttpURI.Violation.values())
{
if (uri.hasViolation(violation) && (compliance == null || compliance.sections().contains(__uriViolations.get(violation))))
return violation.getMessage();
}
return null;
}
}
136 changes: 105 additions & 31 deletions jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java
Expand Up @@ -36,22 +36,45 @@
/**
* Http URI.
* Parse an HTTP URI from a string or byte array. Given a URI
* <code>http://user@host:port/path/info;param?query#fragment</code>
* this class will split it into the following undecoded optional elements:<ul>
* <code>http://user@host:port/path;param1/%2e/info;param2?query#fragment</code>
* this class will split it into the following optional elements:<ul>
* <li>{@link #getScheme()} - http:</li>
* <li>{@link #getAuthority()} - //name@host:port</li>
* <li>{@link #getHost()} - host</li>
* <li>{@link #getPort()} - port</li>
* <li>{@link #getPath()} - /path/info</li>
* <li>{@link #getParam()} - param</li>
* <li>{@link #getPath()} - /path;param1/%2e/info;param2</li>
* <li>{@link #getDecodedPath()} - /path/info</li>
* <li>{@link #getParam()} - param2</li>
* <li>{@link #getQuery()} - query</li>
* <li>{@link #getFragment()} - fragment</li>
* </ul>
*
* <p>Any parameters will be returned from {@link #getPath()}, but are excluded from the
* return value of {@link #getDecodedPath()}. If there are multiple parameters, the
* {@link #getParam()} method returns only the last one.
*/
* <p>The path part of the URI is provided in both raw form ({@link #getPath()}) and
* decoded form ({@link #getDecodedPath}), which has: path parameters removed,
* percent encoded characters expanded and relative segments resolved. This approach
* is somewhat contrary to <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a>
* which no longer defines path parameters (removed after
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>) and specifies
* that relative segment normalization should take place before percent encoded character
* expansion. A literal interpretation of the RFC can result in URI paths with ambiguities
* when viewed as strings. For example, a URI of {@code /foo%2f..%2fbar} is technically a single
* segment of "/foo/../bar", but could easily be misinterpreted as 3 segments resolving to "/bar"
* by a file system.
* </p>
* <p>
* Thus this class avoid and/or detects such ambiguities. Furthermore, by decoding characters and
* removing parameters before relative path normalization, ambiguous paths will be resolved in such
* a way to be non-standard-but-non-ambiguous to down stream interpretation of the decoded path string.
* The violations are recorded and available by API such as {@link #hasViolation(Violation)} so that requests
* containing them may be rejected in case the non-standard-but-non-ambiguous interpretations
* are not satisfactory for a given compliance configuration. Implementations that wish to
* process ambiguous URI paths must configure the compliance modes to accept them and then perform
* their own decoding of {@link #getPath()}.
* </p>
* <p>
* If there are multiple path parameters, only the last one is returned by {@link #getParam()}.
* </p>
**/
public class HttpURI
{
private enum State
Expand All @@ -69,28 +92,49 @@ private enum State
ASTERISK
}

enum Violation
/**
* Violations of safe URI interpretations
*/
public enum Violation
{
SEGMENT,
SEPARATOR,
PARAM,
ENCODING,
EMPTY,
UTF16
/**
* Ambiguous path segments e.g. <code>/foo/%2E%2E/bar</code>
*/
SEGMENT("Ambiguous path segments"),
/**
* Ambiguous path separator within a URI segment e.g. <code>/foo%2Fbar</code>
*/
SEPARATOR("Ambiguous path separator"),
/**
* Ambiguous path parameters within a URI segment e.g. <code>/foo/..;/bar</code>
*/
PARAM("Ambiguous path parameters"),
/**
* Ambiguous double encoding within a URI segment e.g. <code>/%2557EB-INF</code>
*/
ENCODING("Ambiguous double encoding"),
/**
* Ambiguous empty segments e.g. <code>/foo//bar</code>
*/
EMPTY("Ambiguous empty segments"),
/**
* Non standard UTF-16 encoding eg <code>/foo%u2192bar</code>.
*/
UTF16("Non standard UTF-16 encoding");

private final String _message;

Violation(String message)
{
_message = message;
}

String getMessage()
{
return _message;
}
}

/**
* The concept of URI path parameters was originally specified in
* <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>, but that was
* obsoleted by
* <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a> which removed
* a normative definition of path parameters. Specifically it excluded them from the
* <a href="https://tools.ietf.org/html/rfc3986#section-5.2.4">Remove Dot Segments</a>
* algorithm. This results in some ambiguity as dot segments can result from later
* parameter removal or % encoding expansion, that are not removed from the URI
* by {@link URIUtil#canonicalPath(String)}. Thus this class flags such ambiguous
* path segments, so that they may be rejected by the server if so configured.
*/
private static final Trie<Boolean> __ambiguousSegments = new ArrayTrie<>();

static
Expand Down Expand Up @@ -179,6 +223,22 @@ public HttpURI(HttpURI uri)
_emptySegment = false;
}

public HttpURI(HttpURI schemeHostPort, HttpURI uri)
{
_scheme = schemeHostPort._scheme;
_user = schemeHostPort._user;
_host = schemeHostPort._host;
_port = schemeHostPort._port;
_path = uri._path;
_param = uri._param;
_query = uri._query;
_fragment = uri._fragment;
_uri = uri._uri;
_decodedPath = uri._decodedPath;
_violations.addAll(uri._violations);
_emptySegment = false;
}

public HttpURI(String uri)
{
_port = -1;
Expand Down Expand Up @@ -506,6 +566,8 @@ else if (c == '/')
{
switch (encodedValue)
{
case 0:
throw new IllegalArgumentException("Illegal character in path");
case '/':
_violations.add(Violation.SEPARATOR);
break;
Expand Down Expand Up @@ -677,10 +739,12 @@ else if (c == '/')
}
else if (_path != null)
{
String canonical = URIUtil.canonicalPath(_path);
if (canonical == null)
throw new BadMessageException("Bad URI");
_decodedPath = URIUtil.decodePath(canonical);
// The RFC requires this to be canonical before decoding, but this can leave dot segments and dot dot segments
// which are not canonicalized and could be used in an attempt to bypass security checks.
String decodeNonCanonical = URIUtil.decodePath(_path);
_decodedPath = URIUtil.canonicalPath(decodeNonCanonical);
if (_decodedPath == null)
throw new IllegalArgumentException("Bad URI");
}
}

Expand Down Expand Up @@ -794,6 +858,11 @@ public boolean hasViolations()
return !_violations.isEmpty();
}

public boolean hasViolation(Violation violation)
{
return _violations.contains(violation);
}

/**
* @return True if the URI encodes UTF-16 characters with '%u'.
*/
Expand Down Expand Up @@ -839,6 +908,11 @@ public String getDecodedPath()
return _decodedPath;
}

/**
* Get a URI path parameter. Multiple and in segment parameters are ignored and only
* the last trailing parameter is returned.
* @return The last path parameter or null
*/
public String getParam()
{
return _param;
Expand Down

0 comments on commit c2d5a82

Please sign in to comment.