jetty · sbordet · Jun 29, 2021 · Jun 28, 2021 · Jun 28, 2021 · Jun 29, 2021
diff --git a/jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java b/jetty-http/src/main/java/org/eclipse/jetty/http/HttpURI.java
@@ -15,6 +15,8 @@
 
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.util.Collection;
+import java.util.Collections;
 import java.util.EnumSet;
 
 import org.eclipse.jetty.util.HostPort;
@@ -31,54 +33,94 @@
  * via the static methods such as {@link #build()} and {@link #from(String)}.
  *
  * A URI such as
- * <code>http://user@host:port/path;ignored/info;param?query#ignored</code>
- * is split into the following undecoded elements:<ul>
+ * {@code http://user@host:port/path;param1/%2e/info;param2?query#fragment}
+ * this class will split it into the following optional elements:<ul>
  * <li>{@link #getScheme()} - http:</li>
  * <li>{@link #getAuthority()} - //name@host:port</li>
  * <li>{@link #getHost()} - host</li>
  * <li>{@link #getPort()} - port</li>
- * <li>{@link #getPath()} - /path/info</li>
- * <li>{@link #getParam()} - param</li>
+ * <li>{@link #getPath()} - /path;param1/%2e/info;param2</li>
+ * <li>{@link #getDecodedPath()} - /path/info</li>
+ * <li>{@link #getParam()} - param2</li>
  * <li>{@link #getQuery()} - query</li>
  * <li>{@link #getFragment()} - fragment</li>
  * </ul>
- * <p>Any parameters will be returned from {@link #getPath()}, but are excluded from the
- * return value of {@link #getDecodedPath()}.   If there are multiple parameters, the
- * {@link #getParam()} method returns only the last one.
- */
+ * <p>The path part of the URI is provided in both raw form ({@link #getPath()}) and
+ * decoded form ({@link #getDecodedPath}), which has: path parameters removed,
+ * percent encoded characters expanded and relative segments resolved.  This approach
+ * is somewhat contrary to <a href="https://tools.ietf.org/html/rfc3986#section-3.3">RFC3986</a>
+ * which no longer defines path parameters (removed after
+ * <a href="https://tools.ietf.org/html/rfc2396#section-3.3">RFC2396</a>) and specifies
+ * that relative segment normalization should take place before percent encoded character
+ * expansion. A literal interpretation of the RFC can result in URI paths with ambiguities
+ * when viewed as strings. For example, a URI of {@code /foo%2f..%2fbar} is technically a single
+ * segment of "/foo/../bar", but could easily be misinterpreted as 3 segments resolving to "/bar"
+ * by a file system.
+ * </p>
+ * <p>
+ * Thus this class avoid and/or detects such ambiguities. Furthermore, by decoding characters and
+ * removing parameters before relative path normalization, ambiguous paths will be resolved in such
+ * a way to be non-standard-but-non-ambiguous to down stream interpretation of the decoded path string.
+ * The violations are recorded and available by API such as {@link #hasAmbiguousSegment()} so that requests
+ * containing them may be rejected in case the non-standard-but-non-ambiguous interpretations
+ * are not satisfactory for a given compliance configuration.
+ * </p>
+ * <p>
+ * Implementations that wish to process ambiguous URI paths must configure the compliance modes
+ * to accept them and then perform their own decoding of {@link #getPath()}.
+ * </p>
+ * <p>
+ * If there are multiple path parameters, only the last one is returned by {@link #getParam()}.
+ * </p>
+ **/
 public interface HttpURI
 {
+    /**
+     * Violations of safe URI interpretations
+     */
     enum Violation
     {
         /**
-         * URI contains ambiguous path segments e.g. {@code /foo/%2e%2e/bar}
+         * Ambiguous path segments e.g. {@code /foo/%2E%2E/bar}
          */
-        SEGMENT,
+        SEGMENT("Ambiguous path segments"),
 
         /**
-         * URI contains ambiguous empty segments e.g. {@code /foo//bar} or {@code /foo/;param/}, but not {@code /foo/}
+         * Ambiguous path separator within a URI segment e.g. {@code /foo%2Fbar}
          */
-        EMPTY,
+        SEPARATOR("Ambiguous path separator"),
 
         /**
-         * URI contains ambiguous path separator within a URI segment e.g. {@code /foo/b%2fr}
+         * Ambiguous path parameters within a URI segment e.g. {@code /foo/..;/bar}
          */
-        SEPARATOR,
+        PARAM("Ambiguous path parameters"),
 
         /**
-         * URI contains ambiguous path encoding within a URI segment e.g. {@code /%2557EB-INF}
+         * Ambiguous double encoding within a URI segment e.g. {@code /%2557EB-INF}
          */
-        ENCODING,
+        ENCODING("Ambiguous double encoding"),
 
         /**
-         * URI contains ambiguous path parameters within a URI segment e.g. {@code /foo/..;/bar}
+         * Ambiguous empty segments e.g. {@code /foo//bar}
          */
-        PARAM,
+        EMPTY("Ambiguous empty segments"),
 
         /**
-         * Contains UTF16 encodings
+         * Non standard UTF-16 encoding eg {@code /foo%u2192bar}.
          */
-        UTF16
+        UTF16("Non standard UTF-16 encoding");
+
+        private final String _message;
+
+        Violation(String message)
+        {
+            _message = message;
+        }
+
+        String getMessage()
+        {
+            return _message;
+        }
     }
 
     static Mutable build()
@@ -147,6 +189,11 @@ static Immutable from(String scheme, String host, int port, String pathQuery)
 
     String getHost();
 
+    /**
+     * Get a URI path parameter. Multiple and in segment parameters are ignored and only
+     * the last trailing parameter is returned.
+     * @return The last path parameter or null
+     */
     String getParam();
 
     String getPath();
@@ -166,15 +213,26 @@ static Immutable from(String scheme, String host, int port, String pathQuery)
     boolean isAbsolute();
 
     /**
-     * @return True if the URI has either an {@link #hasAmbiguousParameter()}, {@link #hasAmbiguousSegment()} or {@link #hasAmbiguousSeparator()}.
+     * @return True if the URI has any ambiguous {@link Violation}s.
      */
     boolean isAmbiguous();
 
     /**
-     * @return True if the URI has any Violations.
+     * @return True if the URI has any {@link Violation}s.
      */
     boolean hasViolations();
 
+    /**
+     * @param violation the violation to check.
+     * @return true if the URI has the passed violation.
+     */
+    boolean hasViolation(Violation violation);
+
+    /**
+     * @return Set of violations in the URI.
+     */
+    Collection<Violation> getViolations();
+
     /**
      * @return True if the URI has a possibly ambiguous segment like '..;' or '%2e%2e'
      */
@@ -417,6 +475,18 @@ public boolean hasViolations()
             return !_violations.isEmpty();
         }
 
+        @Override
+        public boolean hasViolation(Violation violation)
+        {
+            return _violations.contains(violation);
+        }
+
+        @Override
+        public Collection<Violation> getViolations()
+        {
+            return Collections.unmodifiableSet(_violations);
+        }
+
         @Override
         public boolean hasAmbiguousSegment()
         {
@@ -790,6 +860,18 @@ public boolean hasViolations()
             return !_violations.isEmpty();
         }
 
+        @Override
+        public boolean hasViolation(Violation violation)
+        {
+            return _violations.contains(violation);
+        }
+
+        @Override
+        public Collection<Violation> getViolations()
+        {
+            return Collections.unmodifiableSet(_violations);
+        }
+
         @Override
         public boolean hasAmbiguousSegment()
         {
@@ -1216,6 +1298,10 @@ else if (c == '/')
                             {
                                 switch (encodedValue)
                                 {
+                                    case 0:
+                                    // Byte 0 cannot be present in a UTF-8 sequence in any position
+                                    // other than as the NUL ASCII byte which we do not wish to allow.
+                                        throw new IllegalArgumentException("Illegal character in path");
                                     case '/':
                                         _violations.add(Violation.SEPARATOR);
                                         break;
@@ -1301,26 +1387,11 @@ else if (c == '/')
                     }
                     case QUERY:
                     {
-                        switch (c)
+                        if (c == '#')
                         {
-                            case '%':
-                                encodedCharacters = 2;
-                                break;
-                            case 'u':
-                            case 'U':
-                                if (encodedCharacters == 1)
-                                    _violations.add(Violation.UTF16);
-                                encodedCharacters = 0;
-                                break;
-                            case '#':
-                                _query = uri.substring(mark, i);
-                                mark = i + 1;
-                                state = State.FRAGMENT;
-                                encodedCharacters = 0;
-                                break;
-                            default:
-                                encodedCharacters = 0;
-                                break;
+                            _query = uri.substring(mark, i);
+                            mark = i + 1;
+                            state = State.FRAGMENT;
                         }
                         break;
                     }
@@ -1335,7 +1406,9 @@ else if (c == '/')
                         break;
                     }
                     default:
+                    {
                         throw new IllegalStateException(state.toString());
+                    }
                 }
             }
 
@@ -1387,10 +1460,12 @@ else if (c == '/')
             }
             else if (_path != null)
             {
-                String canonical = URIUtil.canonicalPath(_path);
-                if (canonical == null)
-                    throw new BadMessageException("Bad URI");
-                _decodedPath = URIUtil.decodePath(canonical);
+                // The RFC requires this to be canonical before decoding, but this can leave dot segments and dot dot segments
+                // which are not canonicalized and could be used in an attempt to bypass security checks.
+                String decodedNonCanonical = URIUtil.decodePath(_path);
+                _decodedPath = URIUtil.canonicalPath(decodedNonCanonical);
+                if (_decodedPath == null)
+                    throw new IllegalArgumentException("Bad URI");
             }
         }
 
@@ -1409,7 +1484,8 @@ private void checkSegment(String uri, int segment, int end, boolean param)
             // This method is called once for every segment parsed.
             // A URI like "/foo/" has two segments: "foo" and an empty segment.
             // Empty segments are only ambiguous if they are not the last segment
-            // So if this method is called for any segment and we have previously seen an empty segment, then it was ambiguous 
+            // So if this method is called for any segment and we have previously
+            // seen an empty segment, then it was ambiguous.
             if (_emptySegment)
                 _violations.add(Violation.EMPTY);
 

diff --git a/jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java b/jetty-http/src/main/java/org/eclipse/jetty/http/UriCompliance.java
@@ -14,6 +14,7 @@
 package org.eclipse.jetty.http;
 
 import java.util.Arrays;
+import java.util.EnumMap;
 import java.util.EnumSet;
 import java.util.Objects;
 import java.util.Set;
@@ -67,10 +68,6 @@ public enum Violation implements ComplianceViolation
          * Allow ambiguous path encoding within a URI segment e.g. <code>/%2557EB-INF</code>
          */
         AMBIGUOUS_PATH_ENCODING("https://tools.ietf.org/html/rfc3986#section-3.3", "Ambiguous URI path encoding"),
-        /**
-         * Allow Non canonical ambiguous paths. eg <code>/foo/x%2f%2e%2e%/bar</code> provided to applications as <code>/foo/x/../bar</code>
-         */
-        NON_CANONICAL_AMBIGUOUS_PATHS("https://tools.ietf.org/html/rfc3986#section-3.3", "Non canonical ambiguous paths"),
         /**
          * Allow UTF-16 encoding eg <code>/foo%u2192bar</code>.
          */
@@ -125,10 +122,9 @@ public String getDescription()
 
     /**
      * Compliance mode that exactly follows <a href="https://tools.ietf.org/html/rfc3986">RFC3986</a>,
-     * including allowing all additional ambiguous URI Violations,
-     * except {@link Violation#NON_CANONICAL_AMBIGUOUS_PATHS}, thus ambiguous paths are canonicalized for safety.
+     * including allowing all additional ambiguous URI Violations.
      */
-    public static final UriCompliance RFC3986 = new UriCompliance("RFC3986", complementOf(of(Violation.NON_CANONICAL_AMBIGUOUS_PATHS)));
+    public static final UriCompliance RFC3986 = new UriCompliance("RFC3986", allOf(Violation.class));
 
     /**
      * Compliance mode that follows <a href="https://tools.ietf.org/html/rfc3986">RFC3986</a>
@@ -231,6 +227,11 @@ public static UriCompliance from(String spec)
             boolean exclude = element.startsWith("-");
             if (exclude)
                 element = element.substring(1);
+
+            // Ignore removed name. TODO: remove in future release.
+            if (element.equals("NON_CANONICAL_AMBIGUOUS_PATHS"))
+                continue;
+
             Violation section = Violation.valueOf(element);
             if (exclude)
                 violations.remove(section);
@@ -330,4 +331,47 @@ private static Set<Violation> copyOf(Set<Violation> violations)
             return EnumSet.noneOf(Violation.class);
         return EnumSet.copyOf(violations);
     }
+
+    private static final EnumMap<HttpURI.Violation, Violation> __uriViolations = new EnumMap<>(HttpURI.Violation.class);
+
+    static
+    {
+        // create a map from Violation to compliance in a loop, so that any new violations added are detected with ISE
+        for (HttpURI.Violation violation : HttpURI.Violation.values())
+        {
+            switch (violation)
+            {
+                case SEPARATOR:
+                    __uriViolations.put(violation, Violation.AMBIGUOUS_PATH_SEPARATOR);
+                    break;
+                case SEGMENT:
+                    __uriViolations.put(violation, Violation.AMBIGUOUS_PATH_SEGMENT);
+                    break;
+                case PARAM:
+                    __uriViolations.put(violation, Violation.AMBIGUOUS_PATH_PARAMETER);
+                    break;
+                case ENCODING:
+                    __uriViolations.put(violation, Violation.AMBIGUOUS_PATH_ENCODING);
+                    break;
+                case EMPTY:
+                    __uriViolations.put(violation, Violation.AMBIGUOUS_EMPTY_SEGMENT);
+                    break;
+                case UTF16:
+                    __uriViolations.put(violation, Violation.UTF16_ENCODINGS);
+                    break;
+                default:
+                    throw new IllegalStateException();
+            }
+        }
+    }
+
+    public static String checkUriCompliance(UriCompliance compliance, HttpURI uri)
+    {
+        for (HttpURI.Violation violation : HttpURI.Violation.values())
+        {
+            if (uri.hasViolation(violation) && (compliance == null || !compliance.allows(__uriViolations.get(violation))))
+                return violation.getMessage();
+        }
+        return null;
+    }
 }