From 9d7583d3bf778afdf857303b1cd383961c3a0167 Mon Sep 17 00:00:00 2001 From: Sean McArthur Date: Mon, 18 Feb 2019 12:44:59 -0800 Subject: [PATCH] Permit more characters in URI paths --- src/uri/path.rs | 73 +++++++++++++++++++++++++++++++++--------------- src/uri/tests.rs | 15 +++++++++- 2 files changed, 64 insertions(+), 24 deletions(-) diff --git a/src/uri/path.rs b/src/uri/path.rs index 5013626c..fe34804e 100644 --- a/src/uri/path.rs +++ b/src/uri/path.rs @@ -5,7 +5,7 @@ use bytes::Bytes; use byte_str::ByteStr; use convert::HttpTryFrom; -use super::{ErrorKind, InvalidUri, InvalidUriBytes, URI_CHARS}; +use super::{ErrorKind, InvalidUri, InvalidUriBytes}; /// Represents the path component of a URI #[derive(Clone)] @@ -44,42 +44,69 @@ impl PathAndQuery { let mut i = 0; + // path ... while i < src.len() { let b = src[i]; - match URI_CHARS[b as usize] { - 0 if b == b'%' => {} - 0 if query != NONE => { - // While queries *should* be percent-encoded, most - // bytes are actually allowed... - // See https://url.spec.whatwg.org/#query-state - // - // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E - match b { - 0x21 | - 0x24...0x3B | - 0x3D | - 0x3F...0x7E => {}, - _ => return Err(ErrorKind::InvalidUriChar.into()), - } - } - 0 => return Err(ErrorKind::InvalidUriChar.into()), + // See https://url.spec.whatwg.org/#path-state + match b { b'?' => { - if query == NONE { - query = i as u16; - } + debug_assert_eq!(query, NONE); + query = i as u16; + i += 1; + break; } b'#' => { // TODO: truncate src.split_off(i); break; - } - _ => {} + }, + + // This is the range of bytes that don't need to be + // percent-encoded in the path. If it should have been + // percent-encoded, then error. + 0x21 | + 0x24...0x3B | + 0x3D | + 0x40...0x5F | + 0x61...0x7A | + 0x7C | + 0x7E => {}, + + _ => return Err(ErrorKind::InvalidUriChar.into()), } i += 1; } + // query ... + if query != NONE { + while i < src.len() { + let b = src[i]; + match b { + // While queries *should* be percent-encoded, most + // bytes are actually allowed... + // See https://url.spec.whatwg.org/#query-state + // + // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E + 0x21 | + 0x24...0x3B | + 0x3D | + 0x3F...0x7E => {}, + + b'#' => { + // TODO: truncate + src.split_off(i); + break; + }, + + _ => return Err(ErrorKind::InvalidUriChar.into()), + } + + i += 1; + } + } + Ok(PathAndQuery { data: unsafe { ByteStr::from_utf8_unchecked(src) }, query: query, diff --git a/src/uri/tests.rs b/src/uri/tests.rs index f580e89a..cfc6737f 100644 --- a/src/uri/tests.rs +++ b/src/uri/tests.rs @@ -27,7 +27,12 @@ macro_rules! test_parse { #[test] fn $test_name() { let orig_str = $str; - let uri = Uri::from_str(orig_str).unwrap(); + let uri = match Uri::from_str(orig_str) { + Ok(uri) => uri, + Err(err) => { + panic!("parse error {:?} from {:?}", err, orig_str); + }, + }; $( assert_eq!(uri.$method(), $value, "{}: uri = {:?}", stringify!($method), uri); )+ @@ -369,6 +374,14 @@ test_parse! { port_part = None, } +test_parse! { + test_path_permissive, + "/foo=bar|baz\\^~%", + [], + + path = "/foo=bar|baz\\^~%", +} + test_parse! { test_query_permissive, "/?foo={bar|baz}\\^`",