diff --git a/url/src/lib.rs b/url/src/lib.rs index 967282e56..1486f16a0 100644 --- a/url/src/lib.rs +++ b/url/src/lib.rs @@ -2243,7 +2243,7 @@ impl Url { #[allow(clippy::result_unit_err, clippy::suspicious_operation_groupings)] pub fn set_scheme(&mut self, scheme: &str) -> Result<(), ()> { let mut parser = Parser::for_setter(String::new()); - let remaining = parser.parse_scheme(parser::Input::new(scheme))?; + let remaining = parser::Input::new(parser.parse_scheme(scheme, None)?); let new_scheme_type = SchemeType::from(&parser.serialization); let old_scheme_type = SchemeType::from(self.scheme()); // If url’s scheme is a special scheme and buffer is not a special scheme, then return. diff --git a/url/src/parser.rs b/url/src/parser.rs index b83bdd4f6..041f8c659 100644 --- a/url/src/parser.rs +++ b/url/src/parser.rs @@ -361,11 +361,14 @@ impl<'a> Parser<'a> { /// https://url.spec.whatwg.org/#concept-basic-url-parser pub fn parse_url(mut self, input: &str) -> ParseResult { - let input = Input::with_log(input, self.violation_fn); - if let Ok(remaining) = self.parse_scheme(input.clone()) { - return self.parse_with_scheme(remaining); + if let Ok(input) = self.parse_scheme(input, self.violation_fn) { + let input = Input { + chars: input.chars(), + }; + return self.parse_with_scheme(input); } + let input = Input::with_log(input, self.violation_fn); // No-scheme state if let Some(base_url) = self.base_url { if input.starts_with('#') { @@ -385,28 +388,68 @@ impl<'a> Parser<'a> { } } - pub fn parse_scheme<'i>(&mut self, mut input: Input<'i>) -> Result, ()> { + pub fn parse_scheme<'i>( + &mut self, + original_input: &'i str, + vfn: Option<&dyn Fn(SyntaxViolation)>, + ) -> Result<&'i str, ()> { + let input = original_input.trim_matches(c0_control_or_space); + if let Some(vfn) = vfn { + if input.len() < original_input.len() { + vfn(SyntaxViolation::C0SpaceIgnored) + } + if input.chars().any(|c| matches!(c, '\t' | '\n' | '\r')) { + vfn(SyntaxViolation::TabOrNewlineIgnored) + } + } + if input.is_empty() || !input.starts_with(ascii_alpha) { return Err(()); } debug_assert!(self.serialization.is_empty()); - while let Some(c) = input.next() { + let mut i = 0; + let mut v: Option> = None; + for c in input.chars() { match c { + '\t' | '\n' | '\r' => { + if v.is_none() { + v = Some(vec![]); + } + + v.as_mut().unwrap().push(i); + i += 1; + } 'a'..='z' | 'A'..='Z' | '0'..='9' | '+' | '-' | '.' => { - self.serialization.push(c.to_ascii_lowercase()) + i += 1; + } + ':' => { + if let Some(ref mut v) = v { + let bytes = input.as_bytes(); + for i in 0..i { + if !v.contains(&i) { + self.serialization + .push((bytes[i] as char).to_ascii_lowercase()); + } + } + } else { + self.serialization + .push_str(&input[..i].to_ascii_lowercase()); + } + return Ok(&input[i + 1..]); } - ':' => return Ok(input), _ => { self.serialization.clear(); return Err(()); } } } + // EOF before ':' if self.context == Context::Setter { - Ok(input) + self.serialization + .push_str(&input[..i].to_ascii_lowercase()); + Ok(&input[i..]) } else { - self.serialization.clear(); Err(()) } }