diff --git a/src/format/parse.rs b/src/format/parse.rs index f84a515dc5..0e2db8fae9 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -150,11 +150,8 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st } // optional comments - s = s.trim_left(); while let Ok((s_out, ())) = scan::comment_2822(s) { - // Trim left after every found comment, as comments are allowed to have whitespace - // between them - s = s_out.trim_left(); + s = s_out; } Ok((s, ())) @@ -831,10 +828,15 @@ fn test_rfc2822() { ("Fri, 02 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // leading zero ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC)", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // trailing comment ( - "Tue, 20 Jan 2015 17:35:20 -0800 ( (UTC ) (\\( (a)\\(( \\t ) ) \\\\( \\) ))", + r"Tue, 20 Jan 2015 17:35:20 -0800 ( (UTC ) (\( (a)\(( \t ) ) \\( \) ))", Ok("Tue, 20 Jan 2015 17:35:20 -0800"), ), // complex trailing comment - ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC\\)", Err(TOO_LONG)), // incorrect comment, not enough closing parentheses + (r"Tue, 20 Jan 2015 17:35:20 -0800 (UTC\)", Err(TOO_LONG)), // incorrect comment, not enough closing parentheses + ( + "Tue, 20 Jan 2015 17:35:20 -0800 (UTC)\t \r\n(Anothercomment)", + Ok("Tue, 20 Jan 2015 17:35:20 -0800"), + ), // multiple comments + ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC) ", Err(TOO_LONG)), // trailing whitespace after comment ("20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // no day of week ("20 JAN 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // upper case month ("Tue, 20 Jan 2015 17:35 -0800", Ok("Tue, 20 Jan 2015 17:35:00 -0800")), // no second diff --git a/src/format/scan.rs b/src/format/scan.rs index 674ac33ba7..9a40903436 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -349,49 +349,34 @@ pub(super) fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> { Ok((s.trim_left_matches(|c: char| !c.is_whitespace()), ())) } -/// Tries to consume an RFC2822 comment -pub(super) fn comment_2822(mut s: &str) -> ParseResult<(&str, ())> { - macro_rules! next_char { - () => {{ - let c = s.bytes().nth(0).ok_or(TOO_SHORT)?; - s = &s[1..]; - c - }}; - } - - // Make sure the first letter is a `(` - if b'(' != next_char!() { - Err(INVALID)?; - } - - let mut depth = 1; // start with 1 as we already encountered a '(' - loop { - match next_char!() { - // If we encounter `\`, ignore the next character as it is escaped. - b'\\' => { - next_char!(); - } - - // If we encounter `(`, open a parantheses context. - b'(' => { - depth += 1; - } - - // If we encounter `)`, close a parentheses context. - // If all are closed, we found the end of the comment. - b')' => { - depth -= 1; - if depth == 0 { - break; - } - } - - // Ignore all other characters - _ => (), +/// Tries to consume an RFC2822 comment including preceding ` `. +/// +/// Returns the remaining string after the closing parenthesis. +pub(super) fn comment_2822(s: &str) -> ParseResult<(&str, ())> { + use CommentState::*; + + let s = s.trim_start(); + + let mut state = Start; + for (i, c) in s.bytes().enumerate() { + state = match (state, c) { + (Start, b'(') => Next(1), + (Next(1), b')') => return Ok((&s[i + 1..], ())), + (Next(depth), b'\\') => Escape(depth), + (Next(depth), b'(') => Next(depth + 1), + (Next(depth), b')') => Next(depth - 1), + (Next(depth), _) | (Escape(depth), _) => Next(depth), + _ => return Err(INVALID), }; } - Ok((s, ())) + Err(TOO_SHORT) +} + +enum CommentState { + Start, + Next(usize), + Escape(usize), } #[cfg(test)] @@ -399,25 +384,32 @@ pub(super) fn comment_2822(mut s: &str) -> ParseResult<(&str, ())> { fn test_rfc2822_comments() { let testdata = [ ("", Err(TOO_SHORT)), + (" ", Err(TOO_SHORT)), ("x", Err(INVALID)), ("(", Err(TOO_SHORT)), ("()", Ok("")), + (" \r\n\t()", Ok("")), + ("() ", Ok(" ")), ("()z", Ok("z")), ("(x)", Ok("")), ("(())", Ok("")), ("((()))", Ok("")), ("(x(x(x)x)x)", Ok("")), ("( x ( x ( x ) x ) x )", Ok("")), - ("(\\)", Err(TOO_SHORT)), - ("(\\()", Ok("")), - ("(\\))", Ok("")), - ("(\\\\)", Ok("")), + (r"(\)", Err(TOO_SHORT)), + (r"(\()", Ok("")), + (r"(\))", Ok("")), + (r"(\\)", Ok("")), ("(()())", Ok("")), ("( x ( x ) x ( x ) x )", Ok("")), ]; for (test_in, expected) in testdata { let actual = comment_2822(test_in).map(|(s, _)| s); - assert_eq!(expected, actual); + assert_eq!( + expected, actual, + "{:?} expected to produce {:?}, but produced {:?}.", + test_in, expected, actual + ); } }