diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ac5ebcb85..768011d77d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Versions with only mechanical changes will be omitted from the following list. * Remove libc dependency from Cargo.toml. * Add the `and_local_timezone` method to `NaiveDateTime` * Fix the behavior of `Duration::abs()` for negative durations with non-zero nanos +* Add compatibility with rfc2822 comments (#733) ## 0.4.19 diff --git a/src/format/parse.rs b/src/format/parse.rs index e57fb8a9d2..f84a515dc5 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -53,7 +53,10 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st // an adapted RFC 2822 syntax from Section 3.3 and 4.3: // - // date-time = [ day-of-week "," ] date 1*S time *S + // c-char = + // c-escape = "\" + // comment = "(" *(comment / c-char / c-escape) ")" *S + // date-time = [ day-of-week "," ] date 1*S time *S *comment // day-of-week = *S day-name *S // day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun" // date = day month year @@ -79,9 +82,10 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st // // - we do not recognize a folding white space (FWS) or comment (CFWS). // for our purposes, instead, we accept any sequence of Unicode - // white space characters (denoted here to `S`). any actual RFC 2822 - // parser is expected to parse FWS and/or CFWS themselves and replace - // it with a single SP (`%x20`); this is legitimate. + // white space characters (denoted here to `S`). For comments, we accept + // any text within parentheses while respecting escaped parentheses. + // Any actual RFC 2822 parser is expected to parse FWS and/or CFWS themselves + // and replace it with a single SP (`%x20`); this is legitimate. // // - two-digit year < 50 should be interpreted by adding 2000. // two-digit year >= 50 or three-digit year should be interpreted @@ -145,6 +149,14 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st parsed.set_offset(i64::from(offset))?; } + // optional comments + s = s.trim_left(); + while let Ok((s_out, ())) = scan::comment_2822(s) { + // Trim left after every found comment, as comments are allowed to have whitespace + // between them + s = s_out.trim_left(); + } + Ok((s, ())) } @@ -817,6 +829,12 @@ fn test_rfc2822() { ("Tue, 20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // normal case ("Fri, 2 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // folding whitespace ("Fri, 02 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // leading zero + ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC)", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // trailing comment + ( + "Tue, 20 Jan 2015 17:35:20 -0800 ( (UTC ) (\\( (a)\\(( \\t ) ) \\\\( \\) ))", + Ok("Tue, 20 Jan 2015 17:35:20 -0800"), + ), // complex trailing comment + ("Tue, 20 Jan 2015 17:35:20 -0800 (UTC\\)", Err(TOO_LONG)), // incorrect comment, not enough closing parentheses ("20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // no day of week ("20 JAN 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // upper case month ("Tue, 20 Jan 2015 17:35 -0800", Ok("Tue, 20 Jan 2015 17:35:00 -0800")), // no second diff --git a/src/format/scan.rs b/src/format/scan.rs index 856fa2f567..674ac33ba7 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -348,3 +348,76 @@ pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option)> pub(super) fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> { Ok((s.trim_left_matches(|c: char| !c.is_whitespace()), ())) } + +/// Tries to consume an RFC2822 comment +pub(super) fn comment_2822(mut s: &str) -> ParseResult<(&str, ())> { + macro_rules! next_char { + () => {{ + let c = s.bytes().nth(0).ok_or(TOO_SHORT)?; + s = &s[1..]; + c + }}; + } + + // Make sure the first letter is a `(` + if b'(' != next_char!() { + Err(INVALID)?; + } + + let mut depth = 1; // start with 1 as we already encountered a '(' + loop { + match next_char!() { + // If we encounter `\`, ignore the next character as it is escaped. + b'\\' => { + next_char!(); + } + + // If we encounter `(`, open a parantheses context. + b'(' => { + depth += 1; + } + + // If we encounter `)`, close a parentheses context. + // If all are closed, we found the end of the comment. + b')' => { + depth -= 1; + if depth == 0 { + break; + } + } + + // Ignore all other characters + _ => (), + }; + } + + Ok((s, ())) +} + +#[cfg(test)] +#[test] +fn test_rfc2822_comments() { + let testdata = [ + ("", Err(TOO_SHORT)), + ("x", Err(INVALID)), + ("(", Err(TOO_SHORT)), + ("()", Ok("")), + ("()z", Ok("z")), + ("(x)", Ok("")), + ("(())", Ok("")), + ("((()))", Ok("")), + ("(x(x(x)x)x)", Ok("")), + ("( x ( x ( x ) x ) x )", Ok("")), + ("(\\)", Err(TOO_SHORT)), + ("(\\()", Ok("")), + ("(\\))", Ok("")), + ("(\\\\)", Ok("")), + ("(()())", Ok("")), + ("( x ( x ) x ( x ) x )", Ok("")), + ]; + + for (test_in, expected) in testdata { + let actual = comment_2822(test_in).map(|(s, _)| s); + assert_eq!(expected, actual); + } +}