Skip to content

Commit

Permalink
Add compatibility with rfc2822 comments (#733)
Browse files Browse the repository at this point in the history
* Add compatibility with rfc2822 comments

* Refactor rfc2822 comment parser
  • Loading branch information
Finomnis committed Jul 24, 2022
1 parent 187819f commit acd4ecf
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -34,6 +34,7 @@ Versions with only mechanical changes will be omitted from the following list.
* Remove libc dependency from Cargo.toml.
* Add the `and_local_timezone` method to `NaiveDateTime`
* Fix the behavior of `Duration::abs()` for negative durations with non-zero nanos
* Add compatibility with rfc2822 comments (#733)

## 0.4.19

Expand Down
26 changes: 22 additions & 4 deletions src/format/parse.rs
Expand Up @@ -53,7 +53,10 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st

// an adapted RFC 2822 syntax from Section 3.3 and 4.3:
//
// date-time = [ day-of-week "," ] date 1*S time *S
// c-char = <any char except '(', ')' and '\\'>
// c-escape = "\" <any char>
// comment = "(" *(comment / c-char / c-escape) ")" *S
// date-time = [ day-of-week "," ] date 1*S time *S *comment
// day-of-week = *S day-name *S
// day-name = "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
// date = day month year
Expand All @@ -79,9 +82,10 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st
//
// - we do not recognize a folding white space (FWS) or comment (CFWS).
// for our purposes, instead, we accept any sequence of Unicode
// white space characters (denoted here to `S`). any actual RFC 2822
// parser is expected to parse FWS and/or CFWS themselves and replace
// it with a single SP (`%x20`); this is legitimate.
// white space characters (denoted here to `S`). For comments, we accept
// any text within parentheses while respecting escaped parentheses.
// Any actual RFC 2822 parser is expected to parse FWS and/or CFWS themselves
// and replace it with a single SP (`%x20`); this is legitimate.
//
// - two-digit year < 50 should be interpreted by adding 2000.
// two-digit year >= 50 or three-digit year should be interpreted
Expand Down Expand Up @@ -145,6 +149,14 @@ fn parse_rfc2822<'a>(parsed: &mut Parsed, mut s: &'a str) -> ParseResult<(&'a st
parsed.set_offset(i64::from(offset))?;
}

// optional comments
s = s.trim_left();
while let Ok((s_out, ())) = scan::comment_2822(s) {
// Trim left after every found comment, as comments are allowed to have whitespace
// between them
s = s_out.trim_left();
}

Ok((s, ()))
}

Expand Down Expand Up @@ -817,6 +829,12 @@ fn test_rfc2822() {
("Tue, 20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // normal case
("Fri, 2 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // folding whitespace
("Fri, 02 Jan 2015 17:35:20 -0800", Ok("Fri, 02 Jan 2015 17:35:20 -0800")), // leading zero
("Tue, 20 Jan 2015 17:35:20 -0800 (UTC)", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // trailing comment
(
"Tue, 20 Jan 2015 17:35:20 -0800 ( (UTC ) (\\( (a)\\(( \\t ) ) \\\\( \\) ))",
Ok("Tue, 20 Jan 2015 17:35:20 -0800"),
), // complex trailing comment
("Tue, 20 Jan 2015 17:35:20 -0800 (UTC\\)", Err(TOO_LONG)), // incorrect comment, not enough closing parentheses
("20 Jan 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // no day of week
("20 JAN 2015 17:35:20 -0800", Ok("Tue, 20 Jan 2015 17:35:20 -0800")), // upper case month
("Tue, 20 Jan 2015 17:35 -0800", Ok("Tue, 20 Jan 2015 17:35:00 -0800")), // no second
Expand Down
73 changes: 73 additions & 0 deletions src/format/scan.rs
Expand Up @@ -348,3 +348,76 @@ pub(super) fn timezone_offset_2822(s: &str) -> ParseResult<(&str, Option<i32>)>
pub(super) fn timezone_name_skip(s: &str) -> ParseResult<(&str, ())> {
Ok((s.trim_left_matches(|c: char| !c.is_whitespace()), ()))
}

/// Tries to consume an RFC2822 comment
pub(super) fn comment_2822(mut s: &str) -> ParseResult<(&str, ())> {
macro_rules! next_char {
() => {{
let c = s.bytes().nth(0).ok_or(TOO_SHORT)?;
s = &s[1..];
c
}};
}

// Make sure the first letter is a `(`
if b'(' != next_char!() {
Err(INVALID)?;
}

let mut depth = 1; // start with 1 as we already encountered a '('
loop {
match next_char!() {
// If we encounter `\`, ignore the next character as it is escaped.
b'\\' => {
next_char!();
}

// If we encounter `(`, open a parantheses context.
b'(' => {
depth += 1;
}

// If we encounter `)`, close a parentheses context.
// If all are closed, we found the end of the comment.
b')' => {
depth -= 1;
if depth == 0 {
break;
}
}

// Ignore all other characters
_ => (),
};
}

Ok((s, ()))
}

#[cfg(test)]
#[test]
fn test_rfc2822_comments() {
let testdata = [
("", Err(TOO_SHORT)),
("x", Err(INVALID)),
("(", Err(TOO_SHORT)),
("()", Ok("")),
("()z", Ok("z")),
("(x)", Ok("")),
("(())", Ok("")),
("((()))", Ok("")),
("(x(x(x)x)x)", Ok("")),
("( x ( x ( x ) x ) x )", Ok("")),
("(\\)", Err(TOO_SHORT)),
("(\\()", Ok("")),
("(\\))", Ok("")),
("(\\\\)", Ok("")),
("(()())", Ok("")),
("( x ( x ) x ( x ) x )", Ok("")),
];

for (test_in, expected) in testdata {
let actual = comment_2822(test_in).map(|(s, _)| s);
assert_eq!(expected, actual);
}
}

0 comments on commit acd4ecf

Please sign in to comment.