Skip to content

Commit

Permalink
fix parsing escape sequences after lone surrogates
Browse files Browse the repository at this point in the history
  • Loading branch information
lucacasonato committed Nov 24, 2021
1 parent 4c28c57 commit 07c740c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
29 changes: 23 additions & 6 deletions src/read.rs
Expand Up @@ -859,8 +859,20 @@ fn parse_escape<'de, R: Read<'de>>(
b't' => scratch.push(b'\t'),
b'u' => {
let c = match tri!(read.decode_hex_escape()) {
0xDC00..=0xDFFF => {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
n @ 0xDC00..=0xDFFF => {
if validate {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}

let utf8_bytes = [
(n >> 12 & 0x0F) as u8 | 0b1110_0000,
(n >> 6 & 0x3F) as u8 | 0b1000_0000,
(n & 0x3F) as u8 | 0b1000_0000,
];

scratch.extend_from_slice(&utf8_bytes);

return Ok(());
}

// Non-BMP characters are encoded as a sequence of
Expand All @@ -871,7 +883,7 @@ fn parse_escape<'de, R: Read<'de>>(
n1 @ 0xD800..=0xDBFF => {
if tri!(peek_or_eof(read)) != b'\\' {
if validate {
tri!(next_or_eof(read));
read.discard();
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}

Expand All @@ -885,10 +897,11 @@ fn parse_escape<'de, R: Read<'de>>(

return Ok(());
}
tri!(next_or_eof(read));
read.discard();

if tri!(peek_or_eof(read)) != b'u' {
if validate {
tri!(next_or_eof(read));
read.discard();
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}

Expand All @@ -900,9 +913,13 @@ fn parse_escape<'de, R: Read<'de>>(

scratch.extend_from_slice(&utf8_bytes);

// The \ prior to this byte started an escape sequence,
// so we need to parse that now.
parse_escape(read, validate, scratch)?;

return Ok(());
}
tri!(next_or_eof(read));
read.discard();

let n2 = tri!(read.decode_hex_escape());

Expand Down
23 changes: 22 additions & 1 deletion tests/test.rs
Expand Up @@ -1715,10 +1715,31 @@ fn test_byte_buf_de() {
}

#[test]
fn test_byte_buf_de_invalid_escape_sequence() {
fn test_byte_buf_de_lone_surrogate() {
let bytes = ByteBuf::from(vec![237, 160, 188]);
let v: ByteBuf = from_str(r#""\ud83c""#).unwrap();
assert_eq!(v, bytes);

let bytes = ByteBuf::from(vec![237, 160, 188, 10]);
let v: ByteBuf = from_str(r#""\ud83c\n""#).unwrap();
assert_eq!(v, bytes);

let bytes = ByteBuf::from(vec![237, 160, 188, 32]);
let v: ByteBuf = from_str(r#""\ud83c ""#).unwrap();
assert_eq!(v, bytes);

let bytes = ByteBuf::from(vec![237, 176, 129]);
let v: ByteBuf = from_str(r#""\udc01""#).unwrap();
assert_eq!(v, bytes);

let res = from_str::<ByteBuf>(r#""\ud83c\!""#);
assert!(res.is_err());

let res = from_str::<ByteBuf>(r#""\ud83c\u""#);
assert!(res.is_err());

let res = from_str::<ByteBuf>(r#""\ud83c\ud83c""#);
assert!(res.is_err());
}

#[test]
Expand Down

0 comments on commit 07c740c

Please sign in to comment.