Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow lone surrogates in raw values #830

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 8 additions & 27 deletions src/read.rs
Expand Up @@ -951,34 +951,15 @@ where

match ch {
b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
b'u' => match tri!(read.decode_hex_escape()) {
0xDC00..=0xDFFF => {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}

// Non-BMP characters are encoded as a sequence of
// two hex escapes, representing UTF-16 surrogates.
n1 @ 0xD800..=0xDBFF => {
if tri!(next_or_eof(read)) != b'\\' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}
if tri!(next_or_eof(read)) != b'u' {
return error(read, ErrorCode::UnexpectedEndOfHexEscape);
}

let n2 = tri!(read.decode_hex_escape());
if n2 < 0xDC00 || n2 > 0xDFFF {
return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
}

let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000;
if char::from_u32(n).is_none() {
return error(read, ErrorCode::InvalidUnicodeCodePoint);
}
}
b'u' => {
// At this point we don't care if the codepoint is valid. We just
// want to consume it. We don't actually know what is valid or not
// at this point, because that depends on if this string will
// ultimately be parsed into a string or a byte buffer in the "real"
// parse.

_ => {}
},
tri!(read.decode_hex_escape());
}
_ => {
return error(read, ErrorCode::InvalidEscape);
}
Expand Down
14 changes: 14 additions & 0 deletions tests/test.rs
Expand Up @@ -1742,6 +1742,20 @@ fn test_byte_buf_de_lone_surrogate() {
assert!(res.is_err());
}

#[cfg(feature = "raw_value")]
#[test]
fn test_raw_de_lone_surrogate() {
use serde_json::value::RawValue;

assert!(from_str::<Box<RawValue>>(r#""\ud83c""#).is_ok());
assert!(from_str::<Box<RawValue>>(r#""\ud83c\n""#).is_ok());
assert!(from_str::<Box<RawValue>>(r#""\ud83c ""#).is_ok());
assert!(from_str::<Box<RawValue>>(r#""\udc01 ""#).is_ok());
assert!(from_str::<Box<RawValue>>(r#""\udc01\!""#).is_err());
assert!(from_str::<Box<RawValue>>(r#""\udc01\u""#).is_err());
assert!(from_str::<Box<RawValue>>(r#""\ud83c\ud83c""#).is_ok());
}

#[test]
fn test_byte_buf_de_multiple() {
let s: Vec<ByteBuf> = from_str(r#"["ab\nc", "cd\ne"]"#).unwrap();
Expand Down