From 4cdad7329628e336f1c9a889952cac3766cf6f60 Mon Sep 17 00:00:00 2001 From: Ashley Mannix Date: Sat, 8 Jan 2022 11:19:04 +1000 Subject: [PATCH 1/2] add try_parse_ascii to avoid string conversions when parsing from bytes --- src/error.rs | 25 ++++++++++++++++++------- src/lib.rs | 8 ++++---- src/parser.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 71 insertions(+), 14 deletions(-) diff --git a/src/error.rs b/src/error.rs index bfbc02a0..7335e42c 100644 --- a/src/error.rs +++ b/src/error.rs @@ -28,6 +28,8 @@ pub(crate) enum ErrorKind { len: usize, index: usize, }, + /// The input was not a valid UTF8 string + InvalidUTF8, /// Some other error occurred. Other, } @@ -40,12 +42,18 @@ pub(crate) enum ErrorKind { /// /// [`Uuid`]: ../struct.Uuid.html #[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct InvalidUuid<'a>(pub(crate) &'a str); +pub struct InvalidUuid<'a>(pub(crate) &'a [u8]); impl<'a> InvalidUuid<'a> { /// Converts the lightweight error type into detailed diagnostics. pub fn into_err(self) -> Error { - let (s, offset, simple) = match self.0.as_bytes() { + // Check whether or not the input was ever actually a valid UTF8 string + let input_str = match std::str::from_utf8(self.0) { + Ok(s) => s, + Err(_) => return Error(ErrorKind::InvalidUTF8), + }; + + let (uuid_str, offset, simple) = match input_str.as_bytes() { [b'{', s @ .., b'}'] => (s, 1, false), [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..] => { (s, "urn:uuid:".len(), false) @@ -57,10 +65,10 @@ impl<'a> InvalidUuid<'a> { let mut group_bounds = [0; 4]; // SAFETY: the byte array came from a valid utf8 string, - // and is aligned along char boundries. - let string = unsafe { std::str::from_utf8_unchecked(s) }; + // and is aligned along char boundaries. + let uuid_str = unsafe { std::str::from_utf8_unchecked(uuid_str) }; - for (index, character) in string.char_indices() { + for (index, character) in uuid_str.char_indices() { let byte = character as u8; if character as u32 - byte as u32 > 0 { // Multibyte char @@ -87,7 +95,9 @@ impl<'a> InvalidUuid<'a> { // This means that we tried and failed to parse a simple uuid. // Since we verified that all the characters are valid, this means // that it MUST have an invalid length. - Error(ErrorKind::SimpleLength { len: s.len() }) + Error(ErrorKind::SimpleLength { + len: input_str.len(), + }) } else if hyphen_count != 4 { // We tried to parse a hyphenated variant, but there weren't // 5 groups (4 hyphen splits). @@ -110,7 +120,7 @@ impl<'a> InvalidUuid<'a> { // The last group must be too long Error(ErrorKind::GroupLength { group: 4, - len: s.len() - BLOCK_STARTS[4], + len: input_str.len() - BLOCK_STARTS[4], index: offset + BLOCK_STARTS[4] + 1, }) } @@ -146,6 +156,7 @@ impl fmt::Display for Error { group, expected, len ) } + ErrorKind::InvalidUTF8 => write!(f, "non-UTF8 input"), ErrorKind::Other => write!(f, "failed to parse a UUID"), } } diff --git a/src/lib.rs b/src/lib.rs index 417cc6ce..e350b520 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -491,8 +491,8 @@ impl Uuid { /// If the version field doesn't contain a recognized version then `None` /// is returned. If you're trying to read the version for a future extension /// you can also use [`Uuid::get_version_num`] to unconditionally return a - /// number. Future extensions may start to return `Some` once they're standardized - /// and supported. + /// number. Future extensions may start to return `Some` once they're + /// standardized and supported. /// /// # Examples /// @@ -525,8 +525,8 @@ impl Uuid { /// Returns the four field values of the UUID. /// - /// These values can be passed to the [`Uuid::from_fields`] method to get the - /// original `Uuid` back. + /// These values can be passed to the [`Uuid::from_fields`] method to get + /// the original `Uuid` back. /// /// * The first field value represents the first group of (eight) hex /// digits, taken as a big-endian `u32` value. For V1 UUIDs, this field diff --git a/src/parser.rs b/src/parser.rs index 52d00a0c..68d6bbca 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -42,6 +42,9 @@ impl Uuid { /// Any of the formats generated by this module (simple, hyphenated, urn, /// Microsoft GUID) are supported by this parsing function. /// + /// Prefer [`try_parse`] unless you need detailed user-facing diagnostics. + /// This method will be eventually deprecated in favor of `try_parse`. + /// /// # Examples /// /// Parse a hyphenated UUID: @@ -56,8 +59,10 @@ impl Uuid { /// # Ok(()) /// # } /// ``` + /// + /// [`try_parse`]: #method.try_parse pub fn parse_str(input: &str) -> Result { - try_parse(input) + try_parse(input.as_bytes()) .map(Uuid::from_bytes) .map_err(InvalidUuid::into_err) } @@ -70,6 +75,9 @@ impl Uuid { /// fails, it won't generate very useful error messages. The `parse_str` /// function will eventually be deprecated in favor or `try_parse`. /// + /// To parse a UUID from a byte stream instead of a UTF8 string, see + /// [`try_parse_ascii`]. + /// /// # Examples /// /// Parse a hyphenated UUID: @@ -86,16 +94,46 @@ impl Uuid { /// ``` /// /// [`parse_str`]: #method.parse_str + /// [`try_parse_ascii`]: #method.try_parse_ascii pub const fn try_parse(input: &str) -> Result { + Self::try_parse_ascii(input.as_bytes()) + } + + /// Parses a `Uuid` from a string of hexadecimal digits with optional + /// hyphens. + /// + /// The input is expected to be a string of ASCII characters. This method + /// can be more convenient than [`try_parse`] if the UUID is being + /// parsed from a byte stream instead of from a UTF8 string. + /// + /// # Examples + /// + /// Parse a hyphenated UUID: + /// + /// ``` + /// # use uuid::{Uuid, Version, Variant}; + /// # fn main() -> Result<(), Box> { + /// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?; + /// + /// assert_eq!(Some(Version::Random), uuid.get_version()); + /// assert_eq!(Variant::RFC4122, uuid.get_variant()); + /// # Ok(()) + /// # } + /// ``` + /// + /// [`try_parse`]: #method.try_parse + pub const fn try_parse_ascii(input: &[u8]) -> Result { match try_parse(input) { Ok(bytes) => Ok(Uuid::from_bytes(bytes)), + // If parsing fails then we don't know exactly what went wrong + // In this case, we just return a generic error Err(_) => Err(Error(ErrorKind::Other)), } } } -const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> { - let result = match (input.len(), input.as_bytes()) { +const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> { + let result = match (input.len(), input) { // Inputs of 32 bytes must be a non-hyphenated UUID (32, s) => parse_simple(s), // Hyphenated UUIDs may be wrapped in various ways: @@ -486,4 +524,12 @@ mod tests { let uuid_out = Uuid::parse_str(&orig_str).unwrap(); assert_eq!(uuid_orig, uuid_out); } + + #[test] + fn test_try_parse_ascii_non_utf8() { + assert!(Uuid::try_parse_ascii( + b"67e55044-10b1-426f-9247-bb680e5\0e0c8" + ) + .is_err()); + } } From 36db5c04d232a4e68c7fc8d916d4133df5c1a781 Mon Sep 17 00:00:00 2001 From: Ashley Mannix Date: Sat, 8 Jan 2022 20:07:52 +1000 Subject: [PATCH 2/2] only pull in wasm deps in wasm builds --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index f36dc3da..f528a616 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,11 +135,11 @@ version = "1.0" [dev-dependencies.serde_test] version = "1.0.56" -[dev-dependencies.wasm-bindgen-lib] +[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen-lib] package = "wasm-bindgen" version = "0.2" -[dev-dependencies.wasm-bindgen-test] +[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen-test] version = "0.3" [dev-dependencies.trybuild]