Skip to content

Commit

Permalink
Merge pull request #579 from uuid-rs/feat/try_parse_ascii
Browse files Browse the repository at this point in the history
add try_parse_ascii to avoid string conversions when parsing from bytes
  • Loading branch information
KodrAus committed Jan 8, 2022
2 parents 2d6c147 + 36db5c0 commit 077f263
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 16 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Expand Up @@ -135,11 +135,11 @@ version = "1.0"
[dev-dependencies.serde_test]
version = "1.0.56"

[dev-dependencies.wasm-bindgen-lib]
[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen-lib]
package = "wasm-bindgen"
version = "0.2"

[dev-dependencies.wasm-bindgen-test]
[target.'cfg(target_arch = "wasm32")'.dev-dependencies.wasm-bindgen-test]
version = "0.3"

[dev-dependencies.trybuild]
Expand Down
25 changes: 18 additions & 7 deletions src/error.rs
Expand Up @@ -28,6 +28,8 @@ pub(crate) enum ErrorKind {
len: usize,
index: usize,
},
/// The input was not a valid UTF8 string
InvalidUTF8,
/// Some other error occurred.
Other,
}
Expand All @@ -40,12 +42,18 @@ pub(crate) enum ErrorKind {
///
/// [`Uuid`]: ../struct.Uuid.html
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct InvalidUuid<'a>(pub(crate) &'a str);
pub struct InvalidUuid<'a>(pub(crate) &'a [u8]);

impl<'a> InvalidUuid<'a> {
/// Converts the lightweight error type into detailed diagnostics.
pub fn into_err(self) -> Error {
let (s, offset, simple) = match self.0.as_bytes() {
// Check whether or not the input was ever actually a valid UTF8 string
let input_str = match std::str::from_utf8(self.0) {
Ok(s) => s,
Err(_) => return Error(ErrorKind::InvalidUTF8),
};

let (uuid_str, offset, simple) = match input_str.as_bytes() {
[b'{', s @ .., b'}'] => (s, 1, false),
[b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..] => {
(s, "urn:uuid:".len(), false)
Expand All @@ -57,10 +65,10 @@ impl<'a> InvalidUuid<'a> {
let mut group_bounds = [0; 4];

// SAFETY: the byte array came from a valid utf8 string,
// and is aligned along char boundries.
let string = unsafe { std::str::from_utf8_unchecked(s) };
// and is aligned along char boundaries.
let uuid_str = unsafe { std::str::from_utf8_unchecked(uuid_str) };

for (index, character) in string.char_indices() {
for (index, character) in uuid_str.char_indices() {
let byte = character as u8;
if character as u32 - byte as u32 > 0 {
// Multibyte char
Expand All @@ -87,7 +95,9 @@ impl<'a> InvalidUuid<'a> {
// This means that we tried and failed to parse a simple uuid.
// Since we verified that all the characters are valid, this means
// that it MUST have an invalid length.
Error(ErrorKind::SimpleLength { len: s.len() })
Error(ErrorKind::SimpleLength {
len: input_str.len(),
})
} else if hyphen_count != 4 {
// We tried to parse a hyphenated variant, but there weren't
// 5 groups (4 hyphen splits).
Expand All @@ -110,7 +120,7 @@ impl<'a> InvalidUuid<'a> {
// The last group must be too long
Error(ErrorKind::GroupLength {
group: 4,
len: s.len() - BLOCK_STARTS[4],
len: input_str.len() - BLOCK_STARTS[4],
index: offset + BLOCK_STARTS[4] + 1,
})
}
Expand Down Expand Up @@ -146,6 +156,7 @@ impl fmt::Display for Error {
group, expected, len
)
}
ErrorKind::InvalidUTF8 => write!(f, "non-UTF8 input"),
ErrorKind::Other => write!(f, "failed to parse a UUID"),
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/lib.rs
Expand Up @@ -491,8 +491,8 @@ impl Uuid {
/// If the version field doesn't contain a recognized version then `None`
/// is returned. If you're trying to read the version for a future extension
/// you can also use [`Uuid::get_version_num`] to unconditionally return a
/// number. Future extensions may start to return `Some` once they're standardized
/// and supported.
/// number. Future extensions may start to return `Some` once they're
/// standardized and supported.
///
/// # Examples
///
Expand Down Expand Up @@ -525,8 +525,8 @@ impl Uuid {

/// Returns the four field values of the UUID.
///
/// These values can be passed to the [`Uuid::from_fields`] method to get the
/// original `Uuid` back.
/// These values can be passed to the [`Uuid::from_fields`] method to get
/// the original `Uuid` back.
///
/// * The first field value represents the first group of (eight) hex
/// digits, taken as a big-endian `u32` value. For V1 UUIDs, this field
Expand Down
52 changes: 49 additions & 3 deletions src/parser.rs
Expand Up @@ -42,6 +42,9 @@ impl Uuid {
/// Any of the formats generated by this module (simple, hyphenated, urn,
/// Microsoft GUID) are supported by this parsing function.
///
/// Prefer [`try_parse`] unless you need detailed user-facing diagnostics.
/// This method will be eventually deprecated in favor of `try_parse`.
///
/// # Examples
///
/// Parse a hyphenated UUID:
Expand All @@ -56,8 +59,10 @@ impl Uuid {
/// # Ok(())
/// # }
/// ```
///
/// [`try_parse`]: #method.try_parse
pub fn parse_str(input: &str) -> Result<Uuid, Error> {
try_parse(input)
try_parse(input.as_bytes())
.map(Uuid::from_bytes)
.map_err(InvalidUuid::into_err)
}
Expand All @@ -70,6 +75,9 @@ impl Uuid {
/// fails, it won't generate very useful error messages. The `parse_str`
/// function will eventually be deprecated in favor or `try_parse`.
///
/// To parse a UUID from a byte stream instead of a UTF8 string, see
/// [`try_parse_ascii`].
///
/// # Examples
///
/// Parse a hyphenated UUID:
Expand All @@ -86,16 +94,46 @@ impl Uuid {
/// ```
///
/// [`parse_str`]: #method.parse_str
/// [`try_parse_ascii`]: #method.try_parse_ascii
pub const fn try_parse(input: &str) -> Result<Uuid, Error> {
Self::try_parse_ascii(input.as_bytes())
}

/// Parses a `Uuid` from a string of hexadecimal digits with optional
/// hyphens.
///
/// The input is expected to be a string of ASCII characters. This method
/// can be more convenient than [`try_parse`] if the UUID is being
/// parsed from a byte stream instead of from a UTF8 string.
///
/// # Examples
///
/// Parse a hyphenated UUID:
///
/// ```
/// # use uuid::{Uuid, Version, Variant};
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let uuid = Uuid::try_parse_ascii(b"550e8400-e29b-41d4-a716-446655440000")?;
///
/// assert_eq!(Some(Version::Random), uuid.get_version());
/// assert_eq!(Variant::RFC4122, uuid.get_variant());
/// # Ok(())
/// # }
/// ```
///
/// [`try_parse`]: #method.try_parse
pub const fn try_parse_ascii(input: &[u8]) -> Result<Uuid, Error> {
match try_parse(input) {
Ok(bytes) => Ok(Uuid::from_bytes(bytes)),
// If parsing fails then we don't know exactly what went wrong
// In this case, we just return a generic error
Err(_) => Err(Error(ErrorKind::Other)),
}
}
}

const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
let result = match (input.len(), input.as_bytes()) {
const fn try_parse(input: &[u8]) -> Result<[u8; 16], InvalidUuid> {
let result = match (input.len(), input) {
// Inputs of 32 bytes must be a non-hyphenated UUID
(32, s) => parse_simple(s),
// Hyphenated UUIDs may be wrapped in various ways:
Expand Down Expand Up @@ -486,4 +524,12 @@ mod tests {
let uuid_out = Uuid::parse_str(&orig_str).unwrap();
assert_eq!(uuid_orig, uuid_out);
}

#[test]
fn test_try_parse_ascii_non_utf8() {
assert!(Uuid::try_parse_ascii(
b"67e55044-10b1-426f-9247-bb680e5\0e0c8"
)
.is_err());
}
}

0 comments on commit 077f263

Please sign in to comment.