Skip to content

Commit

Permalink
Merge pull request #563 from KodrAus/chore/parser-docs
Browse files Browse the repository at this point in the history
Add some inline comments to the new parser impl
  • Loading branch information
KodrAus committed Nov 15, 2021
2 parents 3a6aeb1 + 708315e commit f362d7d
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 20 deletions.
2 changes: 2 additions & 0 deletions shared/error.rs
Expand Up @@ -51,9 +51,11 @@ impl<'a> InvalidUuid<'a> {

let mut hyphen_count = 0;
let mut group_bounds = [0; 4];

// SAFETY: the byte array came from a valid utf8 string,
// and is aligned along char boundries.
let string = unsafe { std::str::from_utf8_unchecked(s) };

for (index, character) in string.char_indices() {
let byte = character as u8;
if character as u32 - byte as u32 > 0 {
Expand Down
43 changes: 43 additions & 0 deletions shared/parser.rs
Expand Up @@ -14,15 +14,22 @@ use crate::error::InvalidUuid;
#[inline]
pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
let result = match (input.len(), input.as_bytes()) {
// Inputs of 32 bytes must be a non-hyphenated UUID
(32, s) => parse_simple(s),
// Hyphenated UUIDs may be wrapped in various ways:
// - `{UUID}` for braced UUIDs
// - `urn:uuid:UUID` for URNs
// - `UUID` for a regular hyphenated UUID
(36, s)
| (38, [b'{', s @ .., b'}'])
| (
45,
[b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
) => parse_hyphenated(s),
// Any other shaped input is immediately invalid
_ => Err(()),
};

match result {
Ok(b) => Ok(b),
Err(()) => Err(InvalidUuid(input)),
Expand All @@ -31,30 +38,54 @@ pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {

#[inline]
const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
// This length check here removes all other bounds
// checks in this function
if s.len() != 32 {
return Err(());
}

let mut buf: [u8; 16] = [0; 16];
let mut i = 0;

while i < 16 {
// Convert a two-char hex value (like `A8`)
// into a byte (like `10101000`)
let h1 = HEX_TABLE[s[i * 2] as usize];
let h2 = HEX_TABLE[s[i * 2 + 1] as usize];

// We use `0xff` as a sentinel value to indicate
// an invalid hex character sequence (like the letter `G`)
if h1 | h2 == 0xff {
return Err(());
}

// The upper nibble needs to be shifted into position
// to produce the final byte value
buf[i] = SHL4_TABLE[h1 as usize] | h2;
i += 1;
}

Ok(buf)
}

#[inline]
const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
// This length check here removes all other bounds
// checks in this function
if s.len() != 36 {
return Err(());
}

// We look at two hex-encoded values (4 chars) at a time because
// that's the size of the smallest group in a hyphenated UUID.
// The indexes we're interested in are:
//
// uuid : 936da01f-9abd-4d9d-80c7-02af85c822a8
// | | || || || || | |
// hyphens : | | 8| 13| 18| 23| | |
// positions: 0 4 9 14 19 24 28 32

// First, ensure the hyphens appear in the right places
match [s[8], s[13], s[18], s[23]] {
[b'-', b'-', b'-', b'-'] => {}
_ => return Err(()),
Expand All @@ -63,15 +94,21 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
let mut buf: [u8; 16] = [0; 16];
let mut j = 0;

while j < 8 {
let i = positions[j];

// The decoding here is the same as the simple case
// We're just dealing with two values instead of one
let h1 = HEX_TABLE[s[i as usize] as usize];
let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];

if h1 | h2 | h3 | h4 == 0xff {
return Err(());
}

buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
j += 1;
Expand All @@ -83,28 +120,34 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
const HEX_TABLE: &[u8; 256] = &{
let mut buf = [0; 256];
let mut i: u8 = 0;

loop {
buf[i as usize] = match i {
b'0'..=b'9' => i - b'0',
b'a'..=b'f' => i - b'a' + 10,
b'A'..=b'F' => i - b'A' + 10,
_ => 0xff,
};

if i == 255 {
break buf;
}

i += 1
}
};

const SHL4_TABLE: &[u8; 256] = &{
let mut buf = [0; 256];
let mut i: u8 = 0;

loop {
buf[i as usize] = i.wrapping_shl(4);

if i == 255 {
break buf;
}

i += 1;
}
};
50 changes: 30 additions & 20 deletions src/fmt.rs
Expand Up @@ -12,7 +12,7 @@
//! Adapters for various formats for UUIDs

use crate::{
std::{borrow::Borrow, fmt, str},
std::{borrow::Borrow, fmt, str, ptr},
Uuid, Variant,
};

Expand Down Expand Up @@ -227,12 +227,14 @@ fn encode_simple<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 32;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Simple::LENGTH];
let dst = buf.as_mut_ptr();

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr();
core::ptr::write(dst.cast(), format_simple(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
ptr::write(dst.cast(), format_simple(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand All @@ -242,12 +244,14 @@ fn encode_hyphenated<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 36;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Hyphenated::LENGTH];
let dst = buf.as_mut_ptr();

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr();
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
ptr::write(dst.cast(), format_hyphenated(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand All @@ -257,14 +261,17 @@ fn encode_braced<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 38;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Braced::LENGTH];
buf[0] = b'{';
buf[LEN - 1] = b'}';
buf[Braced::LENGTH - 1] = b'}';

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr().add(1);
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding

ptr::write(dst.cast(), format_hyphenated(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand All @@ -274,13 +281,16 @@ fn encode_urn<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 45;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Urn::LENGTH];
buf[..9].copy_from_slice(b"urn:uuid:");

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr().add(9);
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding

ptr::write(dst.cast(), format_hyphenated(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand Down

0 comments on commit f362d7d

Please sign in to comment.