Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add some inline comments to the new parser impl #563

Merged
merged 2 commits into from Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions shared/error.rs
Expand Up @@ -51,9 +51,11 @@ impl<'a> InvalidUuid<'a> {

let mut hyphen_count = 0;
let mut group_bounds = [0; 4];

// SAFETY: the byte array came from a valid utf8 string,
// and is aligned along char boundries.
let string = unsafe { std::str::from_utf8_unchecked(s) };

for (index, character) in string.char_indices() {
let byte = character as u8;
if character as u32 - byte as u32 > 0 {
Expand Down
43 changes: 43 additions & 0 deletions shared/parser.rs
Expand Up @@ -14,15 +14,22 @@ use crate::error::InvalidUuid;
#[inline]
pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
let result = match (input.len(), input.as_bytes()) {
// Inputs of 32 bytes must be a non-hyphenated UUID
(32, s) => parse_simple(s),
// Hyphenated UUIDs may be wrapped in various ways:
// - `{UUID}` for braced UUIDs
// - `urn:uuid:UUID` for URNs
// - `UUID` for a regular hyphenated UUID
(36, s)
| (38, [b'{', s @ .., b'}'])
| (
45,
[b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
) => parse_hyphenated(s),
// Any other shaped input is immediately invalid
_ => Err(()),
};

match result {
Ok(b) => Ok(b),
Err(()) => Err(InvalidUuid(input)),
Expand All @@ -31,30 +38,54 @@ pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {

#[inline]
const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
// This length check here removes all other bounds
// checks in this function
if s.len() != 32 {
return Err(());
}
KodrAus marked this conversation as resolved.
Show resolved Hide resolved

let mut buf: [u8; 16] = [0; 16];
let mut i = 0;

while i < 16 {
// Convert a two-char hex value (like `A8`)
// into a byte (like `10101000`)
let h1 = HEX_TABLE[s[i * 2] as usize];
let h2 = HEX_TABLE[s[i * 2 + 1] as usize];

// We use `0xff` as a sentinel value to indicate
// an invalid hex character sequence (like the letter `G`)
if h1 | h2 == 0xff {
return Err(());
}

// The upper nibble needs to be shifted into position
// to produce the final byte value
buf[i] = SHL4_TABLE[h1 as usize] | h2;
i += 1;
}

Ok(buf)
}

#[inline]
const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
// This length check here removes all other bounds
// checks in this function
if s.len() != 36 {
return Err(());
}
KodrAus marked this conversation as resolved.
Show resolved Hide resolved

// We look at two hex-encoded values (4 chars) at a time because
// that's the size of the smallest group in a hyphenated UUID.
// The indexes we're interested in are:
//
// uuid : 936da01f-9abd-4d9d-80c7-02af85c822a8
// | | || || || || | |
// hyphens : | | 8| 13| 18| 23| | |
// positions: 0 4 9 14 19 24 28 32

// First, ensure the hyphens appear in the right places
match [s[8], s[13], s[18], s[23]] {
[b'-', b'-', b'-', b'-'] => {}
_ => return Err(()),
Expand All @@ -63,15 +94,21 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
let mut buf: [u8; 16] = [0; 16];
let mut j = 0;

while j < 8 {
let i = positions[j];

// The decoding here is the same as the simple case
// We're just dealing with two values instead of one
let h1 = HEX_TABLE[s[i as usize] as usize];
let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];

if h1 | h2 | h3 | h4 == 0xff {
return Err(());
}

buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
j += 1;
Expand All @@ -83,28 +120,34 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
const HEX_TABLE: &[u8; 256] = &{
let mut buf = [0; 256];
let mut i: u8 = 0;

loop {
buf[i as usize] = match i {
b'0'..=b'9' => i - b'0',
b'a'..=b'f' => i - b'a' + 10,
b'A'..=b'F' => i - b'A' + 10,
_ => 0xff,
};

if i == 255 {
break buf;
}

i += 1
}
};

const SHL4_TABLE: &[u8; 256] = &{
let mut buf = [0; 256];
let mut i: u8 = 0;

loop {
buf[i as usize] = i.wrapping_shl(4);

if i == 255 {
break buf;
}

i += 1;
}
};
50 changes: 30 additions & 20 deletions src/fmt.rs
Expand Up @@ -12,7 +12,7 @@
//! Adapters for various formats for UUIDs

use crate::{
std::{borrow::Borrow, fmt, str},
std::{borrow::Borrow, fmt, str, ptr},
Uuid, Variant,
};

Expand Down Expand Up @@ -227,12 +227,14 @@ fn encode_simple<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 32;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Simple::LENGTH];
let dst = buf.as_mut_ptr();

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr();
core::ptr::write(dst.cast(), format_simple(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
ptr::write(dst.cast(), format_simple(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand All @@ -242,12 +244,14 @@ fn encode_hyphenated<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 36;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Hyphenated::LENGTH];
let dst = buf.as_mut_ptr();

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr();
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
ptr::write(dst.cast(), format_hyphenated(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand All @@ -257,14 +261,17 @@ fn encode_braced<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 38;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Braced::LENGTH];
buf[0] = b'{';
buf[LEN - 1] = b'}';
buf[Braced::LENGTH - 1] = b'}';

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr().add(1);
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding

ptr::write(dst.cast(), format_hyphenated(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand All @@ -274,13 +281,16 @@ fn encode_urn<'b>(
buffer: &'b mut [u8],
upper: bool,
) -> &'b mut str {
const LEN: usize = 45;
let buf = &mut buffer[..LEN];
let buf = &mut buffer[..Urn::LENGTH];
buf[..9].copy_from_slice(b"urn:uuid:");

// SAFETY: `buf` is guaranteed to be at least `LEN` bytes
// SAFETY: The encoded buffer is ASCII encoded
unsafe {
let dst = buf.as_mut_ptr().add(9);
core::ptr::write(dst.cast(), format_hyphenated(src, upper));
core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding

ptr::write(dst.cast(), format_hyphenated(src, upper));
str::from_utf8_unchecked_mut(buf)
}
}

Expand Down