From beac70a80a1c65f39ebadfea2d17ed280d23e6a1 Mon Sep 17 00:00:00 2001
From: KodrAus <kodraus@hey.com>
Date: Mon, 15 Nov 2021 09:51:33 +1000
Subject: [PATCH 1/2] add some inline comments to the new parser impl

---
 shared/error.rs  |  2 ++
 shared/parser.rs | 39 +++++++++++++++++++++++++++++++++++++
 src/fmt.rs       | 50 +++++++++++++++++++++++++++++-------------------
 3 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/shared/error.rs b/shared/error.rs
index 030a313d..9c0bce74 100644
--- a/shared/error.rs
+++ b/shared/error.rs
@@ -51,9 +51,11 @@ impl<'a> InvalidUuid<'a> {
 
         let mut hyphen_count = 0;
         let mut group_bounds = [0; 4];
+
         // SAFETY: the byte array came from a valid utf8 string,
         // and is aligned along char boundries.
         let string = unsafe { std::str::from_utf8_unchecked(s) };
+
         for (index, character) in string.char_indices() {
             let byte = character as u8;
             if character as u32 - byte as u32 > 0 {
diff --git a/shared/parser.rs b/shared/parser.rs
index 336d9461..e46bd1e4 100644
--- a/shared/parser.rs
+++ b/shared/parser.rs
@@ -14,15 +14,22 @@ use crate::error::InvalidUuid;
 #[inline]
 pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
     let result = match (input.len(), input.as_bytes()) {
+        // Inputs of 32 bytes must be a non-hyphenated UUID
         (32, s) => parse_simple(s),
+        // Hyphenated UUIDs may be wrapped in various ways:
+        // - `{UUID}` for braced UUIDs
+        // - `urn:uuid:UUID` for URNs
+        // - `UUID` for a regular hyphenated UUID
         (36, s)
         | (38, [b'{', s @ .., b'}'])
         | (
             45,
             [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..],
         ) => parse_hyphenated(s),
+        // Any other shaped input is immediately invalid
         _ => Err(()),
     };
+
     match result {
         Ok(b) => Ok(b),
         Err(()) => Err(InvalidUuid(input)),
@@ -31,47 +38,73 @@ pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
 
 #[inline]
 const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
+    // Should be optimized away as redundant
     if s.len() != 32 {
         return Err(());
     }
 
     let mut buf: [u8; 16] = [0; 16];
     let mut i = 0;
+
     while i < 16 {
+        // Convert a two-char hex value (like `A8`)
+        // into a byte (like `10101000`)
         let h1 = HEX_TABLE[s[i * 2] as usize];
         let h2 = HEX_TABLE[s[i * 2 + 1] as usize];
+
+        // We use `0xff` as a sentinel value to indicate
+        // an invalid hex character sequence (like the letter `G`)
         if h1 | h2 == 0xff {
             return Err(());
         }
+
+        // The upper nibble needs to be shifted into position
+        // to produce the final byte value
         buf[i] = SHL4_TABLE[h1 as usize] | h2;
         i += 1;
     }
+
     Ok(buf)
 }
 
 #[inline]
 const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
+    // Should be optimized away as redundant
     if s.len() != 36 {
         return Err(());
     }
 
+    // First, ensure the hyphens appear in the right places
     match [s[8], s[13], s[18], s[23]] {
         [b'-', b'-', b'-', b'-'] => {}
         _ => return Err(()),
     }
 
+    // We look at two hex-encoded values (4 chars) at a time because
+    // that's the size of the smallest group in a hyphenated UUID:
+    // 
+    // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
+    //            |   |   ||   ||   ||   ||   |   |
+    // hyphens  : |   |   8|  13|  18|  23|   |   |
+    // positions: 0   4    9   14   19   24  28  32
     let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
     let mut buf: [u8; 16] = [0; 16];
     let mut j = 0;
+
     while j < 8 {
         let i = positions[j];
+
+        // The decoding here is the same as the simple case
+        // We're just dealing with two values instead of one
         let h1 = HEX_TABLE[s[i as usize] as usize];
         let h2 = HEX_TABLE[s[(i + 1) as usize] as usize];
         let h3 = HEX_TABLE[s[(i + 2) as usize] as usize];
         let h4 = HEX_TABLE[s[(i + 3) as usize] as usize];
+
         if h1 | h2 | h3 | h4 == 0xff {
             return Err(());
         }
+
         buf[j * 2] = SHL4_TABLE[h1 as usize] | h2;
         buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4;
         j += 1;
@@ -83,6 +116,7 @@ const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
 const HEX_TABLE: &[u8; 256] = &{
     let mut buf = [0; 256];
     let mut i: u8 = 0;
+
     loop {
         buf[i as usize] = match i {
             b'0'..=b'9' => i - b'0',
@@ -90,9 +124,11 @@ const HEX_TABLE: &[u8; 256] = &{
             b'A'..=b'F' => i - b'A' + 10,
             _ => 0xff,
         };
+
         if i == 255 {
             break buf;
         }
+
         i += 1
     }
 };
@@ -100,11 +136,14 @@ const HEX_TABLE: &[u8; 256] = &{
 const SHL4_TABLE: &[u8; 256] = &{
     let mut buf = [0; 256];
     let mut i: u8 = 0;
+
     loop {
         buf[i as usize] = i.wrapping_shl(4);
+
         if i == 255 {
             break buf;
         }
+
         i += 1;
     }
 };
diff --git a/src/fmt.rs b/src/fmt.rs
index c4e8589e..5bef01c7 100644
--- a/src/fmt.rs
+++ b/src/fmt.rs
@@ -12,7 +12,7 @@
 //! Adapters for various formats for UUIDs
 
 use crate::{
-    std::{borrow::Borrow, fmt, str},
+    std::{borrow::Borrow, fmt, str, ptr},
     Uuid, Variant,
 };
 
@@ -227,12 +227,14 @@ fn encode_simple<'b>(
     buffer: &'b mut [u8],
     upper: bool,
 ) -> &'b mut str {
-    const LEN: usize = 32;
-    let buf = &mut buffer[..LEN];
+    let buf = &mut buffer[..Simple::LENGTH];
+    let dst = buf.as_mut_ptr();
+
+    // SAFETY: `buf` is guaranteed to be at least `LEN` bytes
+    // SAFETY: The encoded buffer is ASCII encoded
     unsafe {
-        let dst = buf.as_mut_ptr();
-        core::ptr::write(dst.cast(), format_simple(src, upper));
-        core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
+        ptr::write(dst.cast(), format_simple(src, upper));
+        str::from_utf8_unchecked_mut(buf)
     }
 }
 
@@ -242,12 +244,14 @@ fn encode_hyphenated<'b>(
     buffer: &'b mut [u8],
     upper: bool,
 ) -> &'b mut str {
-    const LEN: usize = 36;
-    let buf = &mut buffer[..LEN];
+    let buf = &mut buffer[..Hyphenated::LENGTH];
+    let dst = buf.as_mut_ptr();
+
+    // SAFETY: `buf` is guaranteed to be at least `LEN` bytes
+    // SAFETY: The encoded buffer is ASCII encoded
     unsafe {
-        let dst = buf.as_mut_ptr();
-        core::ptr::write(dst.cast(), format_hyphenated(src, upper));
-        core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
+        ptr::write(dst.cast(), format_hyphenated(src, upper));
+        str::from_utf8_unchecked_mut(buf)
     }
 }
 
@@ -257,14 +261,17 @@ fn encode_braced<'b>(
     buffer: &'b mut [u8],
     upper: bool,
 ) -> &'b mut str {
-    const LEN: usize = 38;
-    let buf = &mut buffer[..LEN];
+    let buf = &mut buffer[..Braced::LENGTH];
     buf[0] = b'{';
-    buf[LEN - 1] = b'}';
+    buf[Braced::LENGTH - 1] = b'}';
+
+    // SAFETY: `buf` is guaranteed to be at least `LEN` bytes
+    // SAFETY: The encoded buffer is ASCII encoded
     unsafe {
         let dst = buf.as_mut_ptr().add(1);
-        core::ptr::write(dst.cast(), format_hyphenated(src, upper));
-        core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
+
+        ptr::write(dst.cast(), format_hyphenated(src, upper));
+        str::from_utf8_unchecked_mut(buf)
     }
 }
 
@@ -274,13 +281,16 @@ fn encode_urn<'b>(
     buffer: &'b mut [u8],
     upper: bool,
 ) -> &'b mut str {
-    const LEN: usize = 45;
-    let buf = &mut buffer[..LEN];
+    let buf = &mut buffer[..Urn::LENGTH];
     buf[..9].copy_from_slice(b"urn:uuid:");
+
+    // SAFETY: `buf` is guaranteed to be at least `LEN` bytes
+    // SAFETY: The encoded buffer is ASCII encoded
     unsafe {
         let dst = buf.as_mut_ptr().add(9);
-        core::ptr::write(dst.cast(), format_hyphenated(src, upper));
-        core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding
+
+        ptr::write(dst.cast(), format_hyphenated(src, upper));
+        str::from_utf8_unchecked_mut(buf)
     }
 }
 

From 708315e597030bf072b3cb053e6ec30a4e5ed25f Mon Sep 17 00:00:00 2001
From: KodrAus <kodraus@hey.com>
Date: Tue, 16 Nov 2021 07:40:59 +1000
Subject: [PATCH 2/2] update comment on bounds checks

---
 shared/parser.rs | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/shared/parser.rs b/shared/parser.rs
index e46bd1e4..0ddf9dab 100644
--- a/shared/parser.rs
+++ b/shared/parser.rs
@@ -38,7 +38,8 @@ pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> {
 
 #[inline]
 const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
-    // Should be optimized away as redundant
+    // This length check here removes all other bounds
+    // checks in this function
     if s.len() != 32 {
         return Err(());
     }
@@ -69,24 +70,27 @@ const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> {
 
 #[inline]
 const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> {
-    // Should be optimized away as redundant
+    // This length check here removes all other bounds
+    // checks in this function
     if s.len() != 36 {
         return Err(());
     }
 
-    // First, ensure the hyphens appear in the right places
-    match [s[8], s[13], s[18], s[23]] {
-        [b'-', b'-', b'-', b'-'] => {}
-        _ => return Err(()),
-    }
-
     // We look at two hex-encoded values (4 chars) at a time because
-    // that's the size of the smallest group in a hyphenated UUID:
+    // that's the size of the smallest group in a hyphenated UUID.
+    // The indexes we're interested in are:
     // 
     // uuid     : 936da01f-9abd-4d9d-80c7-02af85c822a8
     //            |   |   ||   ||   ||   ||   |   |
     // hyphens  : |   |   8|  13|  18|  23|   |   |
     // positions: 0   4    9   14   19   24  28  32
+
+    // First, ensure the hyphens appear in the right places
+    match [s[8], s[13], s[18], s[23]] {
+        [b'-', b'-', b'-', b'-'] => {}
+        _ => return Err(()),
+    }
+
     let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32];
     let mut buf: [u8; 16] = [0; 16];
     let mut j = 0;