From c98c7ff5dd566f6a9da55012115847ef2e5deb3d Mon Sep 17 00:00:00 2001 From: Nugine Date: Sun, 14 Nov 2021 10:45:30 +0800 Subject: [PATCH 1/2] Improve try_parse --- shared/parser.rs | 125 ++++++++++++++++++++++++++--------------------- src/parser.rs | 1 + 2 files changed, 69 insertions(+), 57 deletions(-) diff --git a/shared/parser.rs b/shared/parser.rs index c47c93e9..336d9461 100644 --- a/shared/parser.rs +++ b/shared/parser.rs @@ -11,63 +11,78 @@ use crate::error::InvalidUuid; +#[inline] pub const fn try_parse(input: &str) -> Result<[u8; 16], InvalidUuid> { - const fn parse_blocks<'a>( - s: &'a [u8], - hyphenated: bool, - ) -> Option<[u8; 16]> { - let block_table = if hyphenated { - match (s[8], s[13], s[18], s[23]) { - (b'-', b'-', b'-', b'-') => [0, 4, 9, 14, 19, 24, 28, 32], - _ => return None, - } - } else { - [0, 4, 8, 12, 16, 20, 24, 28] - }; - - let mut buf = [0; 16]; - let mut j = 0; - while j < 8 { - let i = block_table[j]; - // Check 4 bytes at a time - let h1 = HEX_TABLE[s[i] as usize]; - let h2 = HEX_TABLE[s[i + 1] as usize]; - let h3 = HEX_TABLE[s[i + 2] as usize]; - let h4 = HEX_TABLE[s[i + 3] as usize]; - // If any of the bytes aren't valid, they will be 0xff, making this - // fail - if h1 | h2 | h3 | h4 == 0xff { - return None; - } - buf[j * 2] = SHL4_TABLE[h1 as usize] | h2; - buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4; - j += 1; - } - Some(buf) - } - - let b = input.as_bytes(); - let maybe_parsed = match (b.len(), b) { - (32, s) => parse_blocks(s, false), + let result = match (input.len(), input.as_bytes()) { + (32, s) => parse_simple(s), (36, s) | (38, [b'{', s @ .., b'}']) | ( 45, [b'u', b'r', b'n', b':', b'u', b'u', b'i', b'd', b':', s @ ..], - ) => parse_blocks(s, true), - _ => None, + ) => parse_hyphenated(s), + _ => Err(()), }; - match maybe_parsed { - Some(b) => Ok(b), - None => Err(InvalidUuid(input)), + match result { + Ok(b) => Ok(b), + Err(()) => Err(InvalidUuid(input)), + } +} + +#[inline] +const fn parse_simple(s: &[u8]) -> Result<[u8; 16], ()> { + if s.len() != 32 { + return Err(()); + } + + let mut buf: [u8; 16] = [0; 16]; + let mut i = 0; + while i < 16 { + let h1 = HEX_TABLE[s[i * 2] as usize]; + let h2 = HEX_TABLE[s[i * 2 + 1] as usize]; + if h1 | h2 == 0xff { + return Err(()); + } + buf[i] = SHL4_TABLE[h1 as usize] | h2; + i += 1; } + Ok(buf) } -type Table = [u8; 256]; +#[inline] +const fn parse_hyphenated(s: &[u8]) -> Result<[u8; 16], ()> { + if s.len() != 36 { + return Err(()); + } + + match [s[8], s[13], s[18], s[23]] { + [b'-', b'-', b'-', b'-'] => {} + _ => return Err(()), + } + + let positions: [u8; 8] = [0, 4, 9, 14, 19, 24, 28, 32]; + let mut buf: [u8; 16] = [0; 16]; + let mut j = 0; + while j < 8 { + let i = positions[j]; + let h1 = HEX_TABLE[s[i as usize] as usize]; + let h2 = HEX_TABLE[s[(i + 1) as usize] as usize]; + let h3 = HEX_TABLE[s[(i + 2) as usize] as usize]; + let h4 = HEX_TABLE[s[(i + 3) as usize] as usize]; + if h1 | h2 | h3 | h4 == 0xff { + return Err(()); + } + buf[j * 2] = SHL4_TABLE[h1 as usize] | h2; + buf[j * 2 + 1] = SHL4_TABLE[h3 as usize] | h4; + j += 1; + } -const fn generate_lookup_table() -> Table { - let mut buf = [0u8; 256]; - let mut i = 0u8; + Ok(buf) +} + +const HEX_TABLE: &[u8; 256] = &{ + let mut buf = [0; 256]; + let mut i: u8 = 0; loop { buf[i as usize] = match i { b'0'..=b'9' => i - b'0', @@ -76,24 +91,20 @@ const fn generate_lookup_table() -> Table { _ => 0xff, }; if i == 255 { - return buf; + break buf; } i += 1 } -} +}; -const HEX_TABLE: Table = generate_lookup_table(); - -const fn generate_shl4_table() -> Table { - let mut buf = [0u8; 256]; - let mut i = 0u8; +const SHL4_TABLE: &[u8; 256] = &{ + let mut buf = [0; 256]; + let mut i: u8 = 0; loop { buf[i as usize] = i.wrapping_shl(4); if i == 255 { - return buf; + break buf; } i += 1; } -} - -const SHL4_TABLE: Table = generate_shl4_table(); +}; diff --git a/src/parser.rs b/src/parser.rs index 2f23aacd..641b979a 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -67,6 +67,7 @@ impl Uuid { } /// Intended to replace `Uuid::parse_str` + #[inline] pub const fn try_parse(input: &str) -> Result { match imp::try_parse(input) { Ok(bytes) => Ok(Uuid::from_bytes(bytes)), From 19b838619ca14f211448f2f95f997cf0c73b43fb Mon Sep 17 00:00:00 2001 From: Nugine Date: Sun, 14 Nov 2021 11:37:40 +0800 Subject: [PATCH 2/2] Improve encode --- src/fmt.rs | 193 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 119 insertions(+), 74 deletions(-) diff --git a/src/fmt.rs b/src/fmt.rs index be90bafc..c4e8589e 100644 --- a/src/fmt.rs +++ b/src/fmt.rs @@ -112,9 +112,7 @@ impl Uuid { #[inline] pub fn as_hyphenated(&self) -> &Hyphenated { // SAFETY: `Uuid` and `Hyphenated` have the same ABI - unsafe { - &*(self as *const Uuid as *const Hyphenated) - } + unsafe { &*(self as *const Uuid as *const Hyphenated) } } /// Get a [`Simple`] formatter. @@ -131,9 +129,7 @@ impl Uuid { #[inline] pub fn as_simple(&self) -> &Simple { // SAFETY: `Uuid` and `Simple` have the same ABI - unsafe { - &*(self as *const Uuid as *const Simple) - } + unsafe { &*(self as *const Uuid as *const Simple) } } /// Get a [`Urn`] formatter. @@ -152,9 +148,7 @@ impl Uuid { #[inline] pub fn as_urn(&self) -> &Urn { // SAFETY: `Uuid` and `Urn` have the same ABI - unsafe { - &*(self as *const Uuid as *const Urn) - } + unsafe { &*(self as *const Uuid as *const Urn) } } /// Get a [`Braced`] formatter. @@ -173,9 +167,7 @@ impl Uuid { #[inline] pub fn as_braced(&self) -> &Braced { // SAFETY: `Uuid` and `Braced` have the same ABI - unsafe { - &*(self as *const Uuid as *const Braced) - } + unsafe { &*(self as *const Uuid as *const Braced) } } } @@ -187,58 +179,109 @@ const LOWER: [u8; 16] = [ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd', b'e', b'f', ]; -/// The segments of a UUID's [u8; 16] corresponding to each group. -const BYTE_POSITIONS: [usize; 6] = [0, 4, 6, 8, 10, 16]; -/// The locations that hyphens are written into the buffer, after each -/// group. -const HYPHEN_POSITIONS: [usize; 4] = [8, 13, 18, 23]; - -/// Encodes the `uuid` possibly with hyphens, and possibly in upper -/// case, to full_buffer[start..] and returns the str sliced from -/// full_buffer[..start + encoded_length]. -/// -/// The `start` parameter allows writing a prefix (such as -/// "urn:uuid:") to the buffer that's included in the final encoded -/// UUID. -#[allow(clippy::needless_range_loop)] -fn encode<'a>( - full_buffer: &'a mut [u8], - start: usize, - trailing: usize, - uuid: &Uuid, - hyphens: bool, - upper: bool, -) -> &'a mut str { - let len = if hyphens { 36 } else { 32 }; - - { - let buffer = &mut full_buffer[start..start + len]; - let bytes = uuid.as_bytes(); - - let hex = if upper { &UPPER } else { &LOWER }; - - for group in 0..5 { - // If we're writing hyphens, we need to shift the output - // location along by how many of them have been written - // before this point. That's exactly the (0-indexed) group - // number. - let hyphens_before = if hyphens { group } else { 0 }; - for idx in BYTE_POSITIONS[group]..BYTE_POSITIONS[group + 1] { - let b = bytes[idx]; - let out_idx = hyphens_before + 2 * idx; - - buffer[out_idx] = hex[(b >> 4) as usize]; - buffer[out_idx + 1] = hex[(b & 0b1111) as usize]; - } - if group != 4 && hyphens { - buffer[HYPHEN_POSITIONS[group]] = b'-'; - } +#[inline] +const fn format_simple(src: &[u8; 16], upper: bool) -> [u8; 32] { + let lut = if upper { &UPPER } else { &LOWER }; + let mut dst = [0; 32]; + let mut i = 0; + while i < 16 { + let x = src[i]; + dst[i * 2] = lut[(x >> 4) as usize]; + dst[i * 2 + 1] = lut[(x & 0x0f) as usize]; + i += 1; + } + dst +} + +#[inline] +const fn format_hyphenated(src: &[u8; 16], upper: bool) -> [u8; 36] { + let lut = if upper { &UPPER } else { &LOWER }; + let groups = [(0, 8), (9, 13), (14, 18), (19, 23), (24, 36)]; + let mut dst = [0; 36]; + + let mut group_idx = 0; + let mut i = 0; + while group_idx < 5 { + let (start, end) = groups[group_idx]; + let mut j = start; + while j < end { + let x = src[i]; + i += 1; + + dst[j] = lut[(x >> 4) as usize]; + dst[j + 1] = lut[(x & 0x0f) as usize]; + j += 2; + } + if group_idx < 4 { + dst[end] = b'-'; } + group_idx += 1; } + dst +} - str::from_utf8_mut(&mut full_buffer[..start + len + trailing]) - .expect("found non-ASCII output characters while encoding a UUID") +#[inline] +fn encode_simple<'b>( + src: &[u8; 16], + buffer: &'b mut [u8], + upper: bool, +) -> &'b mut str { + const LEN: usize = 32; + let buf = &mut buffer[..LEN]; + unsafe { + let dst = buf.as_mut_ptr(); + core::ptr::write(dst.cast(), format_simple(src, upper)); + core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding + } +} + +#[inline] +fn encode_hyphenated<'b>( + src: &[u8; 16], + buffer: &'b mut [u8], + upper: bool, +) -> &'b mut str { + const LEN: usize = 36; + let buf = &mut buffer[..LEN]; + unsafe { + let dst = buf.as_mut_ptr(); + core::ptr::write(dst.cast(), format_hyphenated(src, upper)); + core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding + } +} + +#[inline] +fn encode_braced<'b>( + src: &[u8; 16], + buffer: &'b mut [u8], + upper: bool, +) -> &'b mut str { + const LEN: usize = 38; + let buf = &mut buffer[..LEN]; + buf[0] = b'{'; + buf[LEN - 1] = b'}'; + unsafe { + let dst = buf.as_mut_ptr().add(1); + core::ptr::write(dst.cast(), format_hyphenated(src, upper)); + core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding + } +} + +#[inline] +fn encode_urn<'b>( + src: &[u8; 16], + buffer: &'b mut [u8], + upper: bool, +) -> &'b mut str { + const LEN: usize = 45; + let buf = &mut buffer[..LEN]; + buf[..9].copy_from_slice(b"urn:uuid:"); + unsafe { + let dst = buf.as_mut_ptr().add(9); + core::ptr::write(dst.cast(), format_hyphenated(src, upper)); + core::str::from_utf8_unchecked_mut(buf) // SAFETY: ascii encoding + } } impl Hyphenated { @@ -301,8 +344,9 @@ impl Hyphenated { /// } /// ``` /// */ + #[inline] pub fn encode_lower<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - encode(buffer, 0, 0, &self.0, true, false) + encode_hyphenated(self.0.as_bytes(), buffer, false) } /// Writes the [`Uuid`] as an upper-case hyphenated string to @@ -351,8 +395,9 @@ impl Hyphenated { /// } /// ``` /// */ + #[inline] pub fn encode_upper<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - encode(buffer, 0, 0, &self.0, true, true) + encode_hyphenated(self.0.as_bytes(), buffer, true) } /// Get a reference to the underlying [`Uuid`]. @@ -444,10 +489,9 @@ impl Braced { /// } /// ``` /// */ + #[inline] pub fn encode_lower<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - buffer[0] = b'{'; - buffer[37] = b'}'; - encode(buffer, 1, 1, &self.0, true, false) + encode_braced(self.0.as_bytes(), buffer, false) } /// Writes the [`Uuid`] as an upper-case hyphenated string surrounded by @@ -496,10 +540,9 @@ impl Braced { /// } /// ``` /// */ + #[inline] pub fn encode_upper<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - buffer[0] = b'{'; - buffer[37] = b'}'; - encode(buffer, 1, 1, &self.0, true, true) + encode_braced(self.0.as_bytes(), buffer, true) } /// Get a reference to the underlying [`Uuid`]. @@ -592,8 +635,9 @@ impl Simple { /// } /// ``` /// */ + #[inline] pub fn encode_lower<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - encode(buffer, 0, 0, &self.0, false, false) + encode_simple(self.0.as_bytes(), buffer, false) } /// Writes the [`Uuid`] as an upper-case simple string to `buffer`, @@ -639,8 +683,9 @@ impl Simple { /// } /// ``` /// */ + #[inline] pub fn encode_upper<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - encode(buffer, 0, 0, &self.0, false, true) + encode_simple(self.0.as_bytes(), buffer, true) } /// Get a reference to the underlying [`Uuid`]. @@ -735,9 +780,9 @@ impl Urn { /// } /// ``` /// */ + #[inline] pub fn encode_lower<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - buffer[..9].copy_from_slice(b"urn:uuid:"); - encode(buffer, 9, 0, &self.0, true, false) + encode_urn(self.0.as_bytes(), buffer, false) } /// Writes the [`Uuid`] as an upper-case URN string to @@ -788,9 +833,9 @@ impl Urn { /// } /// ``` /// */ + #[inline] pub fn encode_upper<'buf>(&self, buffer: &'buf mut [u8]) -> &'buf mut str { - buffer[..9].copy_from_slice(b"urn:uuid:"); - encode(buffer, 9, 0, &self.0, true, true) + encode_urn(self.0.as_bytes(), buffer, true) } /// Get a reference to the underlying [`Uuid`].