From 857a805993025734201e228e14537f0397bfb8ea Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Tue, 16 Mar 2021 09:43:52 -0400 Subject: [PATCH 1/4] Faster Ipv4 serialization prototype --- serde/src/ser/impls.rs | 44 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/serde/src/ser/impls.rs b/serde/src/ser/impls.rs index 431a478cc..f4ae80bd1 100644 --- a/serde/src/ser/impls.rs +++ b/serde/src/ser/impls.rs @@ -674,6 +674,40 @@ impl Serialize for net::IpAddr { } } +#[inline] +fn format_u8(mut v: u8, out: &mut [u8]) -> usize { + assert!(out.len() >= 3); + let mut written = 0; + let hundreds = v / 100; + v -= 100 * hundreds; + let tens = v / 10; + v -= 10 * tens; + let ones = v; + if hundreds > 0 { + out[written] = b'0' + hundreds; + written += 1; + } + if hundreds > 0 || tens > 0 { + out[written] = b'0' + tens; + written += 1; + } + out[written] = b'0' + ones; + written += 1; + written +} + +#[cfg(test)] +mod format_u8_tests { + #[test] + fn all() { + for i in 0..(u8::MAX as u16) { + let mut buf = [0u8; 3]; + let written = super::format_u8(i as u8, &mut buf); + assert_eq!(i.to_string().as_bytes(), &buf[..written]); + } + } +} + #[cfg(feature = "std")] impl Serialize for net::Ipv4Addr { fn serialize(&self, serializer: S) -> Result @@ -683,7 +717,15 @@ impl Serialize for net::Ipv4Addr { if serializer.is_human_readable() { const MAX_LEN: usize = 15; debug_assert_eq!(MAX_LEN, "101.102.103.104".len()); - serialize_display_bounded_length!(self, MAX_LEN, serializer) + let mut buf = [0u8; MAX_LEN]; + let mut written = 0; + written += format_u8(self.octets()[0], &mut buf); + for oct in &self.octets()[1..] { + buf[written] = b'.'; + written += 1; + written += format_u8(*oct, &mut buf[written..]); + } + serializer.serialize_str(str::from_utf8(&buf[..written]).unwrap()) } else { self.octets().serialize(serializer) } From ba8c1d63c84822b8ea432a5f3114b6031fb1135d Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Tue, 16 Mar 2021 17:33:42 -0400 Subject: [PATCH 2/4] use the algorithm from itoa --- serde/src/ser/impls.rs | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/serde/src/ser/impls.rs b/serde/src/ser/impls.rs index f4ae80bd1..0cd5f4b59 100644 --- a/serde/src/ser/impls.rs +++ b/serde/src/ser/impls.rs @@ -674,26 +674,32 @@ impl Serialize for net::IpAddr { } } +const DEC_DIGITS_LUT: &'static [u8] = b"\ + 0001020304050607080910111213141516171819\ + 2021222324252627282930313233343536373839\ + 4041424344454647484950515253545556575859\ + 6061626364656667686970717273747576777879\ + 8081828384858687888990919293949596979899"; + #[inline] -fn format_u8(mut v: u8, out: &mut [u8]) -> usize { +fn format_u8(mut n: u8, out: &mut [u8]) -> usize { assert!(out.len() >= 3); - let mut written = 0; - let hundreds = v / 100; - v -= 100 * hundreds; - let tens = v / 10; - v -= 10 * tens; - let ones = v; - if hundreds > 0 { - out[written] = b'0' + hundreds; - written += 1; - } - if hundreds > 0 || tens > 0 { - out[written] = b'0' + tens; - written += 1; - } - out[written] = b'0' + ones; - written += 1; - written + if n >= 100 { + let d1 = ((n % 100) << 1) as usize; + n /= 100; + out[0] = b'0' + n; + out[1] = DEC_DIGITS_LUT[d1]; + out[2] = DEC_DIGITS_LUT[d1 + 1]; + 3 + } else if n >= 10 { + let d1 = (n << 1) as usize; + out[0] = DEC_DIGITS_LUT[d1]; + out[1] = DEC_DIGITS_LUT[d1 + 1]; + 2 + } else { + out[0] = b'0' + n; + 1 + } } #[cfg(test)] From 8bb07b074323fd80cf2c2064eb776ade7a98cf62 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Mon, 22 Mar 2021 18:15:50 -0400 Subject: [PATCH 3/4] skip UTF8 checking and initialize with b'.' --- serde/src/lib.rs | 1 - serde/src/ser/impls.rs | 13 +++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/serde/src/lib.rs b/serde/src/lib.rs index 2f6d0442c..2ddf939c1 100644 --- a/serde/src/lib.rs +++ b/serde/src/lib.rs @@ -139,7 +139,6 @@ ) )] // Rustc lints. -#![forbid(unsafe_code)] #![deny(missing_docs, unused_imports)] //////////////////////////////////////////////////////////////////////////////// diff --git a/serde/src/ser/impls.rs b/serde/src/ser/impls.rs index 0cd5f4b59..1dd073979 100644 --- a/serde/src/ser/impls.rs +++ b/serde/src/ser/impls.rs @@ -683,7 +683,6 @@ const DEC_DIGITS_LUT: &'static [u8] = b"\ #[inline] fn format_u8(mut n: u8, out: &mut [u8]) -> usize { - assert!(out.len() >= 3); if n >= 100 { let d1 = ((n % 100) << 1) as usize; n /= 100; @@ -723,15 +722,13 @@ impl Serialize for net::Ipv4Addr { if serializer.is_human_readable() { const MAX_LEN: usize = 15; debug_assert_eq!(MAX_LEN, "101.102.103.104".len()); - let mut buf = [0u8; MAX_LEN]; - let mut written = 0; - written += format_u8(self.octets()[0], &mut buf); + let mut buf = [b'.'; MAX_LEN]; + let mut written = format_u8(self.octets()[0], &mut buf); for oct in &self.octets()[1..] { - buf[written] = b'.'; - written += 1; - written += format_u8(*oct, &mut buf[written..]); + written += format_u8(*oct, &mut buf[written..]) + 1; } - serializer.serialize_str(str::from_utf8(&buf[..written]).unwrap()) + // We've only written ASCII bytes to the buffer, so it is valid UTF-8 + serializer.serialize_str(unsafe { str::from_utf8_unchecked(&buf[..written]) }) } else { self.octets().serialize(serializer) } From 4114e90bac00726a404353fd507024137a0fce49 Mon Sep 17 00:00:00 2001 From: Ben Kimock Date: Mon, 22 Mar 2021 18:39:24 -0400 Subject: [PATCH 4/4] Fix off-by-one mistake, explain the offset --- serde/src/ser/impls.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/serde/src/ser/impls.rs b/serde/src/ser/impls.rs index 1dd073979..0bbb58e9b 100644 --- a/serde/src/ser/impls.rs +++ b/serde/src/ser/impls.rs @@ -725,7 +725,8 @@ impl Serialize for net::Ipv4Addr { let mut buf = [b'.'; MAX_LEN]; let mut written = format_u8(self.octets()[0], &mut buf); for oct in &self.octets()[1..] { - written += format_u8(*oct, &mut buf[written..]) + 1; + // Skip over delimiters that we initialized buf with + written += format_u8(*oct, &mut buf[written + 1..]) + 1; } // We've only written ASCII bytes to the buffer, so it is valid UTF-8 serializer.serialize_str(unsafe { str::from_utf8_unchecked(&buf[..written]) })