diff --git a/CHANGELOG.md b/CHANGELOG.md index 0510c0c3..9f8e8e83 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,21 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. }, ``` +* The `Bytes` type is heavily inspired by `serde_bytes` and ports it to the `serde_as` system. + + ```rust + #[serde_as(as = "Bytes")] + value: Vec, + ``` + + Compared to `serde_bytes` these improvements are available + + 1. Integration with the `serde_as` annotation (see [serde-bytes#14][serde-bytes-complex]). + 2. Implementation for arrays of arbitrary size (Rust 1.51+) (see [serde-bytes#26][serde-bytes-arrays]). + +[serde-bytes-complex]: https://github.com/serde-rs/bytes/issues/14 +[serde-bytes-arrays]: https://github.com/serde-rs/bytes/issues/26 + ## [1.6.4] - 2021-02-16 ### Fixed diff --git a/Cargo.toml b/Cargo.toml index 4b5d875d..405c9517 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ ron = "0.6" serde-xml-rs = "0.4.1" serde_derive = "1.0.75" serde_json = {version = "1.0.25", features = ["preserve_order"]} +serde_test = "1.0.124" version-sync = "0.9.1" [[test]] diff --git a/src/de/const_arrays.rs b/src/de/const_arrays.rs index 6ff4e143..3bd5eb65 100644 --- a/src/de/const_arrays.rs +++ b/src/de/const_arrays.rs @@ -2,6 +2,7 @@ use super::*; use crate::utils::{MapIter, SeqIter}; use serde::de::*; use std::collections::{BTreeMap, HashMap}; +use std::convert::TryInto; use std::fmt; use std::mem::MaybeUninit; @@ -146,3 +147,55 @@ macro_rules! tuple_seq_as_map_impl_intern { } tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap); tuple_seq_as_map_impl_intern!([(K, V); N], HashMap); + +impl<'de, const N: usize> DeserializeAs<'de, [u8; N]> for Bytes { + fn deserialize_as(deserializer: D) -> Result<[u8; N], D::Error> + where + D: Deserializer<'de>, + { + struct ArrayVisitor; + + impl<'de, const M: usize> Visitor<'de> for ArrayVisitor { + type Value = [u8; M]; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_fmt(format_args!("an byte array of size {}", M)) + } + + fn visit_seq(self, seq: A) -> Result + where + A: SeqAccess<'de>, + { + array_from_iterator(SeqIter::new(seq), &self) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + v.try_into() + .map_err(|_| Error::invalid_length(v.len(), &self)) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + v.as_bytes() + .try_into() + .map_err(|_| Error::invalid_length(v.len(), &self)) + } + } + + deserializer.deserialize_bytes(ArrayVisitor::) + } +} + +impl<'de, const N: usize> DeserializeAs<'de, Box<[u8; N]>> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + >::deserialize_as(deserializer).map(Box::new) + } +} diff --git a/src/de/impls.rs b/src/de/impls.rs index 85811afa..e1a3c80b 100644 --- a/src/de/impls.rs +++ b/src/de/impls.rs @@ -4,6 +4,7 @@ use crate::rust::StringWithSeparator; use crate::utils; use crate::utils::duration::DurationSigned; use serde::de::*; +use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; use std::convert::From; use std::fmt::{self, Display}; @@ -702,3 +703,166 @@ where Ok(Option::::deserialize_as(deserializer)?.unwrap_or_default()) } } + +impl<'de> DeserializeAs<'de, &'de [u8]> for Bytes { + fn deserialize_as(deserializer: D) -> Result<&'de [u8], D::Error> + where + D: Deserializer<'de>, + { + <&'de [u8]>::deserialize(deserializer) + } +} + +// serde_bytes implementation for ByteBuf +// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/bytebuf.rs#L196 +// +// Implements: +// * visit_seq +// * visit_bytes +// * visit_byte_buf +// * visit_str +// * visit_string +impl<'de> DeserializeAs<'de, Vec> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + struct VecVisitor; + + impl<'de> Visitor<'de> for VecVisitor { + type Value = Vec; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("a byte array") + } + + fn visit_seq(self, seq: A) -> Result + where + A: SeqAccess<'de>, + { + utils::SeqIter::new(seq).collect::>() + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + Ok(v.to_vec()) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + Ok(v) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(v.as_bytes().to_vec()) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(v.into_bytes()) + } + } + + deserializer.deserialize_byte_buf(VecVisitor) + } +} + +impl<'de> DeserializeAs<'de, Box<[u8]>> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + >>::deserialize_as(deserializer) + .map(|vec| vec.into_boxed_slice()) + } +} + +// serde_bytes implementation for Cow<'a, [u8]> +// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/de.rs#L77 +// +// Implements: +// * visit_borrowed_bytes +// * visit_borrowed_str +// * visit_bytes +// * visit_str +// * visit_byte_buf +// * visit_string +// * visit_seq +impl<'de> DeserializeAs<'de, Cow<'de, [u8]>> for Bytes { + fn deserialize_as(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + struct CowVisitor; + + impl<'de> Visitor<'de> for CowVisitor { + type Value = Cow<'de, [u8]>; + + fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str("a byte array") + } + + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result + where + E: Error, + { + Ok(Cow::Borrowed(v)) + } + + fn visit_borrowed_str(self, v: &'de str) -> Result + where + E: Error, + { + Ok(Cow::Borrowed(v.as_bytes())) + } + + fn visit_bytes(self, v: &[u8]) -> Result + where + E: Error, + { + Ok(Cow::Owned(v.to_vec())) + } + + fn visit_str(self, v: &str) -> Result + where + E: Error, + { + Ok(Cow::Owned(v.as_bytes().to_vec())) + } + + fn visit_byte_buf(self, v: Vec) -> Result + where + E: Error, + { + Ok(Cow::Owned(v)) + } + + fn visit_string(self, v: String) -> Result + where + E: Error, + { + Ok(Cow::Owned(v.into_bytes())) + } + + fn visit_seq(self, seq: V) -> Result + where + V: SeqAccess<'de>, + { + Ok(Cow::Owned( + utils::SeqIter::new(seq).collect::>()?, + )) + } + } + + deserializer.deserialize_bytes(CowVisitor) + } +} diff --git a/src/guide/serde_as.md b/src/guide/serde_as.md index 13916b19..397abe72 100644 --- a/src/guide/serde_as.md +++ b/src/guide/serde_as.md @@ -17,17 +17,18 @@ The basic design of the system was done by [@markazmierczak](https://github.com/ 5. [Re-exporting `serde_as`](#re-exporting-serde_as) 2. [De/Serialize Implementations Available](#deserialize-implementations-available) 1. [Big Array support (Rust 1.51+)](#big-array-support-rust-151) - 2. [Bytes / `Vec` to hex string](#bytes--vecu8-to-hex-string) - 3. [`Default` from `null`](#default-from-null) - 4. [De/Serialize with `FromStr` and `Display`](#deserialize-with-fromstr-and-display) - 5. [`Duration` as seconds](#duration-as-seconds) - 6. [Ignore deserialization errors](#ignore-deserialization-errors) - 7. [`Maps` to `Vec` of tuples](#maps-to-vec-of-tuples) - 8. [`NaiveDateTime` like UTC timestamp](#naivedatetime-like-utc-timestamp) - 9. [`None` as empty `String`](#none-as-empty-string) - 10. [Timestamps as seconds since UNIX epoch](#timestamps-as-seconds-since-unix-epoch) - 11. [Value into JSON String](#value-into-json-string) - 12. [`Vec` of tuples to `Maps`](#vec-of-tuples-to-maps) + 2. [`Bytes` with more efficiency](#bytes-with-more-efficiency) + 3. [Bytes / `Vec` to hex string](#bytes--vecu8-to-hex-string) + 4. [`Default` from `null`](#default-from-null) + 5. [De/Serialize with `FromStr` and `Display`](#deserialize-with-fromstr-and-display) + 6. [`Duration` as seconds](#duration-as-seconds) + 7. [Ignore deserialization errors](#ignore-deserialization-errors) + 8. [`Maps` to `Vec` of tuples](#maps-to-vec-of-tuples) + 9. [`NaiveDateTime` like UTC timestamp](#naivedatetime-like-utc-timestamp) + 10. [`None` as empty `String`](#none-as-empty-string) + 11. [Timestamps as seconds since UNIX epoch](#timestamps-as-seconds-since-unix-epoch) + 12. [Value into JSON String](#value-into-json-string) + 13. [`Vec` of tuples to `Maps`](#vec-of-tuples-to-maps) ## Switching from serde's with to `serde_as` @@ -300,6 +301,21 @@ value: [[u8; 64]; 33], "value": [[0,0,0,0,0,...], [0,0,0,...], ...], ``` +### `Bytes` with more efficiency + +[`Bytes`] + +More efficient serialization for byte slices and similar. + +```ignore +// Rust +#[serde_as(as = "Bytes")] +value: Vec, + +// JSON +"value": [0, 1, 2, 3, ...], +``` + ### Bytes / `Vec` to hex string [`Hex`] @@ -516,6 +532,7 @@ This includes `BinaryHeap<(K, V)>`, `BTreeSet<(K, V)>`, `HashSet<(K, V)>`, `Link The [inverse operation](#maps-to-vec-of-tuples) is also available. +[`Bytes`]: crate::Bytes [`chrono::DateTime`]: chrono_crate::DateTime [`chrono::DateTime`]: chrono_crate::DateTime [`chrono::Duration`]: https://docs.rs/chrono/latest/chrono/struct.Duration.html diff --git a/src/lib.rs b/src/lib.rs index 6b15dd51..6ddea3e8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1320,3 +1320,118 @@ pub struct TimestampNanoSecondsWithFrac< FORMAT: formats::Format = f64, STRICTNESS: formats::Strictness = formats::Strict, >(PhantomData<(FORMAT, STRICTNESS)>); + +/// Optimized handling of owned and borrowed byte representations. +/// +/// Serialization of byte sequences like `&[u8]` or `Vec` is quite inefficient since each value will be serialized individually. +/// This converter type optimizes the serialization and deserialization. +/// +/// This is a port of the `serde_bytes` crate making it compatible with the `serde_as`-annotation, which allows it to be used in more cases than provided by `serde_bytes`. +/// +/// The type provides de-/serialization for these types: +/// +/// * `[u8; N]`, Rust 1.51+, not possible using `serde_bytes` +/// * `&[u8]` +/// * `Box<[u8; N]>`, Rust 1.51+, not possible using `serde_bytes` +/// * `Box<[u8]>` +/// * `Vec` +/// * `Cow<'_, [u8]>` +/// +/// # Examples +/// +/// ``` +/// # #[cfg(feature = "macros")] { +/// # use serde::{Deserialize, Serialize}; +/// # use serde_with::{serde_as, Bytes}; +/// # use std::borrow::Cow; +/// # +/// #[serde_as] +/// # #[derive(Debug, PartialEq)] +/// #[derive(Deserialize, Serialize)] +/// struct Test<'a> { +/// # #[cfg(FALSE)] +/// #[serde_as(as = "Bytes")] +/// array: [u8; 15], +/// #[serde_as(as = "Bytes")] +/// boxed: Box<[u8]>, +/// #[serde_as(as = "Bytes")] +/// #[serde(borrow)] +/// cow: Cow<'a, [u8]>, +/// #[serde_as(as = "Bytes")] +/// vec: Vec, +/// } +/// +/// let value = Test { +/// # #[cfg(FALSE)] +/// array: b"0123456789ABCDE".clone(), +/// boxed: b"...".to_vec().into_boxed_slice(), +/// cow: Cow::Borrowed(b"FooBar"), +/// vec: vec![0x41, 0x61, 0x21], +/// }; +/// let expected = r#"( +/// array: "MDEyMzQ1Njc4OUFCQ0RF", +/// boxed: "Li4u", +/// cow: "Rm9vQmFy", +/// vec: "QWEh", +/// )"#; +/// # drop(expected); +/// # // Create a fake expected value without the array to make the test compile without const generics +/// # let expected = r#"( +/// # boxed: "Li4u", +/// # cow: "Rm9vQmFy", +/// # vec: "QWEh", +/// # )"#; +/// +/// # let pretty_config = ron::ser::PrettyConfig::new() +/// # .with_new_line("\n".into()); +/// assert_eq!(expected, ron::ser::to_string_pretty(&value, pretty_config).unwrap()); +/// assert_eq!(value, ron::from_str(&expected).unwrap()); +/// # } +/// ``` +/// +/// ## Alternative to [`BytesOrString`] +/// +/// The [`Bytes`] can replace [`BytesOrString`]. +/// [`Bytes`] is implemented for more types, which makes it better. +/// The serialization behavior of [`Bytes`] differes from [`BytesOrString`], therefore only `deserialize_as` should be used. +/// +/// ```rust +/// # #[cfg(feature = "macros")] { +/// # use serde::Deserialize; +/// # use serde_json::json; +/// # use serde_with::{serde_as, Bytes}; +/// # +/// #[serde_as] +/// # #[derive(Debug, PartialEq)] +/// #[derive(Deserialize, serde::Serialize)] +/// struct Test { +/// #[serde_as(deserialize_as = "Bytes")] +/// from_bytes: Vec, +/// #[serde_as(deserialize_as = "Bytes")] +/// from_str: Vec, +/// } +/// +/// // Different serialized values ... +/// let j = json!({ +/// "from_bytes": [70,111,111,45,66,97,114], +/// "from_str": "Foo-Bar", +/// }); +/// +/// // can be deserialized ... +/// let test = Test { +/// from_bytes: b"Foo-Bar".to_vec(), +/// from_str: b"Foo-Bar".to_vec(), +/// }; +/// assert_eq!(test, serde_json::from_value(j).unwrap()); +/// +/// // and serialization will always be a byte sequence +/// # assert_eq!(json!( +/// { +/// "from_bytes": [70,111,111,45,66,97,114], +/// "from_str": [70,111,111,45,66,97,114], +/// } +/// # ), serde_json::to_value(&test).unwrap()); +/// # } +/// ``` +#[derive(Copy, Clone, Debug, Default)] +pub struct Bytes; diff --git a/src/ser/const_arrays.rs b/src/ser/const_arrays.rs index 06d0d4ec..f7d4daed 100644 --- a/src/ser/const_arrays.rs +++ b/src/ser/const_arrays.rs @@ -42,3 +42,21 @@ macro_rules! tuple_seq_as_map_impl_intern { } tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap); tuple_seq_as_map_impl_intern!([(K, V); N], HashMap); + +impl<'a, const N: usize> SerializeAs<[u8; N]> for Bytes { + fn serialize_as(bytes: &[u8; N], serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl<'a, const N: usize> SerializeAs> for Bytes { + fn serialize_as(bytes: &Box<[u8; N]>, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(&**bytes) + } +} diff --git a/src/ser/impls.rs b/src/ser/impls.rs index 17b8b546..db8bcbd7 100644 --- a/src/ser/impls.rs +++ b/src/ser/impls.rs @@ -3,6 +3,7 @@ use crate::formats::Strictness; use crate::rust::StringWithSeparator; use crate::utils::duration::DurationSigned; use crate::Separator; +use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque}; use std::fmt::Display; use std::hash::{BuildHasher, Hash}; @@ -376,3 +377,39 @@ where serializer.serialize_some(&SerializeAsWrap::::new(source)) } } + +impl SerializeAs<&[u8]> for Bytes { + fn serialize_as(bytes: &&[u8], serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl SerializeAs> for Bytes { + fn serialize_as(bytes: &Vec, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl SerializeAs> for Bytes { + fn serialize_as(bytes: &Box<[u8]>, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} + +impl<'a> SerializeAs> for Bytes { + fn serialize_as(bytes: &Cow<'a, [u8]>, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(bytes) + } +} diff --git a/tests/serde_as.rs b/tests/serde_as.rs index eacb181c..3d22b334 100644 --- a/tests/serde_as.rs +++ b/tests/serde_as.rs @@ -1387,3 +1387,132 @@ fn test_big_arrays() { ]], ); } + +// The test requires const-generics to work +#[rustversion::since(1.51)] +#[test] +fn test_bytes() { + // The test case is copied from + // https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/tests/test_derive.rs + // Original code by @dtolnay + + use serde_test::{assert_de_tokens, assert_tokens, Token}; + use serde_with::Bytes; + use std::borrow::Cow; + + #[serde_as] + #[derive(Serialize, Deserialize, PartialEq, Debug)] + struct Test<'a> { + #[serde_as(as = "Bytes")] + array: [u8; 52], + + #[serde_as(as = "Bytes")] + slice: &'a [u8], + + #[serde_as(as = "Bytes")] + vec: Vec, + + #[serde_as(as = "Bytes")] + cow_slice: Cow<'a, [u8]>, + + #[serde_as(as = "Box")] + boxed_array: Box<[u8; 52]>, + + #[serde_as(as = "Bytes")] + boxed_array2: Box<[u8; 52]>, + + #[serde_as(as = "Bytes")] + boxed_slice: Box<[u8]>, + + #[serde_as(as = "Option")] + opt_slice: Option<&'a [u8]>, + + #[serde_as(as = "Option")] + opt_vec: Option>, + + #[serde_as(as = "Option")] + opt_cow_slice: Option>, + } + + let test = Test { + array: *b"ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz", + slice: b"...", + vec: b"...".to_vec(), + cow_slice: Cow::Borrowed(b"..."), + boxed_array: Box::new(*b"ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + boxed_array2: Box::new(*b"ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + boxed_slice: b"...".to_vec().into_boxed_slice(), + opt_slice: Some(b"..."), + opt_vec: Some(b"...".to_vec()), + opt_cow_slice: Some(Cow::Borrowed(b"...")), + }; + + assert_tokens( + &test, + &[ + Token::Struct { + name: "Test", + len: 10, + }, + Token::Str("array"), + Token::BorrowedBytes(b"ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + Token::Str("slice"), + Token::BorrowedBytes(b"..."), + Token::Str("vec"), + Token::Bytes(b"..."), + Token::Str("cow_slice"), + Token::BorrowedBytes(b"..."), + Token::Str("boxed_array"), + Token::BorrowedBytes(b"ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + Token::Str("boxed_array2"), + Token::BorrowedBytes(b"ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + Token::Str("boxed_slice"), + Token::Bytes(b"..."), + Token::Str("opt_slice"), + Token::Some, + Token::BorrowedBytes(b"..."), + Token::Str("opt_vec"), + Token::Some, + Token::Bytes(b"..."), + Token::Str("opt_cow_slice"), + Token::Some, + Token::BorrowedBytes(b"..."), + Token::StructEnd, + ], + ); + + // Test string deserialization + assert_de_tokens( + &test, + &[ + Token::Struct { + name: "Test", + len: 10, + }, + Token::Str("array"), + Token::BorrowedStr("ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + Token::Str("slice"), + Token::BorrowedStr("..."), + Token::Str("vec"), + Token::Bytes(b"..."), + Token::Str("cow_slice"), + Token::BorrowedStr("..."), + Token::Str("boxed_array"), + Token::BorrowedStr("ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + Token::Str("boxed_array2"), + Token::BorrowedStr("ABCDEFGHIJKLMNOPQRSTUVWXZYabcdefghijklmnopqrstuvwxyz"), + Token::Str("boxed_slice"), + Token::Bytes(b"..."), + Token::Str("opt_slice"), + Token::Some, + Token::BorrowedStr("..."), + Token::Str("opt_vec"), + Token::Some, + Token::Bytes(b"..."), + Token::Str("opt_cow_slice"), + Token::Some, + Token::BorrowedStr("..."), + Token::StructEnd, + ], + ); +}