From 006072e7fe16c0aa52cd07e3f9eca428b30e1a23 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 30 Aug 2022 18:23:59 +0500 Subject: [PATCH] Ensure, that map keys are non-complex types that would not break XML markup --- Changelog.md | 1 + src/errors.rs | 7 ++ src/se/key.rs | 297 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/se/mod.rs | 78 +++++++++++++ src/se/var.rs | 21 ++-- src/utils.rs | 22 ++++ 6 files changed, 418 insertions(+), 8 deletions(-) create mode 100644 src/se/key.rs diff --git a/Changelog.md b/Changelog.md index f20f4bdc..6358cb32 100644 --- a/Changelog.md +++ b/Changelog.md @@ -17,6 +17,7 @@ ### Misc Changes - [#468]: Content of `DeError::Unsupported` changed from `&'static str` to `Cow<'static, str>` +- [#468]: Ensure, that map keys could only types that serialized as primitives only [#468]: https://github.com/tafia/quick-xml/pull/468 diff --git a/src/errors.rs b/src/errors.rs index 746912a5..57f8f3b1 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -294,4 +294,11 @@ pub mod serialize { Self::InvalidFloat(e) } } + + impl From for DeError { + #[inline] + fn from(e: fmt::Error) -> Self { + Self::Custom(e.to_string()) + } + } } diff --git a/src/se/key.rs b/src/se/key.rs new file mode 100644 index 00000000..0adf84a1 --- /dev/null +++ b/src/se/key.rs @@ -0,0 +1,297 @@ +use crate::errors::serialize::DeError; +use serde::ser::{Impossible, Serialize, Serializer}; +use serde::serde_if_integer128; +use std::fmt::Write; + +/// A serializer, that ensures, that only plain types can be serialized, +/// so result can be used as an XML tag or attribute name. +/// +/// This serializer does not check that name does not contain characters that +/// [not allowed] in XML names, because in some cases it should pass names +/// that would be filtered on higher level. +/// +/// [not allowed]: https://www.w3.org/TR/REC-xml/#sec-common-syn +pub struct XmlNameSerializer { + /// Writer to which this serializer writes content + pub writer: W, +} + +impl XmlNameSerializer { + #[inline] + fn write_str(&mut self, value: &str) -> Result<(), DeError> { + Ok(self.writer.write_str(value)?) + } +} + +impl Serializer for XmlNameSerializer { + type Ok = W; + type Error = DeError; + + type SerializeSeq = Impossible; + type SerializeTuple = Impossible; + type SerializeTupleStruct = Impossible; + type SerializeTupleVariant = Impossible; + type SerializeMap = Impossible; + type SerializeStruct = Impossible; + type SerializeStructVariant = Impossible; + + write_primitive!(); + + fn serialize_str(mut self, value: &str) -> Result { + self.write_str(value)?; + Ok(self.writer) + } + + /// We cannot store anything, so the absence of a unit and presence of it + /// does not differ, so serialization of unit returns `Err(Unsupported)` + fn serialize_unit(self) -> Result { + Err(DeError::Unsupported( + "unit type `()` cannot be serialized as an XML tag name".into(), + )) + } + + /// We cannot store both a variant discriminant and a variant value, + /// so serialization of enum newtype variant returns `Err(Unsupported)` + fn serialize_newtype_variant( + self, + name: &'static str, + _variant_index: u32, + variant: &'static str, + _value: &T, + ) -> Result { + Err(DeError::Unsupported( + format!( + "enum newtype variant `{}::{}` cannot be serialized as an XML tag name", + name, variant + ) + .into(), + )) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(DeError::Unsupported( + "sequence cannot be serialized as an XML tag name".into(), + )) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(DeError::Unsupported( + "tuple cannot be serialized as an XML tag name".into(), + )) + } + + fn serialize_tuple_struct( + self, + name: &'static str, + _len: usize, + ) -> Result { + Err(DeError::Unsupported( + format!( + "tuple struct `{}` cannot be serialized as an XML tag name", + name + ) + .into(), + )) + } + + fn serialize_tuple_variant( + self, + name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + Err(DeError::Unsupported( + format!( + "enum tuple variant `{}::{}` cannot be serialized as an XML tag name", + name, variant + ) + .into(), + )) + } + + fn serialize_map(self, _len: Option) -> Result { + Err(DeError::Unsupported( + "map cannot be serialized as an XML tag name".into(), + )) + } + + fn serialize_struct( + self, + name: &'static str, + _len: usize, + ) -> Result { + Err(DeError::Unsupported( + format!("struct `{}` cannot be serialized as an XML tag name", name).into(), + )) + } + + fn serialize_struct_variant( + self, + name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result { + Err(DeError::Unsupported( + format!( + "enum struct variant `{}::{}` cannot be serialized as an XML tag name", + name, variant + ) + .into(), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::Bytes; + use pretty_assertions::assert_eq; + use serde::Serialize; + use std::collections::BTreeMap; + + #[derive(Debug, Serialize, PartialEq)] + struct Unit; + + #[derive(Debug, Serialize, PartialEq)] + #[serde(rename = "<\"&'>")] + struct UnitEscaped; + + #[derive(Debug, Serialize, PartialEq)] + struct Newtype(bool); + + #[derive(Debug, Serialize, PartialEq)] + struct Tuple(&'static str, usize); + + #[derive(Debug, Serialize, PartialEq)] + struct Struct { + key: &'static str, + val: usize, + } + + #[derive(Debug, Serialize, PartialEq)] + enum Enum { + Unit, + #[serde(rename = "<\"&'>")] + UnitEscaped, + Newtype(bool), + Tuple(&'static str, usize), + Struct { + key: &'static str, + val: usize, + }, + } + + /// Checks that given `$data` successfully serialized as `$expected` + macro_rules! serialize_as { + ($name:ident: $data:expr => $expected:literal) => { + #[test] + fn $name() { + let ser = XmlNameSerializer { + writer: String::new(), + }; + + let buffer = $data.serialize(ser).unwrap(); + assert_eq!(buffer, $expected); + } + }; + } + + /// Checks that attempt to serialize given `$data` results to a + /// serialization error `$kind` with `$reason` + macro_rules! err { + ($name:ident: $data:expr => $kind:ident($reason:literal)) => { + #[test] + fn $name() { + let mut buffer = String::new(); + let ser = XmlNameSerializer { + writer: &mut buffer, + }; + + match $data.serialize(ser).unwrap_err() { + DeError::$kind(e) => assert_eq!(e, $reason), + e => panic!( + "Expected `{}({})`, found `{:?}`", + stringify!($kind), + $reason, + e + ), + } + assert_eq!(buffer, ""); + } + }; + } + + serialize_as!(false_: false => "false"); + serialize_as!(true_: true => "true"); + + serialize_as!(i8_: -42i8 => "-42"); + serialize_as!(i16_: -4200i16 => "-4200"); + serialize_as!(i32_: -42000000i32 => "-42000000"); + serialize_as!(i64_: -42000000000000i64 => "-42000000000000"); + serialize_as!(isize_: -42000000000000isize => "-42000000000000"); + + serialize_as!(u8_: 42u8 => "42"); + serialize_as!(u16_: 4200u16 => "4200"); + serialize_as!(u32_: 42000000u32 => "42000000"); + serialize_as!(u64_: 42000000000000u64 => "42000000000000"); + serialize_as!(usize_: 42000000000000usize => "42000000000000"); + + serde_if_integer128! { + serialize_as!(i128_: -420000000000000000000000000000i128 => "-420000000000000000000000000000"); + serialize_as!(u128_: 420000000000000000000000000000u128 => "420000000000000000000000000000"); + } + + serialize_as!(f32_: 4.2f32 => "4.2"); + serialize_as!(f64_: 4.2f64 => "4.2"); + + serialize_as!(char_non_escaped: 'h' => "h"); + serialize_as!(char_lt: '<' => "<"); + serialize_as!(char_gt: '>' => ">"); + serialize_as!(char_amp: '&' => "&"); + serialize_as!(char_apos: '\'' => "'"); + serialize_as!(char_quot: '"' => "\""); + + serialize_as!(str_valid_name: "valid-name" => "valid-name"); + serialize_as!(str_space: "string with spaces" => "string with spaces"); + serialize_as!(str_lt: "string<" => "string<"); + serialize_as!(str_gt: "string>" => "string>"); + serialize_as!(str_amp: "string&" => "string&"); + serialize_as!(str_apos: "string'" => "string'"); + serialize_as!(str_quot: "string\"" => "string\""); + + err!(bytes: Bytes(b"<\"escaped & bytes'>") + => Unsupported("`serialize_bytes` not supported yet")); + + serialize_as!(option_none: Option::<&str>::None => ""); + serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string"); + + err!(unit: () + => Unsupported("unit type `()` cannot be serialized as an XML tag name")); + serialize_as!(unit_struct: Unit => "Unit"); + serialize_as!(unit_struct_escaped: UnitEscaped => "<\"&'>"); + + serialize_as!(enum_unit: Enum::Unit => "Unit"); + serialize_as!(enum_unit_escaped: Enum::UnitEscaped => "<\"&'>"); + + serialize_as!(newtype: Newtype(true) => "true"); + err!(enum_newtype: Enum::Newtype(false) + => Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an XML tag name")); + + err!(seq: vec![1, 2, 3] + => Unsupported("sequence cannot be serialized as an XML tag name")); + err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize) + => Unsupported("tuple cannot be serialized as an XML tag name")); + err!(tuple_struct: Tuple("first", 42) + => Unsupported("tuple struct `Tuple` cannot be serialized as an XML tag name")); + err!(enum_tuple: Enum::Tuple("first", 42) + => Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an XML tag name")); + + err!(map: BTreeMap::from([("_1", 2), ("_3", 4)]) + => Unsupported("map cannot be serialized as an XML tag name")); + err!(struct_: Struct { key: "answer", val: 42 } + => Unsupported("struct `Struct` cannot be serialized as an XML tag name")); + err!(enum_struct: Enum::Struct { key: "answer", val: 42 } + => Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an XML tag name")); +} diff --git a/src/se/mod.rs b/src/se/mod.rs index 4f6d95a0..42fed721 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -1,5 +1,83 @@ //! Module to handle custom serde `Serializer` +/// Implements writing primitives to the underlying writer. +/// Implementor must provide `write_str(self, &str) -> Result<(), DeError>` method +macro_rules! write_primitive { + ($method:ident ( $ty:ty )) => { + fn $method(mut self, value: $ty) -> Result { + self.write_str(&value.to_string())?; + Ok(self.writer) + } + }; + () => { + fn serialize_bool(mut self, value: bool) -> Result { + self.write_str(if value { "true" } else { "false" })?; + Ok(self.writer) + } + + write_primitive!(serialize_i8(i8)); + write_primitive!(serialize_i16(i16)); + write_primitive!(serialize_i32(i32)); + write_primitive!(serialize_i64(i64)); + + write_primitive!(serialize_u8(u8)); + write_primitive!(serialize_u16(u16)); + write_primitive!(serialize_u32(u32)); + write_primitive!(serialize_u64(u64)); + + serde_if_integer128! { + write_primitive!(serialize_i128(i128)); + write_primitive!(serialize_u128(u128)); + } + + write_primitive!(serialize_f32(f32)); + write_primitive!(serialize_f64(f64)); + + fn serialize_char(self, value: char) -> Result { + self.serialize_str(&value.to_string()) + } + + fn serialize_bytes(self, _value: &[u8]) -> Result { + //TODO: customization point - allow user to decide how to encode bytes + Err(DeError::Unsupported( + "`serialize_bytes` not supported yet".into(), + )) + } + + fn serialize_none(self) -> Result { + Ok(self.writer) + } + + fn serialize_some(self, value: &T) -> Result { + value.serialize(self) + } + + fn serialize_unit_struct(self, name: &'static str) -> Result { + self.serialize_str(name) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result { + self.serialize_str(variant) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T, + ) -> Result { + value.serialize(self) + } + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +mod key; mod var; use self::var::{Map, Seq, Struct, Tuple}; diff --git a/src/se/var.rs b/src/se/var.rs index 76e09a56..eb154406 100644 --- a/src/se/var.rs +++ b/src/se/var.rs @@ -2,6 +2,7 @@ use crate::{ de::{INNER_VALUE, UNFLATTEN_PREFIX}, errors::{serialize::DeError, Error}, events::{BytesEnd, BytesStart, Event}, + se::key::XmlNameSerializer, se::Serializer, writer::Writer, }; @@ -64,17 +65,21 @@ where key: &K, value: &V, ) -> Result<(), DeError> { - // TODO: Is it possible to ensure our key is never a composite type? - // Anything which isn't a "primitive" would lead to malformed XML here... - write!(self.parent.writer.inner(), "<").map_err(Error::Io)?; - key.serialize(&mut *self.parent)?; - write!(self.parent.writer.inner(), ">").map_err(Error::Io)?; + let key = key.serialize(XmlNameSerializer { + writer: String::new(), + })?; + + let writer = self.parent.writer.inner(); + writer.write_all(b"<").map_err(Error::Io)?; + writer.write_all(key.as_bytes()).map_err(Error::Io)?; + writer.write_all(b">").map_err(Error::Io)?; value.serialize(&mut *self.parent)?; - write!(self.parent.writer.inner(), "").map_err(Error::Io)?; + let writer = self.parent.writer.inner(); + writer.write_all(b"").map_err(Error::Io)?; Ok(()) } } diff --git a/src/utils.rs b/src/utils.rs index 130532a3..185cff40 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,6 +3,8 @@ use std::fmt::{self, Debug, Formatter}; #[cfg(feature = "serialize")] use serde::de::{Deserialize, Deserializer, Error, Visitor}; +#[cfg(feature = "serialize")] +use serde::ser::{Serialize, Serializer}; pub fn write_cow_string(f: &mut Formatter, cow_string: &Cow<[u8]>) -> fmt::Result { match cow_string { @@ -76,6 +78,16 @@ impl<'de> Deserialize<'de> for ByteBuf { } } +#[cfg(feature = "serialize")] +impl Serialize for ByteBuf { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(&self.0) + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /// Wrapper around `&[u8]` that has a human-readable debug representation: @@ -117,6 +129,16 @@ impl<'de> Deserialize<'de> for Bytes<'de> { } } +#[cfg(feature = "serialize")] +impl<'de> Serialize for Bytes<'de> { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_bytes(self.0) + } +} + //////////////////////////////////////////////////////////////////////////////////////////////////// #[cfg(test)]