Skip to content

Commit

Permalink
Ensure, that map keys serialized as valid xml names
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Sep 2, 2022
1 parent 0336dcb commit 54159d0
Show file tree
Hide file tree
Showing 6 changed files with 488 additions and 8 deletions.
6 changes: 6 additions & 0 deletions Changelog.md
Expand Up @@ -14,11 +14,17 @@

### Bug Fixes

- [#468]: Ensure, that serialization of map keys always produces valid XML names.
In particular, that means that maps with numeric and numeric-like keys (for
example, `"42"`) no longer can be serialized because [XML name] cannot start
from a digit

### Misc Changes

- [#468]: Content of `DeError::Unsupported` changed from `&'static str` to `Cow<'static, str>`

[#468]: https://github.com/tafia/quick-xml/pull/468
[XML name]: https://www.w3.org/TR/REC-xml/#NT-Name

## 0.24.0 -- 2022-08-28

Expand Down
16 changes: 16 additions & 0 deletions src/errors.rs
Expand Up @@ -188,6 +188,15 @@ pub mod serialize {
/// An attempt to deserialize to a type, that is not supported by the XML
/// store at current position, for example, attempt to deserialize `struct`
/// from attribute or attempt to deserialize binary data.
///
/// Serialized type cannot be represented in an XML due to violation of the
/// XML rules in the final XML document. For example, attempt to serialize
/// a `HashMap<{integer}, ...>` would cause this error because [XML name]
/// cannot start from a digit or a hyphen (minus sign). The same result
/// would occur if map key is a complex type that serialized not as
/// a primitive type (i.e. string, char, bool, unit struct or unit variant).
///
/// [XML name]: https://www.w3.org/TR/REC-xml/#sec-common-syn
Unsupported(Cow<'static, str>),
/// Too many events were skipped while deserializing a sequence, event limit
/// exceeded. The limit was provided as an argument
Expand Down Expand Up @@ -294,4 +303,11 @@ pub mod serialize {
Self::InvalidFloat(e)
}
}

impl From<fmt::Error> for DeError {
#[inline]
fn from(e: fmt::Error) -> Self {
Self::Custom(e.to_string())
}
}
}
353 changes: 353 additions & 0 deletions src/se/key.rs
@@ -0,0 +1,353 @@
use crate::errors::serialize::DeError;
use serde::ser::{Impossible, Serialize, Serializer};
use serde::serde_if_integer128;
use std::fmt::Write;

/// Almost all characters can form a name. Citation from <https://www.w3.org/TR/xml11/#sec-xml11>:
///
/// > The overall philosophy of names has changed since XML 1.0. Whereas XML 1.0
/// > provided a rigid definition of names, wherein everything that was not permitted
/// > was forbidden, XML 1.1 names are designed so that everything that is not
/// > forbidden (for a specific reason) is permitted. Since Unicode will continue
/// > to grow past version 4.0, further changes to XML can be avoided by allowing
/// > almost any character, including those not yet assigned, in names.
///
/// <https://www.w3.org/TR/xml11/#NT-NameStartChar>
const fn is_xml11_name_start_char(ch: char) -> bool {
match ch {
':'
| 'A'..='Z'
| '_'
| 'a'..='z'
| '\u{00C0}'..='\u{00D6}'
| '\u{00D8}'..='\u{00F6}'
| '\u{00F8}'..='\u{02FF}'
| '\u{0370}'..='\u{037D}'
| '\u{037F}'..='\u{1FFF}'
| '\u{200C}'..='\u{200D}'
| '\u{2070}'..='\u{218F}'
| '\u{2C00}'..='\u{2FEF}'
| '\u{3001}'..='\u{D7FF}'
| '\u{F900}'..='\u{FDCF}'
| '\u{FDF0}'..='\u{FFFD}'
| '\u{10000}'..='\u{EFFFF}' => true,
_ => false,
}
}
/// <https://www.w3.org/TR/REC-xml/#NT-NameChar>
const fn is_xml11_name_char(ch: char) -> bool {
match ch {
'-' | '.' | '0'..='9' | '\u{00B7}' | '\u{0300}'..='\u{036F}' | '\u{203F}'..='\u{2040}' => {
true
}
_ => is_xml11_name_start_char(ch),
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// A serializer, that ensures, that only plain types can be serialized,
/// so result can be used as an XML tag or attribute name.
///
/// This serializer checks that name does not contain characters that [not allowed]
/// in XML names.
///
/// [not allowed]: https://www.w3.org/TR/REC-xml/#sec-common-syn
pub struct XmlNameSerializer<W: Write> {
/// Writer to which this serializer writes content
pub writer: W,
}

impl<W: Write> XmlNameSerializer<W> {
//TODO: customization point - allow user to decide if he want to reject or encode the name
fn write_str(&mut self, value: &str) -> Result<(), DeError> {
match value.chars().next() {
Some(ch) if !is_xml11_name_start_char(ch) => Err(DeError::Unsupported(
format!(
"character `{}` is not allowed at the start of an XML name",
ch
)
.into(),
)),
_ => match value.matches(|ch| !is_xml11_name_char(ch)).next() {
Some(s) => Err(DeError::Unsupported(
format!("character `{}` is not allowed in an XML name", s).into(),
)),
None => Ok(self.writer.write_str(value)?),
},
}
}
}

impl<W: Write> Serializer for XmlNameSerializer<W> {
type Ok = W;
type Error = DeError;

type SerializeSeq = Impossible<Self::Ok, Self::Error>;
type SerializeTuple = Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = Impossible<Self::Ok, Self::Error>;
type SerializeMap = Impossible<Self::Ok, Self::Error>;
type SerializeStruct = Impossible<Self::Ok, Self::Error>;
type SerializeStructVariant = Impossible<Self::Ok, Self::Error>;

write_primitive!();

fn serialize_str(mut self, value: &str) -> Result<Self::Ok, Self::Error> {
self.write_str(value)?;
Ok(self.writer)
}

/// We cannot store anything, so the absence of a unit and presence of it
/// does not differ, so serialization of unit returns `Err(Unsupported)`
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
Err(DeError::Unsupported(
"unit type `()` cannot be serialized as an XML tag name".into(),
))
}

/// We cannot store both a variant discriminant and a variant value,
/// so serialization of enum newtype variant returns `Err(Unsupported)`
fn serialize_newtype_variant<T: ?Sized + Serialize>(
self,
name: &'static str,
_variant_index: u32,
variant: &'static str,
_value: &T,
) -> Result<Self::Ok, DeError> {
Err(DeError::Unsupported(
format!(
"enum newtype variant `{}::{}` cannot be serialized as an XML tag name",
name, variant
)
.into(),
))
}

fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(DeError::Unsupported(
"sequence cannot be serialized as an XML tag name".into(),
))
}

fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
Err(DeError::Unsupported(
"tuple cannot be serialized as an XML tag name".into(),
))
}

fn serialize_tuple_struct(
self,
name: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(DeError::Unsupported(
format!(
"tuple struct `{}` cannot be serialized as an XML tag name",
name
)
.into(),
))
}

fn serialize_tuple_variant(
self,
name: &'static str,
_variant_index: u32,
variant: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(DeError::Unsupported(
format!(
"enum tuple variant `{}::{}` cannot be serialized as an XML tag name",
name, variant
)
.into(),
))
}

fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Err(DeError::Unsupported(
"map cannot be serialized as an XML tag name".into(),
))
}

fn serialize_struct(
self,
name: &'static str,
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
Err(DeError::Unsupported(
format!("struct `{}` cannot be serialized as an XML tag name", name).into(),
))
}

fn serialize_struct_variant(
self,
name: &'static str,
_variant_index: u32,
variant: &'static str,
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(DeError::Unsupported(
format!(
"enum struct variant `{}::{}` cannot be serialized as an XML tag name",
name, variant
)
.into(),
))
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::utils::Bytes;
use pretty_assertions::assert_eq;
use serde::Serialize;
use std::collections::BTreeMap;

#[derive(Debug, Serialize, PartialEq)]
struct Unit;

#[derive(Debug, Serialize, PartialEq)]
#[serde(rename = "<\"&'>")]
struct UnitEscaped;

#[derive(Debug, Serialize, PartialEq)]
struct Newtype(bool);

#[derive(Debug, Serialize, PartialEq)]
struct Tuple(&'static str, usize);

#[derive(Debug, Serialize, PartialEq)]
struct Struct {
key: &'static str,
val: usize,
}

#[derive(Debug, Serialize, PartialEq)]
enum Enum {
Unit,
#[serde(rename = "<\"&'>")]
UnitEscaped,
Newtype(bool),
Tuple(&'static str, usize),
Struct {
key: &'static str,
val: usize,
},
}

/// Checks that given `$data` successfully serialized as `$expected`
macro_rules! serialize_as {
($name:ident: $data:expr => $expected:literal) => {
#[test]
fn $name() {
let ser = XmlNameSerializer {
writer: String::new(),
};

let buffer = $data.serialize(ser).unwrap();
assert_eq!(buffer, $expected);
}
};
}

/// Checks that attempt to serialize given `$data` results to a
/// serialization error `$kind` with `$reason`
macro_rules! err {
($name:ident: $data:expr => $kind:ident($reason:literal)) => {
#[test]
fn $name() {
let mut buffer = String::new();
let ser = XmlNameSerializer {
writer: &mut buffer,
};

match $data.serialize(ser).unwrap_err() {
DeError::$kind(e) => assert_eq!(e, $reason),
e => panic!(
"Expected `{}({})`, found `{:?}`",
stringify!($kind),
$reason,
e
),
}
assert_eq!(buffer, "");
}
};
}

serialize_as!(false_: false => "false");
serialize_as!(true_: true => "true");

err!(i8_: -42i8 => Unsupported("character `-` is not allowed at the start of an XML name"));
err!(i16_: -4200i16 => Unsupported("character `-` is not allowed at the start of an XML name"));
err!(i32_: -42000000i32 => Unsupported("character `-` is not allowed at the start of an XML name"));
err!(i64_: -42000000000000i64 => Unsupported("character `-` is not allowed at the start of an XML name"));
err!(isize_: -42000000000000isize => Unsupported("character `-` is not allowed at the start of an XML name"));

err!(u8_: 42u8 => Unsupported("character `4` is not allowed at the start of an XML name"));
err!(u16_: 4200u16 => Unsupported("character `4` is not allowed at the start of an XML name"));
err!(u32_: 42000000u32 => Unsupported("character `4` is not allowed at the start of an XML name"));
err!(u64_: 42000000000000u64 => Unsupported("character `4` is not allowed at the start of an XML name"));
err!(usize_: 42000000000000usize => Unsupported("character `4` is not allowed at the start of an XML name"));

serde_if_integer128! {
err!(i128_: -420000000000000000000000000000i128 => Unsupported("character `-` is not allowed at the start of an XML name"));
err!(u128_: 420000000000000000000000000000u128 => Unsupported("character `4` is not allowed at the start of an XML name"));
}

err!(f32_: 4.2f32 => Unsupported("character `4` is not allowed at the start of an XML name"));
err!(f64_: 4.2f64 => Unsupported("character `4` is not allowed at the start of an XML name"));

serialize_as!(char_non_escaped: 'h' => "h");
err!(char_lt: '<' => Unsupported("character `<` is not allowed at the start of an XML name"));
err!(char_gt: '>' => Unsupported("character `>` is not allowed at the start of an XML name"));
err!(char_amp: '&' => Unsupported("character `&` is not allowed at the start of an XML name"));
err!(char_apos: '\'' => Unsupported("character `'` is not allowed at the start of an XML name"));
err!(char_quot: '"' => Unsupported("character `\"` is not allowed at the start of an XML name"));

serialize_as!(str_valid_name: "valid-name" => "valid-name");
err!(str_space: "string with spaces" => Unsupported("character ` ` is not allowed in an XML name"));
err!(str_lt: "string<" => Unsupported("character `<` is not allowed in an XML name"));
err!(str_gt: "string>" => Unsupported("character `>` is not allowed in an XML name"));
err!(str_amp: "string&" => Unsupported("character `&` is not allowed in an XML name"));
err!(str_apos: "string'" => Unsupported("character `'` is not allowed in an XML name"));
err!(str_quot: "string\"" => Unsupported("character `\"` is not allowed in an XML name"));

err!(bytes: Bytes(b"<\"escaped & bytes'>")
=> Unsupported("`serialize_bytes` not supported yet"));

serialize_as!(option_none: Option::<&str>::None => "");
serialize_as!(option_some: Some("non-escaped-string") => "non-escaped-string");

err!(unit: ()
=> Unsupported("unit type `()` cannot be serialized as an XML tag name"));
serialize_as!(unit_struct: Unit => "Unit");
err!(unit_struct_escaped: UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name"));

serialize_as!(enum_unit: Enum::Unit => "Unit");
err!(enum_unit_escaped: Enum::UnitEscaped => Unsupported("character `<` is not allowed at the start of an XML name"));

serialize_as!(newtype: Newtype(true) => "true");
err!(enum_newtype: Enum::Newtype(false)
=> Unsupported("enum newtype variant `Enum::Newtype` cannot be serialized as an XML tag name"));

err!(seq: vec![1, 2, 3]
=> Unsupported("sequence cannot be serialized as an XML tag name"));
err!(tuple: ("<\"&'>", "with\t\r\n spaces", 3usize)
=> Unsupported("tuple cannot be serialized as an XML tag name"));
err!(tuple_struct: Tuple("first", 42)
=> Unsupported("tuple struct `Tuple` cannot be serialized as an XML tag name"));
err!(enum_tuple: Enum::Tuple("first", 42)
=> Unsupported("enum tuple variant `Enum::Tuple` cannot be serialized as an XML tag name"));

err!(map: BTreeMap::from([("_1", 2), ("_3", 4)])
=> Unsupported("map cannot be serialized as an XML tag name"));
err!(struct_: Struct { key: "answer", val: 42 }
=> Unsupported("struct `Struct` cannot be serialized as an XML tag name"));
err!(enum_struct: Enum::Struct { key: "answer", val: 42 }
=> Unsupported("enum struct variant `Enum::Struct` cannot be serialized as an XML tag name"));
}

0 comments on commit 54159d0

Please sign in to comment.