diff --git a/Changelog.md b/Changelog.md index 64bb395f..6854dfc6 100644 --- a/Changelog.md +++ b/Changelog.md @@ -22,6 +22,7 @@ way to access decoding functionality is via this struct - [#191]: New event variant `StartText` emitted for bytes before the XML declaration or a start comment or a tag. For streams with BOM this event will contain a BOM +- [#395]: Add support for XML Schema `xs:list` ### Bug Fixes @@ -104,6 +105,7 @@ [#387]: https://github.com/tafia/quick-xml/pull/387 [#391]: https://github.com/tafia/quick-xml/pull/391 [#393]: https://github.com/tafia/quick-xml/pull/393 +[#395]: https://github.com/tafia/quick-xml/pull/395 ## 0.23.0 -- 2022-05-08 diff --git a/src/de/map.rs b/src/de/map.rs index 2ee697d2..efe98ab1 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -3,6 +3,7 @@ use crate::{ de::escape::EscapedDeserializer, de::seq::{not_in, TagFilter}, + de::simple_type::SimpleTypeDeserializer, de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX}, errors::serialize::DeError, events::attributes::IterState, @@ -35,7 +36,10 @@ enum ValueSource { /// represented or by an ordinary text node, or by a CDATA node: /// /// ```xml - /// <...>text content for field value<...> + /// + /// text content + /// + /// /// ``` /// ```xml /// @@ -200,8 +204,8 @@ where ) -> Result { Ok(MapAccess { de, + iter: IterState::new(start.name().as_ref().len(), false), start, - iter: IterState::new(0, false), source: ValueSource::Unknown, fields, has_value_field: fields.contains(&INNER_VALUE), @@ -226,8 +230,8 @@ where ) -> Result, Self::Error> { debug_assert_eq!(self.source, ValueSource::Unknown); - // FIXME: There error positions counted from end of tag name - need global position - let slice = self.start.attributes_raw(); + // FIXME: There error positions counted from the start of tag name - need global position + let slice = &self.start.buf; let decoder = self.de.reader.decoder(); if let Some(a) = self.iter.next(slice).transpose()? { @@ -305,16 +309,12 @@ where seed: K, ) -> Result { match std::mem::replace(&mut self.source, ValueSource::Unknown) { - ValueSource::Attribute(value) => { - let slice = self.start.attributes_raw(); - let decoder = self.de.reader.decoder(); - - seed.deserialize(EscapedDeserializer::new( - Cow::Borrowed(&slice[value]), - decoder, - true, - )) - } + ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part( + &self.start.buf, + value, + true, + self.de.reader.decoder(), + )), // This arm processes the following XML shape: // // text value @@ -323,10 +323,21 @@ where // is implicit and equals to the `INNER_VALUE` constant, and the value // is a `Text` or a `CData` event (the value deserializer will see one // of that events) - ValueSource::Text => seed.deserialize(MapValueDeserializer { - map: self, - allow_start: false, - }), + // This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs + ValueSource::Text => match self.de.next()? { + DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow( + e.into_inner(), + true, + self.de.reader.decoder(), + )), + DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow( + e.into_inner(), + false, + self.de.reader.decoder(), + )), + // SAFETY: We set `Text` only when we seen `Text` or `CData` + _ => unreachable!(), + }, // This arm processes the following XML shape: // // ... @@ -612,8 +623,140 @@ where DeEvent::Eof => Err(DeError::UnexpectedEof), // Start(tag), Text, CData - _ => seed.deserialize(&mut *self.map.de).map(Some), + _ => seed + .deserialize(SeqValueDeserializer { map: self.map }) + .map(Some), }; } } } + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// A deserializer for a value of sequence. +struct SeqValueDeserializer<'de, 'a, 'm, R> +where + R: XmlRead<'de>, +{ + /// Access to the map that created this deserializer. Gives access to the + /// context, such as list of fields, that current map known about. + map: &'m mut MapAccess<'de, 'a, R>, +} + +impl<'de, 'a, 'm, R> SeqValueDeserializer<'de, 'a, 'm, R> +where + R: XmlRead<'de>, +{ + /// Returns a text event, used inside [`deserialize_primitives!()`] + #[inline] + fn next_text(&mut self, unescape: bool) -> Result, DeError> { + self.map.de.next_text_impl(unescape, true) + } + + /// Returns a decoder, used inside [`deserialize_primitives!()`] + #[inline] + fn decoder(&self) -> Decoder { + self.map.de.reader.decoder() + } +} + +impl<'de, 'a, 'm, R> de::Deserializer<'de> for SeqValueDeserializer<'de, 'a, 'm, R> +where + R: XmlRead<'de>, +{ + type Error = DeError; + + deserialize_primitives!(mut); + + forward!(deserialize_option); + forward!(deserialize_unit); + forward!(deserialize_unit_struct(name: &'static str)); + forward!(deserialize_newtype_struct(name: &'static str)); + + forward!(deserialize_map); + forward!(deserialize_struct( + name: &'static str, + fields: &'static [&'static str] + )); + + forward!(deserialize_enum( + name: &'static str, + variants: &'static [&'static str] + )); + + forward!(deserialize_any); + forward!(deserialize_ignored_any); + + /// Representation of tuples the same as [sequences](#method.deserialize_seq). + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + /// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple). + fn deserialize_tuple_struct( + self, + _name: &'static str, + len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_tuple(len, visitor) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.map.de.next()? { + DeEvent::Text(e) => SimpleTypeDeserializer::from_cow( + // Comment to prevent auto-formatting and keep Text and Cdata similar + e.into_inner(), + true, + self.map.de.reader.decoder(), + ) + .deserialize_seq(visitor), + DeEvent::CData(e) => SimpleTypeDeserializer::from_cow( + e.into_inner(), + false, + self.map.de.reader.decoder(), + ) + .deserialize_seq(visitor), + // This is a sequence element. We cannot treat it as another flatten + // sequence if type will require `deserialize_seq` We instead forward + // it to `xs:simpleType` implementation + DeEvent::Start(e) => { + let value = match self.map.de.next()? { + DeEvent::Text(e) => SimpleTypeDeserializer::from_cow( + e.into_inner(), + true, + self.map.de.reader.decoder(), + ) + .deserialize_seq(visitor), + DeEvent::CData(e) => SimpleTypeDeserializer::from_cow( + e.into_inner(), + false, + self.map.de.reader.decoder(), + ) + .deserialize_seq(visitor), + e => Err(DeError::Custom(format!("Unsupported event {:?}", e))), + }; + // TODO: May be assert that here we expect only matching closing tag? + self.map.de.read_to_end(e.name())?; + value + } + // SAFETY: we use that deserializer only when Start(element), Text, + // or CData event Start(tag), Text, CData was peeked already + _ => unreachable!(), + } + } + + #[inline] + fn is_human_readable(&self) -> bool { + self.map.de.is_human_readable() + } +} diff --git a/src/de/mod.rs b/src/de/mod.rs index e252ada7..426c82fd 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -215,6 +215,7 @@ macro_rules! deserialize_primitives { mod escape; mod map; mod seq; +mod simple_type; mod var; pub use crate::errors::serialize::DeError; @@ -331,6 +332,21 @@ where // TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean, // valid boolean representations are only "true", "false", "1", and "0" +fn str2bool<'de, V>(value: &str, visitor: V) -> Result +where + V: de::Visitor<'de>, +{ + match value { + "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => { + visitor.visit_bool(true) + } + "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => { + visitor.visit_bool(false) + } + _ => Err(DeError::InvalidBoolean(value.into())), + } +} + fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result where V: Visitor<'de>, @@ -339,15 +355,7 @@ where { let value = decoder.decode(value)?; // No need to unescape because valid boolean representations cannot be escaped - match value.as_ref() { - "true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => { - visitor.visit_bool(true) - } - "false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => { - visitor.visit_bool(false) - } - _ => Err(DeError::InvalidBoolean(value.into())), - } + str2bool(value.as_ref(), visitor) } #[cfg(not(feature = "encoding"))] diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs new file mode 100644 index 00000000..b9a865b6 --- /dev/null +++ b/src/de/simple_type.rs @@ -0,0 +1,1293 @@ +//! Contains Serde `Deserializer` for XML [simple types] [as defined] in the XML Schema. +//! +//! [simple types]: https://www.w3schools.com/xml/el_simpletype.asp +//! [as defined]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition + +use crate::de::{deserialize_bool, str2bool}; +use crate::errors::serialize::DeError; +use crate::escape::unescape; +use crate::reader::Decoder; +use memchr::memchr; +use serde::de::{DeserializeSeed, Deserializer, EnumAccess, SeqAccess, VariantAccess, Visitor}; +use serde::{self, serde_if_integer128}; +use std::borrow::Cow; +use std::ops::{Deref, Range}; + +macro_rules! deserialize_num { + ($method:ident, $visit:ident) => { + fn $method(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.$visit(self.content.as_str().parse()?) + } + }; + ($method:ident => $visit:ident) => { + fn $method(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let string = self.decode()?; + visitor.$visit(string.as_str().parse()?) + } + }; +} + +macro_rules! unsupported { + ( + $deserialize:ident + $( + ($($type:ty),*) + )? + => $message:literal + ) => { + #[inline] + fn $deserialize>( + self, + $($(_: $type,)*)? + _visitor: V + ) -> Result { + Err(DeError::Unsupported($message)) + } + }; +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// A version of [`Cow`] that can borrow from two different buffers, one of them +/// is a deserializer input, and conceptually contains only part of owned data. +/// +/// # Lifetimes +/// - `'de` -- lifetime of the data that deserializer borrow from the parsed input +/// - `'a` -- lifetime of the data that owned by a deserializer +enum Content<'de, 'a> { + /// An input borrowed from the parsed data + Input(&'de str), + /// An input borrowed from the buffer owned by another deserializer + Slice(&'a str), + /// An input taken from an external deserializer, owned by that deserializer. + /// Only part of this data, located after offset represented by `usize`, used + /// to deserialize data, the other is a garbage that can't be dropped because + /// we do not want to make reallocations if they will not required. + Owned(String, usize), +} +impl<'de, 'a> Content<'de, 'a> { + /// Returns string representation of the content + fn as_str(&self) -> &str { + match self { + Content::Input(s) => s, + Content::Slice(s) => s, + Content::Owned(s, offset) => s.split_at(*offset).1, + } + } + + /// Supply to the visitor a borrowed string, a string slice, or an owned + /// string depending on the kind of input. Unlike [`Self::deserialize_item`], + /// the whole [`Self::Owned`] string will be passed to the visitor. + /// + /// Calls + /// - `visitor.visit_borrowed_str` if data borrowed from the input + /// - `visitor.visit_str` if data borrowed from another source + /// - `visitor.visit_string` if data owned by this type + #[inline] + fn deserialize_all(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Content::Input(s) => visitor.visit_borrowed_str(s), + Content::Slice(s) => visitor.visit_str(s), + Content::Owned(s, _) => visitor.visit_string(s), + } + } + + /// Supply to the visitor a borrowed string, a string slice, or an owned + /// string depending on the kind of input. Unlike [`Self::deserialize_all`], + /// only part of [`Self::Owned`] string will be passed to the visitor. + /// + /// Calls + /// - `visitor.visit_borrowed_str` if data borrowed from the input + /// - `visitor.visit_str` if data borrowed from another source + /// - `visitor.visit_string` if data owned by this type + #[inline] + fn deserialize_item(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self { + Content::Input(s) => visitor.visit_borrowed_str(s), + Content::Slice(s) => visitor.visit_str(s), + Content::Owned(s, 0) => visitor.visit_string(s), + Content::Owned(s, offset) => visitor.visit_str(s.split_at(offset).1), + } + } +} + +/// A deserializer that handles ordinary [simple type definition][item] with +/// `{variety} = atomic`, or an ordinary [simple type] definition with +/// `{variety} = union` whose basic members are all atomic. +/// +/// This deserializer can deserialize only primitive types: +/// - numbers +/// - booleans +/// - strings +/// - units +/// - options +/// - unit variants of enums +/// +/// Identifiers represented as strings and deserialized accordingly. +/// +/// Deserialization of all other types returns [`Unsupported`][DeError::Unsupported] error. +/// +/// The `Owned` variant of the content acts as a storage for data, allocated by +/// an external deserializer that pass it via [`ListIter`]. +/// +/// [item]: https://www.w3.org/TR/xmlschema11-1/#std-item_type_definition +/// [simple type]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition +struct AtomicDeserializer<'de, 'a> { + /// Content of the attribute value, text content or CDATA content + content: Content<'de, 'a>, + /// If `true`, `content` in an escaped form and should be unescaped before use + escaped: bool, +} + +impl<'de, 'a> Deserializer<'de> for AtomicDeserializer<'de, 'a> { + type Error = DeError; + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + /// According to the , + /// valid boolean representations are only `"true"`, `"false"`, `"1"`, + /// and `"0"`. But this method also handles following: + /// + /// |`bool` |XML content + /// |-------|------------------------------------------------------------- + /// |`true` |`"True"`, `"TRUE"`, `"t"`, `"Yes"`, `"YES"`, `"yes"`, `"y"` + /// |`false`|`"False"`, `"FALSE"`, `"f"`, `"No"`, `"NO"`, `"no"`, `"n"` + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + str2bool(self.content.as_str(), visitor) + } + + deserialize_num!(deserialize_i8, visit_i8); + deserialize_num!(deserialize_i16, visit_i16); + deserialize_num!(deserialize_i32, visit_i32); + deserialize_num!(deserialize_i64, visit_i64); + + deserialize_num!(deserialize_u8, visit_u8); + deserialize_num!(deserialize_u16, visit_u16); + deserialize_num!(deserialize_u32, visit_u32); + deserialize_num!(deserialize_u64, visit_u64); + + serde_if_integer128! { + deserialize_num!(deserialize_i128, visit_i128); + deserialize_num!(deserialize_u128, visit_u128); + } + + deserialize_num!(deserialize_f32, visit_f32); + deserialize_num!(deserialize_f64, visit_f64); + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + /// Supply to the visitor borrowed string, string slice, or owned string + /// depending on the kind of input and presence of the escaped data. + /// + /// If string requires unescaping, then calls [`Visitor::visit_string`] with + /// new allocated buffer with unescaped data. + /// + /// Otherwise calls + /// - [`Visitor::visit_borrowed_str`] if data borrowed from the input + /// - [`Visitor::visit_str`] if data borrowed from other deserializer + /// - [`Visitor::visit_string`] if data owned by this deserializer + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if self.escaped { + match unescape(self.content.as_str().as_bytes())? { + Cow::Borrowed(_) => self.content.deserialize_item(visitor), + Cow::Owned(buf) => visitor.visit_string(String::from_utf8(buf)?), + } + } else { + self.content.deserialize_item(visitor) + } + } + + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + /// If `content` is an empty string then calls [`Visitor::visit_none`], + /// otherwise calls [`Visitor::visit_some`] with itself + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if self.content.as_str().is_empty() { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + /// Forwards deserialization to the [`Self::deserialize_unit`] + fn deserialize_unit_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_enum(self) + } + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + unsupported!(deserialize_bytes => "byte arrays are not supported as `xs:list` items"); + unsupported!(deserialize_byte_buf => "byte arrays are not supported as `xs:list` items"); + unsupported!(deserialize_seq => "sequences are not supported as `xs:list` items"); + unsupported!(deserialize_tuple(usize) => "tuples are not supported as `xs:list` items"); + unsupported!(deserialize_tuple_struct(&'static str, usize) => "tuples are not supported as `xs:list` items"); + unsupported!(deserialize_map => "maps are not supported as `xs:list` items"); + unsupported!(deserialize_struct(&'static str, &'static [&'static str]) => "structures are not supported as `xs:list` items"); +} + +impl<'de, 'a> EnumAccess<'de> for AtomicDeserializer<'de, 'a> { + type Error = DeError; + type Variant = AtomicUnitOnly; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> + where + V: DeserializeSeed<'de>, + { + let name = seed.deserialize(self)?; + Ok((name, AtomicUnitOnly)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Deserializer of variant data, that supports only unit variants. +/// Attempt to deserialize newtype, tuple or struct variant will return a +/// [`DeError::Unsupported`] error. +pub struct AtomicUnitOnly; +impl<'de> VariantAccess<'de> for AtomicUnitOnly { + type Error = DeError; + + #[inline] + fn unit_variant(self) -> Result<(), DeError> { + Ok(()) + } + + fn newtype_variant_seed(self, _seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + Err(DeError::Unsupported( + "enum newtype variants are not supported as `xs:list` items", + )) + } + + fn tuple_variant(self, _len: usize, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(DeError::Unsupported( + "enum tuple variants are not supported as `xs:list` items", + )) + } + + fn struct_variant( + self, + _fields: &'static [&'static str], + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(DeError::Unsupported( + "enum struct variants are not supported as `xs:list` items", + )) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Iterator over string sub-slices delimited by one or several spaces. +/// Contains decoded value of the `simpleType`. +/// Iteration ends when list contains `None`. +struct ListIter<'de, 'a> { + /// If `Some`, contains unconsumed data of the list + content: Option>, + /// If `true`, `content` in escaped form and should be unescaped before use + escaped: bool, +} +impl<'de, 'a> SeqAccess<'de> for ListIter<'de, 'a> { + type Error = DeError; + + fn next_element_seed(&mut self, seed: T) -> Result, DeError> + where + T: DeserializeSeed<'de>, + { + if let Some(mut content) = self.content.take() { + const DELIMITER: u8 = b' '; + + loop { + let string = content.as_str(); + if string.is_empty() { + return Ok(None); + } + return match memchr(DELIMITER, string.as_bytes()) { + // No delimiters in the `content`, deserialize it as a whole atomic + None => seed.deserialize(AtomicDeserializer { + content, + escaped: self.escaped, + }), + // `content` started with a space, skip them all + Some(0) => { + // Skip all spaces + let start = string.as_bytes().iter().position(|ch| *ch != DELIMITER); + content = match (start, content) { + // We cannot find any non-space character, so string contains only spaces + (None, _) => return Ok(None), + // Borrow result from input or deserializer depending on the initial borrowing + (Some(start), Content::Input(s)) => Content::Input(s.split_at(start).1), + (Some(start), Content::Slice(s)) => Content::Slice(s.split_at(start).1), + // Skip additional bytes if we own data + (Some(start), Content::Owned(s, skip)) => { + Content::Owned(s, skip + start) + } + }; + continue; + } + // `content` started from an atomic + Some(end) => match content { + // Borrow for the next iteration from input or deserializer depending on + // the initial borrowing + Content::Input(s) => { + let (item, rest) = s.split_at(end); + self.content = Some(Content::Input(rest)); + + seed.deserialize(AtomicDeserializer { + content: Content::Input(item), + escaped: self.escaped, + }) + } + Content::Slice(s) => { + let (item, rest) = s.split_at(end); + self.content = Some(Content::Slice(rest)); + + seed.deserialize(AtomicDeserializer { + content: Content::Slice(item), + escaped: self.escaped, + }) + } + // Skip additional bytes if we own data for next iteration, but deserialize from + // the borrowed data from our buffer + Content::Owned(s, skip) => { + let item = s.split_at(skip + end).0; + let result = seed.deserialize(AtomicDeserializer { + content: Content::Slice(item), + escaped: self.escaped, + }); + + self.content = Some(Content::Owned(s, skip + end)); + + result + } + }, + } + .map(Some); + } + } + Ok(None) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// A version of [`Cow`] that can borrow from two different buffers, one of them +/// is a deserializer input. +/// +/// # Lifetimes +/// - `'de` -- lifetime of the data that deserializer borrow from the parsed input +/// - `'a` -- lifetime of the data that owned by a deserializer +enum CowRef<'de, 'a> { + /// An input borrowed from the parsed data + Input(&'de [u8]), + /// An input borrowed from the buffer owned by another deserializer + Slice(&'a [u8]), + /// An input taken from an external deserializer, owned by that deserializer + Owned(Vec), +} +impl<'de, 'a> Deref for CowRef<'de, 'a> { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + match self { + Self::Input(slice) => slice, + Self::Slice(slice) => slice, + Self::Owned(ref v) => v, + } + } +} + +/// A deserializer for an xml probably escaped and encoded value of XSD [simple types]. +/// This deserializer will borrow from the input as much as possible. +/// +/// `deserialize_any()` returns the whole string that deserializer contains. +/// +/// Escaping the value is actually not always necessary, for instance when +/// converting to a float, we don't expect any escapable character anyway. +/// In that cases deserializer skips unescaping step. +/// +/// Used for deserialize values from: +/// - attribute values (`<... ...="value" ...>`) +/// - text content (`<...>text`) +/// - CDATA content (`<...>`) +/// +/// [simple types]: https://www.w3.org/TR/xmlschema11-1/#Simple_Type_Definition +pub struct SimpleTypeDeserializer<'de, 'a> { + /// - In case of attribute contains escaped attribute value + /// - In case of text contains escaped text value + /// - In case of CData contains unescaped cdata value + content: CowRef<'de, 'a>, + /// If `true`, `content` in escaped form and should be unescaped before use + escaped: bool, + /// Decoder used to deserialize string data, numeric and boolean data. + /// Not used for deserializing raw byte buffers + decoder: Decoder, +} + +impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { + /// Creates a deserializer from a value, that possible borrowed from input + pub fn from_cow(value: Cow<'de, [u8]>, escaped: bool, decoder: Decoder) -> Self { + let content = match value { + Cow::Borrowed(slice) => CowRef::Input(slice), + Cow::Owned(content) => CowRef::Owned(content), + }; + Self::new(content, escaped, decoder) + } + + /// Creates a deserializer from a part of value at specified range + pub fn from_part( + value: &'a Cow<'de, [u8]>, + range: Range, + escaped: bool, + decoder: Decoder, + ) -> Self { + let content = match value { + Cow::Borrowed(slice) => CowRef::Input(&slice[range]), + Cow::Owned(slice) => CowRef::Slice(&slice[range]), + }; + Self::new(content, escaped, decoder) + } + + /// Constructor for tests + #[inline] + fn new(content: CowRef<'de, 'a>, escaped: bool, decoder: Decoder) -> Self { + Self { + content, + escaped, + decoder, + } + } + + /// Decodes raw bytes using the encoding specified. + /// The method will borrow if has the UTF-8 compatible representation. + #[inline] + fn decode<'b>(&'b self) -> Result, DeError> { + Ok(match self.content { + CowRef::Input(content) => match self.decoder.decode(content)? { + Cow::Borrowed(content) => Content::Input(content), + Cow::Owned(content) => Content::Owned(content, 0), + }, + CowRef::Slice(content) => match self.decoder.decode(content)? { + Cow::Borrowed(content) => Content::Slice(content), + Cow::Owned(content) => Content::Owned(content, 0), + }, + CowRef::Owned(ref content) => match self.decoder.decode(content)? { + Cow::Borrowed(content) => Content::Slice(content), + Cow::Owned(content) => Content::Owned(content, 0), + }, + }) + } +} + +impl<'de, 'a> Deserializer<'de> for SimpleTypeDeserializer<'de, 'a> { + type Error = DeError; + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bool(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + deserialize_bool(&self.content, self.decoder, visitor) + } + + deserialize_num!(deserialize_i8 => visit_i8); + deserialize_num!(deserialize_i16 => visit_i16); + deserialize_num!(deserialize_i32 => visit_i32); + deserialize_num!(deserialize_i64 => visit_i64); + + deserialize_num!(deserialize_u8 => visit_u8); + deserialize_num!(deserialize_u16 => visit_u16); + deserialize_num!(deserialize_u32 => visit_u32); + deserialize_num!(deserialize_u64 => visit_u64); + + serde_if_integer128! { + deserialize_num!(deserialize_i128 => visit_i128); + deserialize_num!(deserialize_u128 => visit_u128); + } + + deserialize_num!(deserialize_f32 => visit_f32); + deserialize_num!(deserialize_f64 => visit_f64); + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_char(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_str(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + let content = self.decode()?; + if self.escaped { + match unescape(content.as_str().as_bytes())? { + Cow::Borrowed(_) => content.deserialize_all(visitor), + Cow::Owned(buf) => visitor.visit_string(String::from_utf8(buf)?), + } + } else { + content.deserialize_all(visitor) + } + } + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_string(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_bytes(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + match self.content { + CowRef::Input(content) => visitor.visit_borrowed_bytes(content), + CowRef::Slice(content) => visitor.visit_bytes(content), + CowRef::Owned(content) => visitor.visit_byte_buf(content), + } + } + + /// Forwards deserialization to the [`Self::deserialize_bytes`] + fn deserialize_byte_buf(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_bytes(visitor) + } + + fn deserialize_option(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + if self.content.is_empty() { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + + fn deserialize_unit(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } + + /// Forwards deserialization to the [`Self::deserialize_unit`] + fn deserialize_unit_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_unit(visitor) + } + + fn deserialize_newtype_struct( + self, + _name: &'static str, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + + fn deserialize_seq(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_seq(ListIter { + content: Some(self.decode()?), + escaped: self.escaped, + }) + } + + /// Representation of tuples the same as [sequences][Self::deserialize_seq]. + fn deserialize_tuple(self, _len: usize, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_seq(visitor) + } + + /// Representation of named tuples the same as [unnamed tuples][Self::deserialize_tuple]. + fn deserialize_tuple_struct( + self, + _name: &'static str, + len: usize, + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + self.deserialize_tuple(len, visitor) + } + + unsupported!(deserialize_map => "maps are not supported for XSD `simpleType`s"); + unsupported!(deserialize_struct(&'static str, &'static [&'static str]) + => "structures are not supported for XSD `simpleType`s"); + + fn deserialize_enum( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + visitor.visit_enum(self) + } + + /// Forwards deserialization to the [`Self::deserialize_str`] + fn deserialize_identifier(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + self.deserialize_str(visitor) + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where + V: Visitor<'de>, + { + visitor.visit_unit() + } +} + +impl<'de, 'a> EnumAccess<'de> for SimpleTypeDeserializer<'de, 'a> { + type Error = DeError; + type Variant = SimpleTypeUnitOnly; + + fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), DeError> + where + V: DeserializeSeed<'de>, + { + let name = seed.deserialize(self)?; + Ok((name, SimpleTypeUnitOnly)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Deserializer of variant data, that supports only unit variants. +/// Attempt to deserialize newtype, tuple or struct variant will return a +/// [`DeError::Unsupported`] error. +pub struct SimpleTypeUnitOnly; +impl<'de> VariantAccess<'de> for SimpleTypeUnitOnly { + type Error = DeError; + + #[inline] + fn unit_variant(self) -> Result<(), DeError> { + Ok(()) + } + + fn newtype_variant_seed(self, _seed: T) -> Result + where + T: DeserializeSeed<'de>, + { + Err(DeError::Unsupported( + "enum newtype variants are not supported for XSD `simpleType`s", + )) + } + + fn tuple_variant(self, _len: usize, _visitor: V) -> Result + where + V: Visitor<'de>, + { + Err(DeError::Unsupported( + "enum tuple variants are not supported for XSD `simpleType`s", + )) + } + + fn struct_variant( + self, + _fields: &'static [&'static str], + _visitor: V, + ) -> Result + where + V: Visitor<'de>, + { + Err(DeError::Unsupported( + "enum struct variants are not supported for XSD `simpleType`s", + )) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +#[cfg(test)] +mod tests { + use super::*; + use crate::utils::{ByteBuf, Bytes}; + use serde::de::IgnoredAny; + use serde::Deserialize; + use std::collections::HashMap; + + macro_rules! simple { + ($encoding:ident, $name:ident: $type:ty = $xml:expr => $result:expr) => { + #[test] + fn $name() { + let decoder = Decoder::$encoding(); + let xml = $xml; + let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); + let data: $type = Deserialize::deserialize(de).unwrap(); + + assert_eq!(data, $result); + } + }; + } + + macro_rules! err { + ($encoding:ident, $name:ident: $type:ty = $xml:expr => $kind:ident($reason:literal)) => { + #[test] + fn $name() { + let decoder = Decoder::$encoding(); + let xml = $xml; + let de = SimpleTypeDeserializer::new(CowRef::Input(xml.as_ref()), true, decoder); + let err = <$type as Deserialize>::deserialize(de).unwrap_err(); + + match err { + DeError::$kind(e) => assert_eq!(e, $reason), + _ => panic!( + "Expected `{}({})`, found `{:?}`", + stringify!($kind), + $reason, + err + ), + } + } + }; + } + + #[derive(Debug, Deserialize, PartialEq)] + struct Unit; + + #[derive(Debug, Deserialize, PartialEq)] + struct Newtype(String); + + #[derive(Debug, Deserialize, PartialEq)] + struct BorrowedNewtype<'a>(&'a str); + + #[derive(Debug, Deserialize, PartialEq)] + struct Struct { + key: String, + val: usize, + } + + #[derive(Debug, Deserialize, PartialEq)] + enum Enum { + Unit, + Newtype(String), + Tuple(String, usize), + Struct { key: String, val: usize }, + } + + #[derive(Debug, Deserialize, PartialEq)] + #[serde(field_identifier)] + enum Id { + Field, + } + + #[derive(Debug, Deserialize)] + struct Any(IgnoredAny); + impl PartialEq for Any { + fn eq(&self, _other: &Any) -> bool { + true + } + } + + /// Tests for deserialize atomic and union values, as defined in XSD specification + mod atomic { + use super::*; + use pretty_assertions::assert_eq; + + /// Checks that given `$input` successfully deserializing into given `$result` + macro_rules! deserialized_to { + ($name:ident: $type:ty = $input:literal => $result:expr) => { + #[test] + fn $name() { + let de = AtomicDeserializer { + content: Content::Input($input), + escaped: true, + }; + let data: $type = Deserialize::deserialize(de).unwrap(); + + assert_eq!(data, $result); + } + }; + } + + /// Checks that attempt to deserialize given `$input` as a `$type` results to a + /// deserialization error `$kind` with `$reason` + macro_rules! err { + ($name:ident: $type:ty = $input:literal => $kind:ident($reason:literal)) => { + #[test] + fn $name() { + let de = AtomicDeserializer { + content: Content::Input($input), + escaped: true, + }; + let err = <$type as Deserialize>::deserialize(de).unwrap_err(); + + match err { + DeError::$kind(e) => assert_eq!(e, $reason), + _ => panic!( + "Expected `{}({})`, found `{:?}`", + stringify!($kind), + $reason, + err + ), + } + } + }; + } + + deserialized_to!(any_owned: String = "<escaped string" => " "non-escaped string"); + + deserialized_to!(false_: bool = "false" => false); + deserialized_to!(true_: bool = "true" => true); + + deserialized_to!(i8_: i8 = "-2" => -2); + deserialized_to!(i16_: i16 = "-2" => -2); + deserialized_to!(i32_: i32 = "-2" => -2); + deserialized_to!(i64_: i64 = "-2" => -2); + + deserialized_to!(u8_: u8 = "3" => 3); + deserialized_to!(u16_: u16 = "3" => 3); + deserialized_to!(u32_: u32 = "3" => 3); + deserialized_to!(u64_: u64 = "3" => 3); + + serde_if_integer128! { + deserialized_to!(i128_: i128 = "-2" => -2); + deserialized_to!(u128_: u128 = "2" => 2); + } + + deserialized_to!(f32_: f32 = "1.23" => 1.23); + deserialized_to!(f64_: f64 = "1.23" => 1.23); + + deserialized_to!(char_unescaped: char = "h" => 'h'); + deserialized_to!(char_escaped: char = "<" => '<'); + + deserialized_to!(string: String = "<escaped string" => " "non-escaped string"); + err!(escaped_str: &str = "escaped string" + => Custom("invalid type: string \"escaped string\", expected a borrowed string")); + + err!(byte_buf: ByteBuf = "<escaped string" + => Unsupported("byte arrays are not supported as `xs:list` items")); + err!(borrowed_bytes: Bytes = "non-escaped string" + => Unsupported("byte arrays are not supported as `xs:list` items")); + + deserialized_to!(option_none: Option<&str> = "" => None); + deserialized_to!(option_some: Option<&str> = "non-escaped string" => Some("non-escaped string")); + + deserialized_to!(unit: () = "anything" => ()); + deserialized_to!(unit_struct: Unit = "anything" => Unit); + + deserialized_to!(newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); + + err!(seq: Vec<()> = "non-escaped string" + => Unsupported("sequences are not supported as `xs:list` items")); + err!(tuple: ((), ()) = "non-escaped string" + => Unsupported("tuples are not supported as `xs:list` items")); + err!(tuple_struct: ((), ()) = "non-escaped string" + => Unsupported("tuples are not supported as `xs:list` items")); + + err!(map: HashMap<(), ()> = "non-escaped string" + => Unsupported("maps are not supported as `xs:list` items")); + err!(struct_: Struct = "non-escaped string" + => Unsupported("structures are not supported as `xs:list` items")); + + deserialized_to!(enum_unit: Enum = "Unit" => Enum::Unit); + err!(enum_newtype: Enum = "Newtype" + => Unsupported("enum newtype variants are not supported as `xs:list` items")); + err!(enum_tuple: Enum = "Tuple" + => Unsupported("enum tuple variants are not supported as `xs:list` items")); + err!(enum_struct: Enum = "Struct" + => Unsupported("enum struct variants are not supported as `xs:list` items")); + err!(enum_other: Enum = "any data" + => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); + + deserialized_to!(identifier: Id = "Field" => Id::Field); + deserialized_to!(ignored_any: Any = "any data" => Any(IgnoredAny)); + + /// Checks that deserialization from an owned content is working + #[test] + #[cfg(feature = "encoding")] + fn owned_data() { + let de = AtomicDeserializer { + content: Content::Owned("string slice".into(), 7), + escaped: true, + }; + assert_eq!(de.content.as_str(), "slice"); + + let data: String = Deserialize::deserialize(de).unwrap(); + assert_eq!(data, "slice"); + } + + /// Checks that deserialization from a content borrowed from some + /// buffer other that input is working + #[test] + fn borrowed_from_deserializer() { + let de = AtomicDeserializer { + content: Content::Slice("string slice"), + escaped: true, + }; + assert_eq!(de.content.as_str(), "string slice"); + + let data: String = Deserialize::deserialize(de).unwrap(); + assert_eq!(data, "string slice"); + } + } + + /// Module for testing list accessor + mod list { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn empty() { + let mut seq = ListIter { + content: Some(Content::Input("")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } + + #[test] + fn only_spaces() { + let mut seq = ListIter { + content: Some(Content::Input(" ")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } + + #[test] + fn one_item() { + let mut seq = ListIter { + content: Some(Content::Input("abc")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } + + #[test] + fn two_items() { + let mut seq = ListIter { + content: Some(Content::Input("abc def")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); + assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } + + #[test] + fn leading_spaces() { + let mut seq = ListIter { + content: Some(Content::Input(" def")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), Some("def")); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } + + #[test] + fn trailing_spaces() { + let mut seq = ListIter { + content: Some(Content::Input("abc ")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), Some("abc")); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + assert_eq!(seq.next_element::<&str>().unwrap(), None); + } + + #[test] + fn mixed_types() { + let mut seq = ListIter { + content: Some(Content::Input("string 1.23 42 true false h Unit")), + escaped: true, + }; + + assert_eq!(seq.next_element::<&str>().unwrap(), Some("string")); + assert_eq!(seq.next_element::().unwrap(), Some(1.23)); + assert_eq!(seq.next_element::().unwrap(), Some(42)); + assert_eq!(seq.next_element::().unwrap(), Some(true)); + assert_eq!(seq.next_element::().unwrap(), Some(false)); + assert_eq!(seq.next_element::().unwrap(), Some('h')); + assert_eq!(seq.next_element::().unwrap(), Some(Enum::Unit)); + assert_eq!(seq.next_element::<()>().unwrap(), None); + assert_eq!(seq.next_element::<()>().unwrap(), None); + } + } + + mod utf8 { + use super::*; + use pretty_assertions::assert_eq; + + simple!(utf8, i8_: i8 = "-2" => -2); + simple!(utf8, i16_: i16 = "-2" => -2); + simple!(utf8, i32_: i32 = "-2" => -2); + simple!(utf8, i64_: i64 = "-2" => -2); + + simple!(utf8, u8_: u8 = "3" => 3); + simple!(utf8, u16_: u16 = "3" => 3); + simple!(utf8, u32_: u32 = "3" => 3); + simple!(utf8, u64_: u64 = "3" => 3); + + serde_if_integer128! { + simple!(utf8, i128_: i128 = "-2" => -2); + simple!(utf8, u128_: u128 = "2" => 2); + } + + simple!(utf8, f32_: f32 = "1.23" => 1.23); + simple!(utf8, f64_: f64 = "1.23" => 1.23); + + simple!(utf8, false_: bool = "false" => false); + simple!(utf8, true_: bool = "true" => true); + simple!(utf8, char_unescaped: char = "h" => 'h'); + simple!(utf8, char_escaped: char = "<" => '<'); + + simple!(utf8, string: String = "<escaped string" => " ByteBuf(b"<escaped string".to_vec())); + + simple!(utf8, borrowed_str: &str = "non-escaped string" => "non-escaped string"); + simple!(utf8, borrowed_bytes: Bytes = "<escaped string" => Bytes(b"<escaped string")); + + simple!(utf8, option_none: Option<&str> = "" => None); + simple!(utf8, option_some: Option<&str> = "non-escaped string" => Some("non-escaped string")); + + simple!(utf8, unit: () = "any data" => ()); + simple!(utf8, unit_struct: Unit = "any data" => Unit); + + simple!(utf8, newtype_owned: Newtype = "<escaped string" => Newtype(" BorrowedNewtype("non-escaped string")); + + err!(utf8, map: HashMap<(), ()> = "any data" + => Unsupported("maps are not supported for XSD `simpleType`s")); + err!(utf8, struct_: Struct = "any data" + => Unsupported("structures are not supported for XSD `simpleType`s")); + + simple!(utf8, enum_unit: Enum = "Unit" => Enum::Unit); + err!(utf8, enum_newtype: Enum = "Newtype" + => Unsupported("enum newtype variants are not supported for XSD `simpleType`s")); + err!(utf8, enum_tuple: Enum = "Tuple" + => Unsupported("enum tuple variants are not supported for XSD `simpleType`s")); + err!(utf8, enum_struct: Enum = "Struct" + => Unsupported("enum struct variants are not supported for XSD `simpleType`s")); + err!(utf8, enum_other: Enum = "any data" + => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); + + simple!(utf8, identifier: Id = "Field" => Id::Field); + simple!(utf8, ignored_any: Any = "any data" => Any(IgnoredAny)); + } + + #[cfg(feature = "encoding")] + mod utf16 { + use super::*; + use pretty_assertions::assert_eq; + + fn to_utf16(string: &str) -> Vec { + let mut bytes = Vec::new(); + for ch in string.encode_utf16() { + bytes.extend(&ch.to_le_bytes()); + } + bytes + } + + macro_rules! utf16 { + ($name:ident: $type:ty = $xml:literal => $result:expr) => { + simple!(utf16, $name: $type = to_utf16($xml) => $result); + }; + } + + macro_rules! unsupported { + ($name:ident: $type:ty = $xml:literal => $err:literal) => { + err!(utf16, $name: $type = to_utf16($xml) => Unsupported($err)); + }; + } + + utf16!(i8_: i8 = "-2" => -2); + utf16!(i16_: i16 = "-2" => -2); + utf16!(i32_: i32 = "-2" => -2); + utf16!(i64_: i64 = "-2" => -2); + + utf16!(u8_: u8 = "3" => 3); + utf16!(u16_: u16 = "3" => 3); + utf16!(u32_: u32 = "3" => 3); + utf16!(u64_: u64 = "3" => 3); + + serde_if_integer128! { + utf16!(i128_: i128 = "-2" => -2); + utf16!(u128_: u128 = "2" => 2); + } + + utf16!(f32_: f32 = "1.23" => 1.23); + utf16!(f64_: f64 = "1.23" => 1.23); + + utf16!(false_: bool = "false" => false); + utf16!(true_: bool = "true" => true); + utf16!(char_unescaped: char = "h" => 'h'); + utf16!(char_escaped: char = "<" => '<'); + + utf16!(string: String = "<escaped string" => " ByteBuf(to_utf16("<escaped string"))); + + utf16!(option_none: Option<()> = "" => None); + utf16!(option_some: Option<()> = "any data" => Some(())); + + utf16!(unit: () = "any data" => ()); + utf16!(unit_struct: Unit = "any data" => Unit); + + utf16!(newtype_owned: Newtype = "<escaped string" => Newtype(" Custom("invalid type: string \"non-escaped string\", expected a borrowed string")); + + unsupported!(map: HashMap<(), ()> = "any data" + => "maps are not supported for XSD `simpleType`s"); + unsupported!(struct_: Struct = "any data" + => "structures are not supported for XSD `simpleType`s"); + + utf16!(enum_unit: Enum = "Unit" => Enum::Unit); + err!(utf16, enum_newtype: Enum = to_utf16("Newtype") + => Unsupported("enum newtype variants are not supported for XSD `simpleType`s")); + err!(utf16, enum_tuple: Enum = to_utf16("Tuple") + => Unsupported("enum tuple variants are not supported for XSD `simpleType`s")); + err!(utf16, enum_struct: Enum = to_utf16("Struct") + => Unsupported("enum struct variants are not supported for XSD `simpleType`s")); + err!(utf16, enum_other: Enum = to_utf16("any data") + => Custom("unknown variant `any data`, expected one of `Unit`, `Newtype`, `Tuple`, `Struct`")); + + utf16!(identifier: Id = "Field" => Id::Field); + utf16!(ignored_any: Any = "any data" => Any(IgnoredAny)); + } +} diff --git a/src/events/mod.rs b/src/events/mod.rs index 0d799e7a..ea321017 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -121,9 +121,9 @@ impl<'a> From> for BytesStartText<'a> { #[derive(Clone, Eq, PartialEq)] pub struct BytesStart<'a> { /// content of the element, before any utf8 conversion - buf: Cow<'a, [u8]>, + pub(crate) buf: Cow<'a, [u8]>, /// end of the element name, the name starts at that the start of `buf` - name_len: usize, + pub(crate) name_len: usize, } impl<'a> BytesStart<'a> { diff --git a/src/reader.rs b/src/reader.rs index 4c556a8b..c3d0bbf3 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1549,16 +1549,23 @@ impl Decoder { } } +/// This implementation is required for tests of other parts of the library +#[cfg(test)] +#[cfg(feature = "serialize")] impl Decoder { - /// This implementation is required for tests of other parts of the library - #[cfg(test)] - #[cfg(feature = "serialize")] pub(crate) fn utf8() -> Self { Decoder { #[cfg(feature = "encoding")] encoding: encoding_rs::UTF_8, } } + + #[cfg(feature = "encoding")] + pub(crate) fn utf16() -> Self { + Decoder { + encoding: encoding_rs::UTF_16LE, + } + } } /// Automatic encoding detection of XML files based using the [recommended algorithm] diff --git a/tests/serde-de.rs b/tests/serde-de.rs index 1b100ff1..be9e153e 100644 --- a/tests/serde-de.rs +++ b/tests/serde-de.rs @@ -1039,6 +1039,134 @@ mod seq { ) .unwrap_err(); } + + /// Checks that sequences represented by elements can contain sequences, + /// represented by [`xs:list`s](https://www.w3schools.com/xml/el_list.asp) + mod xs_list { + use super::*; + use pretty_assertions::assert_eq; + + /// Special case: zero elements + #[test] + fn zero() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is required to correctly deserialize + /// empty sequence, because without elements the field + /// also is missing and derived `Deserialize` implementation + /// would complain about that unless field is marked as + /// `default`. + #[serde(default)] + item: [Vec; 0], + } + + let data: List = from_str( + r#" + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { item: [] }); + } + + /// Special case: one element + #[test] + fn one() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + item: [Vec; 1], + } + + let data: List = from_str( + r#" + + first list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + item: [vec!["first".to_string(), "list".to_string()]] + } + ); + } + + /// Special case: outer list is always mapped to an elements sequence, + /// not to an `xs:list` + #[test] + fn element() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + item: [String; 1], + } + + let data: List = from_str( + r#" + + first item + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + item: ["first item".to_string()] + } + ); + } + + /// This tests demonstrates, that for `$value` field (`list`) actual + /// name of XML element (`item`) does not matter. That allows list + /// item to be an enum, where tag name determines enum variant + #[test] + fn many() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + item: [Vec; 2], + } + + let data: List = from_str( + r#" + + first list + second list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + item: [ + vec!["first".to_string(), "list".to_string()], + vec!["second".to_string(), "list".to_string()], + ] + } + ); + } + } } /// This module contains tests where size of the list have an unspecified size @@ -1505,6 +1633,115 @@ mod seq { ) .unwrap_err(); } + + /// Checks that sequences represented by elements can contain sequences, + /// represented by `xs:list`s + mod xs_list { + use super::*; + use pretty_assertions::assert_eq; + + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// `#[serde(default)]` is required to correctly deserialize + /// empty sequence, because without elements the field + /// also is missing and derived `Deserialize` implementation + /// would complain about that unless field is marked as + /// `default`. + #[serde(default)] + item: Vec>, + } + + /// Special case: zero elements + #[test] + fn zero() { + let data: List = from_str( + r#" + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { item: vec![] }); + } + + /// Special case: one element + #[test] + fn one() { + let data: List = from_str( + r#" + + first list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + item: vec![vec!["first".to_string(), "list".to_string()]] + } + ); + } + + /// Special case: outer list is always mapped to an elements sequence, + /// not to an `xs:list` + #[test] + fn element() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + item: Vec, + } + + let data: List = from_str( + r#" + + first item + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + item: vec!["first item".to_string()] + } + ); + } + + /// This tests demonstrates, that for `$value` field (`list`) actual + /// name of XML element (`item`) does not matter. That allows list + /// item to be an enum, where tag name determines enum variant + #[test] + fn many() { + let data: List = from_str( + r#" + + first list + second list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + item: vec![ + vec!["first".to_string(), "list".to_string()], + vec!["second".to_string(), "list".to_string()], + ] + } + ); + } + } } } @@ -2349,6 +2586,132 @@ mod seq { ) .unwrap_err(); } + + /// Checks that sequences represented by elements can contain sequences, + /// represented by `xs:list`s + mod xs_list { + use super::*; + use pretty_assertions::assert_eq; + + /// Special case: zero elements + #[test] + fn zero() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is required to correctly deserialize + /// empty sequence, because without elements the field + /// also is missing and derived `Deserialize` implementation + /// would complain about that unless field is marked as + /// `default`. + #[serde(default)] + #[serde(rename = "$value")] + element: [Vec; 0], + } + + let data: List = from_str( + r#" + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { element: [] }); + } + + /// Special case: one element + #[test] + fn one() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + #[serde(rename = "$value")] + element: [Vec; 1], + } + + let data: List = from_str( + r#" + + first list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: [vec!["first".to_string(), "list".to_string()]] + } + ); + } + + /// Special case: outer list is always mapped to an elements sequence, + /// not to an `xs:list` + #[test] + fn element() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is not required, because correct + /// XML will always contains at least 1 element. + #[serde(rename = "$value")] + element: [String; 1], + } + + let data: List = from_str( + r#" + + first item + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: ["first item".to_string()] + } + ); + } + + #[test] + fn many() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list` + #[serde(rename = "$value")] + element: [Vec; 2], + } + + let data: List = from_str( + r#" + + first list + second list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: [ + vec!["first".to_string(), "list".to_string()], + vec!["second".to_string(), "list".to_string()], + ] + } + ); + } + } } /// This module contains tests where size of the list have an unspecified size @@ -3032,6 +3395,115 @@ mod seq { ) .unwrap_err(); } + + /// Checks that sequences represented by elements can contain sequences, + /// represented by `xs:list`s + mod xs_list { + use super::*; + use pretty_assertions::assert_eq; + + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements, inner -- to `xs:list`. + /// + /// `#[serde(default)]` is required to correctly deserialize + /// empty sequence, because without elements the field + /// also is missing and derived `Deserialize` implementation + /// would complain about that unless field is marked as + /// `default`. + #[serde(default)] + #[serde(rename = "$value")] + element: Vec>, + } + + /// Special case: zero elements + #[test] + fn zero() { + let data: List = from_str( + r#" + + + "#, + ) + .unwrap(); + + assert_eq!(data, List { element: vec![] }); + } + + /// Special case: one element + #[test] + fn one() { + let data: List = from_str( + r#" + + first list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: vec![vec!["first".to_string(), "list".to_string()]] + } + ); + } + + /// Special case: outer list is always mapped to an elements sequence, + /// not to an `xs:list` + #[test] + fn element() { + #[derive(Debug, Deserialize, PartialEq)] + struct List { + /// Outer list mapped to elements. + #[serde(rename = "$value")] + element: Vec, + } + + let data: List = from_str( + r#" + + first item + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: vec!["first item".to_string()] + } + ); + } + + /// This tests demonstrates, that for `$value` field (`list`) actual + /// name of XML element (`item`) does not matter. That allows list + /// item to be an enum, where tag name determines enum variant + #[test] + fn many() { + let data: List = from_str( + r#" + + first list + second list + + "#, + ) + .unwrap(); + + assert_eq!( + data, + List { + element: vec![ + vec!["first".to_string(), "list".to_string()], + vec!["second".to_string(), "list".to_string()], + ] + } + ); + } + } } } } @@ -4127,3 +4599,169 @@ mod enum_ { } } } + +/// https://www.w3schools.com/xml/el_list.asp +mod xml_schema_lists { + use super::*; + + macro_rules! list { + ($name:ident: $type:ty = $xml:literal => $result:expr) => { + #[test] + fn $name() { + let data: List<$type> = from_str($xml).unwrap(); + + assert_eq!(data, List { list: $result }); + } + }; + } + + macro_rules! err { + ($name:ident: $type:ty = $xml:literal => $kind:ident($err:literal)) => { + #[test] + fn $name() { + let err = from_str::>($xml).unwrap_err(); + + match err { + DeError::$kind(e) => assert_eq!(e, $err), + _ => panic!( + "Expected `{}({})`, found `{:?}`", + stringify!($kind), + $err, + err + ), + } + } + }; + } + + /// Checks that sequences can be deserialized from an XML attribute content + /// according to the `xs:list` XML Schema type + mod attribute { + use super::*; + use pretty_assertions::assert_eq; + + #[derive(Debug, Deserialize, PartialEq)] + struct List { + list: Vec, + } + + list!(i8_: i8 = r#""# => vec![1, -2, 3]); + list!(i16_: i16 = r#""# => vec![1, -2, 3]); + list!(i32_: i32 = r#""# => vec![1, -2, 3]); + list!(i64_: i64 = r#""# => vec![1, -2, 3]); + + list!(u8_: u8 = r#""# => vec![1, 2, 3]); + list!(u16_: u16 = r#""# => vec![1, 2, 3]); + list!(u32_: u32 = r#""# => vec![1, 2, 3]); + list!(u64_: u64 = r#""# => vec![1, 2, 3]); + + serde_if_integer128! { + list!(i128_: i128 = r#""# => vec![1, -2, 3]); + list!(u128_: u128 = r#""# => vec![1, 2, 3]); + } + + list!(f32_: f32 = r#""# => vec![1.23, -4.56, 7.89]); + list!(f64_: f64 = r#""# => vec![1.23, -4.56, 7.89]); + + list!(bool_: bool = r#""# => vec![true, false, true]); + list!(char_: char = r#""# => vec!['4', '2', 'j']); + + list!(string: String = r#""# => vec![ + "first".to_string(), + "second".to_string(), + "third 3".to_string(), + ]); + err!(byte_buf: ByteBuf = r#""# + => Unsupported("byte arrays are not supported as `xs:list` items")); + + list!(unit: () = r#""# => vec![(), (), ()]); + } + + /// Checks that sequences can be deserialized from an XML text content + /// according to the `xs:list` XML Schema type + mod element { + use super::*; + + #[derive(Debug, Deserialize, PartialEq)] + struct List { + // Give it a special name that means text content of the XML node + #[serde(rename = "$value")] + list: Vec, + } + + mod text { + use super::*; + use pretty_assertions::assert_eq; + + list!(i8_: i8 = "1 -2 3" => vec![1, -2, 3]); + list!(i16_: i16 = "1 -2 3" => vec![1, -2, 3]); + list!(i32_: i32 = "1 -2 3" => vec![1, -2, 3]); + list!(i64_: i64 = "1 -2 3" => vec![1, -2, 3]); + + list!(u8_: u8 = "1 2 3" => vec![1, 2, 3]); + list!(u16_: u16 = "1 2 3" => vec![1, 2, 3]); + list!(u32_: u32 = "1 2 3" => vec![1, 2, 3]); + list!(u64_: u64 = "1 2 3" => vec![1, 2, 3]); + + serde_if_integer128! { + list!(i128_: i128 = "1 -2 3" => vec![1, -2, 3]); + list!(u128_: u128 = "1 2 3" => vec![1, 2, 3]); + } + + list!(f32_: f32 = "1.23 -4.56 7.89" => vec![1.23, -4.56, 7.89]); + list!(f64_: f64 = "1.23 -4.56 7.89" => vec![1.23, -4.56, 7.89]); + + list!(bool_: bool = "true false true" => vec![true, false, true]); + list!(char_: char = "4 2 j" => vec!['4', '2', 'j']); + + list!(string: String = "first second third 3" => vec![ + "first".to_string(), + "second".to_string(), + "third 3".to_string(), + ]); + err!(byte_buf: ByteBuf = "first second third 3" + => Unsupported("byte arrays are not supported as `xs:list` items")); + + list!(unit: () = "1 second false" => vec![(), (), ()]); + } + + mod cdata { + use super::*; + use pretty_assertions::assert_eq; + + list!(i8_: i8 = "" => vec![1, -2, 3]); + list!(i16_: i16 = "" => vec![1, -2, 3]); + list!(i32_: i32 = "" => vec![1, -2, 3]); + list!(i64_: i64 = "" => vec![1, -2, 3]); + + list!(u8_: u8 = "" => vec![1, 2, 3]); + list!(u16_: u16 = "" => vec![1, 2, 3]); + list!(u32_: u32 = "" => vec![1, 2, 3]); + list!(u64_: u64 = "" => vec![1, 2, 3]); + + serde_if_integer128! { + list!(i128_: i128 = "" => vec![1, -2, 3]); + list!(u128_: u128 = "" => vec![1, 2, 3]); + } + + list!(f32_: f32 = "" => vec![1.23, -4.56, 7.89]); + list!(f64_: f64 = "" => vec![1.23, -4.56, 7.89]); + + list!(bool_: bool = "" => vec![true, false, true]); + list!(char_: char = "" => vec!['4', '2', 'j']); + + // Cannot get whitespace in the value in any way if CDATA used: + // - literal spaces means list item delimiters + // - escaped sequences are not decoded in CDATA + list!(string: String = "" => vec![ + "first".to_string(), + "second".to_string(), + "third 3".to_string(), + ]); + err!(byte_buf: ByteBuf = "first second third 3" + => Unsupported("byte arrays are not supported as `xs:list` items")); + + list!(unit: () = "1 second false" => vec![(), (), ()]); + } + } +}