Skip to content

Commit

Permalink
xs:list: Implement deserialization of xs:lists
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Jun 20, 2022
1 parent df65de4 commit 3155bd6
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 20 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
way to access decoding functionality is via this struct
- [#191]: New event variant `StartText` emitted for bytes before the XML declaration
or a start comment or a tag. For streams with BOM this event will contain a BOM
- [#395]: Add support for XML Schema `xs:list`

### Bug Fixes

Expand Down Expand Up @@ -104,6 +105,7 @@
[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
[#393]: https://github.com/tafia/quick-xml/pull/393
[#395]: https://github.com/tafia/quick-xml/pull/395

## 0.23.0 -- 2022-05-08

Expand Down
47 changes: 29 additions & 18 deletions src/de/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
use crate::{
de::escape::EscapedDeserializer,
de::seq::{not_in, TagFilter},
de::simple_type::SimpleTypeDeserializer,
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::attributes::IterState,
Expand Down Expand Up @@ -35,7 +36,10 @@ enum ValueSource {
/// represented or by an ordinary text node, or by a CDATA node:
///
/// ```xml
/// <...>text content for field value<...>
/// <any-tag>
/// <key>text content</key>
/// <!-- ^^^^^^^^^^^^ - this will be used to deserialize map value -->
/// </any-tag>
/// ```
/// ```xml
/// <any-tag>
Expand Down Expand Up @@ -200,8 +204,8 @@ where
) -> Result<Self, DeError> {
Ok(MapAccess {
de,
iter: IterState::new(start.name().as_ref().len(), false),
start,
iter: IterState::new(0, false),
source: ValueSource::Unknown,
fields,
has_value_field: fields.contains(&INNER_VALUE),
Expand All @@ -226,8 +230,8 @@ where
) -> Result<Option<K::Value>, Self::Error> {
debug_assert_eq!(self.source, ValueSource::Unknown);

// FIXME: There error positions counted from end of tag name - need global position
let slice = self.start.attributes_raw();
// FIXME: There error positions counted from the start of tag name - need global position
let slice = &self.start.buf;
let decoder = self.de.reader.decoder();

if let Some(a) = self.iter.next(slice).transpose()? {
Expand Down Expand Up @@ -305,16 +309,12 @@ where
seed: K,
) -> Result<K::Value, Self::Error> {
match std::mem::replace(&mut self.source, ValueSource::Unknown) {
ValueSource::Attribute(value) => {
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();

seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(&slice[value]),
decoder,
true,
))
}
ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
&self.start.buf,
value,
true,
self.de.reader.decoder(),
)),
// This arm processes the following XML shape:
// <any-tag>
// text value
Expand All @@ -323,10 +323,21 @@ where
// is implicit and equals to the `INNER_VALUE` constant, and the value
// is a `Text` or a `CData` event (the value deserializer will see one
// of that events)
ValueSource::Text => seed.deserialize(MapValueDeserializer {
map: self,
allow_start: false,
}),
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
ValueSource::Text => match self.de.next()? {
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.de.reader.decoder(),
)),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.de.reader.decoder(),
)),
// SAFETY: We set `Text` only when we seen `Text` or `CData`
_ => unreachable!(),
},
// This arm processes the following XML shape:
// <any-tag>
// <any>...</any>
Expand Down
4 changes: 2 additions & 2 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ impl<'a> From<BytesText<'a>> for BytesStartText<'a> {
#[derive(Clone, Eq, PartialEq)]
pub struct BytesStart<'a> {
/// content of the element, before any utf8 conversion
buf: Cow<'a, [u8]>,
pub(crate) buf: Cow<'a, [u8]>,
/// end of the element name, the name starts at that the start of `buf`
name_len: usize,
pub(crate) name_len: usize,
}

impl<'a> BytesStart<'a> {
Expand Down
166 changes: 166 additions & 0 deletions tests/serde-de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4127,3 +4127,169 @@ mod enum_ {
}
}
}

/// https://www.w3schools.com/xml/el_list.asp
mod xml_schema_lists {
use super::*;

macro_rules! list {
($name:ident: $type:ty = $xml:literal => $result:expr) => {
#[test]
fn $name() {
let data: List<$type> = from_str($xml).unwrap();

assert_eq!(data, List { list: $result });
}
};
}

macro_rules! err {
($name:ident: $type:ty = $xml:literal => $kind:ident($err:literal)) => {
#[test]
fn $name() {
let err = from_str::<List<$type>>($xml).unwrap_err();

match err {
DeError::$kind(e) => assert_eq!(e, $err),
_ => panic!(
"Expected `{}({})`, found `{:?}`",
stringify!($kind),
$err,
err
),
}
}
};
}

/// Checks that sequences can be deserialized from an XML attribute content
/// according to the `xs:list` XML Schema type
mod attribute {
use super::*;
use pretty_assertions::assert_eq;

#[derive(Debug, Deserialize, PartialEq)]
struct List<T> {
list: Vec<T>,
}

list!(i8_: i8 = r#"<root list="1 -2 3"/>"# => vec![1, -2, 3]);
list!(i16_: i16 = r#"<root list="1 -2 3"/>"# => vec![1, -2, 3]);
list!(i32_: i32 = r#"<root list="1 -2 3"/>"# => vec![1, -2, 3]);
list!(i64_: i64 = r#"<root list="1 -2 3"/>"# => vec![1, -2, 3]);

list!(u8_: u8 = r#"<root list="1 2 3"/>"# => vec![1, 2, 3]);
list!(u16_: u16 = r#"<root list="1 2 3"/>"# => vec![1, 2, 3]);
list!(u32_: u32 = r#"<root list="1 2 3"/>"# => vec![1, 2, 3]);
list!(u64_: u64 = r#"<root list="1 2 3"/>"# => vec![1, 2, 3]);

serde_if_integer128! {
list!(i128_: i128 = r#"<root list="1 -2 3"/>"# => vec![1, -2, 3]);
list!(u128_: u128 = r#"<root list="1 2 3"/>"# => vec![1, 2, 3]);
}

list!(f32_: f32 = r#"<root list="1.23 -4.56 7.89"/>"# => vec![1.23, -4.56, 7.89]);
list!(f64_: f64 = r#"<root list="1.23 -4.56 7.89"/>"# => vec![1.23, -4.56, 7.89]);

list!(bool_: bool = r#"<root list="true false true"/>"# => vec![true, false, true]);
list!(char_: char = r#"<root list="4 2 j"/>"# => vec!['4', '2', 'j']);

list!(string: String = r#"<root list="first second third&#x20;3"/>"# => vec![
"first".to_string(),
"second".to_string(),
"third 3".to_string(),
]);
err!(byte_buf: ByteBuf = r#"<root list="first second third&#x20;3"/>"#
=> Unsupported("byte arrays are not supported as `xs:list` items"));

list!(unit: () = r#"<root list="1 second false"/>"# => vec![(), (), ()]);
}

/// Checks that sequences can be deserialized from an XML text content
/// according to the `xs:list` XML Schema type
mod element {
use super::*;

#[derive(Debug, Deserialize, PartialEq)]
struct List<T> {
// Give it a special name that means text content of the XML node
#[serde(rename = "$value")]
list: Vec<T>,
}

mod text {
use super::*;
use pretty_assertions::assert_eq;

list!(i8_: i8 = "<root>1 -2 3</root>" => vec![1, -2, 3]);
list!(i16_: i16 = "<root>1 -2 3</root>" => vec![1, -2, 3]);
list!(i32_: i32 = "<root>1 -2 3</root>" => vec![1, -2, 3]);
list!(i64_: i64 = "<root>1 -2 3</root>" => vec![1, -2, 3]);

list!(u8_: u8 = "<root>1 2 3</root>" => vec![1, 2, 3]);
list!(u16_: u16 = "<root>1 2 3</root>" => vec![1, 2, 3]);
list!(u32_: u32 = "<root>1 2 3</root>" => vec![1, 2, 3]);
list!(u64_: u64 = "<root>1 2 3</root>" => vec![1, 2, 3]);

serde_if_integer128! {
list!(i128_: i128 = "<root>1 -2 3</root>" => vec![1, -2, 3]);
list!(u128_: u128 = "<root>1 2 3</root>" => vec![1, 2, 3]);
}

list!(f32_: f32 = "<root>1.23 -4.56 7.89</root>" => vec![1.23, -4.56, 7.89]);
list!(f64_: f64 = "<root>1.23 -4.56 7.89</root>" => vec![1.23, -4.56, 7.89]);

list!(bool_: bool = "<root>true false true</root>" => vec![true, false, true]);
list!(char_: char = "<root>4 2 j</root>" => vec!['4', '2', 'j']);

list!(string: String = "<root>first second third&#x20;3</root>" => vec![
"first".to_string(),
"second".to_string(),
"third 3".to_string(),
]);
err!(byte_buf: ByteBuf = "<root>first second third&#x20;3</root>"
=> Unsupported("byte arrays are not supported as `xs:list` items"));

list!(unit: () = "<root>1 second false</root>" => vec![(), (), ()]);
}

mod cdata {
use super::*;
use pretty_assertions::assert_eq;

list!(i8_: i8 = "<root><![CDATA[1 -2 3]]></root>" => vec![1, -2, 3]);
list!(i16_: i16 = "<root><![CDATA[1 -2 3]]></root>" => vec![1, -2, 3]);
list!(i32_: i32 = "<root><![CDATA[1 -2 3]]></root>" => vec![1, -2, 3]);
list!(i64_: i64 = "<root><![CDATA[1 -2 3]]></root>" => vec![1, -2, 3]);

list!(u8_: u8 = "<root><![CDATA[1 2 3]]></root>" => vec![1, 2, 3]);
list!(u16_: u16 = "<root><![CDATA[1 2 3]]></root>" => vec![1, 2, 3]);
list!(u32_: u32 = "<root><![CDATA[1 2 3]]></root>" => vec![1, 2, 3]);
list!(u64_: u64 = "<root><![CDATA[1 2 3]]></root>" => vec![1, 2, 3]);

serde_if_integer128! {
list!(i128_: i128 = "<root><![CDATA[1 -2 3]]></root>" => vec![1, -2, 3]);
list!(u128_: u128 = "<root><![CDATA[1 2 3]]></root>" => vec![1, 2, 3]);
}

list!(f32_: f32 = "<root><![CDATA[1.23 -4.56 7.89]]></root>" => vec![1.23, -4.56, 7.89]);
list!(f64_: f64 = "<root><![CDATA[1.23 -4.56 7.89]]></root>" => vec![1.23, -4.56, 7.89]);

list!(bool_: bool = "<root><![CDATA[true false true]]></root>" => vec![true, false, true]);
list!(char_: char = "<root><![CDATA[4 2 j]]></root>" => vec!['4', '2', 'j']);

// Cannot get whitespace in the value in any way if CDATA used:
// - literal spaces means list item delimiters
// - escaped sequences are not decoded in CDATA
list!(string: String = "<root><![CDATA[first second third&#x20;3]]></root>" => vec![
"first".to_string(),
"second".to_string(),
"third&#x20;3".to_string(),
]);
err!(byte_buf: ByteBuf = "<root>first second third&#x20;3</root>"
=> Unsupported("byte arrays are not supported as `xs:list` items"));

list!(unit: () = "<root>1 second false</root>" => vec![(), (), ()]);
}
}
}

0 comments on commit 3155bd6

Please sign in to comment.