Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for XML Schema xs:list type #395

Merged
merged 6 commits into from Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions Changelog.md
Expand Up @@ -22,6 +22,7 @@
way to access decoding functionality is via this struct
- [#191]: New event variant `StartText` emitted for bytes before the XML declaration
or a start comment or a tag. For streams with BOM this event will contain a BOM
- [#395]: Add support for XML Schema `xs:list`

### Bug Fixes

Expand Down Expand Up @@ -104,6 +105,7 @@
[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
[#393]: https://github.com/tafia/quick-xml/pull/393
[#395]: https://github.com/tafia/quick-xml/pull/395

## 0.23.0 -- 2022-05-08

Expand Down
181 changes: 162 additions & 19 deletions src/de/map.rs
Expand Up @@ -3,6 +3,7 @@
use crate::{
de::escape::EscapedDeserializer,
de::seq::{not_in, TagFilter},
de::simple_type::SimpleTypeDeserializer,
de::{deserialize_bool, DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::attributes::IterState,
Expand Down Expand Up @@ -35,7 +36,10 @@ enum ValueSource {
/// represented or by an ordinary text node, or by a CDATA node:
///
/// ```xml
/// <...>text content for field value<...>
/// <any-tag>
/// <key>text content</key>
/// <!-- ^^^^^^^^^^^^ - this will be used to deserialize map value -->
/// </any-tag>
/// ```
/// ```xml
/// <any-tag>
Expand Down Expand Up @@ -200,8 +204,8 @@ where
) -> Result<Self, DeError> {
Ok(MapAccess {
de,
iter: IterState::new(start.name().as_ref().len(), false),
start,
iter: IterState::new(0, false),
source: ValueSource::Unknown,
fields,
has_value_field: fields.contains(&INNER_VALUE),
Expand All @@ -226,8 +230,8 @@ where
) -> Result<Option<K::Value>, Self::Error> {
debug_assert_eq!(self.source, ValueSource::Unknown);

// FIXME: There error positions counted from end of tag name - need global position
let slice = self.start.attributes_raw();
// FIXME: There error positions counted from the start of tag name - need global position
let slice = &self.start.buf;
let decoder = self.de.reader.decoder();

if let Some(a) = self.iter.next(slice).transpose()? {
Expand Down Expand Up @@ -305,16 +309,12 @@ where
seed: K,
) -> Result<K::Value, Self::Error> {
match std::mem::replace(&mut self.source, ValueSource::Unknown) {
ValueSource::Attribute(value) => {
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();

seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(&slice[value]),
decoder,
true,
))
}
ValueSource::Attribute(value) => seed.deserialize(SimpleTypeDeserializer::from_part(
&self.start.buf,
value,
true,
self.de.reader.decoder(),
)),
// This arm processes the following XML shape:
// <any-tag>
// text value
Expand All @@ -323,10 +323,21 @@ where
// is implicit and equals to the `INNER_VALUE` constant, and the value
// is a `Text` or a `CData` event (the value deserializer will see one
// of that events)
ValueSource::Text => seed.deserialize(MapValueDeserializer {
map: self,
allow_start: false,
}),
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
ValueSource::Text => match self.de.next()? {
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.de.reader.decoder(),
)),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.de.reader.decoder(),
)),
// SAFETY: We set `Text` only when we seen `Text` or `CData`
_ => unreachable!(),
},
// This arm processes the following XML shape:
// <any-tag>
// <any>...</any>
Expand Down Expand Up @@ -612,8 +623,140 @@ where
DeEvent::Eof => Err(DeError::UnexpectedEof),

// Start(tag), Text, CData
_ => seed.deserialize(&mut *self.map.de).map(Some),
_ => seed
.deserialize(SeqValueDeserializer { map: self.map })
.map(Some),
};
}
}
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// A deserializer for a value of sequence.
struct SeqValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
/// Access to the map that created this deserializer. Gives access to the
/// context, such as list of fields, that current map known about.
map: &'m mut MapAccess<'de, 'a, R>,
}

impl<'de, 'a, 'm, R> SeqValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
/// Returns a text event, used inside [`deserialize_primitives!()`]
#[inline]
fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
self.map.de.next_text_impl(unescape, true)
}

/// Returns a decoder, used inside [`deserialize_primitives!()`]
#[inline]
fn decoder(&self) -> Decoder {
self.map.de.reader.decoder()
}
}

impl<'de, 'a, 'm, R> de::Deserializer<'de> for SeqValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
type Error = DeError;

deserialize_primitives!(mut);

forward!(deserialize_option);
forward!(deserialize_unit);
forward!(deserialize_unit_struct(name: &'static str));
forward!(deserialize_newtype_struct(name: &'static str));

forward!(deserialize_map);
forward!(deserialize_struct(
name: &'static str,
fields: &'static [&'static str]
));

forward!(deserialize_enum(
name: &'static str,
variants: &'static [&'static str]
));

forward!(deserialize_any);
forward!(deserialize_ignored_any);

/// Representation of tuples the same as [sequences](#method.deserialize_seq).
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_seq(visitor)
}

/// Representation of named tuples the same as [unnamed tuples](#method.deserialize_tuple).
fn deserialize_tuple_struct<V>(
self,
_name: &'static str,
len: usize,
visitor: V,
) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
self.deserialize_tuple(len, visitor)
}

fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
// Comment to prevent auto-formatting and keep Text and Cdata similar
e.into_inner(),
true,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
// This is a sequence element. We cannot treat it as another flatten
// sequence if type will require `deserialize_seq` We instead forward
// it to `xs:simpleType` implementation
DeEvent::Start(e) => {
let value = match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
)
.deserialize_seq(visitor),
e => Err(DeError::Custom(format!("Unsupported event {:?}", e))),
};
// TODO: May be assert that here we expect only matching closing tag?
self.map.de.read_to_end(e.name())?;
value
}
// SAFETY: we use that deserializer only when Start(element), Text,
// or CData event Start(tag), Text, CData was peeked already
_ => unreachable!(),
}
}

#[inline]
fn is_human_readable(&self) -> bool {
self.map.de.is_human_readable()
}
}
26 changes: 17 additions & 9 deletions src/de/mod.rs
Expand Up @@ -215,6 +215,7 @@ macro_rules! deserialize_primitives {
mod escape;
mod map;
mod seq;
mod simple_type;
mod var;

pub use crate::errors::serialize::DeError;
Expand Down Expand Up @@ -331,6 +332,21 @@ where

// TODO: According to the https://www.w3.org/TR/xmlschema-2/#boolean,
// valid boolean representations are only "true", "false", "1", and "0"
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
where
V: de::Visitor<'de>,
{
match value {
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
visitor.visit_bool(true)
}
"false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
visitor.visit_bool(false)
}
_ => Err(DeError::InvalidBoolean(value.into())),
}
}

fn deserialize_bool<'de, V>(value: &[u8], decoder: Decoder, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
Expand All @@ -339,15 +355,7 @@ where
{
let value = decoder.decode(value)?;
// No need to unescape because valid boolean representations cannot be escaped
match value.as_ref() {
"true" | "1" | "True" | "TRUE" | "t" | "Yes" | "YES" | "yes" | "y" => {
visitor.visit_bool(true)
}
"false" | "0" | "False" | "FALSE" | "f" | "No" | "NO" | "no" | "n" => {
visitor.visit_bool(false)
}
_ => Err(DeError::InvalidBoolean(value.into())),
}
str2bool(value.as_ref(), visitor)
}

#[cfg(not(feature = "encoding"))]
Expand Down