Skip to content

Commit

Permalink
Merge pull request #523 from Mingun/fix-bugs
Browse files Browse the repository at this point in the history
Fix couple of bugs in serde deserializer
  • Loading branch information
Mingun committed Dec 12, 2022
2 parents 6347f3b + 8283121 commit e877f4f
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 74 deletions.
8 changes: 8 additions & 0 deletions Changelog.md
Expand Up @@ -33,6 +33,13 @@
- [#514]: Fix wrong reporting `Error::EndEventMismatch` after disabling and enabling
`.check_end_names`
- [#517]: Fix swapped codes for `\r` and `\n` characters when escaping them
- [#523]: Fix incorrect skipping text and CDATA content before any map-like structures
in serde deserializer, like
```xml
unwanted text<struct>...</struct>
```
- [#523]: Fix incorrect handling of `xs:list`s with encoded spaces: they still
act as delimiters, which is confirmed also by mature XmlBeans Java library

### Misc Changes

Expand Down Expand Up @@ -64,6 +71,7 @@
[#514]: https://github.com/tafia/quick-xml/issues/514
[#517]: https://github.com/tafia/quick-xml/issues/517
[#521]: https://github.com/tafia/quick-xml/pull/521
[#523]: https://github.com/tafia/quick-xml/pull/523
[XML name]: https://www.w3.org/TR/xml11/#NT-Name
[documentation]: https://docs.rs/quick-xml/0.27.0/quick_xml/de/index.html#difference-between-text-and-value-special-names

Expand Down
117 changes: 79 additions & 38 deletions src/de/map.rs
Expand Up @@ -311,15 +311,13 @@ where
// of that events)
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
ValueSource::Text => match self.de.next()? {
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.de.reader.decoder(),
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode(true)?,
)),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.de.reader.decoder(),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode()?,
)),
// SAFETY: We set `Text` only when we seen `Text` or `CData`
_ => unreachable!(),
Expand Down Expand Up @@ -381,7 +379,7 @@ where
/// Access to the map that created this deserializer. Gives access to the
/// context, such as list of fields, that current map known about.
map: &'m mut MapAccess<'de, 'a, R>,
/// Determines, should [`Deserializer::next_text_impl()`] expand the second
/// Determines, should [`Deserializer::read_string_impl()`] expand the second
/// level of tags or not.
///
/// If this field is `true`, we process the following XML shape:
Expand Down Expand Up @@ -463,10 +461,14 @@ impl<'de, 'a, 'm, R> MapValueDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
/// Returns a text event, used inside [`deserialize_primitives!()`]
/// Returns a next string as concatenated content of consequent [`Text`] and
/// [`CData`] events, used inside [`deserialize_primitives!()`].
///
/// [`Text`]: DeEvent::Text
/// [`CData`]: DeEvent::CData
#[inline]
fn next_text(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
self.map.de.next_text_impl(unescape, self.allow_start)
fn read_string(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
self.map.de.read_string_impl(unescape, self.allow_start)
}
}

Expand Down Expand Up @@ -518,6 +520,16 @@ where
self.deserialize_tuple(len, visitor)
}

/// Deserializes each `<tag>` in
/// ```xml
/// <any-tag>
/// <tag>...</tag>
/// <tag>...</tag>
/// <tag>...</tag>
/// </any-tag>
/// ```
/// as a sequence item, where `<any-tag>` represents a Map in a [`Self::map`],
/// and a `<tag>` is a sequential field of that map.
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
Expand Down Expand Up @@ -619,7 +631,25 @@ impl<'de> TagFilter<'de> {

/// An accessor to sequence elements forming a value for struct field.
/// Technically, this sequence is flattened out into structure and sequence
/// elements are overlapped with other fields of a structure
/// elements are overlapped with other fields of a structure. Each call to
/// [`Self::next_element_seed`] consumes a next sub-tree or consequent list
/// of [`Text`] and [`CData`] events.
///
/// ```xml
/// <>
/// ...
/// <item>The is the one item</item>
/// This is <![CDATA[one another]]> item<!-- even when--> it splitted by comments
/// <tag>...and that is the third!</tag>
/// ...
/// </>
/// ```
///
/// Depending on [`Self::filter`], only some of that possible constructs would be
/// an element.
///
/// [`Text`]: DeEvent::Text
/// [`CData`]: DeEvent::CData
struct MapValueSeqAccess<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
Expand Down Expand Up @@ -686,7 +716,7 @@ where

// Start(tag), Text, CData
_ => seed
.deserialize(SeqValueDeserializer { map: self.map })
.deserialize(SeqItemDeserializer { map: self.map })
.map(Some),
};
}
Expand All @@ -695,8 +725,8 @@ where

////////////////////////////////////////////////////////////////////////////////////////////////////

/// A deserializer for a value of sequence.
struct SeqValueDeserializer<'de, 'a, 'm, R>
/// A deserializer for a single item of a sequence.
struct SeqItemDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
Expand All @@ -705,18 +735,22 @@ where
map: &'m mut MapAccess<'de, 'a, R>,
}

impl<'de, 'a, 'm, R> SeqValueDeserializer<'de, 'a, 'm, R>
impl<'de, 'a, 'm, R> SeqItemDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
/// Returns a text event, used inside [`deserialize_primitives!()`]
/// Returns a next string as concatenated content of consequent [`Text`] and
/// [`CData`] events, used inside [`deserialize_primitives!()`].
///
/// [`Text`]: DeEvent::Text
/// [`CData`]: DeEvent::CData
#[inline]
fn next_text(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
self.map.de.next_text_impl(unescape, true)
fn read_string(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
self.map.de.read_string_impl(unescape, true)
}
}

impl<'de, 'a, 'm, R> de::Deserializer<'de> for SeqValueDeserializer<'de, 'a, 'm, R>
impl<'de, 'a, 'm, R> de::Deserializer<'de> for SeqItemDeserializer<'de, 'a, 'm, R>
where
R: XmlRead<'de>,
{
Expand Down Expand Up @@ -764,39 +798,46 @@ where
self.deserialize_tuple(len, visitor)
}

/// This method deserializes a sequence inside of element that itself is a
/// sequence element:
///
/// ```xml
/// <>
/// ...
/// <self>inner sequence</self>
/// <self>inner sequence</self>
/// <self>inner sequence</self>
/// ...
/// </>
/// ```
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value, Self::Error>
where
V: Visitor<'de>,
{
match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
// Comment to prevent auto-formatting and keep Text and Cdata similar
e.into_inner(),
true,
self.map.de.reader.decoder(),
DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode(true)?,
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode()?,
)
.deserialize_seq(visitor),
// This is a sequence element. We cannot treat it as another flatten
// sequence if type will require `deserialize_seq` We instead forward
// it to `xs:simpleType` implementation
DeEvent::Start(e) => {
let value = match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.map.de.reader.decoder(),
DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode(true)?,
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode()?,
)
.deserialize_seq(visitor),
e => Err(DeError::Unsupported(
Expand Down
50 changes: 19 additions & 31 deletions src/de/mod.rs
Expand Up @@ -239,7 +239,7 @@ macro_rules! deserialize_type {
V: Visitor<'de>,
{
// No need to unescape because valid integer representations cannot be escaped
let text = self.next_text(false)?;
let text = self.read_string(false)?;
visitor.$visit(text.parse()?)
}
};
Expand Down Expand Up @@ -272,7 +272,7 @@ macro_rules! deserialize_primitives {
V: Visitor<'de>,
{
// No need to unescape because valid boolean representations cannot be escaped
let text = self.next_text(false)?;
let text = self.read_string(false)?;

str2bool(&text, visitor)
}
Expand All @@ -297,7 +297,7 @@ macro_rules! deserialize_primitives {
where
V: Visitor<'de>,
{
let text = self.next_text(true)?;
let text = self.read_string(true)?;
match text {
Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
Cow::Owned(string) => visitor.visit_string(string),
Expand Down Expand Up @@ -684,23 +684,9 @@ where
}
}

fn next_start(&mut self) -> Result<Option<BytesStart<'de>>, DeError> {
loop {
let e = self.next()?;
match e {
DeEvent::Start(e) => return Ok(Some(e)),
DeEvent::End(e) => {
return Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned()))
}
DeEvent::Eof => return Ok(None),
_ => (), // ignore texts
}
}
}

#[inline]
fn next_text(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
self.next_text_impl(unescape, true)
fn read_string(&mut self, unescape: bool) -> Result<Cow<'de, str>, DeError> {
self.read_string_impl(unescape, true)
}

/// Consumes a one XML element or an XML tree, returns associated text or
Expand Down Expand Up @@ -735,7 +721,7 @@ where
/// |[`DeEvent::Text`] |`text content` |Unescapes `text content` and returns it, consumes events up to `</tag>`
/// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged, consumes events up to `</tag>`
/// |[`DeEvent::Eof`] | |Emits [`UnexpectedEof`](DeError::UnexpectedEof)
fn next_text_impl(
fn read_string_impl(
&mut self,
unescape: bool,
allow_start: bool,
Expand Down Expand Up @@ -860,15 +846,17 @@ where
where
V: Visitor<'de>,
{
// Try to go to the next `<tag ...>...</tag>` or `<tag .../>`
if let Some(e) = self.next_start()? {
let name = e.name().as_ref().to_vec();
let map = map::MapAccess::new(self, e, fields)?;
let value = visitor.visit_map(map)?;
self.read_to_end(QName(&name))?;
Ok(value)
} else {
Err(DeError::ExpectedStart)
match self.next()? {
DeEvent::Start(e) => {
let name = e.name().as_ref().to_vec();
let map = map::MapAccess::new(self, e, fields)?;
let value = visitor.visit_map(map)?;
self.read_to_end(QName(&name))?;
Ok(value)
}
DeEvent::End(e) => Err(DeError::UnexpectedEnd(e.name().as_ref().to_owned())),
DeEvent::Text(_) | DeEvent::CData(_) => Err(DeError::ExpectedStart),
DeEvent::Eof => Err(DeError::UnexpectedEof),
}
}

Expand Down Expand Up @@ -1744,10 +1732,10 @@ mod tests {
assert_eq!(reader.next().unwrap(), DeEvent::Eof);
}

/// Ensures, that [`Deserializer::next_text()`] never can get an `End` event,
/// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
/// because parser reports error early
#[test]
fn next_text() {
fn read_string() {
match from_str::<String>(r#"</root>"#) {
Err(DeError::InvalidXml(Error::EndEventMismatch { expected, found })) => {
assert_eq!(expected, "");
Expand Down
8 changes: 4 additions & 4 deletions src/de/simple_type.rs
Expand Up @@ -522,12 +522,12 @@ pub struct SimpleTypeDeserializer<'de, 'a> {

impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> {
/// Creates a deserializer from a value, that possible borrowed from input
pub fn from_cow(value: Cow<'de, [u8]>, escaped: bool, decoder: Decoder) -> Self {
pub fn from_text_content(value: Cow<'de, str>) -> Self {
let content = match value {
Cow::Borrowed(slice) => CowRef::Input(slice),
Cow::Owned(content) => CowRef::Owned(content),
Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()),
Cow::Owned(content) => CowRef::Owned(content.into_bytes()),
};
Self::new(content, escaped, decoder)
Self::new(content, false, Decoder::utf8())
}

/// Creates a deserializer from a part of value at specified range
Expand Down

0 comments on commit e877f4f

Please sign in to comment.