Skip to content

Commit

Permalink
Fix incorrect handling of xs:lists with encoded spaces: they still …
Browse files Browse the repository at this point in the history
…act as delimiters

Co-authored-by: Daniel Alley <dalley@redhat.com>
  • Loading branch information
Mingun and dralley committed Dec 12, 2022
1 parent 1373eb1 commit daa6526
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 30 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Expand Up @@ -38,6 +38,8 @@
```xml
unwanted text<struct>...</struct>
```
- [#523]: Fix incorrect handling of `xs:list`s with encoded spaces: they still
act as delimiters, which is confirmed also by mature XmlBeans Java library

### Misc Changes

Expand Down
43 changes: 18 additions & 25 deletions src/de/map.rs
Expand Up @@ -311,15 +311,13 @@ where
// of that events)
// This case are checked by "xml_schema_lists::element" tests in tests/serde-de.rs
ValueSource::Text => match self.de.next()? {
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.de.reader.decoder(),
DeEvent::Text(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode(true)?,
)),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.de.reader.decoder(),
DeEvent::CData(e) => seed.deserialize(SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode()?,
)),
// SAFETY: We set `Text` only when we seen `Text` or `CData`
_ => unreachable!(),
Expand Down Expand Up @@ -777,34 +775,29 @@ where
V: Visitor<'de>,
{
match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
// Comment to prevent auto-formatting and keep Text and Cdata similar
e.into_inner(),
true,
self.map.de.reader.decoder(),
DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode(true)?,
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode()?,
)
.deserialize_seq(visitor),
// This is a sequence element. We cannot treat it as another flatten
// sequence if type will require `deserialize_seq` We instead forward
// it to `xs:simpleType` implementation
DeEvent::Start(e) => {
let value = match self.map.de.next()? {
DeEvent::Text(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
true,
self.map.de.reader.decoder(),
DeEvent::Text(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode(true)?,
)
.deserialize_seq(visitor),
DeEvent::CData(e) => SimpleTypeDeserializer::from_cow(
e.into_inner(),
false,
self.map.de.reader.decoder(),
DeEvent::CData(e) => SimpleTypeDeserializer::from_text_content(
// Comment to prevent auto-formatting
e.decode()?,
)
.deserialize_seq(visitor),
e => Err(DeError::Unsupported(
Expand Down
8 changes: 4 additions & 4 deletions src/de/simple_type.rs
Expand Up @@ -522,12 +522,12 @@ pub struct SimpleTypeDeserializer<'de, 'a> {

impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> {
/// Creates a deserializer from a value, that possible borrowed from input
pub fn from_cow(value: Cow<'de, [u8]>, escaped: bool, decoder: Decoder) -> Self {
pub fn from_text_content(value: Cow<'de, str>) -> Self {
let content = match value {
Cow::Borrowed(slice) => CowRef::Input(slice),
Cow::Owned(content) => CowRef::Owned(content),
Cow::Borrowed(slice) => CowRef::Input(slice.as_bytes()),
Cow::Owned(content) => CowRef::Owned(content.into_bytes()),
};
Self::new(content, escaped, decoder)
Self::new(content, false, Decoder::utf8())
}

/// Creates a deserializer from a part of value at specified range
Expand Down
5 changes: 4 additions & 1 deletion tests/serde-de.rs
Expand Up @@ -6060,10 +6060,13 @@ mod xml_schema_lists {
list!(bool_: bool = "<root>true false true</root>" => vec![true, false, true]);
list!(char_: char = "<root>4 2 j</root>" => vec!['4', '2', 'j']);

// Expanding of entity references happens before list parsing
// This is confirmed by XmlBeans (mature Java library) as well
list!(string: String = "<root>first second third&#x20;3</root>" => vec![
"first".to_string(),
"second".to_string(),
"third 3".to_string(),
"third".to_string(),
"3".to_string(),
]);
err!(byte_buf: ByteBuf = "<root>first second third&#x20;3</root>"
=> Unsupported("byte arrays are not supported as `xs:list` items"));
Expand Down

0 comments on commit daa6526

Please sign in to comment.