Skip to content

Commit

Permalink
Merge pull request #306 from tafia/issue-299
Browse files Browse the repository at this point in the history
fix deserializing multiple unflatten fields
  • Loading branch information
tafia committed Aug 21, 2021
2 parents ad3d9c2 + 33c4a2b commit fc00cb0
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 48 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

## Unreleased

- fix: use element name (with namespace) when unflattening (serialize feature)

## 0.23.0-alpha2

- fix: failing tests with features
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# quick-xml

[![Build Status](https://travis-ci.org/tafia/quick-xml.svg?branch=master)](https://travis-ci.org/tafia/quick-xml)
![status](https://github.com/tafia/quick-xml/actions/workflows/rust.yml/badge.svg)
[![Crate](http://meritbadge.herokuapp.com/quick-xml)](https://crates.io/crates/quick-xml)

High performance xml pull reader/writer.
Expand Down Expand Up @@ -38,6 +38,9 @@ let mut buf = Vec::new();

// The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
loop {
// NOTE: this is the generic case when we don't know about the input BufRead.
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event_unbuffered()`
match reader.read_event(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
Expand Down
72 changes: 52 additions & 20 deletions src/de/map.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
//! Serde `Deserializer` module

use crate::{
de::{
escape::EscapedDeserializer, BorrowingReader, Deserializer, INNER_VALUE, UNFLATTEN_PREFIX,
},
de::escape::EscapedDeserializer,
de::{BorrowingReader, Deserializer, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::{BytesStart, Event},
};
Expand Down Expand Up @@ -36,17 +35,35 @@ pub(crate) struct MapAccess<'de, 'a, R: BorrowingReader<'de> + 'a> {
/// to restore last position before advance.
position: usize,
value: MapValue,
/// number of fields yet to parse
size_hint: Option<usize>,
/// list of fields yet to unflatten (defined as starting with $unflatten=)
unflatten_fields: Vec<&'static [u8]>,
}

impl<'de, 'a, R: BorrowingReader<'de>> MapAccess<'de, 'a, R> {
/// Create a new MapAccess
pub fn new(de: &'a mut Deserializer<'de, R>, start: BytesStart<'de>) -> Result<Self, DeError> {
pub fn new(
de: &'a mut Deserializer<'de, R>,
start: BytesStart<'de>,
fields: &[&'static str],
) -> Result<Self, DeError> {
let position = start.attributes().position;
Ok(MapAccess {
de,
start,
position,
value: MapValue::Empty,
size_hint: if fields.is_empty() {
None
} else {
Some(fields.len())
},
unflatten_fields: fields
.iter()
.filter(|f| f.starts_with(UNFLATTEN_PREFIX))
.map(|f| f.as_bytes())
.collect(),
})
}

Expand All @@ -62,16 +79,20 @@ impl<'de, 'a, R: BorrowingReader<'de>> MapAccess<'de, 'a, R> {
impl<'de, 'a, R: BorrowingReader<'de> + 'a> de::MapAccess<'de> for MapAccess<'de, 'a, R> {
type Error = DeError;

fn size_hint(&self) -> Option<usize> {
self.size_hint.clone()
}

fn next_key_seed<K: DeserializeSeed<'de>>(
&mut self,
seed: K,
) -> Result<Option<K::Value>, Self::Error> {
let decoder = self.de.reader.decoder();
let has_value_field = self.de.has_value_field;
let has_unflatten_field = self.de.has_unflatten_field;
if let Some((key, value)) = self.next_attr()? {
// try getting map from attributes (key= "value")
self.value = MapValue::Attribute { value };
self.size_hint.as_mut().map(|l| *l = l.wrapping_sub(1));
seed.deserialize(EscapedDeserializer::new(key, decoder, false))
.map(Some)
} else {
Expand Down Expand Up @@ -102,24 +123,35 @@ impl<'de, 'a, R: BorrowingReader<'de> + 'a> de::MapAccess<'de> for MapAccess<'de
// See https://github.com/serde-rs/serde/issues/1905
Some(Event::Start(_)) if has_value_field => {
self.value = MapValue::InnerValue;
self.size_hint.as_mut().map(|l| *l = l.wrapping_sub(1));
seed.deserialize(INNER_VALUE.into_deserializer()).map(Some)
}
Some(Event::Start(e)) if has_unflatten_field => {
self.value = MapValue::InnerValue;
let key = format!(
"{}{}",
UNFLATTEN_PREFIX,
String::from_utf8(e.local_name().to_vec())
.expect("$unflatten= did not contain valid Rust identifier")
);
seed.deserialize(key.into_deserializer()).map(Some)
}
Some(Event::Start(e)) => {
let name = e.local_name().to_owned();

self.value = MapValue::Nested;
seed.deserialize(EscapedDeserializer::new(name, decoder, false))
.map(Some)
self.size_hint.as_mut().map(|l| *l = l.wrapping_sub(1));
let key = if let Some(p) = self
.unflatten_fields
.iter()
.position(|f| e.name() == &f[UNFLATTEN_PREFIX.len()..])
{
// Used to deserialize elements, like:
// <root>
// <xxx>test</xxx>
// </root>
//
// into
//
// struct Root {
// #[serde(rename = "$unflatten=xxx")]
// xxx: String,
// }
self.value = MapValue::InnerValue;
seed.deserialize(self.unflatten_fields.remove(p).into_deserializer())
} else {
let name = e.local_name().to_owned();
self.value = MapValue::Nested;
seed.deserialize(EscapedDeserializer::new(name, decoder, false))
};
key.map(Some)
}
_ => Ok(None),
}
Expand Down
38 changes: 17 additions & 21 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ pub struct Deserializer<'de, R: BorrowingReader<'de>> {
/// <tag>value for INNER_VALUE field<tag>
/// ```
has_value_field: bool,
has_unflatten_field: bool,
}

/// Deserialize an instance of type T from a string of XML text.
Expand Down Expand Up @@ -183,7 +182,6 @@ impl<'de, R: BorrowingReader<'de>> Deserializer<'de, R> {
reader,
peek: None,
has_value_field: false,
has_unflatten_field: false,
}
}

Expand Down Expand Up @@ -293,34 +291,16 @@ impl<'de, 'a, R: BorrowingReader<'de>> de::Deserializer<'de> for &'a mut Deseria
if let Some(e) = self.next_start()? {
let name = e.name().to_vec();
self.has_value_field = fields.contains(&INNER_VALUE);
self.has_unflatten_field = fields.iter().any(|elem| elem.starts_with(UNFLATTEN_PREFIX));
let map = map::MapAccess::new(self, e)?;
let map = map::MapAccess::new(self, e, fields)?;
let value = visitor.visit_map(map)?;
self.has_value_field = false;
self.has_unflatten_field = false;
self.read_to_end(&name)?;
Ok(value)
} else {
Err(DeError::Start)
}
}

deserialize_type!(deserialize_i8 => visit_i8);
deserialize_type!(deserialize_i16 => visit_i16);
deserialize_type!(deserialize_i32 => visit_i32);
deserialize_type!(deserialize_i64 => visit_i64);
deserialize_type!(deserialize_u8 => visit_u8);
deserialize_type!(deserialize_u16 => visit_u16);
deserialize_type!(deserialize_u32 => visit_u32);
deserialize_type!(deserialize_u64 => visit_u64);
deserialize_type!(deserialize_f32 => visit_f32);
deserialize_type!(deserialize_f64 => visit_f64);

serde_if_integer128! {
deserialize_type!(deserialize_i128 => visit_i128);
deserialize_type!(deserialize_u128 => visit_u128);
}

fn deserialize_bool<V: de::Visitor<'de>>(self, visitor: V) -> Result<V::Value, DeError> {
let txt = self.next_text()?;

Expand Down Expand Up @@ -475,6 +455,22 @@ impl<'de, 'a, R: BorrowingReader<'de>> de::Deserializer<'de> for &'a mut Deseria
_ => self.deserialize_string(visitor),
}
}

deserialize_type!(deserialize_i8 => visit_i8);
deserialize_type!(deserialize_i16 => visit_i16);
deserialize_type!(deserialize_i32 => visit_i32);
deserialize_type!(deserialize_i64 => visit_i64);
deserialize_type!(deserialize_u8 => visit_u8);
deserialize_type!(deserialize_u16 => visit_u16);
deserialize_type!(deserialize_u32 => visit_u32);
deserialize_type!(deserialize_u64 => visit_u64);
deserialize_type!(deserialize_f32 => visit_f32);
deserialize_type!(deserialize_f64 => visit_f64);

serde_if_integer128! {
deserialize_type!(deserialize_i128 => visit_i128);
deserialize_type!(deserialize_u128 => visit_u128);
}
}

/// A trait that borrows an XML reader that borrows from the input. For a &[u8]
Expand Down
2 changes: 1 addition & 1 deletion src/se/var.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ where
// TODO: Inherit indentation state from self.parent.writer
let writer = Writer::new(&mut self.buffer);
if key.starts_with(UNFLATTEN_PREFIX) {
let key = key.split_at(UNFLATTEN_PREFIX.len()).1;
let key = &key[UNFLATTEN_PREFIX.len()..];
let mut serializer = Serializer::with_root(writer, Some(key));
serializer.serialize_newtype_struct(key, value)?;
self.children.append(&mut self.buffer);
Expand Down
93 changes: 88 additions & 5 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,8 @@ extern crate quick_xml;
#[cfg(feature = "serialize")]
extern crate serde;

use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event::*;
use quick_xml::Reader;
use std::borrow::Cow;
use std::io::Cursor;
use quick_xml::{events::attributes::Attribute, events::Event::*, Error, Reader};
use std::{borrow::Cow, io::Cursor};

#[cfg(feature = "serialize")]
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -1190,3 +1187,89 @@ fn players() {

assert_eq!(res, expected);
}

#[test]
fn test_issue299() -> Result<(), Error> {
let xml = r#"
<?xml version="1.0" encoding="utf8"?>
<MICEX_DOC xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<SECURITY SecurityId="PLZL" ISIN="RU000A0JNAA8" SecShortName="Short Name" PriceType="CASH">
<RECORDS RecNo="1" TradeNo="1111" TradeDate="2021-07-08" TradeTime="15:00:00" BuySell="S" SettleCode="Y1Dt" Decimals="3" Price="13057.034" Quantity="766" Value="10001688.29" AccInt="0" Amount="10001688.29" Balance="766" TrdAccId="X0011" ClientDetails="2222" CPFirmId="3333" CPFirmShortName="Firm Short Name" Price2="13057.034" RepoPart="2" ReportTime="16:53:27" SettleTime="17:47:06" ClientCode="4444" DueDate="2021-07-09" EarlySettleStatus="N" RepoRate="5.45" RateType="FIX"/>
</SECURITY>
</MICEX_DOC>
"#;
let mut reader = Reader::from_str(xml);
loop {
match reader.read_event_unbuffered()? {
Start(e) | Empty(e) => {
let attr_count = match e.name() {
b"MICEX_DOC" => 1,
b"SECURITY" => 4,
b"RECORDS" => 26,
_ => unreachable!(),
};
assert_eq!(
attr_count,
e.attributes().filter(Result::is_ok).count(),
"mismatch att count on '{:?}'",
reader.decoder().decode(e.name())
);
}
Eof => break,
_ => (),
}
}
Ok(())
}

#[cfg(feature = "serialize")]
#[test]
fn test_issue305_unflatten_namespace() -> Result<(), quick_xml::DeError> {
use quick_xml::de::from_str;

#[derive(Deserialize, Debug)]
struct NamespaceBug {
#[serde(rename = "$unflatten=d:test2")]
test2: String,
}

let _namespace_bug: NamespaceBug = from_str(
r#"
<?xml version="1.0" encoding="UTF-8"?>
<d:test xmlns:d="works">
<d:test2>doesntwork</d:test2>
</d:test>"#,
)?;

Ok(())
}

#[cfg(feature = "serialize")]
#[test]
fn test_issue305_unflatten_nesting() -> Result<(), quick_xml::DeError> {
use quick_xml::de::from_str;

#[derive(Deserialize, Debug)]
struct InnerNestingBug {}

#[derive(Deserialize, Debug)]
struct NestingBug {
// comment out one of these fields and it works
#[serde(rename = "$unflatten=outer1")]
outer1: InnerNestingBug,

#[serde(rename = "$unflatten=outer2")]
outer2: String,
}

let _nesting_bug: NestingBug = from_str::<NestingBug>(
r#"
<?xml version="1.0" encoding="UTF-8"?>
<root>
<outer1></outer1>
<outer2></outer2>
</root>"#,
)?;

Ok(())
}

0 comments on commit fc00cb0

Please sign in to comment.