Skip to content

Commit

Permalink
Merge branch 'master' into edition2018
Browse files Browse the repository at this point in the history
  • Loading branch information
tafia committed Aug 10, 2021
2 parents 237cd06 + 8bde969 commit 95ff717
Show file tree
Hide file tree
Showing 14 changed files with 169 additions and 29 deletions.
16 changes: 11 additions & 5 deletions .github/workflows/rust.yml
Expand Up @@ -4,17 +4,23 @@ on: [push, pull_request]

jobs:
build:
strategy:
matrix:
platform: [ubuntu-latest, windows-latest]

runs-on: ubuntu-latest
runs-on: ${{ matrix.platform }}

steps:
- uses: actions/checkout@v1
- name: Build
run: cargo build
- name: Run tests no features
- name: Run tests (no features)
run: cargo test --no-default-features
- name: Run tests all-features
run: cargo test --all-features
- name: Run tests (serialize)
run: cargo test --features serialize
- name: Run tests (encoding+serialize)
run: cargo test --features encoding,serialize
- name: Run tests (escape-html+serialize)
run: cargo test --features escape-html,serialize
- name: Check fmt
run: cargo fmt -- --check

2 changes: 2 additions & 0 deletions .gitignore
@@ -1,3 +1,5 @@
target
.project
Cargo.lock
# macOS hidden files
.DS_Store
14 changes: 14 additions & 0 deletions README.md
Expand Up @@ -226,6 +226,20 @@ struct Foo {
}
```

### Unflattening structs into verbose XML

If your XML files look like `<root><first>value</first><second>value</second></root>`, you can
(de)serialize them with the special name prefix `$unflatten=`:

```rust,ignore
struct Root {
#[serde(rename = "$unflatten=first")]
first: String,
#[serde(rename = "$unflatten=second")]
other_field: String,
}
```

### Performance

Note that despite not focusing on performance (there are several unecessary copies), it remains about 10x faster than serde-xml-rs.
Expand Down
10 changes: 9 additions & 1 deletion src/de/map.rs
@@ -1,7 +1,7 @@
//! Serde `Deserializer` module

use crate::{
de::{escape::EscapedDeserializer, Deserializer, INNER_VALUE},
de::{escape::EscapedDeserializer, Deserializer, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::{attributes::Attribute, BytesStart, Event},
};
Expand Down Expand Up @@ -62,6 +62,7 @@ impl<'a, 'de, R: BufRead> de::MapAccess<'de> for MapAccess<'a, R> {
.map(|a| (a.key.to_owned(), a.value.into_owned()));
let decoder = self.de.reader.decoder();
let has_value_field = self.de.has_value_field;
let has_unflatten_field = self.de.has_unflatten_field;
if let Some((key, value)) = attr_key_val {
// try getting map from attributes (key= "value")
self.value = MapValue::Attribute { value };
Expand Down Expand Up @@ -94,8 +95,15 @@ impl<'a, 'de, R: BufRead> de::MapAccess<'de> for MapAccess<'a, R> {
self.value = MapValue::InnerValue;
seed.deserialize(INNER_VALUE.into_deserializer()).map(Some)
}
Some(Event::Start(e)) if has_unflatten_field => {
self.value = MapValue::InnerValue;
let key = format!("{}{}", UNFLATTEN_PREFIX, String::from_utf8(e.local_name().to_vec())
.expect("$unflatten= did not contain valid Rust identifier"));
seed.deserialize(key.into_deserializer()).map(Some)
}
Some(Event::Start(e)) => {
let name = e.local_name().to_owned();

self.value = MapValue::Nested;
seed.deserialize(EscapedDeserializer::new(name, decoder, false))
.map(Some)
Expand Down
14 changes: 12 additions & 2 deletions src/de/mod.rs
Expand Up @@ -121,20 +121,27 @@ use serde::de::{self, DeserializeOwned};
use serde::serde_if_integer128;
use std::io::BufRead;

const INNER_VALUE: &str = "$value";
pub(crate) const INNER_VALUE: &str = "$value";
pub(crate) const UNFLATTEN_PREFIX: &str = "$unflatten=";

/// An xml deserializer
pub struct Deserializer<R: BufRead> {
reader: Reader<R>,
peek: Option<Event<'static>>,
has_value_field: bool,
has_unflatten_field: bool,
}

/// Deserialize a xml string
/// Deserialize an instance of type T from a string of XML text.
pub fn from_str<T: DeserializeOwned>(s: &str) -> Result<T, DeError> {
from_reader(s.as_bytes())
}

/// Deserialize an instance of type T from bytes of XML text.
pub fn from_slice<T: DeserializeOwned>(b: &[u8]) -> Result<T, DeError> {
from_reader(b)
}

/// Deserialize from a reader
pub fn from_reader<R: BufRead, T: DeserializeOwned>(reader: R) -> Result<T, DeError> {
let mut de = Deserializer::from_reader(reader);
Expand All @@ -148,6 +155,7 @@ impl<R: BufRead> Deserializer<R> {
reader,
peek: None,
has_value_field: false,
has_unflatten_field: false,
}
}

Expand Down Expand Up @@ -269,9 +277,11 @@ impl<'de, 'a, R: BufRead> de::Deserializer<'de> for &'a mut Deserializer<R> {
if let Some(e) = self.next_start(&mut Vec::new())? {
let name = e.name().to_vec();
self.has_value_field = fields.contains(&INNER_VALUE);
self.has_unflatten_field = fields.iter().any(|elem| elem.starts_with(UNFLATTEN_PREFIX));
let map = map::MapAccess::new(self, e)?;
let value = visitor.visit_map(map)?;
self.has_value_field = false;
self.has_unflatten_field = false;
self.read_to_end(&name)?;
Ok(value)
} else {
Expand Down
8 changes: 8 additions & 0 deletions src/errors.rs
Expand Up @@ -52,6 +52,14 @@ impl From<::std::str::Utf8Error> for Error {
}
}

impl From<::escape::EscapeError> for Error {
/// Creates a new `Error::EscapeError` from the given error
#[inline]
fn from(error: ::escape::EscapeError) -> Error {
Error::EscapeError(error)
}
}

/// A specialized `Result` type where the error is hard-wired to [`Error`].
///
/// [`Error`]: enum.Error.html
Expand Down
3 changes: 2 additions & 1 deletion src/escapei.rs
Expand Up @@ -5,6 +5,7 @@ use std::borrow::Cow;
use std::collections::HashMap;
use std::ops::Range;

/// Error for XML escape/unescqpe.
#[derive(Debug)]
pub enum EscapeError {
/// Entity with Null character
Expand All @@ -24,7 +25,7 @@ pub enum EscapeError {
TooLongDecimal,
/// Character is not a valid decimal value
InvalidDecimal(char),
// Not a valid unicode codepoint
/// Not a valid unicode codepoint
InvalidCodepoint(u32),
}

Expand Down
35 changes: 35 additions & 0 deletions src/events/mod.rs
@@ -1,4 +1,39 @@
//! Defines zero-copy XML events used throughout this library.
//!
//! A XML event often represents part of a XML element.
//! They occur both during reading and writing and are
//! usually used with the stream-oriented API.
//!
//! For example, the XML element
//! ```xml
//! <name attr="value">Inner text</name>
//! ```
//! consists of the three events `Start`, `Text` and `End`.
//! They can also represent other parts in an XML document like the
//! XML declaration. Each Event usually contains further information,
//! like the tag name, the attribute or the inner text.
//!
//! See [`Event`] for a list of all possible events.
//!
//! # Reading
//! When reading a XML stream, the events are emitted by
//! [`Reader::read_event`]. You must listen
//! for the different types of events you are interested in.
//!
//! See [`Reader`] for further information.
//!
//! # Writing
//! When writing the XML document, you must create the XML element
//! by constructing the events it consists of and pass them to the writer
//! sequentially.
//!
//! See [`Writer`] for further information.
//!
//! [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
//! [`Reader`]: ../reader/struct.Reader.html
//! [`Writer`]: ../writer/struct.Writer.html
//! [`Event`]: enum.Event.html


pub mod attributes;

Expand Down
28 changes: 23 additions & 5 deletions src/lib.rs
Expand Up @@ -2,8 +2,23 @@
//!
//! ## Description
//!
//! - `Reader`: a low level xml pull-reader where buffer allocation/clearing is left to user
//! - `Writer`: a xml writer. Can be nested with readers if you want to transform xmls
//! quick-xml contains two modes of operation:
//!
//! A streaming API based on the [StAX] model. This is suited for larger XML documents which
//! cannot completely read into memory at once.
//!
//! The user has to expicitely _ask_ for the next XML event, similar
//! to a database cursor.
//! This is achieved by the following two structs:
//!
//! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user.
//! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs.
//!
//! Especially for nested XML elements, the user must keep track _where_ (how deep) in the XML document
//! the current event is located. This is needed as the
//!
//! Furthermore, quick-xml also contains optional [Serde] support to directly serialize and deserialize from
//! structs, without having to deal with the XML events.
//!
//! ## Examples
//!
Expand Down Expand Up @@ -106,8 +121,11 @@
//! # Features
//!
//! quick-xml supports 2 additional features, non activated by default:
//! - `encoding`: support non utf8 xmls
//! - `encoding`: support non utf8 XMLs
//! - `serialize`: support serde `Serialize`/`Deserialize`
//!
//! [StAX]: https://en.wikipedia.org/wiki/StAX
//! [Serde]: https://serde.rs/
#![forbid(unsafe_code)]
#![deny(missing_docs)]
#![recursion_limit = "1024"]
Expand All @@ -126,8 +144,8 @@ mod errors;
mod escapei;
pub mod escape {
//! Manage xml character escapes
pub(crate) use crate::escapei::{do_unescape, EscapeError};
pub use crate::escapei::{escape, partial_escape, unescape, unescape_with};
pub(crate) use crate::escapei::do_unescape;
pub use crate::escapei::{escape, unescape, unescape_with, EscapeError};
}
pub mod events;
mod reader;
Expand Down
33 changes: 21 additions & 12 deletions src/se/var.rs
Expand Up @@ -4,7 +4,9 @@ use crate::{
se::Serializer,
writer::Writer,
};
use serde::ser::{self, Serialize};
use de::{INNER_VALUE, UNFLATTEN_PREFIX};
use serde::ser::{self, Serialize, SerializeMap};
use serde::Serializer as _;
use std::io::Write;

/// An implementation of `SerializeMap` for serializing to XML.
Expand Down Expand Up @@ -116,17 +118,24 @@ where
) -> Result<(), DeError> {
// TODO: Inherit indentation state from self.parent.writer
let writer = Writer::new(&mut self.buffer);
let mut serializer = Serializer::with_root(writer, Some(key));
value.serialize(&mut serializer)?;

if !self.buffer.is_empty() {
if self.buffer[0] == b'<' || key == "$value" {
// Drains buffer, moves it to children
self.children.append(&mut self.buffer);
} else {
self.attrs
.push_attribute((key.as_bytes(), self.buffer.as_ref()));
self.buffer.clear();
if key.starts_with(UNFLATTEN_PREFIX) {
let key = key.split_at(UNFLATTEN_PREFIX.len()).1;
let mut serializer = Serializer::with_root(writer, Some(key));
serializer.serialize_newtype_struct(key, value);
self.children.append(&mut self.buffer);
} else {
let mut serializer = Serializer::with_root(writer, Some(key));
value.serialize(&mut serializer)?;

if !self.buffer.is_empty() {
if self.buffer[0] == b'<' || key == INNER_VALUE {
// Drains buffer, moves it to children
self.children.append(&mut self.buffer);
} else {
self.attrs
.push_attribute((key.as_bytes(), self.buffer.as_ref()));
self.buffer.clear();
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions tests/serde-migrated.rs
Expand Up @@ -953,12 +953,12 @@ fn futile2() {
#[derive(Eq, PartialEq, Debug, Serialize, Deserialize)]
struct Object {
field: Option<Null>,
};
}

#[derive(Eq, PartialEq, Debug, Serialize, Deserialize)]
struct Stuff {
stuff_field: Option<Object>,
};
}

test_parse_ok(&[
(
Expand Down
18 changes: 18 additions & 0 deletions tests/serde_roundtrip.rs
Expand Up @@ -124,6 +124,24 @@ fn no_contiguous_fields() {
// assert_eq!(serialized, source);
}

#[test]
fn test_parse_unflatten_field() {
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
struct Unflatten {
#[serde(rename = "$unflatten=NewKey")]
field: String
}

let source = "<Unflatten><NewKey>Foo</NewKey></Unflatten>";
let expected = Unflatten { field: "Foo".to_string() };

let parsed: Unflatten = ::quick_xml::de::from_str(source).unwrap();
assert_eq!(&parsed, &expected);

let stringified = to_string(&parsed).unwrap();
assert_eq!(&stringified, source);
}

#[test]
fn escapes_in_cdata() {
#[derive(Debug, Deserialize, PartialEq)]
Expand Down
2 changes: 1 addition & 1 deletion tests/unit_tests.rs
Expand Up @@ -663,7 +663,7 @@ fn test_read_write_roundtrip_results_in_identity() -> Result<()> {
loop {
match reader.read_event(&mut buf)? {
Eof => break,
e => assert!(writer.write_event(e).is_ok()),
e => assert!(writer.write_event(e).is_ok())
}
}

Expand Down
11 changes: 11 additions & 0 deletions tests/xmlrs_reader_tests.rs
Expand Up @@ -42,6 +42,15 @@ fn sample_2_full() {

#[cfg(all(not(windows), feature = "escape-html"))]
#[test]
// FIXME: Fails with:
// ```
// Unexpected event at line 6:
// Expected: InvalidUtf8([10, 38, 110, 98, 115, 112, 59, 10]; invalid utf-8 sequence of 1 bytes from index 1)
// Found: Characters(
//
// )
// ```
#[ignore]
fn html5() {
test(
include_bytes!("documents/html5.html"),
Expand All @@ -52,6 +61,8 @@ fn html5() {

#[cfg(all(windows, feature = "escape-html"))]
#[test]
// FIXME: Fails the same way as the one above
#[ignore]
fn html5() {
test(
include_bytes!("documents/html5.html"),
Expand Down

0 comments on commit 95ff717

Please sign in to comment.