Skip to content

Commit

Permalink
Add functions for attribute value normalization
Browse files Browse the repository at this point in the history
closes #371
  • Loading branch information
dralley committed Jun 19, 2023
1 parent 5a536d0 commit f317d76
Show file tree
Hide file tree
Showing 6 changed files with 535 additions and 12 deletions.
13 changes: 13 additions & 0 deletions Changelog.md
Expand Up @@ -14,8 +14,17 @@

### Bug Fixes

- [#379]: Improved compliance with the XML attribute value normalization process by
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
which ought to be used in place of `Attribute::unescape_value()` and
`Attribute::unescape_value_with()`
### Bug Fixes

### Misc Changes

- [#379]: Added tests for attribute value normalization

[#379]: https://github.com/tafia/quick-xml/pull/379

## 0.29.0 -- 2023-06-13

Expand Down Expand Up @@ -111,6 +120,10 @@
- [#565]: Fix compilation error when build with serde <1.0.139


### New Tests

- [#379]: Added tests for attribute value normalization

[externally tagged]: https://serde.rs/enum-representations.html#externally-tagged
[#490]: https://github.com/tafia/quick-xml/pull/490
[#510]: https://github.com/tafia/quick-xml/issues/510
Expand Down
12 changes: 4 additions & 8 deletions benches/macrobenches.rs
Expand Up @@ -43,14 +43,13 @@ static INPUTS: &[(&str, &str)] = &[
("players.xml", PLAYERS),
];

// TODO: use fully normalized attribute values
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
let mut r = Reader::from_str(doc);
loop {
match criterion::black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand All @@ -67,15 +66,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut buf = Vec::new();
loop {
match criterion::black_box(r.read_event_into(&mut buf)?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
Event::Text(e) => {
Expand All @@ -93,15 +91,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
let mut r = NsReader::from_str(doc);
loop {
match criterion::black_box(r.read_resolved_event()?) {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand All @@ -120,7 +117,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
let mut r = NsReader::from_reader(doc);
let mut buf = Vec::new();
Expand All @@ -129,7 +125,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
criterion::black_box(resolved_ns);
for attr in e.attributes() {
criterion::black_box(attr?.decode_and_unescape_value(&r)?);
criterion::black_box(attr?.decode_and_normalize_value(&r)?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down
72 changes: 72 additions & 0 deletions benches/microbenches.rs
@@ -1,6 +1,9 @@
use std::borrow::Cow;

use criterion::{self, criterion_group, criterion_main, Criterion};
use pretty_assertions::assert_eq;
use quick_xml::escape::{escape, unescape};
use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::reader::{NsReader, Reader};
Expand Down Expand Up @@ -242,6 +245,74 @@ fn attributes(c: &mut Criterion) {
assert_eq!(count, 150);
})
});

group.finish();
}

/// Benchmarks normalizing attribute values
fn attribute_value_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("attribute_value_normalization");

group.bench_function("noop_short", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"foobar"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("noop_long", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("replacement_chars", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"just a bit\n of text without\tany entities"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("char_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&#38;&#60;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.bench_function("entity_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"age &gt; 72 &amp;&amp; age &lt; 21"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&quot;what&apos;s that?&quot;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.finish();
}

Expand Down Expand Up @@ -354,6 +425,7 @@ criterion_group!(
read_resolved_event_into,
one_event,
attributes,
attribute_value_normalization,
escaping,
unescaping,
);
Expand Down
1 change: 1 addition & 0 deletions src/errors.rs
Expand Up @@ -82,6 +82,7 @@ impl From<EscapeError> for Error {
}

impl From<AttrError> for Error {
/// Creates a new `Error::InvalidAttr` from the given error
#[inline]
fn from(error: AttrError) -> Self {
Error::InvalidAttr(error)
Expand Down

0 comments on commit f317d76

Please sign in to comment.