Skip to content

Commit

Permalink
Split reader into IoReader and SliceReader
Browse files Browse the repository at this point in the history
  • Loading branch information
999eagle committed Jul 13, 2022
1 parent a6588c2 commit f9cdf27
Show file tree
Hide file tree
Showing 14 changed files with 1,494 additions and 1,272 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ let xml = r#"<tag1 att1 = "test">
</tag2>
</tag1>"#;

let mut reader = Reader::from_str(xml);
let mut reader = Reader::from_reader(xml.as_bytes());
// If you want to read from a string or byte slice without buffering, use:
// let mut reader = Reader::from_str(xml);
// In that case, no `Vec` is needed for buffering below and you can use `read_event` instead of
// `read_event_into`.
reader.trim_text(true);

let mut count = 0;
Expand Down Expand Up @@ -75,9 +79,8 @@ let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {

// crates a new element ... alternatively we could reuse `e` by calling
Expand All @@ -101,7 +104,6 @@ loop {
Ok(e) => assert!(writer.write_event(&e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
}
buf.clear();
}

let result = writer.into_inner().into_inner();
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
// TODO: read the namespaces too
// TODO: use fully normalized attribute values
fn parse_document(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut r = Reader::from_bytes(doc);
loop {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
Expand Down
82 changes: 26 additions & 56 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -79,18 +75,16 @@ fn read_namespaced_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_namespaced_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -101,20 +95,18 @@ fn read_namespaced_event(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -131,19 +123,17 @@ fn bytes_text_unescaped(c: &mut Criterion) {
let mut group = c.benchmark_group("BytesText::unescaped");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut buf = Vec::new();
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut nbtxt = criterion::black_box(0);
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -167,21 +157,19 @@ fn bytes_text_unescaped(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut buf = Vec::new();
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut nbtxt = criterion::black_box(0);
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Text(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand Down Expand Up @@ -210,78 +198,66 @@ fn one_event(c: &mut Criterion) {
let mut group = c.benchmark_group("One event");
group.bench_function("StartText", |b| {
let src = "Hello world!".repeat(512 / 12).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::StartText(e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 504);
})
});

group.bench_function("Start", |b| {
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 525);
})
});

group.bench_function("Comment", |b| {
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Comment(ref e)) => nbtxt += e.unescaped().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 520);
})
});

group.bench_function("CData", |b| {
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 518);
})
});
Expand All @@ -293,12 +269,11 @@ fn attributes(c: &mut Criterion) {
let mut group = c.benchmark_group("attributes");
group.bench_function("with_checks = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes() {
let _attr = attr.unwrap();
Expand All @@ -308,20 +283,18 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 1041);
})
});

group.bench_function("with_checks = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes().with_checks(false) {
let _attr = attr.unwrap();
Expand All @@ -331,20 +304,18 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 1041);
})
});

group.bench_function("try_get_attribute", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
Expand All @@ -359,7 +330,6 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 150);
})
Expand Down
3 changes: 1 addition & 2 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(cap[1].to_vec(), cap[2].to_vec());
Expand Down
6 changes: 2 additions & 4 deletions examples/read_texts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ fn main() {
reader.trim_text(true);

let mut txt = Vec::new();
let mut buf = Vec::new();

loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::Start(ref e)) if e.name().as_ref() == b"tag2" => {
txt.push(
reader
.read_text_into(QName(b"tag2"), &mut Vec::new())
.read_text(QName(b"tag2"))
.expect("Cannot decode text value"),
);
println!("{:?}", txt);
Expand All @@ -26,6 +25,5 @@ fn main() {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
buf.clear();
}
}
6 changes: 3 additions & 3 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {

/// Create new deserializer that will borrow data from the specified borrowing reader
#[inline]
fn from_borrowing_reader(mut reader: Reader<&'de [u8]>) -> Self {
fn from_borrowing_reader(mut reader: Reader<crate::SliceReader<'de>>) -> Self {
reader
.expand_empty_elements(true)
.check_end_names(true)
Expand Down Expand Up @@ -942,7 +942,7 @@ pub trait XmlRead<'i> {
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_reader`]
pub struct IoReader<R: BufRead> {
reader: Reader<R>,
reader: Reader<crate::IoReader<R>>,
buf: Vec<u8>,
}

Expand Down Expand Up @@ -987,7 +987,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
/// You cannot create it, it is created automatically when you call
/// [`Deserializer::from_str`] or [`Deserializer::from_slice`]
pub struct SliceReader<'de> {
reader: Reader<&'de [u8]>,
reader: Reader<crate::SliceReader<'de>>,
}

impl<'de> XmlRead<'de> for SliceReader<'de> {
Expand Down

0 comments on commit f9cdf27

Please sign in to comment.