Skip to content

Commit

Permalink
Merge pull request #426 from Mingun/read-event
Browse files Browse the repository at this point in the history
Use borrowing `read_event` instead of buffering `read_event_into` where possible
  • Loading branch information
dralley committed Jul 18, 2022
2 parents 068b36e + c49c349 commit b456b5a
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 227 deletions.
47 changes: 22 additions & 25 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,8 @@ use quick_xml::events::Event;

let xml = r#"<tag1 att1 = "test">
<tag2><!--Test comment-->Test</tag2>
<tag2>
Test 2
</tag2>
</tag1>"#;

<tag2>Test 2</tag2>
</tag1>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);

Expand All @@ -43,20 +40,24 @@ loop {
// when the input is a &str or a &[u8], we don't actually need to use another
// buffer, we could directly call `reader.read_event()`
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.name() {
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
// exits the loop when reaching end of file
Ok(Event::Eof) => break,

Ok(Event::Start(e)) => {
match e.name().as_ref() {
b"tag1" => println!("attributes values: {:?}",
e.attributes().map(|a| a.unwrap().value).collect::<Vec<_>>()),
e.attributes().map(|a| a.unwrap().value)
.collect::<Vec<_>>()),
b"tag2" => count += 1,
_ => (),
}
},
Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).unwrap().into_owned()),
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}
Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),

// There are several other `Event`s we do not consider here
_ => (),
}
// if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
buf.clear();
}
Expand All @@ -65,24 +66,21 @@ loop {
### Writer

```rust
use quick_xml::Writer;
use quick_xml::Reader;
use quick_xml::events::{Event, BytesEnd, BytesStart};
use quick_xml::{Reader, Writer};
use std::io::Cursor;
use std::iter;

let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {
match reader.read_event() {
Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {

// crates a new element ... alternatively we could reuse `e` by calling
// `e.into_owned()`
let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
let mut elem = BytesStart::owned_name(b"my_elem".to_vec());

// collect existing attributes
elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
Expand All @@ -93,15 +91,14 @@ loop {
// writes the event to the writer
assert!(writer.write_event(Event::Start(elem)).is_ok());
},
Ok(Event::End(ref e)) if e.name() == b"this_tag" => {
Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
},
Ok(Event::Eof) => break,
// you can use either `e` or `&e` if you don't want to move the event
Ok(e) => assert!(writer.write_event(&e).is_ok()),
// we can either move or borrow the event to write, depending on your use-case
Ok(e) => assert!(writer.write_event(e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
}
buf.clear();
}

let result = writer.into_inner().into_inner();
Expand Down
3 changes: 1 addition & 2 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities: HashMap<String, String> = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(
Expand Down
104 changes: 5 additions & 99 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! High performance XML reader/writer.
//!
//! ## Description
//! # Description
//!
//! quick-xml contains two modes of operation:
//!
//! A streaming API based on the [StAX] model. This is suited for larger XML documents which
//! cannot completely read into memory at once.
//!
//! The user has to expicitely _ask_ for the next XML event, similar
//! The user has to explicitly _ask_ for the next XML event, similar
//! to a database cursor.
//! This is achieved by the following two structs:
//!
Expand All @@ -20,104 +20,10 @@
//! Furthermore, quick-xml also contains optional [Serde] support to directly serialize and deserialize from
//! structs, without having to deal with the XML events.
//!
//! ## Examples
//! # Examples
//!
//! ### Reader
//!
//! ```rust
//! use quick_xml::Reader;
//! use quick_xml::events::Event;
//!
//! let xml = r#"<tag1 att1 = "test">
//! <tag2><!--Test comment-->Test</tag2>
//! <tag2>
//! Test 2
//! </tag2>
//! </tag1>"#;
//!
//! let mut reader = Reader::from_str(xml);
//! reader.trim_text(true);
//!
//! let mut count = 0;
//! let mut txt = Vec::new();
//! let mut buf = Vec::new();
//!
//! // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
//! loop {
//! match reader.read_event_into(&mut buf) {
//! // for triggering namespaced events, use this instead:
//! // match reader.read_namespaced_event(&mut buf) {
//! Ok(Event::Start(ref e)) => {
//! // for namespaced:
//! // Ok((ref namespace_value, Event::Start(ref e)))
//! match e.name().as_ref() {
//! b"tag1" => println!("attributes values: {:?}",
//! e.attributes().map(|a| a.unwrap().value)
//! .collect::<Vec<_>>()),
//! b"tag2" => count += 1,
//! _ => (),
//! }
//! },
//! // unescape and decode the text event using the reader encoding
//! Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
//! Ok(Event::Eof) => break, // exits the loop when reaching end of file
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
//! _ => (), // There are several other `Event`s we do not consider here
//! }
//!
//! // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
//! buf.clear();
//! }
//! ```
//!
//! ### Writer
//!
//! ```rust
//! # use pretty_assertions::assert_eq;
//! use quick_xml::Writer;
//! use quick_xml::events::{Event, BytesEnd, BytesStart};
//! use quick_xml::Reader;
//! use std::io::Cursor;
//! use std::iter;
//!
//! let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
//! let mut reader = Reader::from_str(xml);
//! reader.trim_text(true);
//! let mut writer = Writer::new(Cursor::new(Vec::new()));
//! let mut buf = Vec::new();
//! loop {
//! match reader.read_event_into(&mut buf) {
//! Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
//!
//! // crates a new element ... alternatively we could reuse `e` by calling
//! // `e.into_owned()`
//! let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
//!
//! // collect existing attributes
//! elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
//!
//! // copy existing attributes, adds a new my-key="some value" attribute
//! elem.push_attribute(("my-key", "some value"));
//!
//! // writes the event to the writer
//! assert!(writer.write_event(Event::Start(elem)).is_ok());
//! },
//! Ok(Event::End(ref e)) if e.name().as_ref() == b"this_tag" => {
//! assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
//! },
//! Ok(Event::Eof) => break,
//! Ok(e) => assert!(writer.write_event(e).is_ok()),
//! // or using the buffer
//! // Ok(e) => assert!(writer.write(&buf).is_ok()),
//! Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
//! }
//! buf.clear();
//! }
//!
//! let result = writer.into_inner().into_inner();
//! let expected = r#"<my_elem k1="v1" k2="v2" my-key="some value"><child>text</child></my_elem>"#;
//! assert_eq!(result, expected.as_bytes());
//! ```
//! - For a reading example see [`Reader`]
//! - For a writing example see [`Writer`]
//!
//! # Features
//!
Expand Down
21 changes: 16 additions & 5 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,28 +118,39 @@ impl EncodingRef {
/// let xml = r#"<tag1 att1 = "test">
/// <tag2><!--Test comment-->Test</tag2>
/// <tag2>Test 2</tag2>
/// </tag1>"#;
/// </tag1>"#;
/// let mut reader = Reader::from_str(xml);
/// reader.trim_text(true);
///
/// let mut count = 0;
/// let mut txt = Vec::new();
/// let mut buf = Vec::new();
///
/// // The `Reader` does not implement `Iterator` because it outputs borrowed data (`Cow`s)
/// loop {
/// // NOTE: this is the generic case when we don't know about the input BufRead.
/// // when the input is a &str or a &[u8], we don't actually need to use another
/// // buffer, we could directly call `reader.read_event()`
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(ref e)) => {
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// // exits the loop when reaching end of file
/// Ok(Event::Eof) => break,
///
/// Ok(Event::Start(e)) => {
/// match e.name().as_ref() {
/// b"tag1" => println!("attributes values: {:?}",
/// e.attributes().map(|a| a.unwrap().value)
/// .collect::<Vec<_>>()),
/// b"tag2" => count += 1,
/// _ => (),
/// }
/// },
/// }
/// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// Ok(Event::Eof) => break,
///
/// // There are several other `Event`s we do not consider here
/// _ => (),
/// }
/// // if we don't keep a borrow elsewhere, we can clear the buffer to keep memory usage low
/// buf.clear();
/// }
/// ```
Expand Down
18 changes: 8 additions & 10 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,23 @@ use std::io::Write;
///
/// # Examples
///
/// ```rust
/// ```
/// # use pretty_assertions::assert_eq;
/// use quick_xml::{Reader, Writer};
/// use quick_xml::events::{Event, BytesEnd, BytesStart};
/// use quick_xml::{Reader, Writer};
/// use std::io::Cursor;
///
/// let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
/// let mut reader = Reader::from_str(xml);
/// reader.trim_text(true);
/// let mut writer = Writer::new(Cursor::new(Vec::new()));
/// let mut buf = Vec::new();
/// loop {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(ref e)) if e.name().as_ref() == b"this_tag" => {
/// match reader.read_event() {
/// Ok(Event::Start(e)) if e.name().as_ref() == b"this_tag" => {
///
/// // crates a new element ... alternatively we could reuse `e` by calling
/// // `e.into_owned()`
/// let mut elem = BytesStart::owned(b"my_elem".to_vec(), "my_elem".len());
/// let mut elem = BytesStart::owned_name(b"my_elem".to_vec());
///
/// // collect existing attributes
/// elem.extend_attributes(e.attributes().map(|attr| attr.unwrap()));
Expand All @@ -38,15 +37,14 @@ use std::io::Write;
/// // writes the event to the writer
/// assert!(writer.write_event(Event::Start(elem)).is_ok());
/// },
/// Ok(Event::End(ref e)) if e.name().as_ref() == b"this_tag" => {
/// Ok(Event::End(e)) if e.name().as_ref() == b"this_tag" => {
/// assert!(writer.write_event(Event::End(BytesEnd::borrowed(b"my_elem"))).is_ok());
/// },
/// Ok(Event::Eof) => break,
/// // we can either move or borrow the event to write, depending on your use-case
/// Ok(e) => assert!(writer.write_event(&e).is_ok()),
/// Err(e) => panic!("{}", e),
/// Ok(e) => assert!(writer.write_event(e).is_ok()),
/// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
/// }
/// buf.clear();
/// }
///
/// let result = writer.into_inner().into_inner();
Expand Down

0 comments on commit b456b5a

Please sign in to comment.