diff --git a/Changelog.md b/Changelog.md index c3053257..393b3bfb 100644 --- a/Changelog.md +++ b/Changelog.md @@ -27,6 +27,17 @@ the XML declared encoding and always use UTF-8 - [#416]: Add `borrow()` methods in all event structs which allows to get a borrowed version of any event +- [#437]: Split out namespace reading functionality to a dedicated `NsReader`, namely: + |Old function in `Reader`|New function in `NsReader` + |------------------------|-------------------------- + | |`read_event` -- borrow from input + | |`read_resolved_event` -- borrow from input + | |`read_event_into` + |`read_namespaced_event` |`read_resolved_event_into` + | |`resolve` + |`event_namespace` |`resolve_element` + |`attribute_namespace` |`resolve_attribute` + ### Bug Fixes @@ -167,6 +178,7 @@ [#418]: https://github.com/tafia/quick-xml/pull/418 [#421]: https://github.com/tafia/quick-xml/pull/421 [#423]: https://github.com/tafia/quick-xml/pull/423 +[#437]: https://github.com/tafia/quick-xml/pull/437 ## 0.23.0 -- 2022-05-08 diff --git a/benches/microbenches.rs b/benches/microbenches.rs index 8bbe1a67..1e7cc232 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -3,7 +3,7 @@ use pretty_assertions::assert_eq; use quick_xml::escape::{escape, unescape}; use quick_xml::events::Event; use quick_xml::name::QName; -use quick_xml::Reader; +use quick_xml::{NsReader, Reader}; static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml"); static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml"); @@ -73,19 +73,18 @@ fn read_event(c: &mut Criterion) { group.finish(); } -/// Benchmarks the `Reader::read_namespaced_event` function with all XML well-formless +/// Benchmarks the `NsReader::read_resolved_event_into` function with all XML well-formless /// checks disabled (with and without trimming content of #text nodes) -fn read_namespaced_event(c: &mut Criterion) { - let mut group = c.benchmark_group("read_namespaced_event"); +fn read_resolved_event_into(c: &mut Criterion) { + let mut group = c.benchmark_group("NsReader::read_resolved_event_into"); group.bench_function("trim_text = false", |b| { b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); + let mut r = NsReader::from_bytes(SAMPLE); r.check_end_names(false).check_comments(false); let mut count = criterion::black_box(0); let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); loop { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event_into(&mut buf) { Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, Ok((_, Event::Eof)) => break, _ => (), @@ -101,15 +100,14 @@ fn read_namespaced_event(c: &mut Criterion) { group.bench_function("trim_text = true", |b| { b.iter(|| { - let mut r = Reader::from_reader(SAMPLE); + let mut r = NsReader::from_bytes(SAMPLE); r.check_end_names(false) .check_comments(false) .trim_text(true); let mut count = criterion::black_box(0); let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); loop { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event_into(&mut buf) { Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1, Ok((_, Event::Eof)) => break, _ => (), @@ -393,7 +391,7 @@ purus. Consequat id porta nibh venenatis cras sed felis."; criterion_group!( benches, read_event, - read_namespaced_event, + read_resolved_event_into, one_event, attributes, escaping, diff --git a/src/lib.rs b/src/lib.rs index f42ae359..a210ed18 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -65,5 +65,5 @@ mod writer; #[cfg(feature = "serialize")] pub use crate::errors::serialize::DeError; pub use crate::errors::{Error, Result}; -pub use crate::reader::{Decoder, Reader}; +pub use crate::reader::{Decoder, NsReader, Reader}; pub use crate::writer::{ElementWriter, Writer}; diff --git a/src/name.rs b/src/name.rs index b3edf3a2..64891d70 100644 --- a/src/name.rs +++ b/src/name.rs @@ -274,13 +274,15 @@ impl<'a> AsRef<[u8]> for Namespace<'a> { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// Result of [prefix] resolution which creates by [`Reader::attribute_namespace`], -/// [`Reader::event_namespace`] and [`Reader::read_namespaced_event`] methods. +/// Result of [prefix] resolution which creates by [`NsReader::resolve_attribute`], +/// [`NsReader::resolve_element`], [`NsReader::read_resolved_event`] and +/// [`NsReader::read_resolved_event_into`] methods. /// /// [prefix]: Prefix -/// [`Reader::attribute_namespace`]: crate::reader::Reader::attribute_namespace -/// [`Reader::event_namespace`]: crate::reader::Reader::event_namespace -/// [`Reader::read_namespaced_event`]: crate::reader::Reader::read_namespaced_event +/// [`NsReader::resolve_attribute`]: crate::reader::NsReader::resolve_attribute +/// [`NsReader::resolve_element`]: crate::reader::NsReader::resolve_element +/// [`NsReader::read_resolved_event`]: crate::reader::NsReader::read_resolved_event +/// [`NsReader::read_resolved_event_into`]: crate::reader::NsReader::read_resolved_event_into #[derive(Clone, PartialEq, Eq, Hash)] pub enum ResolveResult<'ns> { /// Qualified name does not contain prefix, and resolver does not define diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs new file mode 100644 index 00000000..2b22c6a9 --- /dev/null +++ b/src/reader/buffered_reader.rs @@ -0,0 +1,422 @@ +//! This is an implementation of [`Reader`] for reading from a [`BufRead`] as +//! underlying byte stream. + +use std::fs::File; +use std::io::{self, BufRead, BufReader}; +use std::path::Path; + +use crate::errors::{Error, Result}; +use crate::events::Event; +use crate::name::QName; +use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource}; + +use memchr; + +/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as +/// underlying byte stream. +impl Reader { + /// Reads the next `Event`. + /// + /// This is the main entry point for reading XML `Event`s. + /// + /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` + /// internally). + /// + /// Having the possibility to control the internal buffers gives you some additional benefits + /// such as: + /// + /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, + /// you can call `buf.clear()` once you are done with processing the event (typically at the + /// end of your loop). + /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). + /// + /// # Examples + /// + /// ``` + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_str(xml); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into(&mut buf) { + /// Ok(Event::Start(ref e)) => count += 1, + /// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()), + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok(Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// ``` + #[inline] + pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { + self.read_event_impl(buf) + } + + /// Reads until end element is found using provided buffer as intermediate + /// storage for events content. This function is supposed to be called after + /// you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name. + /// + /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// If your reader created from a string slice or byte array slice, it is + /// better to use [`read_to_end()`] method, because it will not copy bytes + /// into intermediate buffer. + /// + /// The provided `buf` buffer will be filled only by one event content at time. + /// Before reading of each event the buffer will be cleared. If you know an + /// appropriate size of each event, you can preallocate the buffer to reduce + /// number of reallocations. + /// + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. + /// + /// The correctness of the skipped events does not checked, if you disabled + /// the [`check_end_names`] option. + /// + /// # Namespaces + /// + /// While the [`Reader`] does not support namespace resolution, namespaces + /// does not change the algorithm for comparing names. Although the names + /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the + /// same namespace, are semantically equivalent, `` cannot close + /// ``, because according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_str(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let start = BytesStart::borrowed_name(b"outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end + /// [`read_to_end()`]: Self::read_to_end + /// [`check_end_names`]: Self::check_end_names + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag + pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { + let mut depth = 0; + loop { + buf.clear(); + match self.read_event_into(buf) { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == end => depth += 1, + Ok(Event::End(e)) if e.name() == end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Eof) => { + let name = self.decoder().decode(end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); + } + _ => (), + } + } + } + + /// Reads optional text between start and end tags. + /// + /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a + /// `String`. If the next event is an [`End`] event, returns the empty string. In all other + /// cases, returns an error. + /// + /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8 + /// if none is specified). + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// let mut xml = Reader::from_reader(b" + /// <b> + /// + /// " as &[u8]); + /// xml.trim_text(true); + /// + /// let expected = ["", ""]; + /// for &content in expected.iter() { + /// match xml.read_event_into(&mut Vec::new()) { + /// Ok(Event::Start(ref e)) => { + /// assert_eq!(&xml.read_text_into(e.name(), &mut Vec::new()).unwrap(), content); + /// }, + /// e => panic!("Expecting Start event, found {:?}", e), + /// } + /// } + /// ``` + /// + /// [`Text`]: Event::Text + /// [`End`]: Event::End + pub fn read_text_into(&mut self, end: QName, buf: &mut Vec) -> Result { + let s = match self.read_event_into(buf) { + Err(e) => return Err(e), + + Ok(Event::Text(e)) => e.decode_and_unescape(self)?.into_owned(), + Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()), + Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), + _ => return Err(Error::TextNotFound), + }; + self.read_to_end_into(end, buf)?; + Ok(s) + } +} + +impl Reader> { + /// Creates an XML reader from a file path. + pub fn from_file>(path: P) -> Result { + let file = File::open(path).map_err(Error::Io)?; + let reader = BufReader::new(file); + Ok(Self::from_reader(reader)) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Implementation of `XmlSource` for any `BufRead` reader using a user-given +/// `Vec` as buffer that will be borrowed by events. +impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { + #[inline] + fn read_bytes_until( + &mut self, + byte: u8, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + let mut read = 0; + let mut done = false; + let start = buf.len(); + while !done { + let used = { + let available = match self.fill_buf() { + Ok(n) if n.is_empty() => break, + Ok(n) => n, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + + match memchr::memchr(byte, available) { + Some(i) => { + buf.extend_from_slice(&available[..i]); + done = true; + i + 1 + } + None => { + buf.extend_from_slice(available); + available.len() + } + } + }; + self.consume(used); + read += used; + } + *position += read; + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + fn read_bang_element( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + // Peeked one bang ('!') before being called, so it's guaranteed to + // start with it. + let start = buf.len(); + let mut read = 1; + buf.push(b'!'); + self.consume(1); + + let bang_type = BangType::new(self.peek_one()?)?; + + loop { + match self.fill_buf() { + // Note: Do not update position, so the error points to + // somewhere sane rather than at the EOF + Ok(n) if n.is_empty() => return Err(bang_type.to_err()), + Ok(available) => { + if let Some((consumed, used)) = bang_type.parse(available, read) { + buf.extend_from_slice(consumed); + + self.consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self.consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + } + } + + if read == 0 { + Ok(None) + } else { + Ok(Some((bang_type, &buf[start..]))) + } + } + + #[inline] + fn read_element( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + let mut state = ReadElementState::Elem; + let mut read = 0; + + let start = buf.len(); + loop { + match self.fill_buf() { + Ok(n) if n.is_empty() => break, + Ok(available) => { + if let Some((consumed, used)) = state.change(available) { + buf.extend_from_slice(consumed); + + self.consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self.consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + } + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + /// Consume and discard all the whitespace until the next non-whitespace + /// character or EOF. + fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + loop { + break match self.fill_buf() { + Ok(n) => { + let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); + if count > 0 { + self.consume(count); + *position += count; + continue; + } else { + Ok(()) + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } + + /// Consume and discard one character if it matches the given byte. Return + /// true if it matched. + fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { + match self.peek_one()? { + Some(b) if b == byte => { + *position += 1; + self.consume(1); + Ok(true) + } + _ => Ok(false), + } + } + + /// Return one character without consuming it, so that future `read_*` calls + /// will still include it. On EOF, return None. + fn peek_one(&mut self) -> Result> { + loop { + break match self.fill_buf() { + Ok(n) if n.is_empty() => Ok(None), + Ok(n) => Ok(Some(n[0])), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } +} diff --git a/src/reader.rs b/src/reader/mod.rs similarity index 70% rename from src/reader.rs rename to src/reader/mod.rs index f3a868a7..0ad1d90a 100644 --- a/src/reader.rs +++ b/src/reader/mod.rs @@ -1,18 +1,139 @@ //! A module to handle `Reader` use std::borrow::Cow; -use std::io::{self, BufRead, BufReader}; -use std::{fs::File, path::Path, str::from_utf8}; +use std::str::from_utf8; #[cfg(feature = "encoding")] use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8}; use crate::errors::{Error, Result}; use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; -use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult}; use memchr; +macro_rules! configure_methods { + ($($holder:ident)?) => { + /// Changes whether empty elements should be split into an `Open` and a `Close` event. + /// + /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `` are + /// expanded into a [`Start`] event followed by an [`End`] event. When set to `false` (the + /// default), those tags are represented by an [`Empty`] event instead. + /// + /// Note, that setting this to `true` will lead to additional allocates that + /// needed to store tag name for an [`End`] event. There is no additional + /// allocation, however, if [`Self::check_end_names()`] is also set. + /// + /// (`false` by default) + /// + /// [`Empty`]: Event::Empty + /// [`Start`]: Event::Start + /// [`End`]: Event::End + pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { + self $(.$holder)? .expand_empty_elements = val; + self + } + + /// Changes whether whitespace before and after character data should be removed. + /// + /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be + /// pushed. + /// + /// (`false` by default) + /// + /// [`Text`]: Event::Text + pub fn trim_text(&mut self, val: bool) -> &mut Self { + self $(.$holder)? .trim_text_start = val; + self $(.$holder)? .trim_text_end = val; + self + } + + /// Changes whether whitespace after character data should be removed. + /// + /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. + /// + /// (`false` by default) + /// + /// [`Text`]: Event::Text + pub fn trim_text_end(&mut self, val: bool) -> &mut Self { + self $(.$holder)? .trim_text_end = val; + self + } + + /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags + /// ``. + /// + /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name. + /// + /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is + /// going to fail erroneously if a closing tag contains trailing whitespaces. + /// + /// (`true` by default) + /// + /// [`End`]: Event::End + pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self { + self $(.$holder)? .trim_markup_names_in_closing_tags = val; + self + } + + /// Changes whether mismatched closing tag names should be detected. + /// + /// Note, that start and end tags [should match literally][spec], they cannot + /// have different prefixes even if both prefixes resolve to the same namespace. + /// The XML + /// + /// ```xml + /// + /// + /// ``` + /// + /// is not valid, even though semantically the start tag is the same as the + /// end tag. The reason is that namespaces are an extension of the original + /// XML specification (without namespaces) and it should be backward-compatible. + /// + /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag. + /// For example, `` will be permitted. + /// + /// If the XML is known to be sane (already processed, etc.) this saves extra time. + /// + /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will + /// contain the data of the mismatched end tag. + /// + /// Note, that setting this to `true` will lead to additional allocates that + /// needed to store tag name for an [`End`] event. There is no additional + /// allocation, however, if [`Self::expand_empty_elements()`] is also set. + /// + /// (`true` by default) + /// + /// [spec]: https://www.w3.org/TR/xml11/#dt-etag + /// [`End`]: Event::End + pub fn check_end_names(&mut self, val: bool) -> &mut Self { + self $(.$holder)? .check_end_names = val; + self + } + + /// Changes whether comments should be validated. + /// + /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which + /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't + /// really care about comment correctness, thus the default value is `false` to improve + /// performance. + /// + /// (`false` by default) + /// + /// [`Comment`]: Event::Comment + pub fn check_comments(&mut self, val: bool) -> &mut Self { + self $(.$holder)? .check_comments = val; + self + } + }; +} + +mod buffered_reader; +mod ns_reader; +mod slice_reader; + +pub use ns_reader::NsReader; + /// Possible reader states. The state transition diagram (`true` and `false` shows /// value of [`Reader::expand_empty_elements()`] option): /// @@ -109,6 +230,9 @@ impl EncodingRef { /// /// Consumes bytes and streams XML [`Event`]s. /// +/// This reader does not manage namespace declarations and not able to resolve +/// prefixes. If you want these features, use the [`NsReader`]. +/// /// # Examples /// /// ``` @@ -154,6 +278,8 @@ impl EncodingRef { /// buf.clear(); /// } /// ``` +/// +/// [`NsReader`]: crate::reader::NsReader #[derive(Clone)] pub struct Reader { /// reader @@ -196,13 +322,6 @@ pub struct Reader { /// for that field for details opened_starts: Vec, - /// A buffer to manage namespaces - ns_resolver: NamespaceResolver, - /// For `Empty` events keep the 'scope' of the namespace on the stack artificially. That way, the - /// consumer has a chance to use `resolve` in the context of the empty element. We perform the - /// pop as the first operation in the next `next()` call. - pending_pop: bool, - #[cfg(feature = "encoding")] /// Reference to the encoding used to read an XML encoding: EncodingRef, @@ -225,112 +344,12 @@ impl Reader { buf_position: 0, check_comments: false, - ns_resolver: NamespaceResolver::default(), - pending_pop: false, - #[cfg(feature = "encoding")] encoding: EncodingRef::Implicit(UTF_8), } } - /// Changes whether empty elements should be split into an `Open` and a `Close` event. - /// - /// When set to `true`, all [`Empty`] events produced by a self-closing tag like `` are - /// expanded into a [`Start`] event followed by an [`End`] event. When set to `false` (the - /// default), those tags are represented by an [`Empty`] event instead. - /// - /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. There is no additional - /// allocation, however, if [`Self::check_end_names()`] is also set. - /// - /// (`false` by default) - /// - /// [`Empty`]: Event::Empty - /// [`Start`]: Event::Start - /// [`End`]: Event::End - pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { - self.expand_empty_elements = val; - self - } - - /// Changes whether whitespace before and after character data should be removed. - /// - /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be - /// pushed. - /// - /// (`false` by default) - /// - /// [`Text`]: Event::Text - pub fn trim_text(&mut self, val: bool) -> &mut Self { - self.trim_text_start = val; - self.trim_text_end = val; - self - } - - /// Changes whether whitespace after character data should be removed. - /// - /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. - /// - /// (`false` by default) - /// - /// [`Text`]: Event::Text - pub fn trim_text_end(&mut self, val: bool) -> &mut Self { - self.trim_text_end = val; - self - } - - /// Changes whether trailing whitespaces after the markup name are trimmed in closing tags - /// ``. - /// - /// If true the emitted [`End`] event is stripped of trailing whitespace after the markup name. - /// - /// Note that if set to `false` and `check_end_names` is true the comparison of markup names is - /// going to fail erronously if a closing tag contains trailing whitespaces. - /// - /// (`true` by default) - /// - /// [`End`]: Event::End - pub fn trim_markup_names_in_closing_tags(&mut self, val: bool) -> &mut Self { - self.trim_markup_names_in_closing_tags = val; - self - } - - /// Changes whether mismatched closing tag names should be detected. - /// - /// When set to `false`, it won't check if a closing tag matches the corresponding opening tag. - /// For example, `` will be permitted. - /// - /// If the XML is known to be sane (already processed, etc.) this saves extra time. - /// - /// Note that the emitted [`End`] event will not be modified if this is disabled, ie. it will - /// contain the data of the mismatched end tag. - /// - /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. There is no additional - /// allocation, however, if [`Self::expand_empty_elements()`] is also set. - /// - /// (`true` by default) - /// - /// [`End`]: Event::End - pub fn check_end_names(&mut self, val: bool) -> &mut Self { - self.check_end_names = val; - self - } - - /// Changes whether comments should be validated. - /// - /// When set to `true`, every [`Comment`] event will be checked for not containing `--`, which - /// is not allowed in XML comments. Most of the time we don't want comments at all so we don't - /// really care about comment correctness, thus the default value is `false` to improve - /// performance. - /// - /// (`false` by default) - /// - /// [`Comment`]: Event::Comment - pub fn check_comments(&mut self, val: bool) -> &mut Self { - self.check_comments = val; - self - } + configure_methods!(); } /// Getters @@ -415,48 +434,6 @@ impl Reader { } } - /// Resolves a potentially qualified **event name** into (namespace name, local name). - /// - /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined - /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix - /// can be defined on the same element as the attribute in question. - /// - /// *Unqualified* event inherits the current *default namespace*. - /// - /// # Lifetimes - /// - /// - `'n`: lifetime of an element name - /// - `'ns`: lifetime of a namespaces buffer, where all found namespaces are stored - #[inline] - pub fn event_namespace<'n, 'ns>( - &self, - name: QName<'n>, - namespace_buffer: &'ns [u8], - ) -> (ResolveResult<'ns>, LocalName<'n>) { - self.ns_resolver.resolve(name, namespace_buffer, true) - } - - /// Resolves a potentially qualified **attribute name** into (namespace name, local name). - /// - /// *Qualified* attribute names have the form `prefix:local-name` where the`prefix` is defined - /// on any containing XML element via `xmlns:prefix="the:namespace:uri"`. The namespace prefix - /// can be defined on the same element as the attribute in question. - /// - /// *Unqualified* attribute names do *not* inherit the current *default namespace*. - /// - /// # Lifetimes - /// - /// - `'n`: lifetime of an attribute - /// - `'ns`: lifetime of a namespaces buffer, where all found namespaces are stored - #[inline] - pub fn attribute_namespace<'n, 'ns>( - &self, - name: QName<'n>, - namespace_buffer: &'ns [u8], - ) -> (ResolveResult<'ns>, LocalName<'n>) { - self.ns_resolver.resolve(name, namespace_buffer, false) - } - /// Get the decoder, used to decode bytes, read by this reader, to the strings. /// /// If `encoding` feature is enabled, the used encoding may change after @@ -472,305 +449,6 @@ impl Reader { } } -/// Read methods -impl Reader { - /// Reads the next `Event`. - /// - /// This is the main entry point for reading XML `Event`s. - /// - /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` - /// internally). - /// - /// Having the possibility to control the internal buffers gives you some additional benefits - /// such as: - /// - /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, - /// you can call `buf.clear()` once you are done with processing the event (typically at the - /// end of your loop). - /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). - /// - /// # Examples - /// - /// ``` - /// use quick_xml::Reader; - /// use quick_xml::events::Event; - /// - /// let xml = r#" - /// Test - /// Test 2 - /// "#; - /// let mut reader = Reader::from_str(xml); - /// reader.trim_text(true); - /// let mut count = 0; - /// let mut buf = Vec::new(); - /// let mut txt = Vec::new(); - /// loop { - /// match reader.read_event_into(&mut buf) { - /// Ok(Event::Start(ref e)) => count += 1, - /// Ok(Event::Text(e)) => txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()), - /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), - /// Ok(Event::Eof) => break, - /// _ => (), - /// } - /// buf.clear(); - /// } - /// println!("Found {} start events", count); - /// println!("Text events: {:?}", txt); - /// ``` - #[inline] - pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { - self.read_event_impl(buf) - } - - /// Reads the next event and resolves its namespace (if applicable). - /// - /// # Examples - /// - /// ``` - /// use std::str::from_utf8; - /// use quick_xml::Reader; - /// use quick_xml::events::Event; - /// use quick_xml::name::ResolveResult::*; - /// - /// let xml = r#" - /// Test - /// Test 2 - /// "#; - /// let mut reader = Reader::from_str(xml); - /// reader.trim_text(true); - /// let mut count = 0; - /// let mut buf = Vec::new(); - /// let mut ns_buf = Vec::new(); - /// let mut txt = Vec::new(); - /// loop { - /// match reader.read_namespaced_event(&mut buf, &mut ns_buf) { - /// Ok((Bound(ns), Event::Start(e))) => { - /// count += 1; - /// match (ns.as_ref(), e.local_name().as_ref()) { - /// (b"www.xxxx", b"tag1") => (), - /// (b"www.yyyy", b"tag2") => (), - /// (ns, n) => panic!("Namespace and local name mismatch"), - /// } - /// println!("Resolved namespace: {:?}", ns); - /// } - /// Ok((Unbound, Event::Start(_))) => { - /// panic!("Element not in any namespace") - /// }, - /// Ok((Unknown(p), Event::Start(_))) => { - /// panic!("Undeclared namespace prefix {:?}", String::from_utf8(p)) - /// } - /// Ok((_, Event::Text(e))) => { - /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) - /// }, - /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), - /// Ok((_, Event::Eof)) => break, - /// _ => (), - /// } - /// buf.clear(); - /// } - /// println!("Found {} start events", count); - /// println!("Text events: {:?}", txt); - /// ``` - pub fn read_namespaced_event<'b, 'ns>( - &mut self, - buf: &'b mut Vec, - namespace_buffer: &'ns mut Vec, - ) -> Result<(ResolveResult<'ns>, Event<'b>)> { - if self.pending_pop { - self.ns_resolver.pop(namespace_buffer); - } - self.pending_pop = false; - match self.read_event_into(buf) { - Ok(Event::Eof) => Ok((ResolveResult::Unbound, Event::Eof)), - Ok(Event::Start(e)) => { - self.ns_resolver.push(&e, namespace_buffer); - Ok(( - self.ns_resolver.find(e.name(), namespace_buffer), - Event::Start(e), - )) - } - Ok(Event::Empty(e)) => { - // For empty elements we need to 'artificially' keep the namespace scope on the - // stack until the next `next()` call occurs. - // Otherwise the caller has no chance to use `resolve` in the context of the - // namespace declarations that are 'in scope' for the empty element alone. - // Ex: - self.ns_resolver.push(&e, namespace_buffer); - // notify next `read_namespaced_event()` invocation that it needs to pop this - // namespace scope - self.pending_pop = true; - Ok(( - self.ns_resolver.find(e.name(), namespace_buffer), - Event::Empty(e), - )) - } - Ok(Event::End(e)) => { - // notify next `read_namespaced_event()` invocation that it needs to pop this - // namespace scope - self.pending_pop = true; - Ok(( - self.ns_resolver.find(e.name(), namespace_buffer), - Event::End(e), - )) - } - Ok(e) => Ok((ResolveResult::Unbound, e)), - Err(e) => Err(e), - } - } - - /// Reads until end element is found using provided buffer as intermediate - /// storage for events content. This function is supposed to be called after - /// you already read a [`Start`] event. - /// - /// Manages nested cases where parent and child elements have the same name. - /// - /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] - /// will be returned. In particularly, that error will be returned if you call - /// this method without consuming the corresponding [`Start`] event first. - /// - /// If your reader created from a string slice or byte array slice, it is - /// better to use [`read_to_end()`] method, because it will not copy bytes - /// into intermediate buffer. - /// - /// The provided `buf` buffer will be filled only by one event content at time. - /// Before reading of each event the buffer will be cleared. If you know an - /// appropriate size of each event, you can preallocate the buffer to reduce - /// number of reallocations. - /// - /// The `end` parameter should contain name of the end element _in the reader - /// encoding_. It is good practice to always get that parameter using - /// [`BytesStart::to_end()`] method. - /// - /// The correctness of the skipped events does not checked, if you disabled - /// the [`check_end_names`] option. - /// - /// # Namespaces - /// - /// While the [`Reader`] does not support namespace resolution, namespaces - /// does not change the algorithm for comparing names. Although the names - /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the - /// same namespace, are semantically equivalent, `` cannot close - /// ``, because according to [the specification] - /// - /// > The end of every element that begins with a **start-tag** MUST be marked - /// > by an **end-tag** containing a name that echoes the element's type as - /// > given in the **start-tag** - /// - /// # Examples - /// - /// This example shows, how you can skip XML content after you read the - /// start event. - /// - /// ``` - /// # use pretty_assertions::assert_eq; - /// use quick_xml::events::{BytesStart, Event}; - /// use quick_xml::Reader; - /// - /// let mut reader = Reader::from_str(r#" - /// - /// - /// - /// - /// - /// - /// - /// - /// "#); - /// reader.trim_text(true); - /// let mut buf = Vec::new(); - /// - /// let start = BytesStart::borrowed_name(b"outer"); - /// let end = start.to_end().into_owned(); - /// - /// // First, we read a start event... - /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); - /// - /// //...then, we could skip all events to the corresponding end event. - /// // This call will correctly handle nested elements. - /// // Note, however, that this method does not handle namespaces. - /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); - /// - /// // At the end we should get an Eof event, because we ate the whole XML - /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); - /// ``` - /// - /// [`Start`]: Event::Start - /// [`End`]: Event::End - /// [`read_to_end()`]: Self::read_to_end - /// [`check_end_names`]: Self::check_end_names - /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag - pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { - let mut depth = 0; - loop { - buf.clear(); - match self.read_event_into(buf) { - Err(e) => return Err(e), - - Ok(Event::Start(e)) if e.name() == end => depth += 1, - Ok(Event::End(e)) if e.name() == end => { - if depth == 0 { - return Ok(()); - } - depth -= 1; - } - Ok(Event::Eof) => { - let name = self.decoder().decode(end.as_ref()); - return Err(Error::UnexpectedEof(format!("", name))); - } - _ => (), - } - } - } - - /// Reads optional text between start and end tags. - /// - /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a - /// `String`. If the next event is an [`End`] event, returns the empty string. In all other - /// cases, returns an error. - /// - /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8 - /// if none is specified). - /// - /// # Examples - /// - /// ``` - /// # use pretty_assertions::assert_eq; - /// use quick_xml::Reader; - /// use quick_xml::events::Event; - /// - /// let mut xml = Reader::from_reader(b" - /// <b> - /// - /// " as &[u8]); - /// xml.trim_text(true); - /// - /// let expected = ["", ""]; - /// for &content in expected.iter() { - /// match xml.read_event_into(&mut Vec::new()) { - /// Ok(Event::Start(ref e)) => { - /// assert_eq!(&xml.read_text_into(e.name(), &mut Vec::new()).unwrap(), content); - /// }, - /// e => panic!("Expecting Start event, found {:?}", e), - /// } - /// } - /// ``` - /// - /// [`Text`]: Event::Text - /// [`End`]: Event::End - pub fn read_text_into(&mut self, end: QName, buf: &mut Vec) -> Result { - let s = match self.read_event_into(buf) { - Err(e) => return Err(e), - - Ok(Event::Text(e)) => e.decode_and_unescape(self)?.into_owned(), - Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()), - Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), - _ => return Err(Error::TextNotFound), - }; - self.read_to_end_into(end, buf)?; - Ok(s) - } -} - /// Private methods impl Reader { /// Read text into the given buffer, and return an event that borrows from @@ -1028,132 +706,7 @@ impl Reader { } } -impl Reader> { - /// Creates an XML reader from a file path. - pub fn from_file>(path: P) -> Result { - let file = File::open(path).map_err(Error::Io)?; - let reader = BufReader::new(file); - Ok(Self::from_reader(reader)) - } -} - -impl<'a> Reader<&'a [u8]> { - /// Creates an XML reader from a string slice. - pub fn from_str(s: &'a str) -> Self { - // Rust strings are guaranteed to be UTF-8, so lock the encoding - #[cfg(feature = "encoding")] - { - let mut reader = Self::from_reader(s.as_bytes()); - reader.encoding = EncodingRef::Explicit(UTF_8); - reader - } - - #[cfg(not(feature = "encoding"))] - Self::from_reader(s.as_bytes()) - } - - /// Creates an XML reader from a slice of bytes. - pub fn from_bytes(s: &'a [u8]) -> Self { - Self::from_reader(s) - } - - /// Read an event that borrows from the input rather than a buffer. - #[inline] - pub fn read_event(&mut self) -> Result> { - self.read_event_impl(()) - } - - /// Reads until end element is found. This function is supposed to be called - /// after you already read a [`Start`] event. - /// - /// Manages nested cases where parent and child elements have the same name. - /// - /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] - /// will be returned. In particularly, that error will be returned if you call - /// this method without consuming the corresponding [`Start`] event first. - /// - /// The `end` parameter should contain name of the end element _in the reader - /// encoding_. It is good practice to always get that parameter using - /// [`BytesStart::to_end()`] method. - /// - /// The correctness of the skipped events does not checked, if you disabled - /// the [`check_end_names`] option. - /// - /// # Namespaces - /// - /// While the [`Reader`] does not support namespace resolution, namespaces - /// does not change the algorithm for comparing names. Although the names - /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the - /// same namespace, are semantically equivalent, `` cannot close - /// ``, because according to [the specification] - /// - /// > The end of every element that begins with a **start-tag** MUST be marked - /// > by an **end-tag** containing a name that echoes the element's type as - /// > given in the **start-tag** - /// - /// # Examples - /// - /// This example shows, how you can skip XML content after you read the - /// start event. - /// - /// ``` - /// # use pretty_assertions::assert_eq; - /// use quick_xml::events::{BytesStart, Event}; - /// use quick_xml::Reader; - /// - /// let mut reader = Reader::from_str(r#" - /// - /// - /// - /// - /// - /// - /// - /// - /// "#); - /// reader.trim_text(true); - /// - /// let start = BytesStart::borrowed_name(b"outer"); - /// let end = start.to_end().into_owned(); - /// - /// // First, we read a start event... - /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); - /// - /// //...then, we could skip all events to the corresponding end event. - /// // This call will correctly handle nested elements. - /// // Note, however, that this method does not handle namespaces. - /// reader.read_to_end(end.name()).unwrap(); - /// - /// // At the end we should get an Eof event, because we ate the whole XML - /// assert_eq!(reader.read_event().unwrap(), Event::Eof); - /// ``` - /// - /// [`Start`]: Event::Start - /// [`End`]: Event::End - /// [`check_end_names`]: Self::check_end_names - /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag - pub fn read_to_end(&mut self, end: QName) -> Result<()> { - let mut depth = 0; - loop { - match self.read_event() { - Err(e) => return Err(e), - - Ok(Event::Start(e)) if e.name() == end => depth += 1, - Ok(Event::End(e)) if e.name() == end => { - if depth == 0 { - return Ok(()); - } - depth -= 1; - } - Ok(Event::Eof) => { - let name = self.decoder().decode(end.as_ref()); - return Err(Error::UnexpectedEof(format!("", name))); - } - _ => (), - } - } - } -} +//////////////////////////////////////////////////////////////////////////////////////////////////// /// Represents an input for a reader that can return borrowed data. /// @@ -1255,292 +808,6 @@ trait XmlSource<'r, B> { fn peek_one(&mut self) -> Result>; } -/// Implementation of `XmlSource` for any `BufRead` reader using a user-given -/// `Vec` as buffer that will be borrowed by events. -impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { - #[inline] - fn read_bytes_until( - &mut self, - byte: u8, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - let mut read = 0; - let mut done = false; - let start = buf.len(); - while !done { - let used = { - let available = match self.fill_buf() { - Ok(n) if n.is_empty() => break, - Ok(n) => n, - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - }; - - match memchr::memchr(byte, available) { - Some(i) => { - buf.extend_from_slice(&available[..i]); - done = true; - i + 1 - } - None => { - buf.extend_from_slice(available); - available.len() - } - } - }; - self.consume(used); - read += used; - } - *position += read; - - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } - } - - fn read_bang_element( - &mut self, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - // Peeked one bang ('!') before being called, so it's guaranteed to - // start with it. - let start = buf.len(); - let mut read = 1; - buf.push(b'!'); - self.consume(1); - - let bang_type = BangType::new(self.peek_one()?)?; - - loop { - match self.fill_buf() { - // Note: Do not update position, so the error points to - // somewhere sane rather than at the EOF - Ok(n) if n.is_empty() => return Err(bang_type.to_err()), - Ok(available) => { - if let Some((consumed, used)) = bang_type.parse(available, read) { - buf.extend_from_slice(consumed); - - self.consume(used); - read += used; - - *position += read; - break; - } else { - buf.extend_from_slice(available); - - let used = available.len(); - self.consume(used); - read += used; - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - } - } - - if read == 0 { - Ok(None) - } else { - Ok(Some((bang_type, &buf[start..]))) - } - } - - #[inline] - fn read_element( - &mut self, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - let mut state = ReadElementState::Elem; - let mut read = 0; - - let start = buf.len(); - loop { - match self.fill_buf() { - Ok(n) if n.is_empty() => break, - Ok(available) => { - if let Some((consumed, used)) = state.change(available) { - buf.extend_from_slice(consumed); - - self.consume(used); - read += used; - - *position += read; - break; - } else { - buf.extend_from_slice(available); - - let used = available.len(); - self.consume(used); - read += used; - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - }; - } - - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } - } - - /// Consume and discard all the whitespace until the next non-whitespace - /// character or EOF. - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { - loop { - break match self.fill_buf() { - Ok(n) => { - let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); - if count > 0 { - self.consume(count); - *position += count; - continue; - } else { - Ok(()) - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e)), - }; - } - } - - /// Consume and discard one character if it matches the given byte. Return - /// true if it matched. - fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { - match self.peek_one()? { - Some(b) if b == byte => { - *position += 1; - self.consume(1); - Ok(true) - } - _ => Ok(false), - } - } - - /// Return one character without consuming it, so that future `read_*` calls - /// will still include it. On EOF, return None. - fn peek_one(&mut self) -> Result> { - loop { - break match self.fill_buf() { - Ok(n) if n.is_empty() => Ok(None), - Ok(n) => Ok(Some(n[0])), - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e)), - }; - } - } -} - -/// Implementation of `XmlSource` for `&[u8]` reader using a `Self` as buffer -/// that will be borrowed by events. This implementation provides a zero-copy deserialization -impl<'a> XmlSource<'a, ()> for &'a [u8] { - fn read_bytes_until( - &mut self, - byte: u8, - _buf: (), - position: &mut usize, - ) -> Result> { - if self.is_empty() { - return Ok(None); - } - - Ok(Some(if let Some(i) = memchr::memchr(byte, self) { - *position += i + 1; - let bytes = &self[..i]; - *self = &self[i + 1..]; - bytes - } else { - *position += self.len(); - let bytes = &self[..]; - *self = &[]; - bytes - })) - } - - fn read_bang_element( - &mut self, - _buf: (), - position: &mut usize, - ) -> Result> { - // Peeked one bang ('!') before being called, so it's guaranteed to - // start with it. - debug_assert_eq!(self[0], b'!'); - - let bang_type = BangType::new(self[1..].first().copied())?; - - if let Some((bytes, i)) = bang_type.parse(self, 0) { - *position += i; - *self = &self[i..]; - return Ok(Some((bang_type, bytes))); - } - - // Note: Do not update position, so the error points to - // somewhere sane rather than at the EOF - Err(bang_type.to_err()) - } - - fn read_element(&mut self, _buf: (), position: &mut usize) -> Result> { - if self.is_empty() { - return Ok(None); - } - - let mut state = ReadElementState::Elem; - - if let Some((bytes, i)) = state.change(self) { - *position += i; - *self = &self[i..]; - return Ok(Some(bytes)); - } - - // Note: Do not update position, so the error points to a sane place - // rather than at the EOF. - Err(Error::UnexpectedEof("Element".to_string())) - - // FIXME: Figure out why the other one works without UnexpectedEof - } - - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { - let whitespaces = self - .iter() - .position(|b| !is_whitespace(*b)) - .unwrap_or(self.len()); - *position += whitespaces; - *self = &self[whitespaces..]; - Ok(()) - } - - fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { - if self.first() == Some(&byte) { - *self = &self[1..]; - *position += 1; - Ok(true) - } else { - Ok(false) - } - } - - fn peek_one(&mut self) -> Result> { - Ok(self.first().copied()) - } -} - /// Possible elements started with ` { + /// An XML reader + reader: Reader, + /// Buffer that contains names of namespace prefixes (the part between `xmlns:` + /// and an `=`) and namespace values. + buffer: Vec, + /// A buffer to manage namespaces + ns_resolver: NamespaceResolver, + /// We cannot pop data from the namespace stack until returned `Empty` or `End` + /// event will be processed by the user, so we only mark that we should that + /// in the next [`Self::read_event_impl()`] call. + pending_pop: bool, +} + +/// Builder methods +impl NsReader { + /// Creates a `NsReader` that reads from a reader. + #[inline] + pub fn from_reader(reader: R) -> Self { + Self::new(Reader::from_reader(reader)) + } + + configure_methods!(reader); +} + +/// Private methods +impl NsReader { + #[inline] + fn new(reader: Reader) -> Self { + Self { + reader, + buffer: Vec::new(), + ns_resolver: NamespaceResolver::default(), + pending_pop: false, + } + } + + fn read_event_impl<'i, B>(&mut self, buf: B) -> Result> + where + R: XmlSource<'i, B>, + { + if self.pending_pop { + self.ns_resolver.pop(&mut self.buffer); + self.pending_pop = false; + } + match self.reader.read_event_impl(buf) { + Ok(Event::Start(e)) => { + self.ns_resolver.push(&e, &mut self.buffer); + Ok(Event::Start(e)) + } + Ok(Event::Empty(e)) => { + self.ns_resolver.push(&e, &mut self.buffer); + // notify next `read_event_impl()` invocation that it needs to pop this + // namespace scope + self.pending_pop = true; + Ok(Event::Empty(e)) + } + Ok(Event::End(e)) => { + // notify next `read_event_impl()` invocation that it needs to pop this + // namespace scope + self.pending_pop = true; + Ok(Event::End(e)) + } + e => e, + } + } + + fn read_resolved_event_impl<'i, B>(&mut self, buf: B) -> Result<(ResolveResult, Event<'i>)> + where + R: XmlSource<'i, B>, + { + match self.read_event_impl(buf) { + Ok(Event::Start(e)) => Ok(( + self.ns_resolver.find(e.name(), &mut self.buffer), + Event::Start(e), + )), + Ok(Event::Empty(e)) => Ok(( + self.ns_resolver.find(e.name(), &mut self.buffer), + Event::Empty(e), + )), + Ok(Event::End(e)) => Ok(( + self.ns_resolver.find(e.name(), &mut self.buffer), + Event::End(e), + )), + Ok(e) => Ok((ResolveResult::Unbound, e)), + Err(e) => Err(e), + } + } +} + +/// Getters +impl NsReader { + /// Consumes `NsReader` returning the underlying reader + /// + /// See the [`Reader::into_inner`] for examples + #[inline] + pub fn into_inner(self) -> R { + self.reader.into_inner() + } + + /// Gets a mutable reference to the underlying reader. + pub fn get_mut(&mut self) -> &mut R { + self.reader.get_mut() + } + + /// Resolves a potentially qualified **element name** or **attribute name** + /// into (namespace name, local name). + /// + /// *Qualified* names have the form `prefix:local-name` where the `prefix` + /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. + /// The namespace prefix can be defined on the same element as the name in question. + /// + /// The method returns following results depending on the `name` shape, + /// `attribute` flag and the presence of the default namespace: + /// + /// |attribute|`xmlns="..."`|QName |ResolveResult |LocalName + /// |---------|-------------|-------------------|-----------------------|------------ + /// |`true` |Not defined |`local-name` |[`Unbound`] |`local-name` + /// |`true` |Defined |`local-name` |[`Unbound`] |`local-name` + /// |`true` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` + /// |`false` |Not defined |`local-name` |[`Unbound`] |`local-name` + /// |`false` |Defined |`local-name` |[`Bound`] (default) |`local-name` + /// |`false` |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` + /// + /// If you want to clearly indicate that name that you resolve is an element + /// or an attribute name, you could use [`resolve_attribute()`] or [`resolve_element()`] + /// methods. + /// + /// # Lifetimes + /// + /// - `'n`: lifetime of a name. Returned local name will be bound to the same + /// lifetime as the name in question. + /// - returned namespace name will be bound to the reader itself + /// + /// [`Bound`]: ResolveResult::Bound + /// [`Unbound`]: ResolveResult::Unbound + /// [`Unknown`]: ResolveResult::Unknown + /// [`resolve_attribute()`]: Self::resolve_attribute() + /// [`resolve_element()`]: Self::resolve_element() + #[inline] + pub fn resolve<'n>(&self, name: QName<'n>, attribute: bool) -> (ResolveResult, LocalName<'n>) { + self.ns_resolver.resolve(name, &self.buffer, !attribute) + } + + /// Resolves a potentially qualified **element name** into (namespace name, local name). + /// + /// *Qualified* element names have the form `prefix:local-name` where the + /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. + /// The namespace prefix can be defined on the same element as the element + /// in question. + /// + /// *Unqualified* elements inherits the current *default namespace*. + /// + /// The method returns following results depending on the `name` shape and + /// the presence of the default namespace: + /// + /// |`xmlns="..."`|QName |ResolveResult |LocalName + /// |-------------|-------------------|-----------------------|------------ + /// |Not defined |`local-name` |[`Unbound`] |`local-name` + /// |Defined |`local-name` |[`Bound`] (default) |`local-name` + /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` + /// + /// # Lifetimes + /// + /// - `'n`: lifetime of an element name. Returned local name will be bound + /// to the same lifetime as the name in question. + /// - returned namespace name will be bound to the reader itself + /// + /// # Examples + /// + /// This example shows how you can resolve qualified name into a namespace. + /// Note, that in the code like this you do not need to do that manually, + /// because the namespace resolution result returned by the [`read_event()`]. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_str(""); + /// + /// match reader.read_event().unwrap() { + /// Event::Empty(e) => assert_eq!( + /// reader.resolve_element(e.name()), + /// (Bound(Namespace(b"root namespace")), QName(b"tag").into()) + /// ), + /// _ => unreachable!(), + /// } + /// ``` + /// + /// [`Bound`]: ResolveResult::Bound + /// [`Unbound`]: ResolveResult::Unbound + /// [`Unknown`]: ResolveResult::Unknown + /// [`read_event()`]: Self::read_event + #[inline] + pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { + self.ns_resolver.resolve(name, &self.buffer, true) + } + + /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// + /// *Qualified* attribute names have the form `prefix:local-name` where the + /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. + /// The namespace prefix can be defined on the same element as the attribute + /// in question. + /// + /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + /// + /// The method returns following results depending on the `name` shape and + /// the presence of the default namespace: + /// + /// |`xmlns="..."`|QName |ResolveResult |LocalName + /// |-------------|-------------------|-----------------------|------------ + /// |Not defined |`local-name` |[`Unbound`] |`local-name` + /// |Defined |`local-name` |[`Unbound`] |`local-name` + /// |_any_ |`prefix:local-name`|[`Bound`] / [`Unknown`]|`local-name` + /// + /// # Lifetimes + /// + /// - `'n`: lifetime of an attribute name. Returned local name will be bound + /// to the same lifetime as the name in question. + /// - returned namespace name will be bound to the reader itself + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::events::attributes::Attribute; + /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_str(" + /// + /// "); + /// reader.trim_text(true); + /// + /// match reader.read_event().unwrap() { + /// Event::Empty(e) => { + /// let mut iter = e.attributes(); + /// + /// // Unlike elements, attributes without explicit namespace + /// // not bound to any namespace + /// let one = iter.next().unwrap().unwrap(); + /// assert_eq!( + /// reader.resolve_attribute(one.key), + /// (Unbound, QName(b"one").into()) + /// ); + /// + /// let two = iter.next().unwrap().unwrap(); + /// assert_eq!( + /// reader.resolve_attribute(two.key), + /// (Bound(Namespace(b"other namespace")), QName(b"two").into()) + /// ); + /// } + /// _ => unreachable!(), + /// } + /// ``` + /// + /// [`Bound`]: ResolveResult::Bound + /// [`Unbound`]: ResolveResult::Unbound + /// [`Unknown`]: ResolveResult::Unknown + #[inline] + pub fn resolve_attribute<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { + self.ns_resolver.resolve(name, &self.buffer, false) + } +} + +impl NsReader { + /// Reads the next event into given buffer. + /// + /// This method manages namespaces but doesn't resolve them automatically. + /// You should call [`resolve_element()`] if you want to get a namespace. + /// + /// You also can use [`read_resolved_event_into()`] instead if you want to resolve + /// namespace as soon as you get an event. + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::NsReader; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, ResolveResult::*}; + /// + /// let mut reader = NsReader::from_str(r#" + /// + /// Test + /// Test 2 + /// + /// "#); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into(&mut buf).unwrap() { + /// Event::Start(e) => { + /// count += 1; + /// let (ns, local) = reader.resolve_element(e.name()); + /// match local.as_ref() { + /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), + /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), + /// _ => unreachable!(), + /// } + /// } + /// Event::Text(e) => { + /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// } + /// Event::Eof => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// ``` + /// + /// [`resolve_element()`]: Self::resolve_element + /// [`read_resolved_event_into()`]: Self::read_resolved_event_into + #[inline] + pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { + self.read_event_impl(buf) + } + + /// Reads the next event into given buffer and resolves its namespace (if applicable). + /// + /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. + /// For all other events the concept of namespace is not defined, so + /// a [`ResolveResult::Unbound`] is returned. + /// + /// If you are not interested in namespaces, you can use [`read_event_into()`] + /// which will not automatically resolve namespaces for you. + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::NsReader; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; + /// + /// let mut reader = NsReader::from_str(r#" + /// + /// Test + /// Test 2 + /// + /// "#); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_resolved_event_into(&mut buf).unwrap() { + /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag1").into()); + /// } + /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag2").into()); + /// } + /// (_, Event::Start(_)) => unreachable!(), + /// + /// (_, Event::Text(e)) => { + /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// } + /// (_, Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`Empty`]: Event::Empty + /// [`End`]: Event::End + /// [`read_event_into()`]: Self::read_event_into + #[inline] + pub fn read_resolved_event_into<'b>( + &mut self, + buf: &'b mut Vec, + ) -> Result<(ResolveResult, Event<'b>)> { + self.read_resolved_event_impl(buf) + } + + /// Reads until end element is found using provided buffer as intermediate + /// storage for events content. This function is supposed to be called after + /// you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name + /// ("the same" means that their local names are the same and their prefixes + /// resolves to the same namespace). + /// + /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// If your reader created from a string slice or byte array slice, it is + /// better to use [`read_to_end()`] method, because it will not copy bytes + /// into intermediate buffer. + /// + /// The provided `buf` buffer will be filled only by one event content at time. + /// Before reading of each event the buffer will be cleared. If you know an + /// appropriate size of each event, you can preallocate the buffer to reduce + /// number of reallocations. + /// + /// The `ns` and `end` parameters should contain namespace and name of the + /// end element _in the reader encoding_. It is good practice to always get + /// that parameters using [`BytesStart::to_end()`] method. + /// + /// # Namespaces + /// + /// Unlike [`Reader::read_to_end_into()`], this method resolves namespace + /// prefixes, so the names that are not equals literally (for example, + /// `a:name` and `b:name`) could be considered equal if prefixes resolved to + /// the same namespace. + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::name::{Namespace, ResolveResult}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_str(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// "#); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let ns = Namespace(b"namespace 1"); + /// let start = BytesStart::borrowed(br#"outer xmlns="namespace 1""#, 5); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!( + /// reader.read_resolved_event_into(&mut buf).unwrap(), + /// (ResolveResult::Bound(ns), Event::Start(start)) + /// ); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!( + /// reader.read_resolved_event_into(&mut buf).unwrap(), + /// (ResolveResult::Unbound, Event::Eof) + /// ); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof + /// [`read_to_end()`]: Self::read_to_end + /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end + #[inline] + pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { + // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should + // match literally the start name. See `Self::check_end_names` documentation + self.reader.read_to_end_into(end, buf) + } +} + +impl NsReader> { + /// Creates an XML reader from a file path. + pub fn from_file>(path: P) -> Result { + let file = File::open(path)?; + let reader = BufReader::new(file); + Ok(Self::from_reader(reader)) + } +} + +impl<'i> NsReader<&'i [u8]> { + /// Creates an XML reader from a string slice. + #[inline] + pub fn from_str(s: &'i str) -> Self { + Self::new(Reader::from_str(s)) + } + + /// Creates an XML reader from a slice of bytes. + #[inline] + pub fn from_bytes(bytes: &'i [u8]) -> Self { + Self::new(Reader::from_bytes(bytes)) + } + + /// Reads the next event, borrow its content from the input buffer. + /// + /// This method manages namespaces but doesn't resolve them automatically. + /// You should call [`resolve_element()`] if you want to get a namespace. + /// + /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace + /// as soon as you get an event. + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::NsReader; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, ResolveResult::*}; + /// + /// let mut reader = NsReader::from_str(r#" + /// + /// Test + /// Test 2 + /// + /// "#); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event().unwrap() { + /// Event::Start(e) => { + /// count += 1; + /// let (ns, local) = reader.resolve_element(e.name()); + /// match local.as_ref() { + /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), + /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), + /// _ => unreachable!(), + /// } + /// } + /// Event::Text(e) => { + /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// } + /// Event::Eof => break, + /// _ => (), + /// } + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// ``` + /// + /// [`resolve_element()`]: Self::resolve_element + /// [`read_resolved_event()`]: Self::read_resolved_event + #[inline] + pub fn read_event(&mut self) -> Result> { + self.read_event_impl(()) + } + + /// Reads the next event, borrow its content from the input buffer, and resolves + /// its namespace (if applicable). + /// + /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. + /// For all other events the concept of namespace is not defined, so + /// a [`ResolveResult::Unbound`] is returned. + /// + /// If you are not interested in namespaces, you can use [`read_event()`] + /// which will not automatically resolve namespaces for you. + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::NsReader; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; + /// + /// let mut reader = NsReader::from_str(r#" + /// + /// Test + /// Test 2 + /// + /// "#); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_resolved_event().unwrap() { + /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag1").into()); + /// } + /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag2").into()); + /// } + /// (_, Event::Start(_)) => unreachable!(), + /// + /// (_, Event::Text(e)) => { + /// txt.push(e.decode_and_unescape(&reader).unwrap().into_owned()) + /// } + /// (_, Event::Eof) => break, + /// _ => (), + /// } + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`Empty`]: Event::Empty + /// [`End`]: Event::End + /// [`read_event()`]: Self::read_event + #[inline] + pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> { + self.read_resolved_event_impl(()) + } + + /// Reads until end element is found. This function is supposed to be called + /// after you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name + /// ("the same" means that their local names are the same and their prefixes + /// resolves to the same namespace). + /// + /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. + /// + /// # Namespaces + /// + /// Unlike [`Reader::read_to_end()`], this method resolves namespace + /// prefixes, so the names that are not equals literally (for example, + /// `a:name` and `b:name`) could be considered equal if prefixes resolved to + /// the same namespace. + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::name::{Namespace, ResolveResult}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_str(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// "#); + /// reader.trim_text(true); + /// + /// let ns = Namespace(b"namespace 1"); + /// let start = BytesStart::borrowed(br#"outer xmlns="namespace 1""#, 5); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!( + /// reader.read_resolved_event().unwrap(), + /// (ResolveResult::Bound(ns), Event::Start(start)) + /// ); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end(end.name()).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!( + /// reader.read_resolved_event().unwrap(), + /// (ResolveResult::Unbound, Event::Eof) + /// ); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof + /// [`read_to_end()`]: Self::read_to_end + /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end + #[inline] + pub fn read_to_end(&mut self, end: QName) -> Result<()> { + // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should + // match literally the start name. See `Self::check_end_names` documentation + self.reader.read_to_end(end) + } +} + +impl Deref for NsReader { + type Target = Reader; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.reader + } +} diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs new file mode 100644 index 00000000..abbb04f6 --- /dev/null +++ b/src/reader/slice_reader.rs @@ -0,0 +1,232 @@ +//! This is an implementation of [`Reader`] for reading from a `&[u8]` as +//! underlying byte stream. This implementation supports not using an +//! intermediate buffer as the byte slice itself can be used to borrow from. + +#[cfg(feature = "encoding")] +use crate::reader::EncodingRef; +#[cfg(feature = "encoding")] +use encoding_rs::UTF_8; + +use crate::errors::{Error, Result}; +use crate::events::Event; +use crate::name::QName; +use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource}; + +use memchr; + +/// This is an implementation of [`Reader`] for reading from a `&[u8]` as +/// underlying byte stream. This implementation supports not using an +/// intermediate buffer as the byte slice itself can be used to borrow from. +impl<'a> Reader<&'a [u8]> { + /// Creates an XML reader from a string slice. + pub fn from_str(s: &'a str) -> Self { + // Rust strings are guaranteed to be UTF-8, so lock the encoding + #[cfg(feature = "encoding")] + { + let mut reader = Self::from_reader(s.as_bytes()); + reader.encoding = EncodingRef::Explicit(UTF_8); + reader + } + + #[cfg(not(feature = "encoding"))] + Self::from_reader(s.as_bytes()) + } + + /// Creates an XML reader from a slice of bytes. + pub fn from_bytes(s: &'a [u8]) -> Self { + Self::from_reader(s) + } + + /// Read an event that borrows from the input rather than a buffer. + #[inline] + pub fn read_event(&mut self) -> Result> { + self.read_event_impl(()) + } + + /// Reads until end element is found. This function is supposed to be called + /// after you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name. + /// + /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. + /// + /// The correctness of the skipped events does not checked, if you disabled + /// the [`check_end_names`] option. + /// + /// # Namespaces + /// + /// While the [`Reader`] does not support namespace resolution, namespaces + /// does not change the algorithm for comparing names. Although the names + /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the + /// same namespace, are semantically equivalent, `` cannot close + /// ``, because according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_str(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#); + /// reader.trim_text(true); + /// + /// let start = BytesStart::borrowed_name(b"outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end(end.name()).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event().unwrap(), Event::Eof); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end + /// [`check_end_names`]: Self::check_end_names + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag + pub fn read_to_end(&mut self, end: QName) -> Result<()> { + let mut depth = 0; + loop { + match self.read_event() { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == end => depth += 1, + Ok(Event::End(e)) if e.name() == end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Eof) => { + let name = self.decoder().decode(end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); + } + _ => (), + } + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/// Implementation of `XmlSource` for `&[u8]` reader using a `Self` as buffer +/// that will be borrowed by events. This implementation provides a zero-copy deserialization +impl<'a> XmlSource<'a, ()> for &'a [u8] { + fn read_bytes_until( + &mut self, + byte: u8, + _buf: (), + position: &mut usize, + ) -> Result> { + if self.is_empty() { + return Ok(None); + } + + Ok(Some(if let Some(i) = memchr::memchr(byte, self) { + *position += i + 1; + let bytes = &self[..i]; + *self = &self[i + 1..]; + bytes + } else { + *position += self.len(); + let bytes = &self[..]; + *self = &[]; + bytes + })) + } + + fn read_bang_element( + &mut self, + _buf: (), + position: &mut usize, + ) -> Result> { + // Peeked one bang ('!') before being called, so it's guaranteed to + // start with it. + debug_assert_eq!(self[0], b'!'); + + let bang_type = BangType::new(self[1..].first().copied())?; + + if let Some((bytes, i)) = bang_type.parse(self, 0) { + *position += i; + *self = &self[i..]; + return Ok(Some((bang_type, bytes))); + } + + // Note: Do not update position, so the error points to + // somewhere sane rather than at the EOF + Err(bang_type.to_err()) + } + + fn read_element(&mut self, _buf: (), position: &mut usize) -> Result> { + if self.is_empty() { + return Ok(None); + } + + let mut state = ReadElementState::Elem; + + if let Some((bytes, i)) = state.change(self) { + *position += i; + *self = &self[i..]; + return Ok(Some(bytes)); + } + + // Note: Do not update position, so the error points to a sane place + // rather than at the EOF. + Err(Error::UnexpectedEof("Element".to_string())) + + // FIXME: Figure out why the other one works without UnexpectedEof + } + + fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + let whitespaces = self + .iter() + .position(|b| !is_whitespace(*b)) + .unwrap_or(self.len()); + *position += whitespaces; + *self = &self[whitespaces..]; + Ok(()) + } + + fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { + if self.first() == Some(&byte) { + *self = &self[1..]; + *position += 1; + Ok(true) + } else { + Ok(false) + } + } + + fn peek_one(&mut self) -> Result> { + Ok(self.first().copied()) + } +} diff --git a/tests/namespaces.rs b/tests/namespaces.rs index 4729f2c7..41f594fd 100644 --- a/tests/namespaces.rs +++ b/tests/namespaces.rs @@ -3,19 +3,16 @@ use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; use quick_xml::name::ResolveResult::*; use quick_xml::name::{Namespace, QName}; -use quick_xml::Reader; +use quick_xml::NsReader; use std::borrow::Cow; #[test] fn namespace() { - let mut r = Reader::from_str("in namespace!"); + let mut r = NsReader::from_str("in namespace!"); r.trim_text(true); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(_))) => assert_eq!(ns, Unbound), e => panic!( "expecting outer start element with no namespace, got {:?}", @@ -24,7 +21,7 @@ fn namespace() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "expecting inner start element with to resolve to 'www1', got {:?}", @@ -32,13 +29,12 @@ fn namespace() { ), } // "in namespace!" - match r.read_namespaced_event(&mut buf, &mut ns_buf) { - //TODO: Check in specification, it is true that namespace should be empty? + match r.read_resolved_event() { Ok((ns, Text(_))) => assert_eq!(ns, Unbound), e => panic!("expecting text content with no namespace, got {:?}", e), } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "expecting inner end element with to resolve to 'www1', got {:?}", @@ -47,7 +43,7 @@ fn namespace() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Unbound), e => panic!("expecting outer end element with no namespace, got {:?}", e), } @@ -55,14 +51,11 @@ fn namespace() { #[test] fn default_namespace() { - let mut r = Reader::from_str(r#""#); + let mut r = NsReader::from_str(r#""#); r.trim_text(true); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(_))) => assert_eq!(ns, Unbound), e => panic!( "expecting outer start element with no namespace, got {:?}", @@ -71,7 +64,7 @@ fn default_namespace() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "expecting inner start element with to resolve to 'www1', got {:?}", @@ -79,7 +72,7 @@ fn default_namespace() { ), } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "expecting inner end element with to resolve to 'www1', got {:?}", @@ -89,7 +82,7 @@ fn default_namespace() { // very important: a should not be in any namespace. The default namespace only applies to // the sub-document it is defined on. - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Unbound), e => panic!("expecting outer end element with no namespace, got {:?}", e), } @@ -97,14 +90,11 @@ fn default_namespace() { #[test] fn default_namespace_reset() { - let mut r = Reader::from_str(r#""#); + let mut r = NsReader::from_str(r#""#); r.trim_text(true); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "expecting outer start element with to resolve to 'www1', got {:?}", @@ -113,7 +103,7 @@ fn default_namespace_reset() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(_))) => assert_eq!(ns, Unbound), e => panic!( "expecting inner start element with no namespace, got {:?}", @@ -121,13 +111,13 @@ fn default_namespace_reset() { ), } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Unbound), e => panic!("expecting inner end element with no namespace, got {:?}", e), } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "expecting outer end element with to resolve to 'www1', got {:?}", @@ -141,14 +131,12 @@ fn default_namespace_reset() { /// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. #[test] fn attributes_empty_ns() { - let src = b""; + let src = ""; - let mut r = Reader::from_reader(src as &[u8]); + let mut r = NsReader::from_str(src); r.trim_text(true).expand_empty_elements(false); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + let e = match r.read_resolved_event() { Ok((Unbound, Empty(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -159,7 +147,7 @@ fn attributes_empty_ns() { // we don't care about xmlns attributes for this test .filter(|kv| kv.key.as_namespace_binding().is_none()) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + let (opt_ns, local_name) = r.resolve_attribute(name); (opt_ns, local_name.into_inner(), value) }); assert_eq!( @@ -182,14 +170,12 @@ fn attributes_empty_ns() { /// The code path for namespace handling is slightly different for `Empty` vs. `Start+End`. #[test] fn attributes_empty_ns_expanded() { - let src = b""; + let src = ""; - let mut r = Reader::from_reader(src as &[u8]); + let mut r = NsReader::from_str(src); r.trim_text(true).expand_empty_elements(true); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + let e = match r.read_resolved_event() { Ok((Unbound, Start(e))) => e, e => panic!("Expecting Empty event, got {:?}", e), }; @@ -200,7 +186,7 @@ fn attributes_empty_ns_expanded() { // we don't care about xmlns attributes for this test .filter(|kv| kv.key.as_namespace_binding().is_none()) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + let (opt_ns, local_name) = r.resolve_attribute(name); (opt_ns, local_name.into_inner(), value) }); assert_eq!( @@ -218,7 +204,7 @@ fn attributes_empty_ns_expanded() { assert_eq!(attrs.next(), None); } - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((Unbound, End(e))) => assert_eq!(e.name(), QName(b"a")), e => panic!("Expecting End event, got {:?}", e), } @@ -226,16 +212,14 @@ fn attributes_empty_ns_expanded() { #[test] fn default_ns_shadowing_empty() { - let src = b""; + let src = ""; - let mut r = Reader::from_reader(src as &[u8]); + let mut r = NsReader::from_str(src); r.trim_text(true).expand_empty_elements(false); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); // { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); assert_eq!(e.name(), QName(b"e")); @@ -246,7 +230,7 @@ fn default_ns_shadowing_empty() { // { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + let e = match r.read_resolved_event() { Ok((ns, Empty(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); assert_eq!(e.name(), QName(b"e")); @@ -261,7 +245,7 @@ fn default_ns_shadowing_empty() { // we don't care about xmlns attributes for this test .filter(|kv| kv.key.as_namespace_binding().is_none()) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + let (opt_ns, local_name) = r.resolve_attribute(name); (opt_ns, local_name.into_inner(), value) }); // the attribute should _not_ have a namespace name. The default namespace does not @@ -274,7 +258,7 @@ fn default_ns_shadowing_empty() { } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); assert_eq!(e.name(), QName(b"e")); @@ -285,16 +269,14 @@ fn default_ns_shadowing_empty() { #[test] fn default_ns_shadowing_expanded() { - let src = b""; + let src = ""; - let mut r = Reader::from_reader(src as &[u8]); + let mut r = NsReader::from_str(src); r.trim_text(true).expand_empty_elements(true); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); // { - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Start(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); assert_eq!(e.name(), QName(b"e")); @@ -302,11 +284,10 @@ fn default_ns_shadowing_expanded() { e => panic!("Expected Start event (), got {:?}", e), } } - buf.clear(); // { - let e = match r.read_namespaced_event(&mut buf, &mut ns_buf) { + let e = match r.read_resolved_event() { Ok((ns, Start(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); assert_eq!(e.name(), QName(b"e")); @@ -320,7 +301,7 @@ fn default_ns_shadowing_expanded() { // we don't care about xmlns attributes for this test .filter(|kv| kv.key.as_namespace_binding().is_none()) .map(|Attribute { key: name, value }| { - let (opt_ns, local_name) = r.attribute_namespace(name, &ns_buf); + let (opt_ns, local_name) = r.resolve_attribute(name); (opt_ns, local_name.into_inner(), value) }); // the attribute should _not_ have a namespace name. The default namespace does not @@ -333,7 +314,7 @@ fn default_ns_shadowing_expanded() { } // virtual - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:i"))); assert_eq!(e.name(), QName(b"e")); @@ -341,7 +322,7 @@ fn default_ns_shadowing_expanded() { e => panic!("Expected End event (), got {:?}", e), } // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, End(e))) => { assert_eq!(ns, Bound(Namespace(b"urn:example:o"))); assert_eq!(e.name(), QName(b"e")); @@ -360,14 +341,12 @@ fn default_ns_shadowing_expanded() { #[test] fn reserved_name() { // Name "xmlns-something" is reserved according to spec, because started with "xml" - let mut r = Reader::from_str(r#""#); + let mut r = + NsReader::from_str(r#""#); r.trim_text(true); - let mut buf = Vec::new(); - let mut ns_buf = Vec::new(); - // - match r.read_namespaced_event(&mut buf, &mut ns_buf) { + match r.read_resolved_event() { Ok((ns, Empty(_))) => assert_eq!(ns, Bound(Namespace(b"www1"))), e => panic!( "Expected empty element bound to namespace 'www1', got {:?}", diff --git a/tests/xmlrs_reader_tests.rs b/tests/xmlrs_reader_tests.rs index 28401b77..c6d4c18f 100644 --- a/tests/xmlrs_reader_tests.rs +++ b/tests/xmlrs_reader_tests.rs @@ -1,7 +1,7 @@ use quick_xml::escape::unescape; use quick_xml::events::{BytesStart, Event}; use quick_xml::name::{QName, ResolveResult}; -use quick_xml::{Decoder, Reader, Result}; +use quick_xml::{Decoder, NsReader}; use std::str::from_utf8; #[test] @@ -362,20 +362,65 @@ fn test(input: &str, output: &str, trim: bool) { #[track_caller] fn test_bytes(input: &[u8], output: &[u8], trim: bool) { - let mut reader = Reader::from_reader(input); + let mut reader = NsReader::from_bytes(input); reader .trim_text(trim) .check_comments(true) .expand_empty_elements(false); let mut spec_lines = SpecIter(output).enumerate(); - let mut buf = Vec::new(); - let mut ns_buffer = Vec::new(); + let mut decoder = reader.decoder(); loop { - buf.clear(); - let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer); - let line = xmlrs_display(event, reader.decoder()); + let line = match reader.read_resolved_event() { + Ok((_, Event::StartText(_))) => { + // BOM could change decoder + decoder = reader.decoder(); + "StartText".to_string() + } + Ok((_, Event::Decl(e))) => { + // Declaration could change decoder + decoder = reader.decoder(); + + let version_cow = e.version().unwrap(); + let version = decoder.decode(version_cow.as_ref()).unwrap(); + let encoding_cow = e.encoding().unwrap().unwrap(); + let encoding = decoder.decode(encoding_cow.as_ref()).unwrap(); + format!("StartDocument({}, {})", version, encoding) + } + Ok((_, Event::PI(e))) => { + format!("ProcessingInstruction(PI={})", decoder.decode(&e).unwrap()) + } + Ok((_, Event::DocType(e))) => format!("DocType({})", decoder.decode(&e).unwrap()), + Ok((n, Event::Start(e))) => { + let name = namespace_name(n, e.name(), decoder); + match make_attrs(&e, decoder) { + Ok(attrs) if attrs.is_empty() => format!("StartElement({})", &name), + Ok(attrs) => format!("StartElement({} [{}])", &name, &attrs), + Err(e) => format!("StartElement({}, attr-error: {})", &name, &e), + } + } + Ok((n, Event::Empty(e))) => { + let name = namespace_name(n, e.name(), decoder); + match make_attrs(&e, decoder) { + Ok(attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), + Ok(attrs) => format!("EmptyElement({} [{}])", &name, &attrs), + Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e), + } + } + Ok((n, Event::End(e))) => { + let name = namespace_name(n, e.name(), decoder); + format!("EndElement({})", name) + } + Ok((_, Event::Comment(e))) => format!("Comment({})", decoder.decode(&e).unwrap()), + Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()), + Ok((_, Event::Text(e))) => match unescape(&decoder.decode(&e).unwrap()) { + Ok(c) => format!("Characters({})", &c), + Err(err) => format!("FailedUnescape({:?}; {})", e.escape(), err), + }, + Ok((_, Event::Eof)) => format!("EndDocument"), + Err(e) => format!("Error: {}", e), + }; if let Some((n, spec)) = spec_lines.next() { if spec.trim() == "EndDocument" { break; @@ -432,51 +477,6 @@ fn make_attrs(e: &BytesStart, decoder: Decoder) -> ::std::result::Result, decoder: Decoder) -> String { - match opt_event { - Ok((_, Event::StartText(_))) => "StartText".to_string(), - Ok((n, Event::Start(e))) => { - let name = namespace_name(n, e.name(), decoder); - match make_attrs(&e, decoder) { - Ok(attrs) if attrs.is_empty() => format!("StartElement({})", &name), - Ok(attrs) => format!("StartElement({} [{}])", &name, &attrs), - Err(e) => format!("StartElement({}, attr-error: {})", &name, &e), - } - } - Ok((n, Event::Empty(e))) => { - let name = namespace_name(n, e.name(), decoder); - match make_attrs(&e, decoder) { - Ok(attrs) if attrs.is_empty() => format!("EmptyElement({})", &name), - Ok(attrs) => format!("EmptyElement({} [{}])", &name, &attrs), - Err(e) => format!("EmptyElement({}, attr-error: {})", &name, &e), - } - } - Ok((n, Event::End(e))) => { - let name = namespace_name(n, e.name(), decoder); - format!("EndElement({})", name) - } - Ok((_, Event::Comment(e))) => format!("Comment({})", decoder.decode(&e).unwrap()), - Ok((_, Event::CData(e))) => format!("CData({})", decoder.decode(&e).unwrap()), - Ok((_, Event::Text(e))) => match unescape(&decoder.decode(&e).unwrap()) { - Ok(c) => format!("Characters({})", &c), - Err(err) => format!("FailedUnescape({:?}; {})", e.escape(), err), - }, - Ok((_, Event::Decl(e))) => { - let version_cow = e.version().unwrap(); - let version = decoder.decode(version_cow.as_ref()).unwrap(); - let encoding_cow = e.encoding().unwrap().unwrap(); - let encoding = decoder.decode(encoding_cow.as_ref()).unwrap(); - format!("StartDocument({}, {})", version, encoding) - } - Ok((_, Event::Eof)) => format!("EndDocument"), - Ok((_, Event::PI(e))) => { - format!("ProcessingInstruction(PI={})", decoder.decode(&e).unwrap()) - } - Ok((_, Event::DocType(e))) => format!("DocType({})", decoder.decode(&e).unwrap()), - Err(e) => format!("Error: {}", e), - } -} - struct SpecIter<'a>(&'a [u8]); impl<'a> Iterator for SpecIter<'a> {