diff --git a/Cargo.toml b/Cargo.toml index 8e2eecc8..4ec84a3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,11 +59,17 @@ async-tokio = ["tokio"] ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding encoding = ["encoding_rs"] -## Enables support for recognizing all [HTML 5 entities](https://dev.w3.org/html5/html-author/charref) +## Enables support for recognizing all [HTML 5 entities] in [`unescape`] and +## [`unescape_with`] functions. The full list of entities also can be found in +## . +## +## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref +## [`unescape`]: crate::escape::unescape +## [`unescape_with`]: crate::escape::unescape_with escape-html = [] -## This feature enables support for deserializing lists where tags are overlapped -## with tags that do not correspond to the list. +## This feature for a serde deserializer that enables support for deserializing +## lists where tags are overlapped with tags that do not correspond to the list. ## ## When this feature is enabled, the XML: ## ```xml @@ -75,7 +81,8 @@ escape-html = [] ## ## ``` ## could be deserialized to a struct: -## ```ignore +## ```no_run +## # use serde::Deserialize; ## #[derive(Deserialize)] ## #[serde(rename_all = "kebab-case")] ## struct AnyName { diff --git a/src/errors.rs b/src/errors.rs index e981de96..0f23590f 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -168,7 +168,14 @@ pub mod serialize { /// Please open an issue at , provide /// your Rust code and XML input. UnexpectedEnd(Vec), - /// Unexpected end of file + /// The [`Reader`] produced [`Event::Eof`] when it is not expecting, + /// for example, after producing [`Event::Start`] but before corresponding + /// [`Event::End`]. + /// + /// [`Reader`]: crate::reader::Reader + /// [`Event::Eof`]: crate::events::Event::Eof + /// [`Event::Start`]: crate::events::Event::Start + /// [`Event::End`]: crate::events::Event::End UnexpectedEof, /// This error indicates that [`deserialize_struct`] was called, but there /// is no any XML element in the input. That means that you try to deserialize @@ -176,7 +183,9 @@ pub mod serialize { /// /// [`deserialize_struct`]: serde::de::Deserializer::deserialize_struct ExpectedStart, - /// Unsupported operation + /// An attempt to deserialize to a type, that is not supported by the XML + /// store at current position, for example, attempt to deserialize `struct` + /// from attribute or attempt to deserialize binary data. Unsupported(&'static str), /// Too many events were skipped while deserializing a sequence, event limit /// exceeded. The limit was provided as an argument diff --git a/src/escapei.rs b/src/escapei.rs index 13141526..f93fbbf9 100644 --- a/src/escapei.rs +++ b/src/escapei.rs @@ -132,13 +132,21 @@ fn _escape bool>(raw: &str, escape_chars: F) -> Cow { } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into -/// their corresponding value +/// their corresponding value. +/// +/// If feature `escape-html` is enabled, then recognizes all [HTML5 escapes]. +/// +/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape(raw: &str) -> Result, EscapeError> { unescape_with(raw, |_| None) } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value, using a resolver function for custom entities. +/// +/// If feature `escape-html` is enabled, then recognizes all [HTML5 escapes]. +/// +/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape_with<'input, 'entity, F>( raw: &'input str, resolve_entity: F, @@ -211,6 +219,7 @@ const fn named_entity(name: &str) -> Option<&str> { const fn named_entity(name: &str) -> Option<&str> { // imported from https://dev.w3.org/html5/html-author/charref // match over strings are not allowed in const functions + //TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json let s = match name.as_bytes() { b"Tab" => "\u{09}", b"NewLine" => "\u{0A}", diff --git a/src/events/mod.rs b/src/events/mod.rs index 016b88c3..9bf37729 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -16,8 +16,8 @@ //! See [`Event`] for a list of all possible events. //! //! # Reading -//! When reading a XML stream, the events are emitted by -//! [`Reader::read_event_into`]. You must listen +//! When reading a XML stream, the events are emitted by [`Reader::read_event`] +//! and [`Reader::read_event_into`]. You must listen //! for the different types of events you are interested in. //! //! See [`Reader`] for further information. @@ -29,6 +29,7 @@ //! //! See [`Writer`] for further information. //! +//! [`Reader::read_event`]: crate::reader::Reader::read_event //! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into //! [`Reader`]: crate::reader::Reader //! [`Writer`]: crate::writer::Writer @@ -500,7 +501,12 @@ impl<'a> BytesDecl<'a> { .transpose() } - /// Gets the decoder struct + /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) + /// algorithm. + /// + /// If encoding in not known, or `encoding` key was not found, returns `None`. + /// In case of duplicated `encoding` key, encoding, corresponding to the first + /// one, is returned. #[cfg(feature = "encoding")] pub fn encoder(&self) -> Option<&'static Encoding> { self.encoding() diff --git a/src/lib.rs b/src/lib.rs index fde3f293..5e8a20c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,18 +7,19 @@ //! A streaming API based on the [StAX] model. This is suited for larger XML documents which //! cannot completely read into memory at once. //! -//! The user has to explicitly _ask_ for the next XML event, similar -//! to a database cursor. +//! The user has to explicitly _ask_ for the next XML event, similar to a database cursor. //! This is achieved by the following two structs: //! //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user. //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs. //! -//! Especially for nested XML elements, the user must keep track _where_ (how deep) in the XML document -//! the current event is located. This is needed as the +//! Especially for nested XML elements, the user must keep track _where_ (how deep) +//! in the XML document the current event is located. //! -//! Furthermore, quick-xml also contains optional [Serde] support to directly serialize and deserialize from -//! structs, without having to deal with the XML events. +//! quick-xml contains optional support of asynchronous reading using [tokio]. +//! +//! Furthermore, quick-xml also contains optional [Serde] support to directly +//! serialize and deserialize from structs, without having to deal with the XML events. //! //! # Examples //! @@ -30,6 +31,7 @@ //! `quick-xml` supports the following features: //! //! [StAX]: https://en.wikipedia.org/wiki/StAX +//! [tokio]: https://tokio.rs/ //! [Serde]: https://serde.rs/ #![cfg_attr( feature = "document-features", diff --git a/src/name.rs b/src/name.rs index 30548da9..3bdc7220 100644 --- a/src/name.rs +++ b/src/name.rs @@ -212,7 +212,7 @@ impl<'a> AsRef<[u8]> for Prefix<'a> { /// [XML Schema specification](https://www.w3.org/TR/xml-names/#ns-decl) #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PrefixDeclaration<'a> { - /// XML attribute binds a default namespace. Corresponds to `xmlns` in in `xmlns="..."` + /// XML attribute binds a default namespace. Corresponds to `xmlns` in `xmlns="..."` Default, /// XML attribute binds a specified prefix to a namespace. Corresponds to a /// `prefix` in `xmlns:prefix="..."`, which is stored as payload of this variant. diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 54da39b0..dea9f638 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -216,8 +216,7 @@ impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as -/// underlying byte stream. +/// This is an implementation for reading from a [`BufRead`] as underlying byte stream. impl Reader { /// Reads the next `Event`. /// @@ -243,7 +242,7 @@ impl Reader { /// let xml = r#" /// Test /// Test 2 - /// "#; + /// "#; /// let mut reader = Reader::from_str(xml); /// reader.trim_text(true); /// let mut count = 0; @@ -251,7 +250,7 @@ impl Reader { /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf) { - /// Ok(Event::Start(ref e)) => count += 1, + /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// Ok(Event::Eof) => break, @@ -259,8 +258,8 @@ impl Reader { /// } /// buf.clear(); /// } - /// println!("Found {} start events", count); - /// println!("Text events: {:?}", txt); + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { @@ -275,7 +274,8 @@ impl Reader { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -299,7 +299,7 @@ impl Reader { /// /// # Namespaces /// - /// While the [`Reader`] does not support namespace resolution, namespaces + /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close diff --git a/src/reader/mod.rs b/src/reader/mod.rs index b34fa0f6..9d345203 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -20,14 +20,16 @@ macro_rules! configure_methods { /// default), those tags are represented by an [`Empty`] event instead. /// /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. There is no additional - /// allocation, however, if [`Self::check_end_names()`] is also set. + /// needed to store tag name for an [`End`] event. However if [`check_end_names`] + /// is also set, only one additional allocation will be performed that support + /// both these options. /// /// (`false` by default) /// /// [`Empty`]: Event::Empty /// [`Start`]: Event::Start /// [`End`]: Event::End + /// [`check_end_names`]: Self::check_end_names pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.expand_empty_elements = val; self @@ -35,12 +37,15 @@ macro_rules! configure_methods { /// Changes whether whitespace before and after character data should be removed. /// - /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be - /// pushed. + /// When set to `true`, all [`Text`] events are trimmed. + /// If after that the event is empty it will not be pushed. + /// + /// Changing this option automatically changes the [`trim_text_end`] option. /// /// (`false` by default) /// /// [`Text`]: Event::Text + /// [`trim_text_end`]: Self::trim_text_end pub fn trim_text(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.trim_text_start = val; self $(.$holder)? .parser.trim_text_end = val; @@ -50,6 +55,7 @@ macro_rules! configure_methods { /// Changes whether whitespace after character data should be removed. /// /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. + /// If after that the event is empty it will not be pushed. /// /// (`false` by default) /// @@ -99,13 +105,15 @@ macro_rules! configure_methods { /// contain the data of the mismatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. There is no additional - /// allocation, however, if [`Self::expand_empty_elements()`] is also set. + /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] + /// is also set, only one additional allocation will be performed that support + /// both these options. /// /// (`true` by default) /// /// [spec]: https://www.w3.org/TR/xml11/#dt-etag /// [`End`]: Event::End + /// [`expand_empty_elements`]: Self::expand_empty_elements pub fn check_end_names(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.check_end_names = val; self diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 51082dfe..4ce816fa 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -134,9 +134,9 @@ impl NsReader { } /// Resolves a potentially qualified **element name** or **attribute name** - /// into (namespace name, local name). + /// into _(namespace name, local name)_. /// - /// *Qualified* names have the form `prefix:local-name` where the `prefix` + /// _Qualified_ names have the form `prefix:local-name` where the `prefix` /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the name in question. /// @@ -172,14 +172,14 @@ impl NsReader { self.ns_resolver.resolve(name, &self.buffer, !attribute) } - /// Resolves a potentially qualified **element name** into (namespace name, local name). + /// Resolves a potentially qualified **element name** into _(namespace name, local name)_. /// - /// *Qualified* element names have the form `prefix:local-name` where the + /// _Qualified_ element names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element /// in question. /// - /// *Unqualified* elements inherits the current *default namespace*. + /// _Unqualified_ elements inherits the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: @@ -200,7 +200,7 @@ impl NsReader { /// /// This example shows how you can resolve qualified name into a namespace. /// Note, that in the code like this you do not need to do that manually, - /// because the namespace resolution result returned by the [`read_event()`]. + /// because the namespace resolution result returned by the [`read_resolved_event()`]. /// /// ``` /// # use pretty_assertions::assert_eq; @@ -222,20 +222,20 @@ impl NsReader { /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown - /// [`read_event()`]: Self::read_event + /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, &self.buffer, true) } - /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. /// - /// *Qualified* attribute names have the form `prefix:local-name` where the + /// _Qualified_ attribute names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the attribute /// in question. /// - /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + /// _Unqualified_ attribute names do *not* inherit the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: @@ -431,9 +431,8 @@ impl NsReader { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name - /// ("the same" means that their local names are the same and their prefixes - /// resolves to the same namespace). + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -448,16 +447,21 @@ impl NsReader { /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// - /// The `ns` and `end` parameters should contain namespace and name of the - /// end element _in the reader encoding_. It is good practice to always get - /// that parameters using [`BytesStart::to_end()`] method. + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. /// /// # Namespaces /// - /// Unlike [`Reader::read_to_end_into()`], this method resolves namespace - /// prefixes, so the names that are not equals literally (for example, - /// `a:name` and `b:name`) could be considered equal if prefixes resolved to - /// the same namespace. + /// While the `NsReader` does namespace resolution, namespaces does not + /// change the algorithm for comparing names. Although the names `a:name` + /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, + /// are semantically equivalent, `` cannot close ``, because + /// according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** /// /// # Examples /// @@ -515,6 +519,7 @@ impl NsReader { /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should @@ -668,9 +673,8 @@ impl<'i> NsReader<&'i [u8]> { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name - /// ("the same" means that their local names are the same and their prefixes - /// resolves to the same namespace). + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -686,10 +690,15 @@ impl<'i> NsReader<&'i [u8]> { /// /// # Namespaces /// - /// Unlike [`Reader::read_to_end()`], this method resolves namespace - /// prefixes, so the names that are not equals literally (for example, - /// `a:name` and `b:name`) could be considered equal if prefixes resolved to - /// the same namespace. + /// While the `NsReader` does namespace resolution, namespaces does not + /// change the algorithm for comparing names. Although the names `a:name` + /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, + /// are semantically equivalent, `` cannot close ``, because + /// according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** /// /// # Examples /// @@ -743,9 +752,9 @@ impl<'i> NsReader<&'i [u8]> { /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof - /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should @@ -756,7 +765,8 @@ impl<'i> NsReader<&'i [u8]> { /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 0fd6ccfb..6416021d 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -16,9 +16,9 @@ use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, Xml use memchr; -/// This is an implementation of [`Reader`] for reading from a `&[u8]` as -/// underlying byte stream. This implementation supports not using an -/// intermediate buffer as the byte slice itself can be used to borrow from. +/// This is an implementation for reading from a `&[u8]` as underlying byte stream. +/// This implementation supports not using an intermediate buffer as the byte slice +/// itself can be used to borrow from. impl<'a> Reader<&'a [u8]> { /// Creates an XML reader from a string slice. pub fn from_str(s: &'a str) -> Self { @@ -80,7 +80,8 @@ impl<'a> Reader<&'a [u8]> { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -99,7 +100,7 @@ impl<'a> Reader<&'a [u8]> { /// /// # Namespaces /// - /// While the [`Reader`] does not support namespace resolution, namespaces + /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close @@ -159,7 +160,8 @@ impl<'a> Reader<&'a [u8]> { /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text diff --git a/src/writer.rs b/src/writer.rs index 7af1511c..26095eed 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -6,9 +6,7 @@ use crate::encoding::UTF8_BOM; use crate::errors::{Error, Result}; use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event}; -/// XML writer. -/// -/// Writes XML `Event`s to a `Write` implementor. +/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor. /// /// # Examples /// @@ -62,7 +60,7 @@ pub struct Writer { } impl Writer { - /// Creates a Writer from a generic Write + /// Creates a `Writer` from a generic writer. pub const fn new(inner: W) -> Writer { Writer { writer: inner, @@ -70,7 +68,7 @@ impl Writer { } } - /// Creates a Writer with configured whitespace indents from a generic Write + /// Creates a `Writer` with configured whitespace indents from a generic writer. pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer { Writer { writer: inner,