From 6fe40c8d858dd9889e0f34f78c85f2a097aac5b1 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 26 Aug 2022 23:13:54 +0500 Subject: [PATCH 1/6] Remove incorrect serde example. Closes #211 New examples will be added in #369 --- README.md | 96 --------------------------------------------- src/de/mod.rs | 105 -------------------------------------------------- 2 files changed, 201 deletions(-) diff --git a/README.md b/README.md index 9ad6428e..fe622465 100644 --- a/README.md +++ b/README.md @@ -110,102 +110,6 @@ assert_eq!(result, expected.as_bytes()); When using the `serialize` feature, quick-xml can be used with serde's `Serialize`/`Deserialize` traits. -Here is an example deserializing crates.io source: - -```rust -// Cargo.toml -// [dependencies] -// serde = { version = "1.0", features = [ "derive" ] } -// quick-xml = { version = "0.22", features = [ "serialize" ] } -use serde::Deserialize; -use quick_xml::de::{from_str, DeError}; - -#[derive(Debug, Deserialize, PartialEq)] -struct Link { - rel: String, - href: String, - sizes: Option, -} - -#[derive(Debug, Deserialize, PartialEq)] -#[serde(rename_all = "lowercase")] -enum Lang { - En, - Fr, - De, -} - -#[derive(Debug, Deserialize, PartialEq)] -struct Head { - title: String, - #[serde(rename = "link", default)] - links: Vec, -} - -#[derive(Debug, Deserialize, PartialEq)] -struct Script { - src: String, - integrity: String, -} - -#[derive(Debug, Deserialize, PartialEq)] -struct Body { - #[serde(rename = "script", default)] - scripts: Vec - - - - -}"; - let html: Html = from_str(xml)?; - assert_eq!(&html.head.title, "crates.io: Rust Package Registry"); - Ok(html) -} -``` - ### Credits This has largely been inspired by [serde-xml-rs](https://github.com/RReverser/serde-xml-rs). diff --git a/src/de/mod.rs b/src/de/mod.rs index 8927a4b3..8ed23ac5 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -1,109 +1,4 @@ //! Serde `Deserializer` module -//! -//! # Examples -//! -//! Here is a simple example parsing [crates.io](https://crates.io/) source code. -//! -//! ``` -//! // Cargo.toml -//! // [dependencies] -//! // serde = { version = "1.0", features = [ "derive" ] } -//! // quick-xml = { version = "0.22", features = [ "serialize" ] } -//! # use pretty_assertions::assert_eq; -//! use serde::Deserialize; -//! use quick_xml::de::{from_str, DeError}; -//! -//! #[derive(Debug, Deserialize, PartialEq)] -//! struct Link { -//! rel: String, -//! href: String, -//! sizes: Option, -//! } -//! -//! #[derive(Debug, Deserialize, PartialEq)] -//! #[serde(rename_all = "lowercase")] -//! enum Lang { -//! En, -//! Fr, -//! De, -//! } -//! -//! #[derive(Debug, Deserialize, PartialEq)] -//! struct Head { -//! title: String, -//! #[serde(rename = "link", default)] -//! links: Vec, -//! } -//! -//! #[derive(Debug, Deserialize, PartialEq)] -//! struct Script { -//! src: String, -//! integrity: String, -//! } -//! -//! #[derive(Debug, Deserialize, PartialEq)] -//! struct Body { -//! #[serde(rename = "script", default)] -//! scripts: Vec -//! -//! -//! -//! -//! -//! }"; -//! let html: Html = from_str(xml)?; -//! assert_eq!(&html.head.title, "crates.io: Rust Package Registr"); -//! Ok(html) -//! } -//! ``` // Macros should be defined before the modules that using them // Also, macros should be imported before using them From ef3506ba9f751f421a40b6e59d6765a68084d362 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 26 Aug 2022 23:44:39 +0500 Subject: [PATCH 2/6] Use optimized method to extend vector --- src/de/simple_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs index 580c6312..aa78c402 100644 --- a/src/de/simple_type.rs +++ b/src/de/simple_type.rs @@ -1218,7 +1218,7 @@ mod tests { fn to_utf16(string: &str) -> Vec { let mut bytes = Vec::new(); for ch in string.encode_utf16() { - bytes.extend(&ch.to_le_bytes()); + bytes.extend_from_slice(&ch.to_le_bytes()); } bytes } From 50242ba3b10e3d886f29abf4eefbafa49530b904 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 27 Aug 2022 00:18:54 +0500 Subject: [PATCH 3/6] Address some clippy warnings --- examples/custom_entities.rs | 2 +- src/de/map.rs | 4 ++-- src/de/mod.rs | 4 ++-- src/errors.rs | 6 +++--- src/escapei.rs | 10 +++++----- src/events/attributes.rs | 2 +- src/events/mod.rs | 30 +++++++++++++++--------------- src/name.rs | 6 +++--- src/reader/ns_reader.rs | 7 ++++--- src/reader/parser.rs | 2 +- src/utils.rs | 12 ++++++------ src/writer.rs | 11 ++++------- tests/unit_tests.rs | 14 +++++++------- 13 files changed, 54 insertions(+), 56 deletions(-) diff --git a/examples/custom_entities.rs b/examples/custom_entities.rs index a31e3f4a..1405f3d7 100644 --- a/examples/custom_entities.rs +++ b/examples/custom_entities.rs @@ -33,7 +33,7 @@ fn main() -> Result<(), Box> { loop { match reader.read_event() { Ok(Event::DocType(ref e)) => { - for cap in entity_re.captures_iter(&e) { + for cap in entity_re.captures_iter(e) { custom_entities.insert( reader.decoder().decode(&cap[1])?.into_owned(), reader.decoder().decode(&cap[2])?.into_owned(), diff --git a/src/de/map.rs b/src/de/map.rs index 26d5532b..08538ec9 100644 --- a/src/de/map.rs +++ b/src/de/map.rs @@ -621,13 +621,13 @@ where break match self.map.de.peek()? { // If we see a tag that we not interested, skip it #[cfg(feature = "overlapped-lists")] - DeEvent::Start(e) if !self.filter.is_suitable(&e, decoder)? => { + DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => { self.map.de.skip()?; continue; } // Stop iteration when list elements ends #[cfg(not(feature = "overlapped-lists"))] - DeEvent::Start(e) if !self.filter.is_suitable(&e, decoder)? => Ok(None), + DeEvent::Start(e) if !self.filter.is_suitable(e, decoder)? => Ok(None), // Stop iteration after reaching a closing tag DeEvent::End(e) if e.name() == self.map.start.name() => Ok(None), diff --git a/src/de/mod.rs b/src/de/mod.rs index 8ed23ac5..adcc62ea 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -129,7 +129,7 @@ pub(crate) const UNFLATTEN_PREFIX: &str = "$unflatten="; pub(crate) const PRIMITIVE_PREFIX: &str = "$primitive="; /// Simplified event which contains only these variants that used by deserializer -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum DeEvent<'a> { /// Start tag (with attributes) ``. Start(BytesStart<'a>), @@ -343,7 +343,7 @@ where self.read.push_front(self.reader.next()?); } if let Some(event) = self.read.front() { - return Ok(&event); + return Ok(event); } // SAFETY: `self.read` was filled in the code above. // NOTE: Can be replaced with `unsafe { std::hint::unreachable_unchecked() }` diff --git a/src/errors.rs b/src/errors.rs index 8d00763f..e981de96 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -107,7 +107,7 @@ impl std::fmt::Display for Error { Error::EscapeError(e) => write!(f, "{}", e), Error::UnknownPrefix(prefix) => { f.write_str("Unknown namespace prefix '")?; - write_byte_string(f, &prefix)?; + write_byte_string(f, prefix)?; f.write_str("'") } } @@ -195,12 +195,12 @@ pub mod serialize { DeError::KeyNotRead => write!(f, "Invalid `Deserialize` implementation: `MapAccess::next_value[_seed]` was called before `MapAccess::next_key[_seed]`"), DeError::UnexpectedStart(e) => { f.write_str("Unexpected `Event::Start(")?; - write_byte_string(f, &e)?; + write_byte_string(f, e)?; f.write_str(")`") } DeError::UnexpectedEnd(e) => { f.write_str("Unexpected `Event::End(")?; - write_byte_string(f, &e)?; + write_byte_string(f, e)?; f.write_str(")`") } DeError::UnexpectedEof => write!(f, "Unexpected `Event::Eof`"), diff --git a/src/escapei.rs b/src/escapei.rs index 21608b87..13141526 100644 --- a/src/escapei.rs +++ b/src/escapei.rs @@ -1,6 +1,6 @@ //! Manage xml character escapes -use memchr; +use memchr::memchr2_iter; use std::borrow::Cow; use std::ops::Range; @@ -150,7 +150,7 @@ where let bytes = raw.as_bytes(); let mut unescaped = None; let mut last_end = 0; - let mut iter = memchr::memchr2_iter(b'&', b';', bytes); + let mut iter = memchr2_iter(b'&', b';', bytes); while let Some(start) = iter.by_ref().find(|p| bytes[*p] == b'&') { match iter.next() { Some(end) if bytes[end] == b';' => { @@ -163,7 +163,7 @@ where // search for character correctness let pat = &raw[start + 1..end]; - if pat.starts_with("#") { + if pat.starts_with('#') { let entity = &pat[1..]; // starts after the # let codepoint = parse_number(entity, start..end)?; unescaped.push_str(codepoint.encode_utf8(&mut [0u8; 4])); @@ -1672,10 +1672,10 @@ const fn named_entity(name: &str) -> Option<&str> { } fn parse_number(bytes: &str, range: Range) -> Result { - let code = if bytes.starts_with("x") { + let code = if bytes.starts_with('x') { parse_hexadecimal(&bytes[1..]) } else { - parse_decimal(&bytes) + parse_decimal(bytes) }?; if code == 0 { return Err(EscapeError::EntityWithNull(range)); diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 0025003a..7eb2b27b 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -241,7 +241,7 @@ impl<'a> FusedIterator for Attributes<'a> {} /// /// Recovery position in examples shows the position from which parsing of the /// next attribute will be attempted. -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum AttrError { /// Attribute key was not followed by `=`, position relative to the start of /// the owning tag is provided. diff --git a/src/events/mod.rs b/src/events/mod.rs index 655f2087..347695c8 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -280,7 +280,7 @@ impl<'a> Deref for BytesStart<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { - &*self.buf + &self.buf } } @@ -505,7 +505,7 @@ impl<'a> BytesDecl<'a> { pub fn encoder(&self) -> Option<&'static Encoding> { self.encoding() .and_then(|e| e.ok()) - .and_then(|e| Encoding::for_label(&*e)) + .and_then(|e| Encoding::for_label(&e)) } /// Converts the event into an owned event. @@ -528,7 +528,7 @@ impl<'a> Deref for BytesDecl<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { - &*self.content + &self.content } } @@ -575,7 +575,7 @@ impl<'a> BytesEnd<'a> { /// Gets the undecoded raw tag name, as present in the input stream. #[inline] pub fn name(&self) -> QName { - QName(&*self.name) + QName(&self.name) } /// Gets the undecoded raw local tag name (excluding namespace) as present @@ -600,7 +600,7 @@ impl<'a> Deref for BytesEnd<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { - &*self.name + &self.name } } @@ -730,7 +730,7 @@ impl<'a> Deref for BytesText<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { - &*self.content + &self.content } } @@ -861,7 +861,7 @@ impl<'a> Deref for BytesCData<'a> { type Target = [u8]; fn deref(&self) -> &[u8] { - &*self.content + &self.content } } @@ -935,14 +935,14 @@ impl<'a> Deref for Event<'a> { fn deref(&self) -> &[u8] { match *self { - Event::Start(ref e) | Event::Empty(ref e) => &*e, - Event::End(ref e) => &*e, - Event::Text(ref e) => &*e, - Event::Decl(ref e) => &*e, - Event::PI(ref e) => &*e, - Event::CData(ref e) => &*e, - Event::Comment(ref e) => &*e, - Event::DocType(ref e) => &*e, + Event::Start(ref e) | Event::Empty(ref e) => e, + Event::End(ref e) => e, + Event::Text(ref e) => e, + Event::Decl(ref e) => e, + Event::PI(ref e) => e, + Event::CData(ref e) => e, + Event::Comment(ref e) => e, + Event::DocType(ref e) => e, Event::Eof => &[], } } diff --git a/src/name.rs b/src/name.rs index ea304e02..c91c1911 100644 --- a/src/name.rs +++ b/src/name.rs @@ -171,7 +171,7 @@ impl<'a> From> for LocalName<'a> { /// ``` #[inline] fn from(name: QName<'a>) -> Self { - Self(name.index().map_or(&name.0, |i| &name.0[i + 1..])) + Self(name.index().map_or(name.0, |i| &name.0[i + 1..])) } } @@ -414,7 +414,7 @@ impl NamespaceResolver { match k.as_namespace_binding() { Some(PrefixDeclaration::Default) => { let start = buffer.len(); - buffer.extend_from_slice(&*v); + buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: 0, @@ -425,7 +425,7 @@ impl NamespaceResolver { Some(PrefixDeclaration::Named(prefix)) => { let start = buffer.len(); buffer.extend_from_slice(prefix); - buffer.extend_from_slice(&*v); + buffer.extend_from_slice(&v); self.bindings.push(NamespaceEntry { start, prefix_len: prefix.len(), diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 8eba75a1..51082dfe 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -100,15 +100,16 @@ impl NsReader { ) -> Result<(ResolveResult, Event<'i>)> { match event { Ok(Event::Start(e)) => Ok(( - self.ns_resolver.find(e.name(), &mut self.buffer), + self.ns_resolver.find(e.name(), &self.buffer), Event::Start(e), )), Ok(Event::Empty(e)) => Ok(( - self.ns_resolver.find(e.name(), &mut self.buffer), + self.ns_resolver.find(e.name(), &self.buffer), Event::Empty(e), )), Ok(Event::End(e)) => Ok(( - self.ns_resolver.find(e.name(), &mut self.buffer), + // Comment that prevent cargo rmt + self.ns_resolver.find(e.name(), &self.buffer), Event::End(e), )), Ok(e) => Ok((ResolveResult::Unbound, e)), diff --git a/src/reader/parser.rs b/src/reader/parser.rs index 158272df..7ee04858 100644 --- a/src/reader/parser.rs +++ b/src/reader/parser.rs @@ -127,7 +127,7 @@ impl Parser { let start = buf[8..] .iter() .position(|b| !is_whitespace(*b)) - .unwrap_or_else(|| len - 8); + .unwrap_or(len - 8); debug_assert!(start < len - 8, "DocType must have a name"); Ok(Event::DocType(BytesText::wrap( &buf[8 + start..], diff --git a/src/utils.rs b/src/utils.rs index a2e9a79a..130532a3 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -39,10 +39,10 @@ pub fn write_byte_string(f: &mut Formatter, byte_string: &[u8]) -> fmt::Result { /// Also, when `serialize` feature is on, this type deserialized using /// [`deserialize_byte_buf`](serde::Deserializer::deserialize_byte_buf) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) -#[derive(PartialEq)] +#[derive(PartialEq, Eq)] pub struct ByteBuf(pub Vec); -impl<'de> Debug for ByteBuf { +impl Debug for ByteBuf { fn fmt(&self, f: &mut Formatter) -> fmt::Result { write_byte_string(f, &self.0) } @@ -72,7 +72,7 @@ impl<'de> Deserialize<'de> for ByteBuf { } } - Ok(d.deserialize_byte_buf(ValueVisitor)?) + d.deserialize_byte_buf(ValueVisitor) } } @@ -84,12 +84,12 @@ impl<'de> Deserialize<'de> for ByteBuf { /// Also, when `serialize` feature is on, this type deserialized using /// [`deserialize_bytes`](serde::Deserializer::deserialize_bytes) instead /// of vector's generic [`deserialize_seq`](serde::Deserializer::deserialize_seq) -#[derive(PartialEq)] +#[derive(PartialEq, Eq)] pub struct Bytes<'de>(pub &'de [u8]); impl<'de> Debug for Bytes<'de> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write_byte_string(f, &self.0) + write_byte_string(f, self.0) } } @@ -113,7 +113,7 @@ impl<'de> Deserialize<'de> for Bytes<'de> { } } - Ok(d.deserialize_bytes(ValueVisitor)?) + d.deserialize_bytes(ValueVisitor) } } diff --git a/src/writer.rs b/src/writer.rs index 641d632b..0f70a28f 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -142,7 +142,7 @@ impl Writer { Event::Empty(ref e) => self.write_wrapped(b"<", e, b"/>"), Event::Text(ref e) => { next_should_line_break = false; - self.write(&e) + self.write(e) } Event::Comment(ref e) => self.write_wrapped(b""), Event::CData(ref e) => { @@ -325,13 +325,13 @@ impl<'a, W: Write> ElementWriter<'a, W> { } /// Create a new scope for writing XML inside the current element. - pub fn write_inner_content(mut self, closure: F) -> Result<&'a mut Writer> + pub fn write_inner_content(self, closure: F) -> Result<&'a mut Writer> where F: Fn(&mut Writer) -> Result<()>, { self.writer .write_event(Event::Start(self.start_tag.borrow()))?; - closure(&mut self.writer)?; + closure(self.writer)?; self.writer .write_event(Event::End(self.start_tag.to_end()))?; Ok(self.writer) @@ -366,10 +366,7 @@ impl Indentation { } fn shrink(&mut self) { - self.indents_len = match self.indents_len.checked_sub(self.indent_size) { - Some(result) => result, - None => 0, - }; + self.indents_len = self.indents_len.saturating_sub(self.indent_size); } } diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index c269886a..4881ba05 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -134,16 +134,16 @@ fn test_xml_decl() { &*v, b"1.0", "expecting version '1.0', got '{:?}", - from_utf8(&*v) + from_utf8(&v) ), - Err(e) => assert!(false, "{:?}", e), + Err(e) => panic!("{:?}", e), } match e.encoding() { Some(Ok(v)) => assert_eq!( &*v, b"utf-8", "expecting encoding 'utf-8', got '{:?}", - from_utf8(&*v) + from_utf8(&v) ), Some(Err(e)) => panic!("{:?}", e), None => panic!("cannot find encoding"), @@ -433,7 +433,7 @@ fn test_offset_err_comment() { r.buffer_position(), e ), - e => assert!(false, "expecting error, found {:?}", e), + e => panic!("expecting error, found {:?}", e), } } @@ -454,7 +454,7 @@ fn test_offset_err_comment_2_buf() { r.buffer_position(), e ), - e => assert!(false, "expecting error, found {:?}", e), + e => panic!("expecting error, found {:?}", e), } } @@ -475,7 +475,7 @@ fn test_offset_err_comment_trim_text() { r.buffer_position(), e ), - e => assert!(false, "expecting error, found {:?}", e), + e => panic!("expecting error, found {:?}", e), } } @@ -490,7 +490,7 @@ fn test_escaped_content() { &*e, b"<test>", "content unexpected: expecting '<test>', got '{:?}'", - from_utf8(&*e) + from_utf8(&e) ); match e.unescape() { Ok(c) => assert_eq!(c, ""), From 63db2cf4ec2c1cba4633485e1e183a548a6beb83 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 25 Aug 2022 20:34:48 +0500 Subject: [PATCH 4/6] Remove `Deserializer::new` because it cannot be used outside of the quick-xml crate The readers that implements necessary traits cannot be created outside of quick-xml crate. Also it is too dangerous to provide such method directly because deserializer internally rely on some options that are set for a reader --- Changelog.md | 3 +++ src/de/mod.rs | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index 39c84a52..47d7aef1 100644 --- a/Changelog.md +++ b/Changelog.md @@ -183,6 +183,8 @@ - [#459]: Made the `Writer::write()` method non-public as writing random bytes to a document is not generally useful or desirable. - [#459]: BOM bytes are no longer emitted as `Event::Text`. To write a BOM, use `Writer::write_bom()`. +- [#467]: Removed `Deserializer::new` because it cannot be used outside of the quick-xml crate + ### New Tests - [#9]: Added tests for incorrect nested tags in input @@ -227,6 +229,7 @@ [#455]: https://github.com/tafia/quick-xml/pull/455 [#456]: https://github.com/tafia/quick-xml/pull/456 [#459]: https://github.com/tafia/quick-xml/pull/459 +[#467]: https://github.com/tafia/quick-xml/pull/467 ## 0.23.0 -- 2022-05-08 diff --git a/src/de/mod.rs b/src/de/mod.rs index adcc62ea..a0235526 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -256,7 +256,7 @@ where /// /// - [`Deserializer::from_str`] /// - [`Deserializer::from_reader`] - pub fn new(reader: R) -> Self { + fn new(reader: R) -> Self { Deserializer { reader, From 9f4ac28a337b5a5200aada8072c51db962ac84ee Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 27 Aug 2022 00:53:39 +0500 Subject: [PATCH 5/6] Turn whatever possible to a `const fn` --- Changelog.md | 17 +++++++++++++++++ src/de/escape.rs | 2 +- src/de/simple_type.rs | 2 +- src/encoding.rs | 2 +- src/events/attributes.rs | 12 ++++++------ src/events/mod.rs | 6 +++--- src/name.rs | 8 ++++---- src/reader/mod.rs | 12 ++++++------ src/reader/parser.rs | 2 +- src/reader/slice_reader.rs | 2 +- src/se/mod.rs | 4 ++-- src/writer.rs | 2 +- 12 files changed, 44 insertions(+), 27 deletions(-) diff --git a/Changelog.md b/Changelog.md index 47d7aef1..0ae1f588 100644 --- a/Changelog.md +++ b/Changelog.md @@ -41,6 +41,23 @@ - [#455]: Change return type of all `read_to_end*` methods to return a span between tags - [#455]: Added `Reader::read_text` method to return a raw content (including markup) between tags - [#459]: Added a `Writer::write_bom()` method for inserting a Byte-Order-Mark into the document. +- [#467]: The following functions made `const`: + - `Attr::key` + - `Attr::value` + - `Attributes::html` + - `Attributes::new` + - `BytesDecl::from_start` + - `Decoder::encoding` + - `LocalName::into_inner` + - `Namespace::into_inner` + - `Prefix::into_inner` + - `QName::into_inner` + - `Reader::buffer_position` + - `Reader::decoder` + - `Reader::get_ref` + - `Serializer::new` + - `Serializer::with_root` + - `Writer::new` ### Bug Fixes diff --git a/src/de/escape.rs b/src/de/escape.rs index 191df90b..976ab109 100644 --- a/src/de/escape.rs +++ b/src/de/escape.rs @@ -25,7 +25,7 @@ pub struct EscapedDeserializer<'a> { } impl<'a> EscapedDeserializer<'a> { - pub fn new(escaped_value: Cow<'a, [u8]>, decoder: Decoder, escaped: bool) -> Self { + pub const fn new(escaped_value: Cow<'a, [u8]>, decoder: Decoder, escaped: bool) -> Self { EscapedDeserializer { decoder, escaped_value, diff --git a/src/de/simple_type.rs b/src/de/simple_type.rs index aa78c402..3c2b2ac3 100644 --- a/src/de/simple_type.rs +++ b/src/de/simple_type.rs @@ -546,7 +546,7 @@ impl<'de, 'a> SimpleTypeDeserializer<'de, 'a> { /// Constructor for tests #[inline] - fn new(content: CowRef<'de, 'a>, escaped: bool, decoder: Decoder) -> Self { + const fn new(content: CowRef<'de, 'a>, escaped: bool, decoder: Decoder) -> Self { Self { content, escaped, diff --git a/src/encoding.rs b/src/encoding.rs index 673e4012..4894b488 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -90,7 +90,7 @@ impl Decoder { /// This encoding will be used by [`decode`]. /// /// [`decode`]: Self::decode - pub fn encoding(&self) -> &'static Encoding { + pub const fn encoding(&self) -> &'static Encoding { self.encoding } diff --git a/src/events/attributes.rs b/src/events/attributes.rs index 7eb2b27b..3895d71e 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -191,7 +191,7 @@ pub struct Attributes<'a> { impl<'a> Attributes<'a> { /// Internal constructor, used by `BytesStart`. Supplies data in reader's encoding #[inline] - pub(crate) fn wrap(buf: &'a [u8], pos: usize, html: bool) -> Self { + pub(crate) const fn wrap(buf: &'a [u8], pos: usize, html: bool) -> Self { Self { bytes: buf, state: IterState::new(pos, html), @@ -199,12 +199,12 @@ impl<'a> Attributes<'a> { } /// Creates a new attribute iterator from a buffer. - pub fn new(buf: &'a str, pos: usize) -> Self { + pub const fn new(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, false) } /// Creates a new attribute iterator from a buffer, allowing HTML attribute syntax. - pub fn html(buf: &'a str, pos: usize) -> Self { + pub const fn html(buf: &'a str, pos: usize) -> Self { Self::wrap(buf.as_bytes(), pos, true) } @@ -412,7 +412,7 @@ impl Attr { impl<'a> Attr<&'a [u8]> { /// Returns the key value #[inline] - pub fn key(&self) -> QName<'a> { + pub const fn key(&self) -> QName<'a> { QName(match self { Attr::DoubleQ(key, _) => key, Attr::SingleQ(key, _) => key, @@ -425,7 +425,7 @@ impl<'a> Attr<&'a [u8]> { /// /// [HTML specification]: https://www.w3.org/TR/2012/WD-html-markup-20120329/syntax.html#syntax-attr-empty #[inline] - pub fn value(&self) -> &'a [u8] { + pub const fn value(&self) -> &'a [u8] { match self { Attr::DoubleQ(_, value) => value, Attr::SingleQ(_, value) => value, @@ -514,7 +514,7 @@ pub(crate) struct IterState { } impl IterState { - pub fn new(offset: usize, html: bool) -> Self { + pub const fn new(offset: usize, html: bool) -> Self { Self { state: State::Next(offset), html, diff --git a/src/events/mod.rs b/src/events/mod.rs index 347695c8..016b88c3 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -71,7 +71,7 @@ pub struct BytesStart<'a> { impl<'a> BytesStart<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] - pub(crate) fn wrap(content: &'a [u8], name_len: usize) -> Self { + pub(crate) const fn wrap(content: &'a [u8], name_len: usize) -> Self { BytesStart { buf: Cow::Borrowed(content), name_len, @@ -343,7 +343,7 @@ impl<'a> BytesDecl<'a> { } /// Creates a `BytesDecl` from a `BytesStart` - pub fn from_start(start: BytesStart<'a>) -> Self { + pub const fn from_start(start: BytesStart<'a>) -> Self { Self { content: start } } @@ -543,7 +543,7 @@ pub struct BytesEnd<'a> { impl<'a> BytesEnd<'a> { /// Internal constructor, used by `Reader`. Supplies data in reader's encoding #[inline] - pub(crate) fn wrap(name: Cow<'a, [u8]>) -> Self { + pub(crate) const fn wrap(name: Cow<'a, [u8]>) -> Self { BytesEnd { name } } diff --git a/src/name.rs b/src/name.rs index c91c1911..30548da9 100644 --- a/src/name.rs +++ b/src/name.rs @@ -21,7 +21,7 @@ pub struct QName<'a>(pub &'a [u8]); impl<'a> QName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] - pub fn into_inner(self) -> &'a [u8] { + pub const fn into_inner(self) -> &'a [u8] { self.0 } @@ -138,7 +138,7 @@ pub struct LocalName<'a>(&'a [u8]); impl<'a> LocalName<'a> { /// Converts this name to an internal slice representation. #[inline(always)] - pub fn into_inner(self) -> &'a [u8] { + pub const fn into_inner(self) -> &'a [u8] { self.0 } } @@ -188,7 +188,7 @@ pub struct Prefix<'a>(&'a [u8]); impl<'a> Prefix<'a> { /// Extracts internal slice #[inline(always)] - pub fn into_inner(self) -> &'a [u8] { + pub const fn into_inner(self) -> &'a [u8] { self.0 } } @@ -253,7 +253,7 @@ impl<'a> Namespace<'a> { /// [non-normalized]: https://www.w3.org/TR/REC-xml/#AVNormalize /// [IRI reference]: https://datatracker.ietf.org/doc/html/rfc3987 #[inline(always)] - pub fn into_inner(self) -> &'a [u8] { + pub const fn into_inner(self) -> &'a [u8] { self.0 } //TODO: implement value normalization and use it when comparing namespaces diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 77fa8fd9..b34fa0f6 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -351,7 +351,7 @@ enum EncodingRef { #[cfg(feature = "encoding")] impl EncodingRef { #[inline] - fn encoding(&self) -> &'static Encoding { + const fn encoding(&self) -> &'static Encoding { match self { Self::Implicit(e) => e, Self::Explicit(e) => e, @@ -360,7 +360,7 @@ impl EncodingRef { } } #[inline] - fn can_be_refined(&self) -> bool { + const fn can_be_refined(&self) -> bool { match self { Self::Implicit(_) | Self::BomDetected(_) => true, Self::Explicit(_) | Self::XmlDetected(_) => false, @@ -505,7 +505,7 @@ impl Reader { } /// Gets a reference to the underlying reader. - pub fn get_ref(&self) -> &R { + pub const fn get_ref(&self) -> &R { &self.reader } @@ -517,7 +517,7 @@ impl Reader { /// Gets the current byte position in the input data. /// /// Useful when debugging errors. - pub fn buffer_position(&self) -> usize { + pub const fn buffer_position(&self) -> usize { // when internal state is OpenedTag, we have actually read until '<', // which we don't want to show if let ParseState::OpenedTag = self.parser.state { @@ -535,7 +535,7 @@ impl Reader { /// If `encoding` feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. #[inline] - pub fn decoder(&self) -> Decoder { + pub const fn decoder(&self) -> Decoder { self.parser.decoder() } } @@ -789,7 +789,7 @@ impl ReadElementState { /// A function to check whether the byte is a whitespace (blank, new line, carriage return or tab) #[inline] -pub(crate) fn is_whitespace(b: u8) -> bool { +pub(crate) const fn is_whitespace(b: u8) -> bool { match b { b' ' | b'\r' | b'\n' | b'\t' => true, _ => false, diff --git a/src/reader/parser.rs b/src/reader/parser.rs index 7ee04858..9c146f9c 100644 --- a/src/reader/parser.rs +++ b/src/reader/parser.rs @@ -247,7 +247,7 @@ impl Parser { /// /// If `encoding` feature is enabled and no encoding is specified in declaration, /// defaults to UTF-8. - pub fn decoder(&self) -> Decoder { + pub const fn decoder(&self) -> Decoder { Decoder { #[cfg(feature = "encoding")] encoding: self.encoding.encoding(), diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index fbe3e318..0fd6ccfb 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -334,7 +334,7 @@ mod test { use crate::reader::XmlSource; /// Default buffer constructor just pass the byte array from the test - fn identity(input: T) -> T { + const fn identity(input: T) -> T { input } diff --git a/src/se/mod.rs b/src/se/mod.rs index 2c9cd8ed..eab194ba 100644 --- a/src/se/mod.rs +++ b/src/se/mod.rs @@ -39,7 +39,7 @@ impl<'r, W: Write> Serializer<'r, W> { /// Note, that attempt to serialize a non-struct (including unit structs /// and newtype structs) will end up to an error. Use `with_root` to create /// serializer with explicitly defined root element name - pub fn new(writer: W) -> Self { + pub const fn new(writer: W) -> Self { Self::with_root(Writer::new(writer), None) } @@ -91,7 +91,7 @@ impl<'r, W: Write> Serializer<'r, W> { /// r#""# /// ); /// ``` - pub fn with_root(writer: Writer, root_tag: Option<&'r str>) -> Self { + pub const fn with_root(writer: Writer, root_tag: Option<&'r str>) -> Self { Self { writer, root_tag } } diff --git a/src/writer.rs b/src/writer.rs index 0f70a28f..7af1511c 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -63,7 +63,7 @@ pub struct Writer { impl Writer { /// Creates a Writer from a generic Write - pub fn new(inner: W) -> Writer { + pub const fn new(inner: W) -> Writer { Writer { writer: inner, indent: None, From f17e0e58c43ec4fdf2b8b1786b5348a62367bb14 Mon Sep 17 00:00:00 2001 From: Mingun Date: Thu, 25 Aug 2022 00:53:37 +0500 Subject: [PATCH 6/6] Fix the errors, clarify some things in the documentation --- Cargo.toml | 15 +++++--- src/errors.rs | 13 +++++-- src/escapei.rs | 11 +++++- src/events/mod.rs | 12 +++++-- src/lib.rs | 14 ++++---- src/name.rs | 2 +- src/reader/buffered_reader.rs | 16 ++++----- src/reader/mod.rs | 20 +++++++---- src/reader/ns_reader.rs | 68 ++++++++++++++++++++--------------- src/reader/slice_reader.rs | 14 ++++---- src/writer.rs | 8 ++--- 11 files changed, 122 insertions(+), 71 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8e2eecc8..4ec84a3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -59,11 +59,17 @@ async-tokio = ["tokio"] ## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding encoding = ["encoding_rs"] -## Enables support for recognizing all [HTML 5 entities](https://dev.w3.org/html5/html-author/charref) +## Enables support for recognizing all [HTML 5 entities] in [`unescape`] and +## [`unescape_with`] functions. The full list of entities also can be found in +## . +## +## [HTML 5 entities]: https://dev.w3.org/html5/html-author/charref +## [`unescape`]: crate::escape::unescape +## [`unescape_with`]: crate::escape::unescape_with escape-html = [] -## This feature enables support for deserializing lists where tags are overlapped -## with tags that do not correspond to the list. +## This feature for a serde deserializer that enables support for deserializing +## lists where tags are overlapped with tags that do not correspond to the list. ## ## When this feature is enabled, the XML: ## ```xml @@ -75,7 +81,8 @@ escape-html = [] ## ## ``` ## could be deserialized to a struct: -## ```ignore +## ```no_run +## # use serde::Deserialize; ## #[derive(Deserialize)] ## #[serde(rename_all = "kebab-case")] ## struct AnyName { diff --git a/src/errors.rs b/src/errors.rs index e981de96..0f23590f 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -168,7 +168,14 @@ pub mod serialize { /// Please open an issue at , provide /// your Rust code and XML input. UnexpectedEnd(Vec), - /// Unexpected end of file + /// The [`Reader`] produced [`Event::Eof`] when it is not expecting, + /// for example, after producing [`Event::Start`] but before corresponding + /// [`Event::End`]. + /// + /// [`Reader`]: crate::reader::Reader + /// [`Event::Eof`]: crate::events::Event::Eof + /// [`Event::Start`]: crate::events::Event::Start + /// [`Event::End`]: crate::events::Event::End UnexpectedEof, /// This error indicates that [`deserialize_struct`] was called, but there /// is no any XML element in the input. That means that you try to deserialize @@ -176,7 +183,9 @@ pub mod serialize { /// /// [`deserialize_struct`]: serde::de::Deserializer::deserialize_struct ExpectedStart, - /// Unsupported operation + /// An attempt to deserialize to a type, that is not supported by the XML + /// store at current position, for example, attempt to deserialize `struct` + /// from attribute or attempt to deserialize binary data. Unsupported(&'static str), /// Too many events were skipped while deserializing a sequence, event limit /// exceeded. The limit was provided as an argument diff --git a/src/escapei.rs b/src/escapei.rs index 13141526..f93fbbf9 100644 --- a/src/escapei.rs +++ b/src/escapei.rs @@ -132,13 +132,21 @@ fn _escape bool>(raw: &str, escape_chars: F) -> Cow { } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into -/// their corresponding value +/// their corresponding value. +/// +/// If feature `escape-html` is enabled, then recognizes all [HTML5 escapes]. +/// +/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape(raw: &str) -> Result, EscapeError> { unescape_with(raw, |_| None) } /// Unescape an `&str` and replaces all xml escaped characters (`&...;`) into /// their corresponding value, using a resolver function for custom entities. +/// +/// If feature `escape-html` is enabled, then recognizes all [HTML5 escapes]. +/// +/// [HTML5 escapes]: https://dev.w3.org/html5/html-author/charref pub fn unescape_with<'input, 'entity, F>( raw: &'input str, resolve_entity: F, @@ -211,6 +219,7 @@ const fn named_entity(name: &str) -> Option<&str> { const fn named_entity(name: &str) -> Option<&str> { // imported from https://dev.w3.org/html5/html-author/charref // match over strings are not allowed in const functions + //TODO: automate up-to-dating using https://html.spec.whatwg.org/entities.json let s = match name.as_bytes() { b"Tab" => "\u{09}", b"NewLine" => "\u{0A}", diff --git a/src/events/mod.rs b/src/events/mod.rs index 016b88c3..9bf37729 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -16,8 +16,8 @@ //! See [`Event`] for a list of all possible events. //! //! # Reading -//! When reading a XML stream, the events are emitted by -//! [`Reader::read_event_into`]. You must listen +//! When reading a XML stream, the events are emitted by [`Reader::read_event`] +//! and [`Reader::read_event_into`]. You must listen //! for the different types of events you are interested in. //! //! See [`Reader`] for further information. @@ -29,6 +29,7 @@ //! //! See [`Writer`] for further information. //! +//! [`Reader::read_event`]: crate::reader::Reader::read_event //! [`Reader::read_event_into`]: crate::reader::Reader::read_event_into //! [`Reader`]: crate::reader::Reader //! [`Writer`]: crate::writer::Writer @@ -500,7 +501,12 @@ impl<'a> BytesDecl<'a> { .transpose() } - /// Gets the decoder struct + /// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get) + /// algorithm. + /// + /// If encoding in not known, or `encoding` key was not found, returns `None`. + /// In case of duplicated `encoding` key, encoding, corresponding to the first + /// one, is returned. #[cfg(feature = "encoding")] pub fn encoder(&self) -> Option<&'static Encoding> { self.encoding() diff --git a/src/lib.rs b/src/lib.rs index fde3f293..5e8a20c2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,18 +7,19 @@ //! A streaming API based on the [StAX] model. This is suited for larger XML documents which //! cannot completely read into memory at once. //! -//! The user has to explicitly _ask_ for the next XML event, similar -//! to a database cursor. +//! The user has to explicitly _ask_ for the next XML event, similar to a database cursor. //! This is achieved by the following two structs: //! //! - [`Reader`]: A low level XML pull-reader where buffer allocation/clearing is left to user. //! - [`Writer`]: A XML writer. Can be nested with readers if you want to transform XMLs. //! -//! Especially for nested XML elements, the user must keep track _where_ (how deep) in the XML document -//! the current event is located. This is needed as the +//! Especially for nested XML elements, the user must keep track _where_ (how deep) +//! in the XML document the current event is located. //! -//! Furthermore, quick-xml also contains optional [Serde] support to directly serialize and deserialize from -//! structs, without having to deal with the XML events. +//! quick-xml contains optional support of asynchronous reading using [tokio]. +//! +//! Furthermore, quick-xml also contains optional [Serde] support to directly +//! serialize and deserialize from structs, without having to deal with the XML events. //! //! # Examples //! @@ -30,6 +31,7 @@ //! `quick-xml` supports the following features: //! //! [StAX]: https://en.wikipedia.org/wiki/StAX +//! [tokio]: https://tokio.rs/ //! [Serde]: https://serde.rs/ #![cfg_attr( feature = "document-features", diff --git a/src/name.rs b/src/name.rs index 30548da9..3bdc7220 100644 --- a/src/name.rs +++ b/src/name.rs @@ -212,7 +212,7 @@ impl<'a> AsRef<[u8]> for Prefix<'a> { /// [XML Schema specification](https://www.w3.org/TR/xml-names/#ns-decl) #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum PrefixDeclaration<'a> { - /// XML attribute binds a default namespace. Corresponds to `xmlns` in in `xmlns="..."` + /// XML attribute binds a default namespace. Corresponds to `xmlns` in `xmlns="..."` Default, /// XML attribute binds a specified prefix to a namespace. Corresponds to a /// `prefix` in `xmlns:prefix="..."`, which is stored as payload of this variant. diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 54da39b0..dea9f638 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -216,8 +216,7 @@ impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { //////////////////////////////////////////////////////////////////////////////////////////////////// -/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as -/// underlying byte stream. +/// This is an implementation for reading from a [`BufRead`] as underlying byte stream. impl Reader { /// Reads the next `Event`. /// @@ -243,7 +242,7 @@ impl Reader { /// let xml = r#" /// Test /// Test 2 - /// "#; + /// "#; /// let mut reader = Reader::from_str(xml); /// reader.trim_text(true); /// let mut count = 0; @@ -251,7 +250,7 @@ impl Reader { /// let mut txt = Vec::new(); /// loop { /// match reader.read_event_into(&mut buf) { - /// Ok(Event::Start(ref e)) => count += 1, + /// Ok(Event::Start(_)) => count += 1, /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), /// Ok(Event::Eof) => break, @@ -259,8 +258,8 @@ impl Reader { /// } /// buf.clear(); /// } - /// println!("Found {} start events", count); - /// println!("Text events: {:?}", txt); + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); /// ``` #[inline] pub fn read_event_into<'b>(&mut self, buf: &'b mut Vec) -> Result> { @@ -275,7 +274,8 @@ impl Reader { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -299,7 +299,7 @@ impl Reader { /// /// # Namespaces /// - /// While the [`Reader`] does not support namespace resolution, namespaces + /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close diff --git a/src/reader/mod.rs b/src/reader/mod.rs index b34fa0f6..9d345203 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -20,14 +20,16 @@ macro_rules! configure_methods { /// default), those tags are represented by an [`Empty`] event instead. /// /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. There is no additional - /// allocation, however, if [`Self::check_end_names()`] is also set. + /// needed to store tag name for an [`End`] event. However if [`check_end_names`] + /// is also set, only one additional allocation will be performed that support + /// both these options. /// /// (`false` by default) /// /// [`Empty`]: Event::Empty /// [`Start`]: Event::Start /// [`End`]: Event::End + /// [`check_end_names`]: Self::check_end_names pub fn expand_empty_elements(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.expand_empty_elements = val; self @@ -35,12 +37,15 @@ macro_rules! configure_methods { /// Changes whether whitespace before and after character data should be removed. /// - /// When set to `true`, all [`Text`] events are trimmed. If they are empty, no event will be - /// pushed. + /// When set to `true`, all [`Text`] events are trimmed. + /// If after that the event is empty it will not be pushed. + /// + /// Changing this option automatically changes the [`trim_text_end`] option. /// /// (`false` by default) /// /// [`Text`]: Event::Text + /// [`trim_text_end`]: Self::trim_text_end pub fn trim_text(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.trim_text_start = val; self $(.$holder)? .parser.trim_text_end = val; @@ -50,6 +55,7 @@ macro_rules! configure_methods { /// Changes whether whitespace after character data should be removed. /// /// When set to `true`, trailing whitespace is trimmed in [`Text`] events. + /// If after that the event is empty it will not be pushed. /// /// (`false` by default) /// @@ -99,13 +105,15 @@ macro_rules! configure_methods { /// contain the data of the mismatched end tag. /// /// Note, that setting this to `true` will lead to additional allocates that - /// needed to store tag name for an [`End`] event. There is no additional - /// allocation, however, if [`Self::expand_empty_elements()`] is also set. + /// needed to store tag name for an [`End`] event. However if [`expand_empty_elements`] + /// is also set, only one additional allocation will be performed that support + /// both these options. /// /// (`true` by default) /// /// [spec]: https://www.w3.org/TR/xml11/#dt-etag /// [`End`]: Event::End + /// [`expand_empty_elements`]: Self::expand_empty_elements pub fn check_end_names(&mut self, val: bool) -> &mut Self { self $(.$holder)? .parser.check_end_names = val; self diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 51082dfe..4ce816fa 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -134,9 +134,9 @@ impl NsReader { } /// Resolves a potentially qualified **element name** or **attribute name** - /// into (namespace name, local name). + /// into _(namespace name, local name)_. /// - /// *Qualified* names have the form `prefix:local-name` where the `prefix` + /// _Qualified_ names have the form `prefix:local-name` where the `prefix` /// is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the name in question. /// @@ -172,14 +172,14 @@ impl NsReader { self.ns_resolver.resolve(name, &self.buffer, !attribute) } - /// Resolves a potentially qualified **element name** into (namespace name, local name). + /// Resolves a potentially qualified **element name** into _(namespace name, local name)_. /// - /// *Qualified* element names have the form `prefix:local-name` where the + /// _Qualified_ element names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the element /// in question. /// - /// *Unqualified* elements inherits the current *default namespace*. + /// _Unqualified_ elements inherits the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: @@ -200,7 +200,7 @@ impl NsReader { /// /// This example shows how you can resolve qualified name into a namespace. /// Note, that in the code like this you do not need to do that manually, - /// because the namespace resolution result returned by the [`read_event()`]. + /// because the namespace resolution result returned by the [`read_resolved_event()`]. /// /// ``` /// # use pretty_assertions::assert_eq; @@ -222,20 +222,20 @@ impl NsReader { /// [`Bound`]: ResolveResult::Bound /// [`Unbound`]: ResolveResult::Unbound /// [`Unknown`]: ResolveResult::Unknown - /// [`read_event()`]: Self::read_event + /// [`read_resolved_event()`]: Self::read_resolved_event #[inline] pub fn resolve_element<'n>(&self, name: QName<'n>) -> (ResolveResult, LocalName<'n>) { self.ns_resolver.resolve(name, &self.buffer, true) } - /// Resolves a potentially qualified **attribute name** into (namespace name, local name). + /// Resolves a potentially qualified **attribute name** into _(namespace name, local name)_. /// - /// *Qualified* attribute names have the form `prefix:local-name` where the + /// _Qualified_ attribute names have the form `prefix:local-name` where the /// `prefix` is defined on any containing XML element via `xmlns:prefix="the:namespace:uri"`. /// The namespace prefix can be defined on the same element as the attribute /// in question. /// - /// *Unqualified* attribute names do *not* inherit the current *default namespace*. + /// _Unqualified_ attribute names do *not* inherit the current _default namespace_. /// /// The method returns following results depending on the `name` shape and /// the presence of the default namespace: @@ -431,9 +431,8 @@ impl NsReader { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name - /// ("the same" means that their local names are the same and their prefixes - /// resolves to the same namespace). + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -448,16 +447,21 @@ impl NsReader { /// appropriate size of each event, you can preallocate the buffer to reduce /// number of reallocations. /// - /// The `ns` and `end` parameters should contain namespace and name of the - /// end element _in the reader encoding_. It is good practice to always get - /// that parameters using [`BytesStart::to_end()`] method. + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. /// /// # Namespaces /// - /// Unlike [`Reader::read_to_end_into()`], this method resolves namespace - /// prefixes, so the names that are not equals literally (for example, - /// `a:name` and `b:name`) could be considered equal if prefixes resolved to - /// the same namespace. + /// While the `NsReader` does namespace resolution, namespaces does not + /// change the algorithm for comparing names. Although the names `a:name` + /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, + /// are semantically equivalent, `` cannot close ``, because + /// according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** /// /// # Examples /// @@ -515,6 +519,7 @@ impl NsReader { /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should @@ -668,9 +673,8 @@ impl<'i> NsReader<&'i [u8]> { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name - /// ("the same" means that their local names are the same and their prefixes - /// resolves to the same namespace). + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -686,10 +690,15 @@ impl<'i> NsReader<&'i [u8]> { /// /// # Namespaces /// - /// Unlike [`Reader::read_to_end()`], this method resolves namespace - /// prefixes, so the names that are not equals literally (for example, - /// `a:name` and `b:name`) could be considered equal if prefixes resolved to - /// the same namespace. + /// While the `NsReader` does namespace resolution, namespaces does not + /// change the algorithm for comparing names. Although the names `a:name` + /// and `b:name` where both prefixes `a` and `b` resolves to the same namespace, + /// are semantically equivalent, `` cannot close ``, because + /// according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** /// /// # Examples /// @@ -743,9 +752,9 @@ impl<'i> NsReader<&'i [u8]> { /// [`Start`]: Event::Start /// [`End`]: Event::End /// [`UnexpectedEof`]: crate::errors::Error::UnexpectedEof - /// [`read_to_end()`]: Self::read_to_end /// [`BytesStart::to_end()`]: crate::events::BytesStart::to_end /// [`expand_empty_elements`]: Self::expand_empty_elements + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag #[inline] pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should @@ -756,7 +765,8 @@ impl<'i> NsReader<&'i [u8]> { /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 0fd6ccfb..6416021d 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -16,9 +16,9 @@ use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, Span, Xml use memchr; -/// This is an implementation of [`Reader`] for reading from a `&[u8]` as -/// underlying byte stream. This implementation supports not using an -/// intermediate buffer as the byte slice itself can be used to borrow from. +/// This is an implementation for reading from a `&[u8]` as underlying byte stream. +/// This implementation supports not using an intermediate buffer as the byte slice +/// itself can be used to borrow from. impl<'a> Reader<&'a [u8]> { /// Creates an XML reader from a string slice. pub fn from_str(s: &'a str) -> Self { @@ -80,7 +80,8 @@ impl<'a> Reader<&'a [u8]> { /// a closing tag or an empty slice, if [`expand_empty_elements`] is set and /// this method was called after reading expanded [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] /// will be returned. In particularly, that error will be returned if you call @@ -99,7 +100,7 @@ impl<'a> Reader<&'a [u8]> { /// /// # Namespaces /// - /// While the [`Reader`] does not support namespace resolution, namespaces + /// While the `Reader` does not support namespace resolution, namespaces /// does not change the algorithm for comparing names. Although the names /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the /// same namespace, are semantically equivalent, `` cannot close @@ -159,7 +160,8 @@ impl<'a> Reader<&'a [u8]> { /// Reads content between start and end tags, including any markup. This /// function is supposed to be called after you already read a [`Start`] event. /// - /// Manages nested cases where parent and child elements have the same name. + /// Manages nested cases where parent and child elements have the _literally_ + /// same name. /// /// This method does not unescape read data, instead it returns content /// "as is" of the XML document. This is because it has no idea what text diff --git a/src/writer.rs b/src/writer.rs index 7af1511c..26095eed 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -6,9 +6,7 @@ use crate::encoding::UTF8_BOM; use crate::errors::{Error, Result}; use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event}; -/// XML writer. -/// -/// Writes XML `Event`s to a `Write` implementor. +/// XML writer. Writes XML [`Event`]s to a [`std::io::Write`] implementor. /// /// # Examples /// @@ -62,7 +60,7 @@ pub struct Writer { } impl Writer { - /// Creates a Writer from a generic Write + /// Creates a `Writer` from a generic writer. pub const fn new(inner: W) -> Writer { Writer { writer: inner, @@ -70,7 +68,7 @@ impl Writer { } } - /// Creates a Writer with configured whitespace indents from a generic Write + /// Creates a `Writer` with configured whitespace indents from a generic writer. pub fn new_with_indent(inner: W, indent_char: u8, indent_size: usize) -> Writer { Writer { writer: inner,