From 1161d7408aa172b7855c3a55091070b72ad00869 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 14 Jun 2022 21:30:48 +0500 Subject: [PATCH] #180: Make `Decoder` struct public The method `Reader::decoder()` is public anyway, but its result type is not, which means that it cannot be used as method argument, and that is not good --- Changelog.md | 4 ++++ src/lib.rs | 2 +- src/reader.rs | 55 ++++++++++++++++++++++++++++----------------------- 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/Changelog.md b/Changelog.md index e61aace9..62b483a2 100644 --- a/Changelog.md +++ b/Changelog.md @@ -17,6 +17,9 @@ - [#393]: New module `name` with `QName`, `LocalName`, `Namespace`, `Prefix` and `PrefixDeclaration` wrappers around byte arrays and `ResolveResult` with the result of namespace resolution +- [#180]: Make `Decoder` struct public. You already had access to it via the + `Reader::decoder()` method, but could not name it in the code. Now the preferred + way to access decoding functionality is via this struct ### Bug Fixes @@ -67,6 +70,7 @@ [#8]: https://github.com/Mingun/fast-xml/pull/8 [#9]: https://github.com/Mingun/fast-xml/pull/9 +[#180]: https://github.com/tafia/quick-xml/issues/180 [#191]: https://github.com/tafia/quick-xml/issues/191 [#387]: https://github.com/tafia/quick-xml/pull/387 [#391]: https://github.com/tafia/quick-xml/pull/391 diff --git a/src/lib.rs b/src/lib.rs index 122520f0..378488bb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -156,5 +156,5 @@ mod writer; #[cfg(feature = "serialize")] pub use crate::errors::serialize::DeError; pub use crate::errors::{Error, Result}; -pub use crate::reader::Reader; +pub use crate::reader::{Decoder, Reader}; pub use crate::writer::{ElementWriter, Writer}; diff --git a/src/reader.rs b/src/reader.rs index 3cadbd9a..7f0624e4 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -381,20 +381,14 @@ impl Reader { self.encoding } - /// Get utf8 decoder - #[cfg(feature = "encoding")] + /// Get the decoder, used to decode bytes, read by this reader, to the strings. pub fn decoder(&self) -> Decoder { Decoder { + #[cfg(feature = "encoding")] encoding: self.encoding, } } - /// Get utf8 decoder - #[cfg(not(feature = "encoding"))] - pub fn decoder(&self) -> Decoder { - Decoder - } - /// Decodes a slice using the encoding specified in the XML declaration. /// /// Decode `bytes` with BOM sniffing and with malformed sequences replaced with the @@ -1471,47 +1465,58 @@ pub(crate) fn is_whitespace(b: u8) -> bool { } } -/// Utf8 Decoder -#[cfg(not(feature = "encoding"))] -#[derive(Clone, Copy, Debug)] -pub struct Decoder; +//////////////////////////////////////////////////////////////////////////////////////////////////// -/// Utf8 Decoder -#[cfg(feature = "encoding")] +/// Decoder of byte slices to the strings. This is lightweight object that can be copied. +/// +/// If feature `encoding` is enabled, this encoding taken from the `"encoding"` +/// XML declaration or assumes UTF-8, if XML has no declaration, encoding +/// key is not defined or contains unknown encoding. +/// +/// The library supports any UTF-8 compatible encodings that crate `encoding_rs` +/// is supported. [*UTF-16 is not supported at the present*][utf16]. +/// +/// If feature `encoding` is disabled, the decoder is always UTF-8 decoder: +/// any XML declarations are ignored. +/// +/// [utf16]: https://github.com/tafia/quick-xml/issues/158 #[derive(Clone, Copy, Debug)] pub struct Decoder { + #[cfg(feature = "encoding")] encoding: &'static Encoding, } +#[cfg(not(feature = "encoding"))] impl Decoder { - #[cfg(not(feature = "encoding"))] + /// Decodes specified bytes using UTF-8 encoding pub fn decode<'c>(&self, bytes: &'c [u8]) -> Result<&'c str> { from_utf8(bytes).map_err(Error::Utf8) } +} - #[cfg(feature = "encoding")] +#[cfg(feature = "encoding")] +impl Decoder { + /// Decodes specified bytes using encoding, declared in the XML, if it was + /// declared there, or UTF-8 otherwise pub fn decode<'c>(&self, bytes: &'c [u8]) -> Cow<'c, str> { self.encoding.decode(bytes).0 } } -/// This implementation is required for tests of other parts of the library -#[cfg(test)] -#[cfg(feature = "serialize")] impl Decoder { - #[cfg(not(feature = "encoding"))] - pub(crate) fn utf8() -> Self { - Decoder - } - - #[cfg(feature = "encoding")] + /// This implementation is required for tests of other parts of the library + #[cfg(test)] + #[cfg(feature = "serialize")] pub(crate) fn utf8() -> Self { Decoder { + #[cfg(feature = "encoding")] encoding: encoding_rs::UTF_8, } } } +//////////////////////////////////////////////////////////////////////////////////////////////////// + #[cfg(test)] mod test { macro_rules! check {