diff --git a/Changelog.md b/Changelog.md
index 92b51511..520ffb2f 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -19,6 +19,15 @@
from the attribute and element names and attribute values
- fix: allow to deserialize `unit`s from text and CDATA content.
`DeError::InvalidUnit` variant is removed, because after fix it is no longer used
+- test: add tests for trivial documents (empty / only comment / `...` -- one tag with content)
+- fix: CDATA was not handled in many cases where it should
+- fix: do not unescape CDATA content because it never escaped by design
+ ([#311](https://github.com/tafia/quick-xml/issues/311)).
+
+ NOTE: now text content when deserialized into bytes (`Vec` / `&[u8]`), also unescaped.
+ It is impossible to get a raw XML data in bytes buffer. Actually, deserializing of bytes
+ should be prohibited, because XML cannot store raw byte data. You should store binary
+ data in a string hex- or base64- or any-other-schema-encoded.
## 0.23.0-alpha3
diff --git a/benches/bench.rs b/benches/bench.rs
index adea6840..42409aac 100644
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -225,7 +225,7 @@ fn bench_quick_xml_one_cdata_event_trimmed(b: &mut Bencher) {
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
- Ok(Event::CData(ref e)) => nbtxt += e.unescaped().unwrap().len(),
+ Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};
diff --git a/src/de/byte_buf.rs b/src/de/byte_buf.rs
index 67e07008..e5c3dda1 100644
--- a/src/de/byte_buf.rs
+++ b/src/de/byte_buf.rs
@@ -1,11 +1,12 @@
//! Helper types for tests
+use crate::utils::write_byte_string;
use serde::de::{self, Deserialize, Deserializer, Error};
use std::fmt;
/// Wrapper around `Vec` that deserialized using `deserialize_byte_buf`
/// instead of vector's generic `deserialize_seq`
-#[derive(Debug, PartialEq)]
+#[derive(PartialEq)]
pub struct ByteBuf(pub Vec);
impl<'de> Deserialize<'de> for ByteBuf {
@@ -35,9 +36,15 @@ impl<'de> Deserialize<'de> for ByteBuf {
}
}
+impl fmt::Debug for ByteBuf {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write_byte_string(f, &self.0)
+ }
+}
+
/// Wrapper around `&[u8]` that deserialized using `deserialize_bytes`
/// instead of vector's generic `deserialize_seq`
-#[derive(Debug, PartialEq)]
+#[derive(PartialEq)]
pub struct Bytes<'de>(pub &'de [u8]);
impl<'de> Deserialize<'de> for Bytes<'de> {
@@ -62,3 +69,9 @@ impl<'de> Deserialize<'de> for Bytes<'de> {
Ok(d.deserialize_bytes(Visitor)?)
}
}
+
+impl<'de> fmt::Debug for Bytes<'de> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write_byte_string(f, self.0)
+ }
+}
diff --git a/src/de/map.rs b/src/de/map.rs
index 466f04c3..aa311bb8 100644
--- a/src/de/map.rs
+++ b/src/de/map.rs
@@ -100,7 +100,7 @@ impl<'de, 'a, R: BorrowingReader<'de>> de::MapAccess<'de> for MapAccess<'de, 'a,
} else {
// try getting from events (value)
match self.de.peek()? {
- DeEvent::Text(_) => {
+ DeEvent::Text(_) | DeEvent::CData(_) => {
self.state = State::InnerValue;
// Deserialize `key` from special attribute name which means
// that value should be taken from the text content of the
diff --git a/src/de/mod.rs b/src/de/mod.rs
index a6b7adef..e51d0a1a 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -117,7 +117,7 @@ mod var;
pub use crate::errors::serialize::DeError;
use crate::{
errors::Error,
- events::{BytesEnd, BytesStart, BytesText, Event},
+ events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
reader::Decoder,
Reader,
};
@@ -141,7 +141,7 @@ pub enum DeEvent<'a> {
Text(BytesText<'a>),
/// Unescaped character data between `Start` and `End` element,
/// stored in ``.
- CData(BytesText<'a>),
+ CData(BytesCData<'a>),
/// End of XML document.
Eof,
}
@@ -300,18 +300,20 @@ where
/// |`text`|`text` |Complete tag consumed |
/// |`` |empty slice|Virtual end tag not consumed|
/// |`` |empty slice|Not consumed |
- fn next_text(&mut self) -> Result, DeError> {
+ fn next_text(&mut self) -> Result, DeError> {
match self.next()? {
- DeEvent::Text(e) | DeEvent::CData(e) => Ok(e),
+ DeEvent::Text(e) => e.unescape().map_err(|e| DeError::Xml(e.into())),
+ DeEvent::CData(e) => Ok(e),
DeEvent::Eof => Err(DeError::Eof),
DeEvent::Start(e) => {
// allow one nested level
let inner = self.next()?;
let t = match inner {
- DeEvent::Text(t) | DeEvent::CData(t) => t,
+ DeEvent::Text(t) => t.unescape().map_err(|e| DeError::Xml(e.into()))?,
+ DeEvent::CData(t) => t,
DeEvent::Start(_) => return Err(DeError::Start),
DeEvent::End(end) if end.name() == e.name() => {
- return Ok(BytesText::from_escaped(&[] as &[u8]));
+ return Ok(BytesCData::new(&[] as &[u8]));
}
DeEvent::End(_) => return Err(DeError::End),
DeEvent::Eof => return Err(DeError::Eof),
@@ -321,7 +323,7 @@ where
}
DeEvent::End(e) => {
self.peek = Some(DeEvent::End(e));
- Ok(BytesText::from_escaped(&[] as &[u8]))
+ Ok(BytesCData::new(&[] as &[u8]))
}
}
}
@@ -406,13 +408,12 @@ where
deserialize_bool(txt.as_ref(), self.reader.decoder(), visitor)
}
+ /// Representation of owned strings the same as [non-owned](#method.deserialize_str).
fn deserialize_string(self, visitor: V) -> Result
where
V: Visitor<'de>,
{
- let text = self.next_text()?;
- let string = text.decode_and_escape(self.reader.decoder())?;
- visitor.visit_string(string.into_owned())
+ self.deserialize_str(visitor)
}
fn deserialize_char(self, visitor: V) -> Result
@@ -427,7 +428,7 @@ where
V: Visitor<'de>,
{
let text = self.next_text()?;
- let string = text.decode_and_escape(self.reader.decoder())?;
+ let string = text.decode(self.reader.decoder())?;
match string {
Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
Cow::Owned(string) => visitor.visit_string(string),
@@ -439,8 +440,7 @@ where
V: Visitor<'de>,
{
let text = self.next_text()?;
- let value = text.escaped();
- visitor.visit_bytes(value)
+ visitor.visit_bytes(&text)
}
fn deserialize_byte_buf(self, visitor: V) -> Result
@@ -562,6 +562,7 @@ where
{
match self.peek()? {
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
+ DeEvent::CData(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Eof => visitor.visit_none(),
_ => visitor.visit_some(self),
}
@@ -723,6 +724,8 @@ mod tests {
where
T: Deserialize<'de>,
{
+ // Log XM that we try to deserialize to see it in the failed tests output
+ dbg!(s);
let mut de = Deserializer::from_str(s);
let result = T::deserialize(&mut de);
@@ -771,7 +774,7 @@ mod tests {
);
assert_eq!(
de.next().unwrap(),
- CData(BytesText::from_plain_str("cdata content"))
+ CData(BytesCData::from_str("cdata content"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));
@@ -907,6 +910,173 @@ mod tests {
source: String,
}
+ /// Tests for trivial XML documents: empty or contains only primitive type
+ /// on a top level; all of them should be considered invalid
+ mod trivial {
+ use super::*;
+
+ #[rustfmt::skip] // excess spaces used for readability
+ macro_rules! eof {
+ ($name:ident: $type:ty = $value:expr) => {
+ #[test]
+ fn $name() {
+ let item = from_str::<$type>($value).unwrap_err();
+
+ match item {
+ DeError::Eof => (),
+ _ => panic!("Expected `Eof`, found {:?}", item),
+ }
+ }
+ };
+ ($value:expr) => {
+ eof!(i8_: i8 = $value);
+ eof!(i16_: i16 = $value);
+ eof!(i32_: i32 = $value);
+ eof!(i64_: i64 = $value);
+ eof!(isize_: isize = $value);
+
+ eof!(u8_: u8 = $value);
+ eof!(u16_: u16 = $value);
+ eof!(u32_: u32 = $value);
+ eof!(u64_: u64 = $value);
+ eof!(usize_: usize = $value);
+
+ serde_if_integer128! {
+ eof!(u128_: u128 = $value);
+ eof!(i128_: i128 = $value);
+ }
+
+ eof!(f32_: f32 = $value);
+ eof!(f64_: f64 = $value);
+
+ eof!(false_: bool = $value);
+ eof!(true_: bool = $value);
+ eof!(char_: char = $value);
+
+ eof!(string: String = $value);
+ eof!(byte_buf: ByteBuf = $value);
+
+ #[test]
+ fn unit() {
+ let item = from_str::<()>($value).unwrap_err();
+
+ match item {
+ DeError::Eof => (),
+ _ => panic!("Expected `Eof`, found {:?}", item),
+ }
+ }
+ };
+ }
+
+ /// Empty document should considered invalid no matter which type we try to deserialize
+ mod empty_doc {
+ use super::*;
+ eof!("");
+ }
+
+ /// Document that contains only comment should be handles as if it is empty
+ mod only_comment {
+ use super::*;
+ eof!("");
+ }
+
+ /// Tests deserialization from top-level tag content: `...content...`
+ mod struct_ {
+ use super::*;
+
+ /// Well-formed XML must have a single tag at the root level.
+ /// Any XML tag can be modeled as a struct, and content of this tag are modeled as
+ /// fields of this struct.
+ ///
+ /// Because we want to get access to unnamed content of the tag (usually, this internal
+ /// XML node called `#text`) we use a rename to a special name `$value`
+ #[derive(Debug, Deserialize, PartialEq)]
+ struct Trivial {
+ #[serde(rename = "$value")]
+ value: T,
+ }
+
+ macro_rules! in_struct {
+ ($name:ident: $type:ty = $value:expr, $expected:expr) => {
+ #[test]
+ fn $name() {
+ let item: Trivial<$type> = from_str($value).unwrap();
+
+ assert_eq!(item, Trivial { value: $expected });
+ }
+ };
+ }
+
+ /// Tests deserialization from text content in a tag
+ #[rustfmt::skip] // tests formatted in a table
+ mod text {
+ use super::*;
+
+ in_struct!(i8_: i8 = "-42", -42i8);
+ in_struct!(i16_: i16 = "-4200", -4200i16);
+ in_struct!(i32_: i32 = "-42000000", -42000000i32);
+ in_struct!(i64_: i64 = "-42000000000000", -42000000000000i64);
+ in_struct!(isize_: isize = "-42000000000000", -42000000000000isize);
+
+ in_struct!(u8_: u8 = "42", 42u8);
+ in_struct!(u16_: u16 = "4200", 4200u16);
+ in_struct!(u32_: u32 = "42000000", 42000000u32);
+ in_struct!(u64_: u64 = "42000000000000", 42000000000000u64);
+ in_struct!(usize_: usize = "42000000000000", 42000000000000usize);
+
+ serde_if_integer128! {
+ in_struct!(u128_: u128 = "420000000000000000000000000000", 420000000000000000000000000000u128);
+ in_struct!(i128_: i128 = "-420000000000000000000000000000", -420000000000000000000000000000i128);
+ }
+
+ in_struct!(f32_: f32 = "4.2", 4.2f32);
+ in_struct!(f64_: f64 = "4.2", 4.2f64);
+
+ in_struct!(false_: bool = "false", false);
+ in_struct!(true_: bool = "true", true);
+ in_struct!(char_: char = "r", 'r');
+
+ in_struct!(string: String = "escaped string", "escaped string".into());
+ in_struct!(byte_buf: ByteBuf = "escaped byte_buf", ByteBuf(r"escaped byte_buf".into()));
+ }
+
+ /// Tests deserialization from CDATA content in a tag.
+ /// CDATA handling similar to text handling except that strings does not unescapes
+ #[rustfmt::skip] // tests formatted in a table
+ mod cdata {
+ use super::*;
+
+ in_struct!(i8_: i8 = "", -42i8);
+ in_struct!(i16_: i16 = "", -4200i16);
+ in_struct!(i32_: i32 = "", -42000000i32);
+ in_struct!(i64_: i64 = "", -42000000000000i64);
+ in_struct!(isize_: isize = "", -42000000000000isize);
+
+ in_struct!(u8_: u8 = "", 42u8);
+ in_struct!(u16_: u16 = "", 4200u16);
+ in_struct!(u32_: u32 = "", 42000000u32);
+ in_struct!(u64_: u64 = "", 42000000000000u64);
+ in_struct!(usize_: usize = "", 42000000000000usize);
+
+ serde_if_integer128! {
+ in_struct!(u128_: u128 = "", 420000000000000000000000000000u128);
+ in_struct!(i128_: i128 = "", -420000000000000000000000000000i128);
+ }
+
+ in_struct!(f32_: f32 = "", 4.2f32);
+ in_struct!(f64_: f64 = "", 4.2f64);
+
+ in_struct!(false_: bool = "", false);
+ in_struct!(true_: bool = "", true);
+ in_struct!(char_: char = "", 'r');
+
+ // Escape sequences does not processed inside CDATA section
+ in_struct!(string: String = "", "escaped string".into());
+ in_struct!(byte_buf: ByteBuf = "", ByteBuf(r"escaped byte_buf".into()));
+ }
+ }
+ }
+
#[test]
fn multiple_roots_attributes() {
let s = r##"
diff --git a/src/de/var.rs b/src/de/var.rs
index 63d5a0e2..53638687 100644
--- a/src/de/var.rs
+++ b/src/de/var.rs
@@ -36,6 +36,8 @@ where
let decoder = self.de.reader.decoder();
let de = match self.de.peek()? {
DeEvent::Text(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, true),
+ // Escape sequences does not processed inside CDATA section
+ DeEvent::CData(t) => EscapedDeserializer::new(Cow::Borrowed(t), decoder, false),
DeEvent::Start(e) => EscapedDeserializer::new(Cow::Borrowed(e.name()), decoder, false),
_ => {
return Err(DeError::Unsupported(
@@ -64,7 +66,7 @@ where
fn unit_variant(self) -> Result<(), DeError> {
match self.de.next()? {
DeEvent::Start(e) => self.de.read_to_end(e.name()),
- DeEvent::Text(_) => Ok(()),
+ DeEvent::Text(_) | DeEvent::CData(_) => Ok(()),
_ => unreachable!(),
}
}
diff --git a/src/events/mod.rs b/src/events/mod.rs
index c0b9a924..b6bb2c50 100644
--- a/src/events/mod.rs
+++ b/src/events/mod.rs
@@ -40,10 +40,14 @@ pub mod attributes;
use encoding_rs::Encoding;
use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Deref, str::from_utf8};
-use crate::escape::{do_unescape, escape};
+use crate::escape::{do_unescape, escape, partial_escape};
+use crate::utils::write_cow_string;
use crate::{errors::Error, errors::Result, reader::Reader};
use attributes::{Attribute, Attributes};
+#[cfg(feature = "serialize")]
+use crate::escape::EscapeError;
+
use memchr;
/// Opening tag data (`Event::Start`), with optional attributes.
@@ -365,8 +369,6 @@ impl<'a> BytesStart<'a> {
impl<'a> std::fmt::Debug for BytesStart<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- use crate::utils::write_cow_string;
-
write!(f, "BytesStart {{ buf: ")?;
write_cow_string(f, &self.buf)?;
write!(f, ", name_len: {} }}", self.name_len)
@@ -548,15 +550,14 @@ impl<'a> BytesEnd<'a> {
impl<'a> std::fmt::Debug for BytesEnd<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- use crate::utils::write_cow_string;
-
write!(f, "BytesEnd {{ name: ")?;
write_cow_string(f, &self.name)?;
write!(f, " }}")
}
}
-/// Data from various events (most notably, `Event::Text`).
+/// Data from various events (most notably, `Event::Text`) that stored in XML
+/// in escaped form. Internally data is stored in escaped form
#[derive(Clone, Eq, PartialEq)]
pub struct BytesText<'a> {
// Invariant: The content is always escaped.
@@ -566,8 +567,8 @@ pub struct BytesText<'a> {
impl<'a> BytesText<'a> {
/// Creates a new `BytesText` from an escaped byte sequence.
#[inline]
- pub fn from_escaped>>(content: C) -> BytesText<'a> {
- BytesText {
+ pub fn from_escaped>>(content: C) -> Self {
+ Self {
content: content.into(),
}
}
@@ -575,15 +576,15 @@ impl<'a> BytesText<'a> {
/// Creates a new `BytesText` from a byte sequence. The byte sequence is
/// expected not to be escaped.
#[inline]
- pub fn from_plain(content: &'a [u8]) -> BytesText<'a> {
- BytesText {
+ pub fn from_plain(content: &'a [u8]) -> Self {
+ Self {
content: escape(content),
}
}
/// Creates a new `BytesText` from an escaped string.
#[inline]
- pub fn from_escaped_str>>(content: C) -> BytesText<'a> {
+ pub fn from_escaped_str>>(content: C) -> Self {
Self::from_escaped(match content.into() {
Cow::Owned(o) => Cow::Owned(o.into_bytes()),
Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()),
@@ -593,7 +594,7 @@ impl<'a> BytesText<'a> {
/// Creates a new `BytesText` from a string. The string is expected not to
/// be escaped.
#[inline]
- pub fn from_plain_str(content: &'a str) -> BytesText<'a> {
+ pub fn from_plain_str(content: &'a str) -> Self {
Self::from_plain(content.as_bytes())
}
@@ -606,11 +607,17 @@ impl<'a> BytesText<'a> {
}
}
- /// Extracts the inner `Cow` from the `BytesText` event container.
+ /// Returns unescaped version of the text content, that can be written
+ /// as CDATA in XML
#[cfg(feature = "serialize")]
- #[inline]
- pub(crate) fn into_inner(self) -> Cow<'a, [u8]> {
- self.content
+ pub(crate) fn unescape(self) -> std::result::Result, EscapeError> {
+ //TODO: need to think about better API instead of dozens similar functions
+ // Maybe use builder pattern. After that expose function as public API
+ //FIXME: need to take into account entities defined in the document
+ Ok(BytesCData::new(match do_unescape(&self.content, None)? {
+ Cow::Borrowed(_) => self.content,
+ Cow::Owned(unescaped) => Cow::Owned(unescaped),
+ }))
}
/// gets escaped content
@@ -648,60 +655,6 @@ impl<'a> BytesText<'a> {
do_unescape(self, custom_entities).map_err(Error::EscapeError)
}
- /// Gets content of this text buffer in the specified encoding
- #[cfg(feature = "serialize")]
- pub(crate) fn decode(&self, decoder: crate::reader::Decoder) -> Result> {
- Ok(match &self.content {
- Cow::Borrowed(bytes) => {
- #[cfg(feature = "encoding")]
- {
- decoder.decode(bytes)
- }
- #[cfg(not(feature = "encoding"))]
- {
- decoder.decode(bytes)?.into()
- }
- }
- Cow::Owned(bytes) => {
- #[cfg(feature = "encoding")]
- let decoded = decoder.decode(bytes).into_owned();
-
- #[cfg(not(feature = "encoding"))]
- let decoded = decoder.decode(bytes)?.to_string();
-
- decoded.into()
- }
- })
- }
-
- #[cfg(feature = "serialize")]
- pub(crate) fn decode_and_escape(
- &self,
- decoder: crate::reader::Decoder,
- ) -> Result> {
- match self.decode(decoder)? {
- Cow::Borrowed(decoded) => {
- let unescaped =
- do_unescape(decoded.as_bytes(), None).map_err(Error::EscapeError)?;
- match unescaped {
- Cow::Borrowed(unescaped) => {
- from_utf8(unescaped).map(|s| s.into()).map_err(Error::Utf8)
- }
- Cow::Owned(unescaped) => String::from_utf8(unescaped)
- .map(|s| s.into())
- .map_err(|e| Error::Utf8(e.utf8_error())),
- }
- }
- Cow::Owned(decoded) => {
- let unescaped =
- do_unescape(decoded.as_bytes(), None).map_err(Error::EscapeError)?;
- String::from_utf8(unescaped.into_owned())
- .map(|s| s.into())
- .map_err(|e| Error::Utf8(e.utf8_error()))
- }
- }
- }
-
/// helper method to unescape then decode self using the reader encoding
/// but without BOM (Byte order mark)
///
@@ -856,14 +809,123 @@ impl<'a> BytesText<'a> {
impl<'a> std::fmt::Debug for BytesText<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
- use crate::utils::write_cow_string;
-
write!(f, "BytesText {{ content: ")?;
write_cow_string(f, &self.content)?;
write!(f, " }}")
}
}
+/// CDATA content contains unescaped data from the reader. If you want to write them as a text,
+/// [convert](Self::escape) it to [`BytesText`]
+#[derive(Clone, Eq, PartialEq)]
+pub struct BytesCData<'a> {
+ content: Cow<'a, [u8]>,
+}
+
+impl<'a> BytesCData<'a> {
+ /// Creates a new `BytesCData` from a byte sequence.
+ #[inline]
+ pub fn new>>(content: C) -> Self {
+ Self {
+ content: content.into(),
+ }
+ }
+
+ /// Creates a new `BytesCData` from a string
+ #[inline]
+ pub fn from_str(content: &'a str) -> Self {
+ Self::new(content.as_bytes())
+ }
+
+ /// Extracts the inner `Cow` from the `BytesCData` event container.
+ #[inline]
+ pub fn into_inner(self) -> Cow<'a, [u8]> {
+ self.content
+ }
+
+ /// Ensures that all data is owned to extend the object's lifetime if
+ /// necessary.
+ #[inline]
+ pub fn into_owned(self) -> BytesCData<'static> {
+ BytesCData {
+ content: self.content.into_owned().into(),
+ }
+ }
+
+ /// Converts this CDATA content to an escaped version, that can be written
+ /// as an usual text in XML.
+ ///
+ /// This function performs following replacements:
+ ///
+ /// | Character | Replacement
+ /// |-----------|------------
+ /// | `<` | `<`
+ /// | `>` | `>`
+ /// | `&` | `&`
+ /// | `'` | `'`
+ /// | `"` | `"`
+ pub fn escape(self) -> BytesText<'a> {
+ BytesText::from_escaped(match escape(&self.content) {
+ Cow::Borrowed(_) => self.content,
+ Cow::Owned(escaped) => Cow::Owned(escaped),
+ })
+ }
+
+ /// Converts this CDATA content to an escaped version, that can be written
+ /// as an usual text in XML.
+ ///
+ /// In XML text content, it is allowed (though not recommended) to leave
+ /// the quote special characters `"` and `'` unescaped.
+ ///
+ /// This function performs following replacements:
+ ///
+ /// | Character | Replacement
+ /// |-----------|------------
+ /// | `<` | `<`
+ /// | `>` | `>`
+ /// | `&` | `&`
+ pub fn partial_escape(self) -> BytesText<'a> {
+ BytesText::from_escaped(match partial_escape(&self.content) {
+ Cow::Borrowed(_) => self.content,
+ Cow::Owned(escaped) => Cow::Owned(escaped),
+ })
+ }
+
+ /// Gets content of this text buffer in the specified encoding
+ #[cfg(feature = "serialize")]
+ pub(crate) fn decode(&self, decoder: crate::reader::Decoder) -> Result> {
+ Ok(match &self.content {
+ Cow::Borrowed(bytes) => {
+ #[cfg(feature = "encoding")]
+ {
+ decoder.decode(bytes)
+ }
+ #[cfg(not(feature = "encoding"))]
+ {
+ decoder.decode(bytes)?.into()
+ }
+ }
+ Cow::Owned(bytes) => {
+ #[cfg(feature = "encoding")]
+ let decoded = decoder.decode(bytes).into_owned();
+
+ #[cfg(not(feature = "encoding"))]
+ let decoded = decoder.decode(bytes)?.to_string();
+
+ decoded.into()
+ }
+ })
+ }
+}
+
+impl<'a> std::fmt::Debug for BytesCData<'a> {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "BytesCData {{ content: ")?;
+ write_cow_string(f, &self.content)?;
+ write!(f, " }}")
+ }
+}
+
/// Event emitted by [`Reader::read_event`].
///
/// [`Reader::read_event`]: ../reader/struct.Reader.html#method.read_event
@@ -880,7 +942,7 @@ pub enum Event<'a> {
/// Comment ``.
Comment(BytesText<'a>),
/// CData ``.
- CData(BytesText<'a>),
+ CData(BytesCData<'a>),
/// XML declaration ``.
Decl(BytesDecl<'a>),
/// Processing instruction `...?>`.
@@ -938,6 +1000,14 @@ impl<'a> Deref for BytesText<'a> {
}
}
+impl<'a> Deref for BytesCData<'a> {
+ type Target = [u8];
+
+ fn deref(&self) -> &[u8] {
+ &*self.content
+ }
+}
+
impl<'a> Deref for Event<'a> {
type Target = [u8];
fn deref(&self) -> &[u8] {
diff --git a/src/reader.rs b/src/reader.rs
index cf050133..3da17af9 100644
--- a/src/reader.rs
+++ b/src/reader.rs
@@ -9,7 +9,8 @@ use std::{fs::File, path::Path, str::from_utf8};
use encoding_rs::{Encoding, UTF_16BE, UTF_16LE};
use crate::errors::{Error, Result};
-use crate::events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
+use crate::events::attributes::Attribute;
+use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use memchr;
@@ -368,7 +369,7 @@ impl Reader {
Ok(Event::Comment(BytesText::from_escaped(&buf[3..len - 2])))
} else if uncased_starts_with(buf, b"![CDATA[") {
debug_assert!(len >= 10, "Minimum length guaranteed by read_bang_elem");
- Ok(Event::CData(BytesText::from_plain(&buf[8..buf.len() - 2])))
+ Ok(Event::CData(BytesCData::new(&buf[8..buf.len() - 2])))
} else if uncased_starts_with(buf, b"!DOCTYPE") {
debug_assert!(len >= 8, "Minimum length guaranteed by read_bang_elem");
let start = buf[8..]
diff --git a/src/writer.rs b/src/writer.rs
index 73a7dc2e..3870fd7e 100644
--- a/src/writer.rs
+++ b/src/writer.rs
@@ -1,7 +1,7 @@
//! A module to handle `Writer`
use crate::errors::{Error, Result};
-use crate::events::{attributes::Attribute, BytesStart, BytesText, Event};
+use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
use std::io::Write;
/// XML writer.
@@ -261,7 +261,7 @@ impl<'a, W: Write> ElementWriter<'a, W> {
}
/// Write a CData event `` inside the current element.
- pub fn write_cdata_content(self, text: BytesText) -> Result<&'a mut Writer> {
+ pub fn write_cdata_content(self, text: BytesCData) -> Result<&'a mut Writer> {
self.writer
.write_event(Event::Start(self.start_tag.to_borrowed()))?;
self.writer.write_event(Event::CData(text))?;
diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs
index 3515666a..1d9f74e4 100644
--- a/tests/unit_tests.rs
+++ b/tests/unit_tests.rs
@@ -180,7 +180,7 @@ fn test_cdata() {
fn test_cdata_open_close() {
let mut r = Reader::from_str(" test]]>");
r.trim_text(true);
- next_eq!(r, CData, b"test <> test");
+ next_eq!(r, CData, b"test <> test");
}
#[test]