Skip to content

Commit

Permalink
Introduce a BytesCData type for CData events and use it instead of …
Browse files Browse the repository at this point in the history
…`BytesText`

This fixes #311.

This commit revert changes from 85f9f68
  • Loading branch information
Mingun committed May 3, 2022
1 parent 52b18f0 commit 738ddd7
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 84 deletions.
3 changes: 3 additions & 0 deletions Changelog.md
Expand Up @@ -33,6 +33,9 @@
([#344](https://github.com/tafia/quick-xml/issues/344))
- test: add tests for trivial documents (empty / only comment / `<root>...</root>` -- one tag with content)
- fix: CDATA was not handled in many cases where it should
- fix: do not unescape CDATA content because it never escaped by design.
CDATA event data now represented by its own `BytesCData` type
([quick-xml#311](https://github.com/tafia/quick-xml/issues/311))

## 0.23.0-alpha3

Expand Down
2 changes: 1 addition & 1 deletion benches/bench.rs
Expand Up @@ -223,7 +223,7 @@ fn bench_fast_xml_one_cdata_event_trimmed(b: &mut Bencher) {
.check_comments(false)
.trim_text(true);
match r.read_event(&mut buf) {
Ok(Event::CData(ref e)) => nbtxt += e.unescaped().unwrap().len(),
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down
43 changes: 25 additions & 18 deletions src/de/mod.rs
Expand Up @@ -113,7 +113,7 @@ mod var;
pub use crate::errors::serialize::DeError;
use crate::{
errors::Error,
events::{BytesEnd, BytesStart, BytesText, Event},
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
reader::Decoder,
Reader,
};
Expand All @@ -137,7 +137,7 @@ pub enum DeEvent<'a> {
Text(BytesText<'a>),
/// Unescaped character data between `Start` and `End` element,
/// stored in `<![CDATA[...]]>`.
CData(BytesText<'a>),
CData(BytesCData<'a>),
/// End of XML document.
Eof,
}
Expand Down Expand Up @@ -316,19 +316,23 @@ where
/// |[`DeEvent::Text`] |`text content` |Unescapes `text content` and returns it, consumes events up to `</tag>`
/// |[`DeEvent::CData`]|`<![CDATA[cdata content]]>`|Returns `cdata content` unchanged, consumes events up to `</tag>`
/// |[`DeEvent::Eof`] | |Emits [`DeError::Eof`]
fn next_text(&mut self) -> Result<BytesText<'de>, DeError> {
fn next_text(&mut self, unescape: bool) -> Result<BytesCData<'de>, DeError> {
match self.next()? {
DeEvent::Text(e) | DeEvent::CData(e) => Ok(e),
DeEvent::Text(e) if unescape => e.unescape().map_err(|e| DeError::Xml(e.into())),
DeEvent::Text(e) => Ok(BytesCData::new(e.into_inner())),
DeEvent::CData(e) => Ok(e),
DeEvent::Start(e) => {
// allow one nested level
let inner = self.next()?;
let t = match inner {
DeEvent::Text(t) | DeEvent::CData(t) => t,
DeEvent::Text(t) if unescape => t.unescape()?,
DeEvent::Text(t) => BytesCData::new(t.into_inner()),
DeEvent::CData(t) => t,
DeEvent::Start(_) => return Err(DeError::Start),
// We can get End event in case of `<tag></tag>` or `<tag/>` input
// Return empty text in that case
DeEvent::End(end) if end.name() == e.name() => {
return Ok(BytesText::from_escaped(&[] as &[u8]));
return Ok(BytesCData::new(&[] as &[u8]));
}
DeEvent::End(_) => return Err(DeError::End),
DeEvent::Eof => return Err(DeError::Eof),
Expand Down Expand Up @@ -397,9 +401,9 @@ macro_rules! deserialize_type {
where
V: Visitor<'de>,
{
let txt = self.next_text()?;
// No need to unescape because valid integer representations cannot be escaped
let string = txt.decode(self.reader.decoder())?;
let text = self.next_text(false)?;
let string = text.decode(self.reader.decoder())?;
visitor.$visit(string.parse()?)
}
};
Expand Down Expand Up @@ -438,9 +442,10 @@ where
where
V: Visitor<'de>,
{
let txt = self.next_text()?;
// No need to unescape because valid integer representations cannot be escaped
let text = self.next_text(false)?;

deserialize_bool(txt.as_ref(), self.reader.decoder(), visitor)
deserialize_bool(text.as_ref(), self.reader.decoder(), visitor)
}

/// Representation of owned strings the same as [non-owned](#method.deserialize_str).
Expand All @@ -462,8 +467,8 @@ where
where
V: Visitor<'de>,
{
let text = self.next_text()?;
let string = text.decode_and_escape(self.reader.decoder())?;
let text = self.next_text(true)?;
let string = text.decode(self.reader.decoder())?;
match string {
Cow::Borrowed(string) => visitor.visit_borrowed_str(string),
Cow::Owned(string) => visitor.visit_string(string),
Expand All @@ -474,16 +479,17 @@ where
where
V: Visitor<'de>,
{
let text = self.next_text()?;
let value = text.escaped();
visitor.visit_bytes(value)
// No need to unescape because bytes gives access to the raw XML input
let text = self.next_text(false)?;
visitor.visit_bytes(&text)
}

fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, DeError>
where
V: Visitor<'de>,
{
let text = self.next_text()?;
// No need to unescape because bytes gives access to the raw XML input
let text = self.next_text(false)?;
let value = text.into_inner().into_owned();
visitor.visit_byte_buf(value)
}
Expand Down Expand Up @@ -597,7 +603,8 @@ where
V: Visitor<'de>,
{
match self.peek()? {
DeEvent::Text(t) | DeEvent::CData(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Text(t) if t.is_empty() => visitor.visit_none(),
DeEvent::CData(t) if t.is_empty() => visitor.visit_none(),
DeEvent::Eof => visitor.visit_none(),
_ => visitor.visit_some(self),
}
Expand Down Expand Up @@ -790,7 +797,7 @@ mod tests {
);
assert_eq!(
de.next().unwrap(),
CData(BytesText::from_plain_str("cdata content"))
CData(BytesCData::from_str("cdata content"))
);
assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag")));

Expand Down
11 changes: 9 additions & 2 deletions src/errors.rs
Expand Up @@ -133,7 +133,7 @@ impl std::error::Error for Error {
pub mod serialize {
//! A module to handle serde (de)serialization errors

use super::Error;
use super::*;
use std::fmt;
use std::num::{ParseFloatError, ParseIntError};

Expand Down Expand Up @@ -170,7 +170,7 @@ pub mod serialize {
}

impl fmt::Display for DeError {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DeError::Custom(s) => write!(f, "{}", s),
DeError::Xml(e) => write!(f, "{}", e),
Expand Down Expand Up @@ -221,6 +221,13 @@ pub mod serialize {
}
}

impl From<EscapeError> for DeError {
#[inline]
fn from(e: EscapeError) -> Self {
Self::Xml(e.into())
}
}

impl From<ParseIntError> for DeError {
fn from(e: ParseIntError) -> Self {
DeError::Int(e)
Expand Down

0 comments on commit 738ddd7

Please sign in to comment.