diff --git a/Changelog.md b/Changelog.md index a079fd8b..8af95792 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,6 +10,12 @@ ## Unreleased +- test: add tests for malformed inputs for serde deserializer +- fix: allow to deserialize `unit`s from any data in attribute values and text nodes +- refactor: unify errors when EOF encountered during serde deserialization +- test: ensure that after deserializing all XML was consumed +- feat: add `Deserializer::from_str` and `Deserializer::from_bytes` + ## 0.23.0-alpha3 - fix: use element name (with namespace) when unflattening (serialize feature) diff --git a/src/de/escape.rs b/src/de/escape.rs index 4738fbd0..b487e5c4 100644 --- a/src/de/escape.rs +++ b/src/de/escape.rs @@ -12,7 +12,7 @@ use std::borrow::Cow; /// Escaping the value is actually not always necessary, for instance /// when converting to float, we don't expect any escapable character /// anyway -#[derive(Clone)] +#[derive(Clone, Debug)] pub(crate) struct EscapedDeserializer { decoder: Decoder, /// Possible escaped value of text/CDATA or attribute value @@ -145,13 +145,7 @@ impl<'de> serde::Deserializer<'de> for EscapedDeserializer { where V: Visitor<'de>, { - if self.escaped_value.is_empty() { - visitor.visit_unit() - } else { - Err(DeError::InvalidUnit( - "Expecting unit, got non empty attribute".into(), - )) - } + visitor.visit_unit() } fn deserialize_option(self, visitor: V) -> Result diff --git a/src/de/mod.rs b/src/de/mod.rs index 66d681ff..f689463b 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -114,6 +114,7 @@ mod var; pub use crate::errors::serialize::DeError; use crate::{ + errors::Error, events::{BytesStart, BytesText, Event}, reader::Decoder, Reader, @@ -147,12 +148,7 @@ pub fn from_str<'de, T: Deserialize<'de>>(s: &'de str) -> Result { /// Deserialize a xml slice of bytes pub fn from_bytes<'de, T: Deserialize<'de>>(s: &'de [u8]) -> Result { - let mut reader = Reader::from_bytes(s); - reader - .expand_empty_elements(true) - .check_end_names(true) - .trim_text(true); - let mut de = Deserializer::from_borrowing_reader(SliceReader { reader }); + let mut de = Deserializer::from_bytes(s); T::deserialize(&mut de) } @@ -262,6 +258,23 @@ impl<'de, R: BorrowingReader<'de>> Deserializer<'de, R> { } } +impl<'de> Deserializer<'de, SliceReader<'de>> { + /// Create new deserializer that will borrow data from the specified string + pub fn from_str(s: &'de str) -> Self { + Self::from_bytes(s.as_bytes()) + } + + /// Create new deserializer that will borrow data from the specified byte array + pub fn from_bytes(bytes: &'de [u8]) -> Self { + let mut reader = Reader::from_bytes(bytes); + reader + .expand_empty_elements(true) + .check_end_names(true) + .trim_text(true); + Self::from_borrowing_reader(SliceReader { reader }) + } +} + macro_rules! deserialize_type { ($deserialize:ident => $visit:ident) => { fn $deserialize>(self, visitor: V) -> Result { @@ -439,10 +452,18 @@ impl<'de, 'a, R: BorrowingReader<'de>> de::Deserializer<'de> for &'a mut Deseria self.deserialize_string(visitor) } + /// Always call `visitor.visit_unit()` because returned value ignored in any case. + /// + /// This method consumes any single [event][Event] except the [`Start`][Event::Start] + /// event. in which case all events up to corresponding [`End`][Event::End] event will + /// be consumed. + /// + /// This method returns error if current event is [`End`][Event::End] or [`Eof`][Event::Eof] fn deserialize_ignored_any>(self, visitor: V) -> Result { match self.next()? { Event::Start(e) => self.read_to_end(e.name())?, Event::End(_) => return Err(DeError::End), + Event::Eof => return Err(DeError::Eof), _ => (), } visitor.visit_unit() @@ -514,7 +535,10 @@ impl<'i, R: BufRead + 'i> BorrowingReader<'i> for IoReader { } fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> { - Ok(self.reader.read_to_end(name, &mut self.buf)?) + match self.reader.read_to_end(name, &mut self.buf) { + Err(Error::UnexpectedEof(_)) => Err(DeError::Eof), + other => Ok(other?), + } } fn decoder(&self) -> Decoder { @@ -540,7 +564,10 @@ impl<'de> BorrowingReader<'de> for SliceReader<'de> { } fn read_to_end(&mut self, name: &[u8]) -> Result<(), DeError> { - Ok(self.reader.read_to_end_unbuffered(name)?) + match self.reader.read_to_end_unbuffered(name) { + Err(Error::UnexpectedEof(_)) => Err(DeError::Eof), + other => Ok(other?), + } } fn decoder(&self) -> Decoder { @@ -551,8 +578,75 @@ impl<'de> BorrowingReader<'de> for SliceReader<'de> { #[cfg(test)] mod tests { use super::*; + use serde::de::IgnoredAny; use serde::Deserialize; + /// Deserialize an instance of type T from a string of XML text. + /// If deserialization was succeeded checks that all XML events was consumed + fn from_str<'de, T: Deserialize<'de>>(s: &'de str) -> Result { + let mut de = Deserializer::from_str(s); + let result = T::deserialize(&mut de); + + // If type was deserialized, the whole XML document should be consumed + if let Ok(_) = result { + assert_eq!(de.next().unwrap(), Event::Eof); + } + + result + } + + #[test] + fn read_to_end() { + use crate::events::BytesEnd; + use crate::events::Event::*; + + let mut reader = Reader::from_bytes( + r#" + + textcontent + + + + "# + .as_bytes(), + ); + reader + .expand_empty_elements(true) + .check_end_names(true) + .trim_text(true); + let mut de = Deserializer::from_borrowing_reader(SliceReader { reader }); + + assert_eq!( + de.next().unwrap(), + Start(BytesStart::borrowed_name(b"root")) + ); + + assert_eq!( + de.next().unwrap(), + Start(BytesStart::borrowed(br#"tag a="1""#, 3)) + ); + assert_eq!(de.read_to_end(b"tag").unwrap(), ()); + + assert_eq!( + de.next().unwrap(), + Start(BytesStart::borrowed(br#"tag a="2""#, 3)) + ); + assert_eq!( + de.next().unwrap(), + CData(BytesText::from_plain_str("cdata content")) + ); + assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"tag"))); + + assert_eq!( + de.next().unwrap(), + Start(BytesStart::borrowed(b"self-closed", 11)) + ); + assert_eq!(de.read_to_end(b"self-closed").unwrap(), ()); + + assert_eq!(de.next().unwrap(), End(BytesEnd::borrowed(b"root"))); + assert_eq!(de.next().unwrap(), Eof); + } + #[test] fn borrowing_reader_parity() { let s = r##" @@ -578,7 +672,7 @@ mod tests { break; } - assert_eq!(format!("{:?}", event1), format!("{:?}", event2)); + assert_eq!(event1, event2); } } @@ -675,23 +769,6 @@ mod tests { source: String, } - #[test] - fn simple_struct_from_attributes() { - let s = r##" - - "##; - - let item: Item = from_reader(s.as_bytes()).unwrap(); - - assert_eq!( - item, - Item { - name: "hello".to_string(), - source: "world.rs".to_string(), - } - ); - } - #[test] fn multiple_roots_attributes() { let s = r##" @@ -716,25 +793,6 @@ mod tests { ); } - #[test] - fn simple_struct_from_attribute_and_child() { - let s = r##" - - world.rs - - "##; - - let item: Item = from_str(s).unwrap(); - - assert_eq!( - item, - Item { - name: "hello".to_string(), - source: "world.rs".to_string(), - } - ); - } - #[derive(Debug, Deserialize, PartialEq)] struct Project { name: String, @@ -906,37 +964,234 @@ mod tests { assert_eq!(item, Item); } + /// Tests calling `deserialize_ignored_any` #[test] - fn unit() { + fn ignored_any() { + let err = from_str::(""); + match err { + Err(DeError::Eof) => {} + other => panic!("Expected `Eof`, found {:?}", other), + } + + from_str::(r#""#).unwrap(); + from_str::(r#""#).unwrap(); + from_str::(r#"text"#).unwrap(); + from_str::(r#""#).unwrap(); + from_str::(r#""#).unwrap(); + } + + mod unit { + use super::*; + #[derive(Debug, Deserialize, PartialEq)] struct Unit; - let data: Unit = from_str("").unwrap(); - assert_eq!(data, Unit); + #[test] + fn simple() { + let data: Unit = from_str("").unwrap(); + assert_eq!(data, Unit); + } + + #[test] + fn excess_attribute() { + let data: Unit = from_str(r#""#).unwrap(); + assert_eq!(data, Unit); + } + + #[test] + fn excess_element() { + let data: Unit = from_str(r#"element"#).unwrap(); + assert_eq!(data, Unit); + } + + #[test] + fn excess_text() { + let data: Unit = from_str(r#"excess text"#).unwrap(); + assert_eq!(data, Unit); + } + + #[test] + fn excess_cdata() { + let data: Unit = from_str(r#""#).unwrap(); + assert_eq!(data, Unit); + } } - #[test] - fn newtype() { + mod newtype { + use super::*; + #[derive(Debug, Deserialize, PartialEq)] struct Newtype(bool); - let data: Newtype = from_str("true").unwrap(); - assert_eq!(data, Newtype(true)); + #[test] + fn simple() { + let data: Newtype = from_str("true").unwrap(); + assert_eq!(data, Newtype(true)); + } + + #[test] + fn excess_attribute() { + let data: Newtype = from_str(r#"true"#).unwrap(); + assert_eq!(data, Newtype(true)); + } } - #[test] - fn tuple() { - let data: (f32, String) = from_str("42answer").unwrap(); - assert_eq!(data, (42.0, "answer".into())); + mod tuple { + use super::*; + + #[test] + fn simple() { + let data: (f32, String) = from_str("42answer").unwrap(); + assert_eq!(data, (42.0, "answer".into())); + } + + #[test] + fn excess_attribute() { + let data: (f32, String) = + from_str(r#"42answer"#).unwrap(); + assert_eq!(data, (42.0, "answer".into())); + } } - #[test] - fn tuple_struct() { + mod tuple_struct { + use super::*; + #[derive(Debug, Deserialize, PartialEq)] struct Tuple(f32, String); - let data: Tuple = from_str("42answer").unwrap(); - assert_eq!(data, Tuple(42.0, "answer".into())); + #[test] + fn simple() { + let data: Tuple = from_str("42answer").unwrap(); + assert_eq!(data, Tuple(42.0, "answer".into())); + } + + #[test] + fn excess_attribute() { + let data: Tuple = + from_str(r#"42answer"#).unwrap(); + assert_eq!(data, Tuple(42.0, "answer".into())); + } + } + + macro_rules! maplike_errors { + ($type:ty) => { + mod non_closed { + use super::*; + + #[test] + fn attributes() { + let data = from_str::<$type>(r#""#); + + match data { + Err(DeError::Eof) => (), + _ => panic!("Expected `Eof`, found {:?}", data), + } + } + + #[test] + fn elements_root() { + let data = from_str::<$type>(r#"answer"#); + + match data { + Err(DeError::Eof) => (), + _ => panic!("Expected `Eof`, found {:?}", data), + } + } + + #[test] + fn elements_child() { + let data = from_str::<$type>(r#"answer"#); + + match data { + Err(DeError::Eof) => (), + _ => panic!("Expected `Eof`, found {:?}", data), + } + } + } + + mod mismatched_end { + use super::*; + use crate::errors::Error::EndEventMismatch; + + #[test] + fn attributes() { + let data = + from_str::<$type>(r#""#); + + match data { + Err(DeError::Xml(EndEventMismatch { .. })) => (), + _ => panic!("Expected `Xml(EndEventMismatch)`, found {:?}", data), + } + } + + #[test] + fn elements_root() { + let data = from_str::<$type>( + r#"answer"#, + ); + + match data { + Err(DeError::Xml(EndEventMismatch { .. })) => (), + _ => panic!("Expected `Xml(EndEventMismatch)`, found {:?}", data), + } + } + + #[test] + fn elements_child() { + let data = + from_str::<$type>(r#"answer"#); + + match data { + Err(DeError::Xml(EndEventMismatch { .. })) => (), + _ => panic!("Expected `Xml(EndEventMismatch)`, found {:?}", data), + } + } + } + }; + } + + mod map { + use super::*; + use std::collections::HashMap; + use std::iter::FromIterator; + + #[test] + fn elements() { + let data: HashMap<(), ()> = + from_str(r#"42answer"#).unwrap(); + assert_eq!( + data, + HashMap::from_iter([((), ()), ((), ()),].iter().cloned()) + ); + } + + #[test] + fn attributes() { + let data: HashMap<(), ()> = from_str(r#""#).unwrap(); + assert_eq!( + data, + HashMap::from_iter([((), ()), ((), ()),].iter().cloned()) + ); + } + + #[test] + fn attribute_and_element() { + let data: HashMap<(), ()> = from_str( + r#" + + answer + + "#, + ) + .unwrap(); + + assert_eq!( + data, + HashMap::from_iter([((), ()), ((), ()),].iter().cloned()) + ); + } + + maplike_errors!(HashMap<(), ()>); } mod struct_ { @@ -961,6 +1216,28 @@ mod tests { ); } + #[test] + fn excess_elements() { + let data: Struct = from_str( + r#" + + + 42 + + answer + + "#, + ) + .unwrap(); + assert_eq!( + data, + Struct { + float: 42.0, + string: "answer".into() + } + ); + } + #[test] fn attributes() { let data: Struct = from_str(r#""#).unwrap(); @@ -972,6 +1249,43 @@ mod tests { } ); } + + #[test] + fn excess_attributes() { + let data: Struct = from_str( + r#""#, + ) + .unwrap(); + assert_eq!( + data, + Struct { + float: 42.0, + string: "answer".into() + } + ); + } + + #[test] + fn attribute_and_element() { + let data: Struct = from_str( + r#" + + answer + + "#, + ) + .unwrap(); + + assert_eq!( + data, + Struct { + float: 42.0, + string: "answer".into() + } + ); + } + + maplike_errors!(Struct); } mod nested_struct { diff --git a/src/events/attributes.rs b/src/events/attributes.rs index cc4b1f4c..83ebd489 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -13,7 +13,7 @@ use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range}; /// The duplicate check can be turned off by calling [`with_checks(false)`]. /// /// [`with_checks(false)`]: #method.with_checks -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Attributes<'a> { /// slice of `Element` corresponding to attributes bytes: &'a [u8], @@ -281,12 +281,12 @@ impl<'a> Attribute<'a> { impl<'a> std::fmt::Debug for Attribute<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use crate::utils::write_byte_string; + use crate::utils::{write_byte_string, write_cow_string}; write!(f, "Attribute {{ key: ")?; write_byte_string(f, self.key)?; write!(f, ", value: ")?; - write_byte_string(f, &self.value)?; + write_cow_string(f, &self.value)?; write!(f, " }}") } } diff --git a/src/events/mod.rs b/src/events/mod.rs index d366f122..97f94a96 100644 --- a/src/events/mod.rs +++ b/src/events/mod.rs @@ -365,10 +365,10 @@ impl<'a> BytesStart<'a> { impl<'a> std::fmt::Debug for BytesStart<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use crate::utils::write_byte_string; + use crate::utils::write_cow_string; write!(f, "BytesStart {{ buf: ")?; - write_byte_string(f, &self.buf)?; + write_cow_string(f, &self.buf)?; write!(f, ", name_len: {} }}", self.name_len) } } @@ -548,10 +548,10 @@ impl<'a> BytesEnd<'a> { impl<'a> std::fmt::Debug for BytesEnd<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use crate::utils::write_byte_string; + use crate::utils::write_cow_string; write!(f, "BytesEnd {{ name: ")?; - write_byte_string(f, &self.name)?; + write_cow_string(f, &self.name)?; write!(f, " }}") } } @@ -852,10 +852,10 @@ impl<'a> BytesText<'a> { impl<'a> std::fmt::Debug for BytesText<'a> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - use crate::utils::write_byte_string; + use crate::utils::write_cow_string; write!(f, "BytesText {{ content: ")?; - write_byte_string(f, &self.content)?; + write_cow_string(f, &self.content)?; write!(f, " }}") } } diff --git a/src/reader.rs b/src/reader.rs index 22d1c173..961c9be2 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -1568,12 +1568,12 @@ impl NamespaceBufferIndex { /// Utf8 Decoder #[cfg(not(feature = "encoding"))] -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct Decoder; /// Utf8 Decoder #[cfg(feature = "encoding")] -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub struct Decoder { encoding: &'static Encoding, } diff --git a/src/utils.rs b/src/utils.rs index adef9160..a3fd7c83 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,21 @@ -pub fn write_byte_string(f: &mut std::fmt::Formatter<'_>, byte_string: &[u8]) -> std::fmt::Result { +use std::borrow::Cow; +use std::fmt::{Formatter, Result}; + +pub fn write_cow_string(f: &mut Formatter<'_>, cow_string: &Cow<[u8]>) -> Result { + match cow_string { + Cow::Owned(s) => { + write!(f, "Owned(")?; + write_byte_string(f, &s)?; + } + Cow::Borrowed(s) => { + write!(f, "Borrowed(")?; + write_byte_string(f, s)?; + } + } + write!(f, ")") +} + +pub fn write_byte_string(f: &mut Formatter<'_>, byte_string: &[u8]) -> Result { write!(f, "\"")?; for b in byte_string { match *b {