diff --git a/Changelog.md b/Changelog.md
index 3fc54c67..a906c05a 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -14,8 +14,15 @@
### Bug Fixes
+- [#530]: Fix an infinite loop leading to unbounded memory consumption that occurs when
+ skipping events on malformed XML with the `overlapped-lists` feature active.
+- [#530]: Fix an error in the `Deserializer::read_to_end` when `overlapped-lists`
+ feature is active and malformed XML is parsed
+
### Misc Changes
+[#530]: https://github.com/tafia/quick-xml/pull/530
+
## 0.27.0 -- 2022-12-25
### New Features
diff --git a/src/de/mod.rs b/src/de/mod.rs
index e57b3261..abd39257 100644
--- a/src/de/mod.rs
+++ b/src/de/mod.rs
@@ -2073,6 +2073,7 @@ where
/// should be replayed after calling [`Self::start_replay()`].
#[cfg(feature = "overlapped-lists")]
#[inline]
+ #[must_use = "returned checkpoint should be used in `start_replay`"]
fn skip_checkpoint(&self) -> usize {
self.write.len()
}
@@ -2100,16 +2101,21 @@ where
DeEvent::End(ref e) if e.name().as_ref() == end => {
self.skip_event(event)?;
if depth == 0 {
- return Ok(());
+ break;
}
depth -= 1;
}
+ DeEvent::Eof => {
+ self.skip_event(event)?;
+ break;
+ }
_ => self.skip_event(event)?,
}
}
}
- _ => Ok(()),
+ _ => (),
}
+ Ok(())
}
#[cfg(feature = "overlapped-lists")]
@@ -2231,7 +2237,7 @@ where
}
Some(DeEvent::End(e)) if e.name() == name => {
if depth == 0 {
- return Ok(());
+ break;
}
depth -= 1;
}
@@ -2241,9 +2247,29 @@ where
// If we do not have skipped events, use effective reading that will
// not allocate memory for events
- None => return self.reader.read_to_end(name),
+ None => {
+ // We should close all opened tags, because we could buffer
+ // Start events, but not the corresponding End events. So we
+ // keep reading events until we exit all nested tags.
+ // `read_to_end()` will return an error if an Eof was encountered
+ // preliminary (in case of malformed XML).
+ //
+ //
+ // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
+ // ^^^^^^ - read by the first call of `self.reader.read_to_end()`
+ // ^^^^^^ - read by the second call of `self.reader.read_to_end()`
+ loop {
+ self.reader.read_to_end(name)?;
+ if depth == 0 {
+ break;
+ }
+ depth -= 1;
+ }
+ break;
+ }
}
}
+ Ok(())
}
#[cfg(not(feature = "overlapped-lists"))]
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
@@ -3057,51 +3083,83 @@ mod tests {
e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e),
}
}
+
+ /// Without handling Eof in `skip` this test failed with memory allocation
+ #[test]
+ fn invalid_xml() {
+ use crate::de::DeEvent::*;
+
+ let mut de = Deserializer::from_str("");
+
+ // Cache all events
+ let checkpoint = de.skip_checkpoint();
+ de.skip().unwrap();
+ de.start_replay(checkpoint);
+ assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
+ }
}
- #[test]
- fn read_to_end() {
+ mod read_to_end {
+ use super::*;
use crate::de::DeEvent::*;
+ use pretty_assertions::assert_eq;
- let mut de = Deserializer::from_str(
- r#"
-
- textcontent
-
-
-
- "#,
- );
+ #[test]
+ fn complex() {
+ let mut de = Deserializer::from_str(
+ r#"
+
+ textcontent
+
+
+
+ "#,
+ );
- assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
+ assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
- assert_eq!(
- de.next().unwrap(),
- Start(BytesStart::from_content(r#"tag a="1""#, 3))
- );
- assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::from_content(r#"tag a="1""#, 3))
+ );
+ assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
- assert_eq!(
- de.next().unwrap(),
- Start(BytesStart::from_content(r#"tag a="2""#, 3))
- );
- assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content")));
- assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
+ assert_eq!(
+ de.next().unwrap(),
+ Start(BytesStart::from_content(r#"tag a="2""#, 3))
+ );
+ assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content")));
+ assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
+
+ assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
+ assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
+
+ assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
+ assert_eq!(de.next().unwrap(), Eof);
+ }
+
+ #[test]
+ fn invalid_xml() {
+ let mut de = Deserializer::from_str("");
- assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
- assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
+ assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
+ assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));
- assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
- assert_eq!(de.next().unwrap(), Eof);
+ match de.read_to_end(QName(b"tag")) {
+ Err(DeError::UnexpectedEof) => (),
+ x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x),
+ }
+ assert_eq!(de.next().unwrap(), Eof);
+ }
}
#[test]
fn borrowing_reader_parity() {
- let s = r##"
+ let s = r#"
- Some text
- "##;
+ "#;
let mut reader1 = IoReader {
reader: Reader::from_reader(s.as_bytes()),
@@ -3125,12 +3183,12 @@ mod tests {
#[test]
fn borrowing_reader_events() {
- let s = r##"
+ let s = r#"
- Some text
- "##;
+ "#;
let mut reader = SliceReader {
reader: Reader::from_str(s),
@@ -3173,27 +3231,6 @@ mod tests {
)
}
- #[test]
- fn borrowing_read_to_end() {
- let s = " ";
- let mut reader = SliceReader {
- reader: Reader::from_str(s),
- };
-
- reader
- .reader
- .trim_text(true)
- .expand_empty_elements(true)
- .check_end_names(true);
-
- assert_eq!(
- reader.next().unwrap(),
- DeEvent::Start(BytesStart::from_content("item ", 4))
- );
- reader.read_to_end(QName(b"item")).unwrap();
- assert_eq!(reader.next().unwrap(), DeEvent::Eof);
- }
-
/// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
/// because parser reports error early
#[test]