diff --git a/Changelog.md b/Changelog.md index 3fc54c67..a906c05a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,8 +14,15 @@ ### Bug Fixes +- [#530]: Fix an infinite loop leading to unbounded memory consumption that occurs when + skipping events on malformed XML with the `overlapped-lists` feature active. +- [#530]: Fix an error in the `Deserializer::read_to_end` when `overlapped-lists` + feature is active and malformed XML is parsed + ### Misc Changes +[#530]: https://github.com/tafia/quick-xml/pull/530 + ## 0.27.0 -- 2022-12-25 ### New Features diff --git a/src/de/mod.rs b/src/de/mod.rs index e57b3261..abd39257 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2073,6 +2073,7 @@ where /// should be replayed after calling [`Self::start_replay()`]. #[cfg(feature = "overlapped-lists")] #[inline] + #[must_use = "returned checkpoint should be used in `start_replay`"] fn skip_checkpoint(&self) -> usize { self.write.len() } @@ -2100,16 +2101,21 @@ where DeEvent::End(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; if depth == 0 { - return Ok(()); + break; } depth -= 1; } + DeEvent::Eof => { + self.skip_event(event)?; + break; + } _ => self.skip_event(event)?, } } } - _ => Ok(()), + _ => (), } + Ok(()) } #[cfg(feature = "overlapped-lists")] @@ -2231,7 +2237,7 @@ where } Some(DeEvent::End(e)) if e.name() == name => { if depth == 0 { - return Ok(()); + break; } depth -= 1; } @@ -2241,9 +2247,29 @@ where // If we do not have skipped events, use effective reading that will // not allocate memory for events - None => return self.reader.read_to_end(name), + None => { + // We should close all opened tags, because we could buffer + // Start events, but not the corresponding End events. So we + // keep reading events until we exit all nested tags. + // `read_to_end()` will return an error if an Eof was encountered + // preliminary (in case of malformed XML). + // + // + // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2 + // ^^^^^^ - read by the first call of `self.reader.read_to_end()` + // ^^^^^^ - read by the second call of `self.reader.read_to_end()` + loop { + self.reader.read_to_end(name)?; + if depth == 0 { + break; + } + depth -= 1; + } + break; + } } } + Ok(()) } #[cfg(not(feature = "overlapped-lists"))] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { @@ -3057,51 +3083,83 @@ mod tests { e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e), } } + + /// Without handling Eof in `skip` this test failed with memory allocation + #[test] + fn invalid_xml() { + use crate::de::DeEvent::*; + + let mut de = Deserializer::from_str(""); + + // Cache all events + let checkpoint = de.skip_checkpoint(); + de.skip().unwrap(); + de.start_replay(checkpoint); + assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); + } } - #[test] - fn read_to_end() { + mod read_to_end { + use super::*; use crate::de::DeEvent::*; + use pretty_assertions::assert_eq; - let mut de = Deserializer::from_str( - r#" - - textcontent - - - - "#, - ); + #[test] + fn complex() { + let mut de = Deserializer::from_str( + r#" + + textcontent + + + + "#, + ); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::from_content(r#"tag a="1""#, 3)) - ); - assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); + assert_eq!( + de.next().unwrap(), + Start(BytesStart::from_content(r#"tag a="1""#, 3)) + ); + assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::from_content(r#"tag a="2""#, 3)) - ); - assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); + assert_eq!( + de.next().unwrap(), + Start(BytesStart::from_content(r#"tag a="2""#, 3)) + ); + assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); + + assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); + assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); + + assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); + assert_eq!(de.next().unwrap(), Eof); + } + + #[test] + fn invalid_xml() { + let mut de = Deserializer::from_str(""); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); - assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); + assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); - assert_eq!(de.next().unwrap(), Eof); + match de.read_to_end(QName(b"tag")) { + Err(DeError::UnexpectedEof) => (), + x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x), + } + assert_eq!(de.next().unwrap(), Eof); + } } #[test] fn borrowing_reader_parity() { - let s = r##" + let s = r#" Some text - "##; + "#; let mut reader1 = IoReader { reader: Reader::from_reader(s.as_bytes()), @@ -3125,12 +3183,12 @@ mod tests { #[test] fn borrowing_reader_events() { - let s = r##" + let s = r#" Some text - "##; + "#; let mut reader = SliceReader { reader: Reader::from_str(s), @@ -3173,27 +3231,6 @@ mod tests { ) } - #[test] - fn borrowing_read_to_end() { - let s = " "; - let mut reader = SliceReader { - reader: Reader::from_str(s), - }; - - reader - .reader - .trim_text(true) - .expand_empty_elements(true) - .check_end_names(true); - - assert_eq!( - reader.next().unwrap(), - DeEvent::Start(BytesStart::from_content("item ", 4)) - ); - reader.read_to_end(QName(b"item")).unwrap(); - assert_eq!(reader.next().unwrap(), DeEvent::Eof); - } - /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event, /// because parser reports error early #[test]