From e75a54bbf9e30db976ade8db99d0e1b726ad1531 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 25 Dec 2022 20:20:53 +0500 Subject: [PATCH 1/4] Fix infinity loop in skip when parsing malformed XML Co-authored-by: Daniel Alley --- Changelog.md | 5 +++++ src/de/mod.rs | 24 ++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/Changelog.md b/Changelog.md index 3fc54c67..f9b5ce2d 100644 --- a/Changelog.md +++ b/Changelog.md @@ -14,8 +14,13 @@ ### Bug Fixes +- [#530]: Fix an infinite loop leading to unbounded memory consumption that occurs when + skipping events on malformed XML with the `overlapped-lists` feature active. + ### Misc Changes +[#530]: https://github.com/tafia/quick-xml/pull/530 + ## 0.27.0 -- 2022-12-25 ### New Features diff --git a/src/de/mod.rs b/src/de/mod.rs index e57b3261..a2d3bbe9 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2073,6 +2073,7 @@ where /// should be replayed after calling [`Self::start_replay()`]. #[cfg(feature = "overlapped-lists")] #[inline] + #[must_use = "returned checkpoint should be used in `start_replay`"] fn skip_checkpoint(&self) -> usize { self.write.len() } @@ -2100,16 +2101,21 @@ where DeEvent::End(ref e) if e.name().as_ref() == end => { self.skip_event(event)?; if depth == 0 { - return Ok(()); + break; } depth -= 1; } + DeEvent::Eof => { + self.skip_event(event)?; + break; + } _ => self.skip_event(event)?, } } } - _ => Ok(()), + _ => (), } + Ok(()) } #[cfg(feature = "overlapped-lists")] @@ -3057,6 +3063,20 @@ mod tests { e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e), } } + + /// Without handling Eof in `skip` this test failed with memory allocation + #[test] + fn invalid_xml() { + use crate::de::DeEvent::*; + + let mut de = Deserializer::from_str(""); + + // Cache all events + let checkpoint = de.skip_checkpoint(); + de.skip().unwrap(); + de.start_replay(checkpoint); + assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]); + } } #[test] From bfafbbccdd9fc3d9ed6d17ff0104902827b7e6d5 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 25 Dec 2022 21:57:50 +0500 Subject: [PATCH 2/4] Add test for reading invalid XML to the end (Review this commit with "ignore whitespace changes" option) failures (1): de::tests::read_to_end::invalid_xml --- src/de/mod.rs | 72 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 27 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index a2d3bbe9..26bbab16 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -3079,40 +3079,58 @@ mod tests { } } - #[test] - fn read_to_end() { + mod read_to_end { + use super::*; use crate::de::DeEvent::*; + use pretty_assertions::assert_eq; - let mut de = Deserializer::from_str( - r#" - - textcontent - - - - "#, - ); + #[test] + fn complex() { + let mut de = Deserializer::from_str( + r#" + + textcontent + + + + "#, + ); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("root"))); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::from_content(r#"tag a="1""#, 3)) - ); - assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); + assert_eq!( + de.next().unwrap(), + Start(BytesStart::from_content(r#"tag a="1""#, 3)) + ); + assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ()); - assert_eq!( - de.next().unwrap(), - Start(BytesStart::from_content(r#"tag a="2""#, 3)) - ); - assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); - assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); + assert_eq!( + de.next().unwrap(), + Start(BytesStart::from_content(r#"tag a="2""#, 3)) + ); + assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content"))); + assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag"))); - assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); - assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); + assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed"))); + assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ()); + + assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); + assert_eq!(de.next().unwrap(), Eof); + } - assert_eq!(de.next().unwrap(), End(BytesEnd::new("root"))); - assert_eq!(de.next().unwrap(), Eof); + #[test] + fn invalid_xml() { + let mut de = Deserializer::from_str(""); + + assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag"))); + assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag"))); + + match de.read_to_end(QName(b"tag")) { + Err(DeError::UnexpectedEof) => (), + x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x), + } + assert_eq!(de.next().unwrap(), Eof); + } } #[test] From 178e154d242e47549162bebd50f5bba12807245c Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 16 Dec 2022 01:31:58 +0500 Subject: [PATCH 3/4] Fix an error in the `Deserializer::read_to_end` when feature "overlapped-lists" is enabled --- Changelog.md | 2 ++ src/de/mod.rs | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Changelog.md b/Changelog.md index f9b5ce2d..a906c05a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,6 +16,8 @@ - [#530]: Fix an infinite loop leading to unbounded memory consumption that occurs when skipping events on malformed XML with the `overlapped-lists` feature active. +- [#530]: Fix an error in the `Deserializer::read_to_end` when `overlapped-lists` + feature is active and malformed XML is parsed ### Misc Changes diff --git a/src/de/mod.rs b/src/de/mod.rs index 26bbab16..32f438d3 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -2237,7 +2237,7 @@ where } Some(DeEvent::End(e)) if e.name() == name => { if depth == 0 { - return Ok(()); + break; } depth -= 1; } @@ -2247,9 +2247,29 @@ where // If we do not have skipped events, use effective reading that will // not allocate memory for events - None => return self.reader.read_to_end(name), + None => { + // We should close all opened tags, because we could buffer + // Start events, but not the corresponding End events. So we + // keep reading events until we exit all nested tags. + // `read_to_end()` will return an error if an Eof was encountered + // preliminary (in case of malformed XML). + // + // + // ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2 + // ^^^^^^ - read by the first call of `self.reader.read_to_end()` + // ^^^^^^ - read by the second call of `self.reader.read_to_end()` + loop { + self.reader.read_to_end(name)?; + if depth == 0 { + break; + } + depth -= 1; + } + break; + } } } + Ok(()) } #[cfg(not(feature = "overlapped-lists"))] fn read_to_end(&mut self, name: QName) -> Result<(), DeError> { From ca7aa21ba111b28ca54ddc161c8302bba035904f Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 25 Dec 2022 22:22:42 +0500 Subject: [PATCH 4/4] Remove excess test. That test is duplicated by read_to_end::complex --- src/de/mod.rs | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/src/de/mod.rs b/src/de/mod.rs index 32f438d3..abd39257 100644 --- a/src/de/mod.rs +++ b/src/de/mod.rs @@ -3155,11 +3155,11 @@ mod tests { #[test] fn borrowing_reader_parity() { - let s = r##" + let s = r#" Some text - "##; + "#; let mut reader1 = IoReader { reader: Reader::from_reader(s.as_bytes()), @@ -3183,12 +3183,12 @@ mod tests { #[test] fn borrowing_reader_events() { - let s = r##" + let s = r#" Some text - "##; + "#; let mut reader = SliceReader { reader: Reader::from_str(s), @@ -3231,27 +3231,6 @@ mod tests { ) } - #[test] - fn borrowing_read_to_end() { - let s = " "; - let mut reader = SliceReader { - reader: Reader::from_str(s), - }; - - reader - .reader - .trim_text(true) - .expand_empty_elements(true) - .check_end_names(true); - - assert_eq!( - reader.next().unwrap(), - DeEvent::Start(BytesStart::from_content("item ", 4)) - ); - reader.read_to_end(QName(b"item")).unwrap(); - assert_eq!(reader.next().unwrap(), DeEvent::Eof); - } - /// Ensures, that [`Deserializer::read_string()`] never can get an `End` event, /// because parser reports error early #[test]