Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bugs when parsing sequences in overlapped-lists mode #530

Merged
merged 4 commits into from Dec 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions Changelog.md
Expand Up @@ -14,8 +14,15 @@

### Bug Fixes

- [#530]: Fix an infinite loop leading to unbounded memory consumption that occurs when
skipping events on malformed XML with the `overlapped-lists` feature active.
- [#530]: Fix an error in the `Deserializer::read_to_end` when `overlapped-lists`
feature is active and malformed XML is parsed

### Misc Changes

[#530]: https://github.com/tafia/quick-xml/pull/530

## 0.27.0 -- 2022-12-25

### New Features
Expand Down
149 changes: 93 additions & 56 deletions src/de/mod.rs
Expand Up @@ -2073,6 +2073,7 @@ where
/// should be replayed after calling [`Self::start_replay()`].
#[cfg(feature = "overlapped-lists")]
#[inline]
#[must_use = "returned checkpoint should be used in `start_replay`"]
fn skip_checkpoint(&self) -> usize {
self.write.len()
}
Expand Down Expand Up @@ -2100,16 +2101,21 @@ where
DeEvent::End(ref e) if e.name().as_ref() == end => {
self.skip_event(event)?;
if depth == 0 {
return Ok(());
break;
}
depth -= 1;
}
DeEvent::Eof => {
self.skip_event(event)?;
break;
}
_ => self.skip_event(event)?,
}
}
}
_ => Ok(()),
_ => (),
}
Ok(())
}

#[cfg(feature = "overlapped-lists")]
Expand Down Expand Up @@ -2231,7 +2237,7 @@ where
}
Some(DeEvent::End(e)) if e.name() == name => {
if depth == 0 {
return Ok(());
break;
}
depth -= 1;
}
Expand All @@ -2241,9 +2247,29 @@ where

// If we do not have skipped events, use effective reading that will
// not allocate memory for events
None => return self.reader.read_to_end(name),
None => {
// We should close all opened tags, because we could buffer
// Start events, but not the corresponding End events. So we
// keep reading events until we exit all nested tags.
// `read_to_end()` will return an error if an Eof was encountered
// preliminary (in case of malformed XML).
//
// <tag><tag></tag></tag>
// ^^^^^^^^^^ - buffered in `self.read`, when `self.read_to_end()` is called, depth = 2
// ^^^^^^ - read by the first call of `self.reader.read_to_end()`
// ^^^^^^ - read by the second call of `self.reader.read_to_end()`
loop {
self.reader.read_to_end(name)?;
dralley marked this conversation as resolved.
Show resolved Hide resolved
if depth == 0 {
break;
}
depth -= 1;
}
break;
}
}
}
Ok(())
}
#[cfg(not(feature = "overlapped-lists"))]
fn read_to_end(&mut self, name: QName) -> Result<(), DeError> {
Expand Down Expand Up @@ -3057,51 +3083,83 @@ mod tests {
e => panic!("Expected `Err(TooManyEvents(3))`, but found {:?}", e),
}
}

/// Without handling Eof in `skip` this test failed with memory allocation
#[test]
fn invalid_xml() {
use crate::de::DeEvent::*;

let mut de = Deserializer::from_str("<root>");

// Cache all events
let checkpoint = de.skip_checkpoint();
de.skip().unwrap();
de.start_replay(checkpoint);
assert_eq!(de.read, vec![Start(BytesStart::new("root")), Eof]);
}
}

#[test]
fn read_to_end() {
mod read_to_end {
use super::*;
use crate::de::DeEvent::*;
use pretty_assertions::assert_eq;

let mut de = Deserializer::from_str(
r#"
<root>
<tag a="1"><tag>text</tag>content</tag>
<tag a="2"><![CDATA[cdata content]]></tag>
<self-closed/>
</root>
"#,
);
#[test]
fn complex() {
let mut de = Deserializer::from_str(
r#"
<root>
<tag a="1"><tag>text</tag>content</tag>
<tag a="2"><![CDATA[cdata content]]></tag>
<self-closed/>
</root>
"#,
);

assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));
assert_eq!(de.next().unwrap(), Start(BytesStart::new("root")));

assert_eq!(
de.next().unwrap(),
Start(BytesStart::from_content(r#"tag a="1""#, 3))
);
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());
assert_eq!(
de.next().unwrap(),
Start(BytesStart::from_content(r#"tag a="1""#, 3))
);
assert_eq!(de.read_to_end(QName(b"tag")).unwrap(), ());

assert_eq!(
de.next().unwrap(),
Start(BytesStart::from_content(r#"tag a="2""#, 3))
);
assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content")));
assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));
assert_eq!(
de.next().unwrap(),
Start(BytesStart::from_content(r#"tag a="2""#, 3))
);
assert_eq!(de.next().unwrap(), CData(BytesCData::new("cdata content")));
assert_eq!(de.next().unwrap(), End(BytesEnd::new("tag")));

assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());

assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
assert_eq!(de.next().unwrap(), Eof);
}

#[test]
fn invalid_xml() {
let mut de = Deserializer::from_str("<tag><tag></tag>");

assert_eq!(de.next().unwrap(), Start(BytesStart::new("self-closed")));
assert_eq!(de.read_to_end(QName(b"self-closed")).unwrap(), ());
assert_eq!(de.next().unwrap(), Start(BytesStart::new("tag")));
assert_eq!(de.peek().unwrap(), &Start(BytesStart::new("tag")));

assert_eq!(de.next().unwrap(), End(BytesEnd::new("root")));
assert_eq!(de.next().unwrap(), Eof);
match de.read_to_end(QName(b"tag")) {
Err(DeError::UnexpectedEof) => (),
x => panic!("Expected `Err(UnexpectedEof)`, but found {:?}", x),
}
assert_eq!(de.next().unwrap(), Eof);
}
}

#[test]
fn borrowing_reader_parity() {
let s = r##"
let s = r#"
<item name="hello" source="world.rs">Some text</item>
<item2/>
<item3 value="world" />
"##;
"#;

let mut reader1 = IoReader {
reader: Reader::from_reader(s.as_bytes()),
Expand All @@ -3125,12 +3183,12 @@ mod tests {

#[test]
fn borrowing_reader_events() {
let s = r##"
let s = r#"
<item name="hello" source="world.rs">Some text</item>
<item2></item2>
<item3/>
<item4 value="world" />
"##;
"#;

let mut reader = SliceReader {
reader: Reader::from_str(s),
Expand Down Expand Up @@ -3173,27 +3231,6 @@ mod tests {
)
}

#[test]
fn borrowing_read_to_end() {
let s = " <item /> ";
let mut reader = SliceReader {
reader: Reader::from_str(s),
};

reader
.reader
.trim_text(true)
.expand_empty_elements(true)
.check_end_names(true);

assert_eq!(
reader.next().unwrap(),
DeEvent::Start(BytesStart::from_content("item ", 4))
);
reader.read_to_end(QName(b"item")).unwrap();
assert_eq!(reader.next().unwrap(), DeEvent::Eof);
}

/// Ensures, that [`Deserializer::read_string()`] never can get an `End` event,
/// because parser reports error early
#[test]
Expand Down