Skip to content
This repository has been archived by the owner on May 30, 2022. It is now read-only.

Commit

Permalink
Fix internal panic when parsing not fully forming comment, CDATA and …
Browse files Browse the repository at this point in the history
…DOCTYPE. Fix #344
  • Loading branch information
Mingun committed Mar 8, 2022
1 parent 9582fe5 commit 05814ea
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 39 deletions.
2 changes: 2 additions & 0 deletions Changelog.md
Expand Up @@ -27,6 +27,8 @@
([#367](https://github.com/tafia/quick-xml/pull/367))
- refactor: unify code for buffered and borrowed readers
([#367](https://github.com/tafia/quick-xml/pull/367))
- fix: fix internal panic message when parse malformed XML
([#344](https://github.com/tafia/quick-xml/issues/344), [#367](https://github.com/tafia/quick-xml/pull/367))

## 0.23.0-alpha3

Expand Down
88 changes: 49 additions & 39 deletions src/reader.rs
Expand Up @@ -274,7 +274,7 @@ impl<R: BufRead> Reader<R> {
// `<!` - comment, CDATA or DOCTYPE declaration
b'!' => match self.reader.read_bang_element(buf, &mut self.buf_position) {
Ok(None) => Ok(Event::Eof),
Ok(Some(bytes)) => self.read_bang(bytes),
Ok(Some((bang_type, bytes))) => self.read_bang(bang_type, bytes),
Err(e) => Err(e),
},
// `</` - closing tag
Expand Down Expand Up @@ -347,36 +347,37 @@ impl<R: BufRead> Reader<R> {

/// reads `BytesElement` starting with a `!`,
/// return `Comment`, `CData` or `DocType` event
fn read_bang<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result<Event<'b>> {
fn read_bang<'a, 'b>(&'a mut self, bang_type: BangType, buf: &'b [u8]) -> Result<Event<'b>> {
let uncased_starts_with = |string: &[u8], prefix: &[u8]| {
string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix)
};

let len = buf.len();
if buf.starts_with(b"!--") {
// FIXME: actually, isn't, it misses <!-->
debug_assert!(len >= 5, "Minimum length guaranteed by read_bang_element");
if self.check_comments {
// search if '--' not in comments
if let Some(p) =
memchr::memchr_iter(b'-', &buf[3..len - 2]).position(|p| buf[3 + p + 1] == b'-')
{
self.buf_position += len - p;
return Err(Error::UnexpectedToken("--".to_string()));
match bang_type {
BangType::Comment if buf.starts_with(b"!--") => {
if self.check_comments {
// search if '--' not in comments
if let Some(p) = memchr::memchr_iter(b'-', &buf[3..len - 2])
.position(|p| buf[3 + p + 1] == b'-')
{
self.buf_position += len - p;
return Err(Error::UnexpectedToken("--".to_string()));
}
}
Ok(Event::Comment(BytesText::from_escaped(&buf[3..len - 2])))
}
Ok(Event::Comment(BytesText::from_escaped(&buf[3..len - 2])))
} else if uncased_starts_with(buf, b"![CDATA[") {
Ok(Event::CData(BytesText::from_plain(&buf[8..])))
} else if uncased_starts_with(buf, b"!DOCTYPE") {
let start = buf[8..]
.iter()
.position(|b| !is_whitespace(*b))
.unwrap_or_else(|| len - 8);
debug_assert!(start < len - 8, "DocType must have a name");
Ok(Event::DocType(BytesText::from_escaped(&buf[8 + start..])))
} else {
unreachable!("Proper bang start guaranteed by read_bang_element");
BangType::CData if uncased_starts_with(buf, b"![CDATA[") => {
Ok(Event::CData(BytesText::from_plain(&buf[8..])))
}
BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => {
let start = buf[8..]
.iter()
.position(|b| !is_whitespace(*b))
.unwrap_or_else(|| len - 8);
debug_assert!(start < len - 8, "DocType must have a name");
Ok(Event::DocType(BytesText::from_escaped(&buf[8 + start..])))
}
_ => Err(bang_type.to_err()),
}
}

Expand Down Expand Up @@ -974,7 +975,11 @@ where
/// - `position`: Will be increased by amount of bytes consumed
///
/// [events]: crate::events::Event
fn read_bang_element(&mut self, buf: B, position: &mut usize) -> Result<Option<&'r [u8]>>;
fn read_bang_element(
&mut self,
buf: B,
position: &mut usize,
) -> Result<Option<(BangType, &'r [u8])>>;

fn read_element(&mut self, buf: B, position: &mut usize) -> Result<Option<&'r [u8]>>;

Expand Down Expand Up @@ -1040,7 +1045,7 @@ impl<'b, 'i, R: BufRead + 'i> BufferedInput<'b, 'i, &'b mut Vec<u8>> for R {
&mut self,
buf: &'b mut Vec<u8>,
position: &mut usize,
) -> Result<Option<&'b [u8]>> {
) -> Result<Option<(BangType, &'b [u8])>> {
// Peeked one bang ('!') before being called, so it's guaranteed to
// start with it.
let start = buf.len();
Expand Down Expand Up @@ -1083,7 +1088,7 @@ impl<'b, 'i, R: BufRead + 'i> BufferedInput<'b, 'i, &'b mut Vec<u8>> for R {
if read == 0 {
Ok(None)
} else {
Ok(Some(&buf[start..]))
Ok(Some((bang_type, &buf[start..])))
}
}

Expand Down Expand Up @@ -1257,7 +1262,11 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] {
}))
}

fn read_bang_element(&mut self, _buf: (), position: &mut usize) -> Result<Option<&'a [u8]>> {
fn read_bang_element(
&mut self,
_buf: (),
position: &mut usize,
) -> Result<Option<(BangType, &'a [u8])>> {
// Peeked one bang ('!') before being called, so it's guaranteed to
// start with it.
debug_assert_eq!(self[0], b'!');
Expand All @@ -1267,7 +1276,7 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] {
if let Some((bytes, i)) = bang_type.parse(self, 0) {
*position += i;
*self = &self[i..];
return Ok(Some(bytes));
return Ok(Some((bang_type, bytes)));
}

// Note: Do not update position, so the error points to
Expand Down Expand Up @@ -1351,6 +1360,7 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] {
}

/// Possible elements started with `<!`
#[derive(Debug, PartialEq)]
enum BangType {
/// <![CDATA[...]]>
CData,
Expand Down Expand Up @@ -1721,7 +1731,7 @@ mod test {
/// Checks that reading CDATA content works correctly
mod cdata {
use crate::errors::Error;
use crate::reader::BufferedInput;
use crate::reader::{BangType, BufferedInput};

/// Checks that if input begins like CDATA element, but CDATA start sequence
/// is not finished, parsing ends with an error
Expand Down Expand Up @@ -1774,7 +1784,7 @@ mod test {

assert_eq!(
input.read_bang_element(buf, &mut position).unwrap(),
Some(b"![CDATA[".as_ref())
Some((BangType::CData, b"![CDATA[".as_ref()))
);
assert_eq!(position, 11);
}
Expand All @@ -1791,7 +1801,7 @@ mod test {

assert_eq!(
input.read_bang_element(buf, &mut position).unwrap(),
Some(b"![CDATA[cdata]] ]>content".as_ref())
Some((BangType::CData, b"![CDATA[cdata]] ]>content".as_ref()))
);
assert_eq!(position, 28);
}
Expand All @@ -1815,7 +1825,7 @@ mod test {
/// [specification]: https://www.w3.org/TR/xml11/#dt-comment
mod comment {
use crate::errors::Error;
use crate::reader::BufferedInput;
use crate::reader::{BangType, BufferedInput};

#[test]
#[ignore = "start comment sequence fully checked outside of `read_bang_element`"]
Expand Down Expand Up @@ -1917,7 +1927,7 @@ mod test {

assert_eq!(
input.read_bang_element(buf, &mut position).unwrap(),
Some(b"!----".as_ref())
Some((BangType::Comment, b"!----".as_ref()))
);
assert_eq!(position, 6);
}
Expand All @@ -1931,7 +1941,7 @@ mod test {

assert_eq!(
input.read_bang_element(buf, &mut position).unwrap(),
Some(b"!--->comment<---".as_ref())
Some((BangType::Comment, b"!--->comment<---".as_ref()))
);
assert_eq!(position, 17);
}
Expand All @@ -1941,7 +1951,7 @@ mod test {
mod doctype {
mod uppercase {
use crate::errors::Error;
use crate::reader::BufferedInput;
use crate::reader::{BangType, BufferedInput};

#[test]
fn not_properly_start() {
Expand Down Expand Up @@ -1988,7 +1998,7 @@ mod test {

assert_eq!(
input.read_bang_element(buf, &mut position).unwrap(),
Some(b"!DOCTYPE".as_ref())
Some((BangType::DocType, b"!DOCTYPE".as_ref()))
);
assert_eq!(position, 9);
}
Expand All @@ -2014,7 +2024,7 @@ mod test {

mod lowercase {
use crate::errors::Error;
use crate::reader::BufferedInput;
use crate::reader::{BangType, BufferedInput};

#[test]
fn not_properly_start() {
Expand Down Expand Up @@ -2061,7 +2071,7 @@ mod test {

assert_eq!(
input.read_bang_element(buf, &mut position).unwrap(),
Some(b"!doctype".as_ref())
Some((BangType::DocType, b"!doctype".as_ref()))
);
assert_eq!(position, 9);
}
Expand Down

0 comments on commit 05814ea

Please sign in to comment.