diff --git a/Changelog.md b/Changelog.md index 98d1c5c..43ff49a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -27,6 +27,8 @@ ([#367](https://github.com/tafia/quick-xml/pull/367)) - refactor: unify code for buffered and borrowed readers ([#367](https://github.com/tafia/quick-xml/pull/367)) +- fix: fix internal panic message when parse malformed XML + ([#344](https://github.com/tafia/quick-xml/issues/344), [#367](https://github.com/tafia/quick-xml/pull/367)) ## 0.23.0-alpha3 diff --git a/src/reader.rs b/src/reader.rs index e5b26c0..ba595cd 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -274,7 +274,7 @@ impl Reader { // ` match self.reader.read_bang_element(buf, &mut self.buf_position) { Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.read_bang(bytes), + Ok(Some((bang_type, bytes))) => self.read_bang(bang_type, bytes), Err(e) => Err(e), }, // ` Reader { /// reads `BytesElement` starting with a `!`, /// return `Comment`, `CData` or `DocType` event - fn read_bang<'a, 'b>(&'a mut self, buf: &'b [u8]) -> Result> { + fn read_bang<'a, 'b>(&'a mut self, bang_type: BangType, buf: &'b [u8]) -> Result> { let uncased_starts_with = |string: &[u8], prefix: &[u8]| { string.len() >= prefix.len() && string[..prefix.len()].eq_ignore_ascii_case(prefix) }; let len = buf.len(); - if buf.starts_with(b"!--") { - // FIXME: actually, isn't, it misses - debug_assert!(len >= 5, "Minimum length guaranteed by read_bang_element"); - if self.check_comments { - // search if '--' not in comments - if let Some(p) = - memchr::memchr_iter(b'-', &buf[3..len - 2]).position(|p| buf[3 + p + 1] == b'-') - { - self.buf_position += len - p; - return Err(Error::UnexpectedToken("--".to_string())); + match bang_type { + BangType::Comment if buf.starts_with(b"!--") => { + if self.check_comments { + // search if '--' not in comments + if let Some(p) = memchr::memchr_iter(b'-', &buf[3..len - 2]) + .position(|p| buf[3 + p + 1] == b'-') + { + self.buf_position += len - p; + return Err(Error::UnexpectedToken("--".to_string())); + } } + Ok(Event::Comment(BytesText::from_escaped(&buf[3..len - 2]))) } - Ok(Event::Comment(BytesText::from_escaped(&buf[3..len - 2]))) - } else if uncased_starts_with(buf, b"![CDATA[") { - Ok(Event::CData(BytesText::from_plain(&buf[8..]))) - } else if uncased_starts_with(buf, b"!DOCTYPE") { - let start = buf[8..] - .iter() - .position(|b| !is_whitespace(*b)) - .unwrap_or_else(|| len - 8); - debug_assert!(start < len - 8, "DocType must have a name"); - Ok(Event::DocType(BytesText::from_escaped(&buf[8 + start..]))) - } else { - unreachable!("Proper bang start guaranteed by read_bang_element"); + BangType::CData if uncased_starts_with(buf, b"![CDATA[") => { + Ok(Event::CData(BytesText::from_plain(&buf[8..]))) + } + BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => { + let start = buf[8..] + .iter() + .position(|b| !is_whitespace(*b)) + .unwrap_or_else(|| len - 8); + debug_assert!(start < len - 8, "DocType must have a name"); + Ok(Event::DocType(BytesText::from_escaped(&buf[8 + start..]))) + } + _ => Err(bang_type.to_err()), } } @@ -974,7 +975,11 @@ where /// - `position`: Will be increased by amount of bytes consumed /// /// [events]: crate::events::Event - fn read_bang_element(&mut self, buf: B, position: &mut usize) -> Result>; + fn read_bang_element( + &mut self, + buf: B, + position: &mut usize, + ) -> Result>; fn read_element(&mut self, buf: B, position: &mut usize) -> Result>; @@ -1040,7 +1045,7 @@ impl<'b, 'i, R: BufRead + 'i> BufferedInput<'b, 'i, &'b mut Vec> for R { &mut self, buf: &'b mut Vec, position: &mut usize, - ) -> Result> { + ) -> Result> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. let start = buf.len(); @@ -1083,7 +1088,7 @@ impl<'b, 'i, R: BufRead + 'i> BufferedInput<'b, 'i, &'b mut Vec> for R { if read == 0 { Ok(None) } else { - Ok(Some(&buf[start..])) + Ok(Some((bang_type, &buf[start..]))) } } @@ -1257,7 +1262,11 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] { })) } - fn read_bang_element(&mut self, _buf: (), position: &mut usize) -> Result> { + fn read_bang_element( + &mut self, + _buf: (), + position: &mut usize, + ) -> Result> { // Peeked one bang ('!') before being called, so it's guaranteed to // start with it. debug_assert_eq!(self[0], b'!'); @@ -1267,7 +1276,7 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] { if let Some((bytes, i)) = bang_type.parse(self, 0) { *position += i; *self = &self[i..]; - return Ok(Some(bytes)); + return Ok(Some((bang_type, bytes))); } // Note: Do not update position, so the error points to @@ -1351,6 +1360,7 @@ impl<'a> BufferedInput<'a, 'a, ()> for &'a [u8] { } /// Possible elements started with ` CData, @@ -1721,7 +1731,7 @@ mod test { /// Checks that reading CDATA content works correctly mod cdata { use crate::errors::Error; - use crate::reader::BufferedInput; + use crate::reader::{BangType, BufferedInput}; /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error @@ -1774,7 +1784,7 @@ mod test { assert_eq!( input.read_bang_element(buf, &mut position).unwrap(), - Some(b"![CDATA[".as_ref()) + Some((BangType::CData, b"![CDATA[".as_ref())) ); assert_eq!(position, 11); } @@ -1791,7 +1801,7 @@ mod test { assert_eq!( input.read_bang_element(buf, &mut position).unwrap(), - Some(b"![CDATA[cdata]] ]>content".as_ref()) + Some((BangType::CData, b"![CDATA[cdata]] ]>content".as_ref())) ); assert_eq!(position, 28); } @@ -1815,7 +1825,7 @@ mod test { /// [specification]: https://www.w3.org/TR/xml11/#dt-comment mod comment { use crate::errors::Error; - use crate::reader::BufferedInput; + use crate::reader::{BangType, BufferedInput}; #[test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] @@ -1917,7 +1927,7 @@ mod test { assert_eq!( input.read_bang_element(buf, &mut position).unwrap(), - Some(b"!----".as_ref()) + Some((BangType::Comment, b"!----".as_ref())) ); assert_eq!(position, 6); } @@ -1931,7 +1941,7 @@ mod test { assert_eq!( input.read_bang_element(buf, &mut position).unwrap(), - Some(b"!--->comment<---".as_ref()) + Some((BangType::Comment, b"!--->comment<---".as_ref())) ); assert_eq!(position, 17); } @@ -1941,7 +1951,7 @@ mod test { mod doctype { mod uppercase { use crate::errors::Error; - use crate::reader::BufferedInput; + use crate::reader::{BangType, BufferedInput}; #[test] fn not_properly_start() { @@ -1988,7 +1998,7 @@ mod test { assert_eq!( input.read_bang_element(buf, &mut position).unwrap(), - Some(b"!DOCTYPE".as_ref()) + Some((BangType::DocType, b"!DOCTYPE".as_ref())) ); assert_eq!(position, 9); } @@ -2014,7 +2024,7 @@ mod test { mod lowercase { use crate::errors::Error; - use crate::reader::BufferedInput; + use crate::reader::{BangType, BufferedInput}; #[test] fn not_properly_start() { @@ -2061,7 +2071,7 @@ mod test { assert_eq!( input.read_bang_element(buf, &mut position).unwrap(), - Some(b"!doctype".as_ref()) + Some((BangType::DocType, b"!doctype".as_ref())) ); assert_eq!(position, 9); }