Skip to content

Commit

Permalink
Merge pull request tafia#684 from Mingun/errors
Browse files Browse the repository at this point in the history
Add new tests for syntax and ill-formed parser errors and fix... emm... errors
  • Loading branch information
Mingun committed Nov 22, 2023
2 parents 64c4249 + 2c55638 commit 9fb181a
Show file tree
Hide file tree
Showing 15 changed files with 928 additions and 254 deletions.
16 changes: 14 additions & 2 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,39 @@ configuration is serializable.
- [#677]: Added methods `config()` and `config_mut()` to inspect and change the parser
configuration. Previous builder methods on `Reader` / `NsReader` was replaced by
direct access to fields of config using `reader.config_mut().<...>`.
- #[#684]: Added a method `Config::enable_all_checks` to turn on or off all
well-formedness checks.

### Bug Fixes

- [#622]: Fix wrong disregarding of not closed markup, such as lone `<`.
- [#684]: Fix incorrect position reported for `Error::IllFormed(DoubleHyphenInComment)`.
- [#684]: Fix incorrect position reported for `Error::IllFormed(MissingDoctypeName)`.

### Misc Changes

- [#675]: Minimum supported version of serde raised to 1.0.139
- [#675]: Rework the `quick_xml::Error` type to provide more accurate information:
- `Error::EndEventMismatch` replaced by `IllFormedError::MismatchedEnd` in some cases
- `Error::EndEventMismatch` replaced by `IllFormedError::UnmatchedEnd` in some cases
- `Error::EndEventMismatch` replaced by `IllFormedError::MismatchedEndTag` in some cases
- `Error::EndEventMismatch` replaced by `IllFormedError::UnmatchedEndTag` in some cases
- `Error::TextNotFound` was removed because not used
- `Error::UnexpectedBang` replaced by `SyntaxError`
- `Error::UnexpectedEof` replaced by `SyntaxError` in some cases
- `Error::UnexpectedEof` replaced by `IllFormedError` in some cases
- `Error::UnexpectedToken` replaced by `IllFormedError::DoubleHyphenInComment`
- `Error::XmlDeclWithoutVersion` replaced by `IllFormedError::MissingDeclVersion` (in [#684])
- `Error::EmptyDocType` replaced by `IllFormedError::MissingDoctypeName` (in [#684])
- [#684]: Changed positions reported for `SyntaxError`s: now they are always points
to the start of markup (i. e. to the `<` character) with error.
- [#684]: Now `<??>` parsed as `Event::PI` with empty content instead of raising
syntax error.
- [#684]: Now `<?xml?>` parsed as `Event::Decl` instead of `Event::PI`.

[#513]: https://github.com/tafia/quick-xml/issues/513
[#622]: https://github.com/tafia/quick-xml/issues/622
[#675]: https://github.com/tafia/quick-xml/pull/675
[#677]: https://github.com/tafia/quick-xml/pull/677
[#684]: https://github.com/tafia/quick-xml/pull/684


## 0.31.0 -- 2023-10-22
Expand Down
24 changes: 12 additions & 12 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2649,7 +2649,7 @@ where
/// |[`DeEvent::Start`]|`<any-tag>...</any-tag>` |Emits [`UnexpectedStart("any-tag")`](DeError::UnexpectedStart)
/// |[`DeEvent::End`] |`</tag>` |Returns an empty slice. The reader guarantee that tag will match the open one
/// |[`DeEvent::Text`] |`text content` or `<![CDATA[cdata content]]>` (probably mixed)|Returns event content unchanged, expects the `</tag>` after that
/// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissedEnd))`](DeError::InvalidXml)
/// |[`DeEvent::Eof`] | |Emits [`InvalidXml(IllFormed(MissingEndTag))`](DeError::InvalidXml)
///
/// [`Text`]: Event::Text
/// [`CData`]: Event::CData
Expand Down Expand Up @@ -3642,7 +3642,7 @@ mod tests {

match de.read_to_end(QName(b"tag")) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::MissedEnd("tag".into()))
assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand All @@ -3661,7 +3661,7 @@ mod tests {

match de.read_to_end(QName(b"tag")) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::MissedEnd("tag".into()))
assert_eq!(cause, IllFormedError::MissingEndTag("tag".into()))
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -3756,7 +3756,7 @@ mod tests {
fn read_string() {
match from_str::<String>(r#"</root>"#) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("root".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("root".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand All @@ -3770,7 +3770,7 @@ mod tests {
match from_str::<String>(r#"<root></other>"#) {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => assert_eq!(
cause,
IllFormedError::MismatchedEnd {
IllFormedError::MismatchedEndTag {
expected: "root".into(),
found: "other".into(),
}
Expand Down Expand Up @@ -4098,7 +4098,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::End(BytesEnd::new("tag")));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag2".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag2".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4241,7 +4241,7 @@ mod tests {
let mut de = make_de("</tag>");
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4320,7 +4320,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4352,7 +4352,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text("text cdata ".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4458,7 +4458,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata ".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4488,7 +4488,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata text".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down Expand Up @@ -4538,7 +4538,7 @@ mod tests {
assert_eq!(de.next().unwrap(), DeEvent::Text(" cdata cdata2 ".into()));
match de.next() {
Err(DeError::InvalidXml(Error::IllFormed(cause))) => {
assert_eq!(cause, IllFormedError::UnmatchedEnd("tag".into()));
assert_eq!(cause, IllFormedError::UnmatchedEndTag("tag".into()));
}
x => panic!(
"Expected `Err(InvalidXml(IllFormed(_)))`, but got `{:?}`",
Expand Down
72 changes: 36 additions & 36 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,36 @@ impl std::error::Error for SyntaxError {}
/// [well-formed]: https://www.w3.org/TR/xml11/#dt-wellformed
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum IllFormedError {
/// A `version` attribute was not found in an XML declaration or is not the
/// first attribute.
///
/// According to the [specification], the XML declaration (`<?xml ?>`) MUST contain
/// a `version` attribute and it MUST be the first attribute. This error indicates,
/// that the declaration does not contain attributes at all (if contains `None`)
/// or either `version` attribute is not present or not the first attribute in
/// the declaration. In the last case it contains the name of the found attribute.
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
MissingDeclVersion(Option<String>),
/// A document type definition (DTD) does not contain a name of a root element.
///
/// According to the [specification], document type definition (`<!DOCTYPE foo>`)
/// MUST contain a name which defines a document type (`foo`). If that name
/// is missed, this error is returned.
///
/// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl
MissingDoctypeName,
/// The end tag was not found during reading of a sub-tree of elements due to
/// encountering an EOF from the underlying reader. This error is returned from
/// [`Reader::read_to_end`].
///
/// [`Reader::read_to_end`]: crate::reader::Reader::read_to_end
MissedEnd(String),
MissingEndTag(String),
/// The specified end tag was encountered without corresponding open tag at the
/// same level of hierarchy
UnmatchedEnd(String),
UnmatchedEndTag(String),
/// The specified end tag does not match the start tag at that nesting level.
MismatchedEnd {
MismatchedEndTag {
/// Name of open tag, that is expected to be closed
expected: String,
/// Name of actually closed tag
Expand All @@ -103,15 +122,25 @@ pub enum IllFormedError {
impl fmt::Display for IllFormedError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::MissedEnd(tag) => write!(
Self::MissingDeclVersion(None) => {
write!(f, "an XML declaration does not contain `version` attribute")
}
Self::MissingDeclVersion(Some(attr)) => {
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
}
Self::MissingDoctypeName => write!(
f,
"`<!DOCTYPE>` declaration does not contain a name of a document type"
),
Self::MissingEndTag(tag) => write!(
f,
"start tag not closed: `</{}>` not found before end of input",
tag,
),
Self::UnmatchedEnd(tag) => {
Self::UnmatchedEndTag(tag) => {
write!(f, "close tag `</{}>` does not match any open tag", tag)
}
Self::MismatchedEnd { expected, found } => write!(
Self::MismatchedEndTag { expected, found } => write!(
f,
"expected `</{}>`, but `</{}>` was found",
expected, found,
Expand Down Expand Up @@ -143,25 +172,6 @@ pub enum Error {
///
/// [`encoding`]: index.html#encoding
NonDecodable(Option<Utf8Error>),
/// A `version` attribute was not found in an XML declaration or is not the
/// first attribute.
///
/// According to the [specification], the XML declaration (`<?xml ?>`) MUST contain
/// a `version` attribute and it MUST be the first attribute. This error indicates,
/// that the declaration does not contain attributes at all (if contains `None`)
/// or either `version` attribute is not present or not the first attribute in
/// the declaration. In the last case it contains the name of the found attribute.
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
XmlDeclWithoutVersion(Option<String>),
/// A document type definition (DTD) does not contain a name of a root element.
///
/// According to the [specification], document type definition (`<!doctype foo>`)
/// MUST contain a name which defines a document type. If that name is missed,
/// this error is returned.
///
/// [specification]: https://www.w3.org/TR/xml11/#NT-doctypedecl
EmptyDocType,
/// Attribute parsing error
InvalidAttr(AttrError),
/// Escape error
Expand Down Expand Up @@ -189,7 +199,7 @@ pub enum Error {
impl Error {
pub(crate) fn missed_end(name: QName, decoder: Decoder) -> Self {
match decoder.decode(name.as_ref()) {
Ok(name) => IllFormedError::MissedEnd(name.into()).into(),
Ok(name) => IllFormedError::MissingEndTag(name.into()).into(),
Err(err) => err.into(),
}
}
Expand Down Expand Up @@ -261,16 +271,6 @@ impl fmt::Display for Error {
Error::IllFormed(e) => write!(f, "ill-formed document: {}", e),
Error::NonDecodable(None) => write!(f, "Malformed input, decoding impossible"),
Error::NonDecodable(Some(e)) => write!(f, "Malformed UTF-8 input: {}", e),
Error::XmlDeclWithoutVersion(None) => {
write!(f, "an XML declaration does not contain `version` attribute")
}
Error::XmlDeclWithoutVersion(Some(attr)) => {
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
}
Error::EmptyDocType => write!(
f,
"`<!DOCTYPE>` declaration does not contain a name of a document type"
),
Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e),
Error::EscapeError(e) => write!(f, "{}", e),
Error::UnknownPrefix(prefix) => {
Expand Down
18 changes: 10 additions & 8 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use std::ops::Deref;
use std::str::from_utf8;

use crate::encoding::Decoder;
use crate::errors::{Error, Result};
use crate::errors::{Error, IllFormedError, Result};
use crate::escape::{escape, partial_escape, unescape_with};
use crate::name::{LocalName, QName};
use crate::reader::is_whitespace;
Expand Down Expand Up @@ -391,12 +391,12 @@ impl<'a> BytesDecl<'a> {
/// In case of multiple attributes value of the first one is returned.
///
/// If version is missed in the declaration, or the first thing is not a version,
/// [`Error::XmlDeclWithoutVersion`] will be returned.
/// [`IllFormedError::MissingDeclVersion`] will be returned.
///
/// # Examples
///
/// ```
/// use quick_xml::Error;
/// use quick_xml::errors::{Error, IllFormedError};
/// use quick_xml::events::{BytesDecl, BytesStart};
///
/// // <?xml version='1.1'?>
Expand All @@ -410,21 +410,21 @@ impl<'a> BytesDecl<'a> {
/// // <?xml encoding='utf-8'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
/// match decl.version() {
/// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
/// _ => assert!(false),
/// }
///
/// // <?xml encoding='utf-8' version='1.1'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
/// match decl.version() {
/// Err(Error::XmlDeclWithoutVersion(Some(key))) => assert_eq!(key, "encoding"),
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
/// _ => assert!(false),
/// }
///
/// // <?xml?>
/// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
/// match decl.version() {
/// Err(Error::XmlDeclWithoutVersion(None)) => {},
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
/// _ => assert!(false),
/// }
/// ```
Expand All @@ -437,12 +437,14 @@ impl<'a> BytesDecl<'a> {
// first attribute was not "version"
Some(Ok(a)) => {
let found = from_utf8(a.key.as_ref())?.to_string();
Err(Error::XmlDeclWithoutVersion(Some(found)))
Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(
found,
))))
}
// error parsing attributes
Some(Err(e)) => Err(e.into()),
// no attributes
None => Err(Error::XmlDeclWithoutVersion(None)),
None => Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))),
}
}

Expand Down
10 changes: 9 additions & 1 deletion src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ macro_rules! impl_buffered_source {
buf.push(b'!');
self $(.$reader)? .consume(1);

let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?;
let bang_type = BangType::new(self.peek_one() $(.$await)? ?, position)?;

loop {
match self $(.$reader)? .fill_buf() $(.$await)? {
Expand Down Expand Up @@ -139,6 +139,10 @@ macro_rules! impl_buffered_source {
}
}

// <!....EOF
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
// so we move offset to it (+1 for `<`)
*position -= 1;
Err(bang_type.to_err())
}

Expand Down Expand Up @@ -182,6 +186,10 @@ macro_rules! impl_buffered_source {
};
}

// <.....EOF
// ^^^^^ - `buf` does not contains `<`, but we want to report error at `<`,
// so we move offset to it (+1 for `<`)
*position -= 1;
Err(Error::Syntax(SyntaxError::UnclosedTag))
}

Expand Down

0 comments on commit 9fb181a

Please sign in to comment.