Skip to content
This repository has been archived by the owner on May 30, 2022. It is now read-only.

Fix attribute parsing #4

Merged
merged 8 commits into from May 8, 2022
6 changes: 6 additions & 0 deletions Changelog.md
Expand Up @@ -38,6 +38,12 @@
([quick-xml#311](https://github.com/tafia/quick-xml/issues/311))
- feat: add `Reader::get_ref()` and `Reader::get_mut()`, rename
`Reader::into_underlying_reader()` to `Reader::into_inner()`
- refactor: now `Attributes::next()` returns a new type `AttrError` when attribute parsing failed
([#4](https://github.com/Mingun/fast-xml/pull/4))
- test: properly test all paths of attributes parsing ([#4](https://github.com/Mingun/fast-xml/pull/4))
- feat: attribute iterator now implements `FusedIterator` ([#4](https://github.com/Mingun/fast-xml/pull/4))
- fix: fixed many errors in attribute parsing using iterator, returned from `attributes()`
or `html_attributes()` ([#4](https://github.com/Mingun/fast-xml/pull/4))

## 0.23.0-alpha3

Expand Down
43 changes: 19 additions & 24 deletions src/de/map.rs
Expand Up @@ -4,11 +4,12 @@ use crate::{
de::escape::EscapedDeserializer,
de::{DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::attributes::Attribute,
events::attributes::IterState,
events::BytesStart,
};
use serde::de::{self, DeserializeSeed, IntoDeserializer};
use std::borrow::Cow;
use std::ops::Range;

/// Representing state of the `MapAccess` accessor.
enum State {
Expand All @@ -17,7 +18,7 @@ enum State {
Empty,
/// `next_key_seed` checked the attributes list and find it is not exhausted yet.
/// Next call to the `next_value_seed` will deserialize type from the attribute value
Attribute,
Attribute(Range<usize>),
/// The same as `InnerValue`
Nested,
/// Value should be deserialized from the text content of the XML node:
Expand All @@ -41,7 +42,7 @@ where
/// do not store reference to `Attributes` itself but instead create
/// a new object on each advance of `Attributes` iterator, so we need
/// to restore last position before advance.
position: usize,
iter: IterState,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have some other questions about this (below) but the docstring needs to be updated.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Somehow forgot to save changes in the editor, so comment is changed outside of this PR in f9453b9

/// Current state of the accessor that determines what next call to API
/// methods should return.
state: State,
Expand All @@ -59,11 +60,10 @@ where
start: BytesStart<'de>,
fields: &[&'static str],
) -> Result<Self, DeError> {
let position = start.attributes().position;
Ok(MapAccess {
de,
start,
position,
iter: IterState::new(0, false),
state: State::Empty,
unflatten_fields: fields
.iter()
Expand All @@ -72,14 +72,6 @@ where
.collect(),
})
}

fn next_attr(&mut self) -> Result<Option<Attribute>, DeError> {
let mut attributes = self.start.attributes();
attributes.position = self.position;
let next_att = attributes.next().transpose()?;
self.position = attributes.position;
Ok(next_att)
}
}

impl<'de, 'a, R> de::MapAccess<'de> for MapAccess<'de, 'a, R>
Expand All @@ -92,16 +84,17 @@ where
&mut self,
seed: K,
) -> Result<Option<K::Value>, Self::Error> {
// FIXME: There error positions counted from end of tag name - need global position
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();
let has_value_field = self.de.has_value_field;

let mut attributes = self.start.attributes();
attributes.position = self.position;
if let Some(a) = attributes.next().transpose()? {
if let Some(a) = self.iter.next(slice).transpose()? {
// try getting map from attributes (key= "value")
self.state = State::Attribute;
let (key, value) = a.into();
self.state = State::Attribute(value.unwrap_or_default());
seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(a.key),
Cow::Borrowed(&slice[key]),
decoder,
false,
))
Expand Down Expand Up @@ -172,13 +165,15 @@ where
seed: K,
) -> Result<K::Value, Self::Error> {
match std::mem::replace(&mut self.state, State::Empty) {
State::Attribute => {
State::Attribute(value) => {
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();
match self.next_attr()? {
Some(a) => seed.deserialize(EscapedDeserializer::new(a.value, decoder, true)),
// We set `Attribute` state only when we are sure that `next_attr()` returns a value
None => unreachable!(),
}

seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(&slice[value]),
decoder,
true,
))
}
State::Nested | State::InnerValue => seed.deserialize(&mut *self.de),
State::Empty => Err(DeError::EndOfAttributes),
Expand Down
53 changes: 20 additions & 33 deletions src/errors.rs
@@ -1,6 +1,7 @@
//! Error management module

use crate::escape::EscapeError;
use crate::events::attributes::AttrError;
use std::str::Utf8Error;

/// The error type used by this crate.
Expand All @@ -27,14 +28,8 @@ pub enum Error {
TextNotFound,
/// `Event::XmlDecl` must start with *version* attribute
XmlDeclWithoutVersion(Option<String>),
/// Attribute Name contains quote
NameWithQuote(usize),
/// Attribute key not followed by with `=`
NoEqAfterName(usize),
/// Attribute value not quoted
UnquotedValue(usize),
/// Duplicate attribute
DuplicatedAttribute(usize, usize),
/// Attribute parsing error
InvalidAttr(AttrError),
/// Escape error
EscapeError(EscapeError),
}
Expand Down Expand Up @@ -63,6 +58,13 @@ impl From<EscapeError> for Error {
}
}

impl From<AttrError> for Error {
#[inline]
fn from(error: AttrError) -> Self {
Error::InvalidAttr(error)
}
}

/// A specialized `Result` type where the error is hard-wired to [`Error`].
///
/// [`Error`]: enum.Error.html
Expand All @@ -73,7 +75,7 @@ impl std::fmt::Display for Error {
match self {
Error::Io(e) => write!(f, "I/O error: {}", e),
Error::Utf8(e) => write!(f, "UTF8 error: {}", e),
Error::UnexpectedEof(e) => write!(f, "Unexpected EOF during reading {}.", e),
Error::UnexpectedEof(e) => write!(f, "Unexpected EOF during reading {}", e),
Error::EndEventMismatch { expected, found } => {
write!(f, "Expecting </{}> found </{}>", expected, found)
}
Expand All @@ -89,30 +91,7 @@ impl std::fmt::Display for Error {
"XmlDecl must start with 'version' attribute, found {:?}",
e
),
Error::NameWithQuote(e) => write!(
f,
"error while parsing attribute at position {}: \
Attribute key cannot contain quote.",
e
),
Error::NoEqAfterName(e) => write!(
f,
"error while parsing attribute at position {}: \
Attribute key must be directly followed by = or space",
e
),
Error::UnquotedValue(e) => write!(
f,
"error while parsing attribute at position {}: \
Attribute value must start with a quote.",
e
),
Error::DuplicatedAttribute(pos1, pos2) => write!(
f,
"error while parsing attribute at position {0}: \
Duplicate attribute at position {1} and {0}",
pos1, pos2
),
Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e),
Error::EscapeError(e) => write!(f, "{}", e),
}
}
Expand All @@ -123,6 +102,7 @@ impl std::error::Error for Error {
match self {
Error::Io(e) => Some(e),
Error::Utf8(e) => Some(e),
Error::InvalidAttr(e) => Some(e),
Error::EscapeError(e) => Some(e),
_ => None,
}
Expand Down Expand Up @@ -239,4 +219,11 @@ pub mod serialize {
DeError::Float(e)
}
}

impl From<AttrError> for DeError {
#[inline]
fn from(e: AttrError) -> Self {
DeError::Xml(e.into())
}
}
}