Skip to content

Commit

Permalink
Merge pull request tafia#4 from Mingun/fix-attributes
Browse files Browse the repository at this point in the history
Fix attribute parsing and provide information about attribute shapes
  • Loading branch information
Mingun committed May 8, 2022
2 parents 1d9af37 + c81d25c commit 30e0c23
Show file tree
Hide file tree
Showing 11 changed files with 2,295 additions and 463 deletions.
6 changes: 6 additions & 0 deletions Changelog.md
Expand Up @@ -38,6 +38,12 @@
([quick-xml#311](https://github.com/tafia/quick-xml/issues/311))
- feat: add `Reader::get_ref()` and `Reader::get_mut()`, rename
`Reader::into_underlying_reader()` to `Reader::into_inner()`
- refactor: now `Attributes::next()` returns a new type `AttrError` when attribute parsing failed
([#4](https://github.com/Mingun/fast-xml/pull/4))
- test: properly test all paths of attributes parsing ([#4](https://github.com/Mingun/fast-xml/pull/4))
- feat: attribute iterator now implements `FusedIterator` ([#4](https://github.com/Mingun/fast-xml/pull/4))
- fix: fixed many errors in attribute parsing using iterator, returned from `attributes()`
or `html_attributes()` ([#4](https://github.com/Mingun/fast-xml/pull/4))

## 0.23.0-alpha3

Expand Down
43 changes: 19 additions & 24 deletions src/de/map.rs
Expand Up @@ -4,11 +4,12 @@ use crate::{
de::escape::EscapedDeserializer,
de::{DeEvent, Deserializer, XmlRead, INNER_VALUE, UNFLATTEN_PREFIX},
errors::serialize::DeError,
events::attributes::Attribute,
events::attributes::IterState,
events::BytesStart,
};
use serde::de::{self, DeserializeSeed, IntoDeserializer};
use std::borrow::Cow;
use std::ops::Range;

/// Representing state of the `MapAccess` accessor.
enum State {
Expand All @@ -17,7 +18,7 @@ enum State {
Empty,
/// `next_key_seed` checked the attributes list and find it is not exhausted yet.
/// Next call to the `next_value_seed` will deserialize type from the attribute value
Attribute,
Attribute(Range<usize>),
/// The same as `InnerValue`
Nested,
/// Value should be deserialized from the text content of the XML node:
Expand All @@ -41,7 +42,7 @@ where
/// do not store reference to `Attributes` itself but instead create
/// a new object on each advance of `Attributes` iterator, so we need
/// to restore last position before advance.
position: usize,
iter: IterState,
/// Current state of the accessor that determines what next call to API
/// methods should return.
state: State,
Expand All @@ -59,11 +60,10 @@ where
start: BytesStart<'de>,
fields: &[&'static str],
) -> Result<Self, DeError> {
let position = start.attributes().position;
Ok(MapAccess {
de,
start,
position,
iter: IterState::new(0, false),
state: State::Empty,
unflatten_fields: fields
.iter()
Expand All @@ -72,14 +72,6 @@ where
.collect(),
})
}

fn next_attr(&mut self) -> Result<Option<Attribute>, DeError> {
let mut attributes = self.start.attributes();
attributes.position = self.position;
let next_att = attributes.next().transpose()?;
self.position = attributes.position;
Ok(next_att)
}
}

impl<'de, 'a, R> de::MapAccess<'de> for MapAccess<'de, 'a, R>
Expand All @@ -92,16 +84,17 @@ where
&mut self,
seed: K,
) -> Result<Option<K::Value>, Self::Error> {
// FIXME: There error positions counted from end of tag name - need global position
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();
let has_value_field = self.de.has_value_field;

let mut attributes = self.start.attributes();
attributes.position = self.position;
if let Some(a) = attributes.next().transpose()? {
if let Some(a) = self.iter.next(slice).transpose()? {
// try getting map from attributes (key= "value")
self.state = State::Attribute;
let (key, value) = a.into();
self.state = State::Attribute(value.unwrap_or_default());
seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(a.key),
Cow::Borrowed(&slice[key]),
decoder,
false,
))
Expand Down Expand Up @@ -172,13 +165,15 @@ where
seed: K,
) -> Result<K::Value, Self::Error> {
match std::mem::replace(&mut self.state, State::Empty) {
State::Attribute => {
State::Attribute(value) => {
let slice = self.start.attributes_raw();
let decoder = self.de.reader.decoder();
match self.next_attr()? {
Some(a) => seed.deserialize(EscapedDeserializer::new(a.value, decoder, true)),
// We set `Attribute` state only when we are sure that `next_attr()` returns a value
None => unreachable!(),
}

seed.deserialize(EscapedDeserializer::new(
Cow::Borrowed(&slice[value]),
decoder,
true,
))
}
State::Nested | State::InnerValue => seed.deserialize(&mut *self.de),
State::Empty => Err(DeError::EndOfAttributes),
Expand Down
53 changes: 20 additions & 33 deletions src/errors.rs
@@ -1,6 +1,7 @@
//! Error management module

use crate::escape::EscapeError;
use crate::events::attributes::AttrError;
use std::str::Utf8Error;

/// The error type used by this crate.
Expand All @@ -27,14 +28,8 @@ pub enum Error {
TextNotFound,
/// `Event::XmlDecl` must start with *version* attribute
XmlDeclWithoutVersion(Option<String>),
/// Attribute Name contains quote
NameWithQuote(usize),
/// Attribute key not followed by with `=`
NoEqAfterName(usize),
/// Attribute value not quoted
UnquotedValue(usize),
/// Duplicate attribute
DuplicatedAttribute(usize, usize),
/// Attribute parsing error
InvalidAttr(AttrError),
/// Escape error
EscapeError(EscapeError),
}
Expand Down Expand Up @@ -63,6 +58,13 @@ impl From<EscapeError> for Error {
}
}

impl From<AttrError> for Error {
#[inline]
fn from(error: AttrError) -> Self {
Error::InvalidAttr(error)
}
}

/// A specialized `Result` type where the error is hard-wired to [`Error`].
///
/// [`Error`]: enum.Error.html
Expand All @@ -73,7 +75,7 @@ impl std::fmt::Display for Error {
match self {
Error::Io(e) => write!(f, "I/O error: {}", e),
Error::Utf8(e) => write!(f, "UTF8 error: {}", e),
Error::UnexpectedEof(e) => write!(f, "Unexpected EOF during reading {}.", e),
Error::UnexpectedEof(e) => write!(f, "Unexpected EOF during reading {}", e),
Error::EndEventMismatch { expected, found } => {
write!(f, "Expecting </{}> found </{}>", expected, found)
}
Expand All @@ -89,30 +91,7 @@ impl std::fmt::Display for Error {
"XmlDecl must start with 'version' attribute, found {:?}",
e
),
Error::NameWithQuote(e) => write!(
f,
"error while parsing attribute at position {}: \
Attribute key cannot contain quote.",
e
),
Error::NoEqAfterName(e) => write!(
f,
"error while parsing attribute at position {}: \
Attribute key must be directly followed by = or space",
e
),
Error::UnquotedValue(e) => write!(
f,
"error while parsing attribute at position {}: \
Attribute value must start with a quote.",
e
),
Error::DuplicatedAttribute(pos1, pos2) => write!(
f,
"error while parsing attribute at position {0}: \
Duplicate attribute at position {1} and {0}",
pos1, pos2
),
Error::InvalidAttr(e) => write!(f, "error while parsing attribute: {}", e),
Error::EscapeError(e) => write!(f, "{}", e),
}
}
Expand All @@ -123,6 +102,7 @@ impl std::error::Error for Error {
match self {
Error::Io(e) => Some(e),
Error::Utf8(e) => Some(e),
Error::InvalidAttr(e) => Some(e),
Error::EscapeError(e) => Some(e),
_ => None,
}
Expand Down Expand Up @@ -239,4 +219,11 @@ pub mod serialize {
DeError::Float(e)
}
}

impl From<AttrError> for DeError {
#[inline]
fn from(e: AttrError) -> Self {
DeError::Xml(e.into())
}
}
}

0 comments on commit 30e0c23

Please sign in to comment.