diff --git a/feed-rs/Cargo.toml b/feed-rs/Cargo.toml index 8e24c0b..c7ef726 100644 --- a/feed-rs/Cargo.toml +++ b/feed-rs/Cargo.toml @@ -25,7 +25,7 @@ travis-ci = { repository = "feed-rs/feed-rs", branch = "master" } chrono = { version = "0.4" } lazy_static = "1.4" mime = "0.3" -quick-xml = { version = "0.23", features = ["encoding"] } +quick-xml = { version = "0.25", features = ["encoding"] } regex = "1.5" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/feed-rs/src/xml/mod.rs b/feed-rs/src/xml/mod.rs index 5c97db2..26f7e90 100644 --- a/feed-rs/src/xml/mod.rs +++ b/feed-rs/src/xml/mod.rs @@ -6,7 +6,8 @@ use std::io::BufRead; use std::mem; use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event}; -use quick_xml::{escape, Reader}; +use quick_xml::name::ResolveResult; +use quick_xml::{escape, NsReader, Reader}; use url::Url; #[cfg(test)] @@ -30,7 +31,7 @@ impl ElementSource { /// * `xml_base_uri` - the base URI if known (e.g. Content-Location, feed URI etc) pub(crate) fn new(xml_data: R, xml_base_uri: Option<&str>) -> XmlResult> { // Create the XML parser - let mut reader = Reader::from_reader(xml_data); + let mut reader = NsReader::from_reader(xml_data); reader.expand_empty_elements(true).trim_markup_names_in_closing_tags(true).trim_text(false); let state = RefCell::new(SourceState::new(reader, xml_base_uri)?); @@ -223,9 +224,8 @@ impl ElementSource { // Wraps the XML source and current depth of iteration struct SourceState { - reader: Reader, + reader: NsReader, buf_event: Vec, - buf_ns: Vec, next: XmlResult>, current_depth: u32, base_uris: Vec<(u32, Url)>, @@ -234,7 +234,7 @@ struct SourceState { impl SourceState { // Wrap the reader in additional state (buffers, tree depth etc) - fn new(reader: Reader, xml_base_uri: Option<&str>) -> XmlResult> { + fn new(reader: NsReader, xml_base_uri: Option<&str>) -> XmlResult> { // If we have a base URI, parse it and init at the root let mut base_uris = Vec::new(); if let Some(xml_base_uri) = xml_base_uri { @@ -243,11 +243,9 @@ impl SourceState { } let buf_event = Vec::with_capacity(512); - let buf_ns = Vec::with_capacity(128); let mut state = SourceState { reader, buf_event, - buf_ns, next: Ok(None), current_depth: 0, base_uris, @@ -259,18 +257,27 @@ impl SourceState { // Returns the next event fn fetch_next(&mut self) -> XmlResult> { + let decoder = self.reader.decoder(); let reader = &mut self.reader; + loop { - let (ns, event) = reader.read_namespaced_event(&mut self.buf_event, &mut self.buf_ns)?; + let (ns_resolution, event) = reader.read_resolved_event_into(&mut self.buf_event)?; match event { // Start of an element Event::Start(ref e) => { // Parse the namespace - let namespace = ns - .map(|bytes| reader.decode(bytes)) - .map(|s| NS::parse(s.as_ref())) - .unwrap_or(self.default_namespace); + let namespace = match ns_resolution { + ResolveResult::Bound(ns) => decoder + .decode(ns.as_ref()) + .map(|decoded| NS::parse(decoded.as_ref())) + .unwrap_or(self.default_namespace), + ResolveResult::Unknown(bytes) => decoder + .decode(&bytes) + .map(|decoded| NS::parse(decoded.as_ref())) + .unwrap_or(self.default_namespace), + ResolveResult::Unbound => self.default_namespace, + }; return Ok(Some(XmlEvent::start(namespace, e, reader))); } @@ -290,10 +297,11 @@ impl SourceState { // CData is converted to text Event::CData(t) => { - let escaped = t.escape(); - let event = XmlEvent::text(&escaped, reader); - if let Ok(Some(ref _t)) = event { - return event; + if let Ok(escaped) = t.escape() { + let event = XmlEvent::text(&escaped, reader); + if let Ok(Some(ref _t)) = event { + return event; + } } } @@ -483,32 +491,42 @@ impl XmlEvent { // Creates a new event corresponding to an XML end-tag fn end(event: &BytesEnd, reader: &Reader) -> XmlEvent { // Parse the name - let name = XmlEvent::parse_name(event.name(), reader); + let name = XmlEvent::parse_name(event.name().as_ref(), reader); XmlEvent::End { name } } // Extracts the element name, dropping the namespace prefix if present fn parse_name(bytes: &[u8], reader: &Reader) -> String { - reader.decode(bytes).split(':').rev().next().unwrap_or("").into() + reader + .decoder() + .decode(bytes) + .ok() + .and_then(|name| name.split(':').rev().next().map(str::to_string)) + .unwrap_or_default() } // Creates a new event corresponding to an XML start-tag fn start(namespace: NS, event: &BytesStart, reader: &Reader) -> XmlEvent { // Parse the name - let name = XmlEvent::parse_name(event.name(), reader); + let name = XmlEvent::parse_name(event.name().as_ref(), reader); // Parse the attributes let attributes = event .attributes() .filter_map(|a| { if let Ok(a) = a { - let name = reader.decode(a.key); + let name = match reader.decoder().decode(a.key.as_ref()) { + Ok(decoded) => decoded, + Err(_) => return None, + }; // Unescape the XML attribute, or use the original value if this fails (broken escape sequence etc) - let value = escape::unescape(a.value.as_ref()) - .map(|v| String::from_utf8_lossy(v.as_ref()).to_string()) - .unwrap_or_else(|_| reader.decode(a.value.as_ref()).to_string()); + let decoded_value = match reader.decoder().decode(&a.value) { + Ok(decoded) => decoded, + Err(_) => return None, + }; + let value = escape::unescape(&decoded_value).unwrap_or_else(|_| decoded_value.to_owned()).to_string(); Some(NameValue { name: name.into(), value }) } else { @@ -522,11 +540,13 @@ impl XmlEvent { // Creates a new event corresponding to an XML text node fn text(text: &BytesText, reader: &Reader) -> XmlResult> { - let text = text.unescape_and_decode(reader)?; + let escaped_text = reader.decoder().decode(text)?; + let unescaped_text = escape::unescape(&escaped_text).map_err(quick_xml::Error::EscapeError)?; + if text.is_empty() { Ok(None) } else { - Ok(Some(XmlEvent::Text(text))) + Ok(Some(XmlEvent::Text(unescaped_text.to_string()))) } } }