From 3e119873651bbc7ef315112fd6bfd94470a1dd0c Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 30 Jul 2022 23:47:57 +0500 Subject: [PATCH 1/6] Implement `read_to_end` using a macro => deduplicate code in readers --- src/reader/buffered_reader.rs | 21 ++------------------- src/reader/mod.rs | 30 ++++++++++++++++++++++++++++++ src/reader/slice_reader.rs | 20 +------------------- 3 files changed, 33 insertions(+), 38 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index fcc9ec38..ba21716b 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -146,26 +146,9 @@ impl Reader { /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { - let mut depth = 0; - loop { + read_to_end!(self, end, buf, { buf.clear(); - match self.read_event_into(buf) { - Err(e) => return Err(e), - - Ok(Event::Start(e)) if e.name() == end => depth += 1, - Ok(Event::End(e)) if e.name() == end => { - if depth == 0 { - return Ok(()); - } - depth -= 1; - } - Ok(Event::Eof) => { - let name = self.decoder().decode(end.as_ref()); - return Err(Error::UnexpectedEof(format!("", name))); - } - _ => (), - } - } + }) } /// Reads optional text between start and end tags. diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 3320acc4..82b4606c 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -127,6 +127,36 @@ macro_rules! configure_methods { }; } +/// Generalization of `read_to_end` method for buffered and borrowed readers +macro_rules! read_to_end { + ( + $self:expr, $end:expr, $buf:expr, + // Code block that performs clearing of internal buffer after read of each event + $clear:block + ) => {{ + let mut depth = 0; + loop { + $clear + match $self.read_event_impl($buf) { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == $end => depth += 1, + Ok(Event::End(e)) if e.name() == $end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Eof) => { + let name = $self.decoder().decode($end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); + } + _ => (), + } + } + }}; +} + mod buffered_reader; mod ns_reader; mod parser; diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index f4bb8706..ddf2618e 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -138,25 +138,7 @@ impl<'a> Reader<&'a [u8]> { /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result<()> { - let mut depth = 0; - loop { - match self.read_event() { - Err(e) => return Err(e), - - Ok(Event::Start(e)) if e.name() == end => depth += 1, - Ok(Event::End(e)) if e.name() == end => { - if depth == 0 { - return Ok(()); - } - depth -= 1; - } - Ok(Event::Eof) => { - let name = self.decoder().decode(end.as_ref()); - return Err(Error::UnexpectedEof(format!("", name))); - } - _ => (), - } - } + read_to_end!(self, end, (), {}) } } From 5791023b174e3b9bb9c467faa1cb58a5c385c543 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 24 Jul 2022 02:17:51 +0500 Subject: [PATCH 2/6] Move reusable parts of a reader to macros This commit only moves code and will allow to share the code with async implementations It is better to review it in the TortoiseGitMerge with whitespace changes ignored --- src/reader/buffered_reader.rs | 408 +++++++++++++++++----------------- src/reader/mod.rs | 163 ++++++++------ 2 files changed, 303 insertions(+), 268 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index ba21716b..6af7b995 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -12,6 +12,213 @@ use crate::events::Event; use crate::name::QName; use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource}; +macro_rules! impl_buffered_source { + () => { + #[inline] + fn read_bytes_until( + &mut self, + byte: u8, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + // search byte must be within the ascii range + debug_assert!(byte.is_ascii()); + + let mut read = 0; + let mut done = false; + let start = buf.len(); + while !done { + let used = { + let available = match self.fill_buf() { + Ok(n) if n.is_empty() => break, + Ok(n) => n, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + + match memchr::memchr(byte, available) { + Some(i) => { + buf.extend_from_slice(&available[..i]); + done = true; + i + 1 + } + None => { + buf.extend_from_slice(available); + available.len() + } + } + }; + self.consume(used); + read += used; + } + *position += read; + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + fn read_bang_element( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + // Peeked one bang ('!') before being called, so it's guaranteed to + // start with it. + let start = buf.len(); + let mut read = 1; + buf.push(b'!'); + self.consume(1); + + let bang_type = BangType::new(self.peek_one()?)?; + + loop { + match self.fill_buf() { + // Note: Do not update position, so the error points to + // somewhere sane rather than at the EOF + Ok(n) if n.is_empty() => return Err(bang_type.to_err()), + Ok(available) => { + if let Some((consumed, used)) = bang_type.parse(available, read) { + buf.extend_from_slice(consumed); + + self.consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self.consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + } + } + + if read == 0 { + Ok(None) + } else { + Ok(Some((bang_type, &buf[start..]))) + } + } + + #[inline] + fn read_element( + &mut self, + buf: &'b mut Vec, + position: &mut usize, + ) -> Result> { + let mut state = ReadElementState::Elem; + let mut read = 0; + + let start = buf.len(); + loop { + match self.fill_buf() { + Ok(n) if n.is_empty() => break, + Ok(available) => { + if let Some((consumed, used)) = state.change(available) { + buf.extend_from_slice(consumed); + + self.consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self.consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + } + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + /// Consume and discard all the whitespace until the next non-whitespace + /// character or EOF. + fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + loop { + break match self.fill_buf() { + Ok(n) => { + let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); + if count > 0 { + self.consume(count); + *position += count; + continue; + } else { + Ok(()) + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } + + /// Consume and discard one character if it matches the given byte. Return + /// true if it matched. + fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { + // search byte must be within the ascii range + debug_assert!(byte.is_ascii()); + + match self.peek_one()? { + Some(b) if b == byte => { + *position += 1; + self.consume(1); + Ok(true) + } + _ => Ok(false), + } + } + + /// Return one character without consuming it, so that future `read_*` calls + /// will still include it. On EOF, return None. + fn peek_one(&mut self) -> Result> { + loop { + break match self.fill_buf() { + Ok(n) if n.is_empty() => Ok(None), + Ok(n) => Ok(Some(n[0])), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } + }; +} + +/// Implementation of `XmlSource` for any `BufRead` reader using a user-given +/// `Vec` as buffer that will be borrowed by events. +impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { + impl_buffered_source!(); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + /// This is an implementation of [`Reader`] for reading from a [`BufRead`] as /// underlying byte stream. impl Reader { @@ -209,207 +416,6 @@ impl Reader> { } } -//////////////////////////////////////////////////////////////////////////////////////////////////// - -/// Implementation of `XmlSource` for any `BufRead` reader using a user-given -/// `Vec` as buffer that will be borrowed by events. -impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { - #[inline] - fn read_bytes_until( - &mut self, - byte: u8, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - // search byte must be within the ascii range - debug_assert!(byte.is_ascii()); - - let mut read = 0; - let mut done = false; - let start = buf.len(); - while !done { - let used = { - let available = match self.fill_buf() { - Ok(n) if n.is_empty() => break, - Ok(n) => n, - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - }; - - match memchr::memchr(byte, available) { - Some(i) => { - buf.extend_from_slice(&available[..i]); - done = true; - i + 1 - } - None => { - buf.extend_from_slice(available); - available.len() - } - } - }; - self.consume(used); - read += used; - } - *position += read; - - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } - } - - fn read_bang_element( - &mut self, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - // Peeked one bang ('!') before being called, so it's guaranteed to - // start with it. - let start = buf.len(); - let mut read = 1; - buf.push(b'!'); - self.consume(1); - - let bang_type = BangType::new(self.peek_one()?)?; - - loop { - match self.fill_buf() { - // Note: Do not update position, so the error points to - // somewhere sane rather than at the EOF - Ok(n) if n.is_empty() => return Err(bang_type.to_err()), - Ok(available) => { - if let Some((consumed, used)) = bang_type.parse(available, read) { - buf.extend_from_slice(consumed); - - self.consume(used); - read += used; - - *position += read; - break; - } else { - buf.extend_from_slice(available); - - let used = available.len(); - self.consume(used); - read += used; - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - } - } - - if read == 0 { - Ok(None) - } else { - Ok(Some((bang_type, &buf[start..]))) - } - } - - #[inline] - fn read_element( - &mut self, - buf: &'b mut Vec, - position: &mut usize, - ) -> Result> { - let mut state = ReadElementState::Elem; - let mut read = 0; - - let start = buf.len(); - loop { - match self.fill_buf() { - Ok(n) if n.is_empty() => break, - Ok(available) => { - if let Some((consumed, used)) = state.change(available) { - buf.extend_from_slice(consumed); - - self.consume(used); - read += used; - - *position += read; - break; - } else { - buf.extend_from_slice(available); - - let used = available.len(); - self.consume(used); - read += used; - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => { - *position += read; - return Err(Error::Io(e)); - } - }; - } - - if read == 0 { - Ok(None) - } else { - Ok(Some(&buf[start..])) - } - } - - /// Consume and discard all the whitespace until the next non-whitespace - /// character or EOF. - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { - loop { - break match self.fill_buf() { - Ok(n) => { - let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); - if count > 0 { - self.consume(count); - *position += count; - continue; - } else { - Ok(()) - } - } - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e)), - }; - } - } - - /// Consume and discard one character if it matches the given byte. Return - /// true if it matched. - fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { - // search byte must be within the ascii range - debug_assert!(byte.is_ascii()); - - match self.peek_one()? { - Some(b) if b == byte => { - *position += 1; - self.consume(1); - Ok(true) - } - _ => Ok(false), - } - } - - /// Return one character without consuming it, so that future `read_*` calls - /// will still include it. On EOF, return None. - fn peek_one(&mut self) -> Result> { - loop { - break match self.fill_buf() { - Ok(n) if n.is_empty() => Ok(None), - Ok(n) => Ok(Some(n[0])), - Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, - Err(e) => Err(Error::Io(e)), - }; - } - } -} - #[cfg(test)] mod test { use crate::reader::test::check; diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 82b4606c..9025dad5 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -127,6 +127,99 @@ macro_rules! configure_methods { }; } +macro_rules! read_event_impl { + ( + $self:ident, $buf:ident + ) => {{ + let event = match $self.parser.state { + ParseState::Init => $self.read_until_open($buf, true), + ParseState::ClosedTag => $self.read_until_open($buf, false), + ParseState::OpenedTag => $self.read_until_close($buf), + ParseState::Empty => $self.parser.close_expanded_empty(), + ParseState::Exit => return Ok(Event::Eof), + }; + match event { + Err(_) | Ok(Event::Eof) => $self.parser.state = ParseState::Exit, + _ => {} + } + event + }}; +} + +macro_rules! read_until_open { + ( + $self:ident, $buf:ident, $first:ident + ) => {{ + $self.parser.state = ParseState::OpenedTag; + + if $self.parser.trim_text_start { + $self.reader.skip_whitespace(&mut $self.parser.offset)?; + } + + // If we already at the `<` symbol, do not try to return an empty Text event + if $self.reader.skip_one(b'<', &mut $self.parser.offset)? { + return $self.read_event_impl($buf); + } + + match $self + .reader + .read_bytes_until(b'<', $buf, &mut $self.parser.offset) + { + Ok(Some(bytes)) => $self.parser.read_text(bytes, $first), + Ok(None) => Ok(Event::Eof), + Err(e) => Err(e), + } + }}; +} + +macro_rules! read_until_close { + ( + $self:ident, $buf:ident + ) => {{ + $self.parser.state = ParseState::ClosedTag; + + match $self.reader.peek_one() { + // ` match $self + .reader + .read_bang_element($buf, &mut $self.parser.offset) + { + Ok(None) => Ok(Event::Eof), + Ok(Some((bang_type, bytes))) => $self.parser.read_bang(bang_type, bytes), + Err(e) => Err(e), + }, + // ` match $self + // Comment for prevent formatting + .reader + .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => $self.parser.read_end(bytes), + Err(e) => Err(e), + }, + // ` match $self + // Comment for prevent formatting + .reader + .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => $self.parser.read_question_mark(bytes), + Err(e) => Err(e), + }, + // `<...` - opening or self-closed tag + Ok(Some(_)) => match $self.reader.read_element($buf, &mut $self.parser.offset) { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => $self.parser.read_start(bytes), + Err(e) => Err(e), + }, + Ok(None) => Ok(Event::Eof), + Err(e) => Err(e), + } + }}; +} + /// Generalization of `read_to_end` method for buffered and borrowed readers macro_rules! read_to_end { ( @@ -435,18 +528,7 @@ impl Reader { where R: XmlSource<'i, B>, { - let event = match self.parser.state { - ParseState::Init => self.read_until_open(buf, true), - ParseState::ClosedTag => self.read_until_open(buf, false), - ParseState::OpenedTag => self.read_until_close(buf), - ParseState::Empty => self.parser.close_expanded_empty(), - ParseState::Exit => return Ok(Event::Eof), - }; - match event { - Err(_) | Ok(Event::Eof) => self.parser.state = ParseState::Exit, - _ => {} - } - event + read_event_impl!(self, buf) } /// Read until '<' is found and moves reader to an `OpenedTag` state. @@ -456,25 +538,7 @@ impl Reader { where R: XmlSource<'i, B>, { - self.parser.state = ParseState::OpenedTag; - - if self.parser.trim_text_start { - self.reader.skip_whitespace(&mut self.parser.offset)?; - } - - // If we already at the `<` symbol, do not try to return an empty Text event - if self.reader.skip_one(b'<', &mut self.parser.offset)? { - return self.read_event_impl(buf); - } - - match self - .reader - .read_bytes_until(b'<', buf, &mut self.parser.offset) - { - Ok(Some(bytes)) => self.parser.read_text(bytes, first), - Ok(None) => Ok(Event::Eof), - Err(e) => Err(e), - } + read_until_open!(self, buf, first) } /// Private function to read until `>` is found. This function expects that @@ -483,42 +547,7 @@ impl Reader { where R: XmlSource<'i, B>, { - self.parser.state = ParseState::ClosedTag; - - match self.reader.peek_one() { - // ` match self.reader.read_bang_element(buf, &mut self.parser.offset) { - Ok(None) => Ok(Event::Eof), - Ok(Some((bang_type, bytes))) => self.parser.read_bang(bang_type, bytes), - Err(e) => Err(e), - }, - // ` match self - .reader - .read_bytes_until(b'>', buf, &mut self.parser.offset) - { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.parser.read_end(bytes), - Err(e) => Err(e), - }, - // ` match self - .reader - .read_bytes_until(b'>', buf, &mut self.parser.offset) - { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.parser.read_question_mark(bytes), - Err(e) => Err(e), - }, - // `<...` - opening or self-closed tag - Ok(Some(_)) => match self.reader.read_element(buf, &mut self.parser.offset) { - Ok(None) => Ok(Event::Eof), - Ok(Some(bytes)) => self.parser.read_start(bytes), - Err(e) => Err(e), - }, - Ok(None) => Ok(Event::Eof), - Err(e) => Err(e), - } + read_until_close!(self, buf) } } From f062bbe1567c30c0b0868b1072c420638001a092 Mon Sep 17 00:00:00 2001 From: Mingun Date: Tue, 26 Jul 2022 21:27:21 +0500 Subject: [PATCH 3/6] Allow to wrap input and reader in tests for more flexibility --- src/reader/buffered_reader.rs | 7 +++- src/reader/mod.rs | 77 +++++++++++++++++++---------------- src/reader/slice_reader.rs | 7 +++- 3 files changed, 53 insertions(+), 38 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 6af7b995..a4c552e6 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -421,7 +421,12 @@ mod test { use crate::reader::test::check; use crate::reader::XmlSource; - check!(&mut Vec::new()); + /// Default buffer constructor just pass the byte array from the test + fn identity(input: T) -> T { + input + } + + check!(identity, &mut Vec::new()); #[cfg(feature = "encoding")] mod encoding { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 9025dad5..fd2d8cc3 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -771,7 +771,12 @@ pub(crate) fn is_whitespace(b: u8) -> bool { mod test { /// Checks the internal implementation of the various reader methods macro_rules! check { - ($buf:expr) => { + ( + // constructor of the XML source on which internal functions will be called + $source:path, + // constructor of the buffer to which read data will stored + $buf:expr + ) => { mod read_bytes_until { use super::*; // Use Bytes for printing bytes as strings for ASCII range @@ -787,7 +792,7 @@ mod test { // ^= 0 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) .unwrap() .map(Bytes), @@ -806,7 +811,7 @@ mod test { // ^= 6 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) .unwrap() .map(Bytes), @@ -826,7 +831,7 @@ mod test { // ^= 1 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) .unwrap() .map(Bytes), @@ -846,7 +851,7 @@ mod test { // ^= 4 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) .unwrap() .map(Bytes), @@ -866,7 +871,7 @@ mod test { // ^= 7 assert_eq!( - input + $source(&mut input) .read_bytes_until(b'*', buf, &mut position) .unwrap() .map(Bytes), @@ -897,7 +902,7 @@ mod test { let mut input = b"![]]>other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -917,7 +922,7 @@ mod test { let mut input = b"![CDATA[other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -937,7 +942,7 @@ mod test { // ^= 11 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) .unwrap() .map(|(ty, data)| (ty, Bytes(data))), @@ -957,7 +962,7 @@ mod test { // ^= 28 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) .unwrap() .map(|(ty, data)| (ty, Bytes(data))), @@ -998,7 +1003,7 @@ mod test { let mut input = b"!- -->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1016,7 +1021,7 @@ mod test { let mut input = b"!->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1034,7 +1039,7 @@ mod test { let mut input = b"!--other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1052,7 +1057,7 @@ mod test { let mut input = b"!-->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1070,7 +1075,7 @@ mod test { let mut input = b"!--->other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1089,7 +1094,7 @@ mod test { // ^= 6 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) .unwrap() .map(|(ty, data)| (ty, Bytes(data))), @@ -1106,7 +1111,7 @@ mod test { // ^= 17 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) .unwrap() .map(|(ty, data)| (ty, Bytes(data))), @@ -1134,7 +1139,7 @@ mod test { let mut input = b"!D other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1152,7 +1157,7 @@ mod test { let mut input = b"!DOCTYPEother content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1171,7 +1176,7 @@ mod test { // ^= 9 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) .unwrap() .map(|(ty, data)| (ty, Bytes(data))), @@ -1187,7 +1192,7 @@ mod test { let mut input = b"!DOCTYPE other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1213,7 +1218,7 @@ mod test { let mut input = b"!d other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1231,7 +1236,7 @@ mod test { let mut input = b"!doctypeother content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1250,7 +1255,7 @@ mod test { // ^= 9 assert_eq!( - input + $source(&mut input) .read_bang_element(buf, &mut position) .unwrap() .map(|(ty, data)| (ty, Bytes(data))), @@ -1266,7 +1271,7 @@ mod test { let mut input = b"!doctype other content".as_ref(); // ^= 0 - match input.read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1293,7 +1298,7 @@ mod test { let mut input = b"".as_ref(); // ^= 0 - assert_eq!(input.read_element(buf, &mut position).unwrap().map(Bytes), None); + assert_eq!($source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), None); assert_eq!(position, 0); } @@ -1310,7 +1315,7 @@ mod test { // ^= 1 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b"")) ); assert_eq!(position, 1); @@ -1324,7 +1329,7 @@ mod test { // ^= 4 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b"tag")) ); assert_eq!(position, 4); @@ -1338,7 +1343,7 @@ mod test { // ^= 2 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b":")) ); assert_eq!(position, 2); @@ -1352,7 +1357,7 @@ mod test { // ^= 5 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b":tag")) ); assert_eq!(position, 5); @@ -1366,7 +1371,7 @@ mod test { // ^= 38 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#)) ); assert_eq!(position, 38); @@ -1386,7 +1391,7 @@ mod test { // ^= 2 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b"/")) ); assert_eq!(position, 2); @@ -1400,7 +1405,7 @@ mod test { // ^= 5 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b"tag/")) ); assert_eq!(position, 5); @@ -1414,7 +1419,7 @@ mod test { // ^= 3 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b":/")) ); assert_eq!(position, 3); @@ -1428,7 +1433,7 @@ mod test { // ^= 6 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(b":tag/")) ); assert_eq!(position, 6); @@ -1442,7 +1447,7 @@ mod test { // ^= 41 assert_eq!( - input.read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#)) ); assert_eq!(position, 41); diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index ddf2618e..e8ceec3d 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -246,7 +246,12 @@ mod test { use crate::reader::test::check; use crate::reader::XmlSource; - check!(()); + /// Default buffer constructor just pass the byte array from the test + fn identity(input: T) -> T { + input + } + + check!(identity, ()); #[cfg(feature = "encoding")] mod encoding { From b980725818a1158c6db9cae5061919fe0c82471a Mon Sep 17 00:00:00 2001 From: Sophie Tauchert Date: Thu, 14 Jul 2022 15:48:26 +0500 Subject: [PATCH 4/6] Update implementation macros to support async code --- src/reader/buffered_reader.rs | 46 +++++++++++++++++------------------ src/reader/mod.rs | 44 +++++++++++++++++++++------------ src/reader/slice_reader.rs | 2 +- 3 files changed, 53 insertions(+), 39 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index a4c552e6..6b8dfc97 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -13,9 +13,9 @@ use crate::name::QName; use crate::reader::{is_whitespace, BangType, ReadElementState, Reader, XmlSource}; macro_rules! impl_buffered_source { - () => { + ($($lf:lifetime, $reader:tt, $async:ident, $await:ident)?) => { #[inline] - fn read_bytes_until( + $($async)? fn read_bytes_until $(<$lf>)? ( &mut self, byte: u8, buf: &'b mut Vec, @@ -29,7 +29,7 @@ macro_rules! impl_buffered_source { let start = buf.len(); while !done { let used = { - let available = match self.fill_buf() { + let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(n) => n, Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, @@ -51,7 +51,7 @@ macro_rules! impl_buffered_source { } } }; - self.consume(used); + self $(.$reader)? .consume(used); read += used; } *position += read; @@ -63,7 +63,7 @@ macro_rules! impl_buffered_source { } } - fn read_bang_element( + $($async)? fn read_bang_element $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut usize, @@ -73,12 +73,12 @@ macro_rules! impl_buffered_source { let start = buf.len(); let mut read = 1; buf.push(b'!'); - self.consume(1); + self $(.$reader)? .consume(1); - let bang_type = BangType::new(self.peek_one()?)?; + let bang_type = BangType::new(self.peek_one() $(.$await)? ?)?; loop { - match self.fill_buf() { + match self $(.$reader)? .fill_buf() $(.$await)? { // Note: Do not update position, so the error points to // somewhere sane rather than at the EOF Ok(n) if n.is_empty() => return Err(bang_type.to_err()), @@ -86,7 +86,7 @@ macro_rules! impl_buffered_source { if let Some((consumed, used)) = bang_type.parse(available, read) { buf.extend_from_slice(consumed); - self.consume(used); + self $(.$reader)? .consume(used); read += used; *position += read; @@ -95,7 +95,7 @@ macro_rules! impl_buffered_source { buf.extend_from_slice(available); let used = available.len(); - self.consume(used); + self $(.$reader)? .consume(used); read += used; } } @@ -115,7 +115,7 @@ macro_rules! impl_buffered_source { } #[inline] - fn read_element( + $($async)? fn read_element $(<$lf>)? ( &mut self, buf: &'b mut Vec, position: &mut usize, @@ -125,13 +125,13 @@ macro_rules! impl_buffered_source { let start = buf.len(); loop { - match self.fill_buf() { + match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, Ok(available) => { if let Some((consumed, used)) = state.change(available) { buf.extend_from_slice(consumed); - self.consume(used); + self $(.$reader)? .consume(used); read += used; *position += read; @@ -140,7 +140,7 @@ macro_rules! impl_buffered_source { buf.extend_from_slice(available); let used = available.len(); - self.consume(used); + self $(.$reader)? .consume(used); read += used; } } @@ -161,13 +161,13 @@ macro_rules! impl_buffered_source { /// Consume and discard all the whitespace until the next non-whitespace /// character or EOF. - fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + $($async)? fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { loop { - break match self.fill_buf() { + break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => { let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); if count > 0 { - self.consume(count); + self $(.$reader)? .consume(count); *position += count; continue; } else { @@ -182,14 +182,14 @@ macro_rules! impl_buffered_source { /// Consume and discard one character if it matches the given byte. Return /// true if it matched. - fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { + $($async)? fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { // search byte must be within the ascii range debug_assert!(byte.is_ascii()); - match self.peek_one()? { + match self.peek_one() $(.$await)? ? { Some(b) if b == byte => { *position += 1; - self.consume(1); + self $(.$reader)? .consume(1); Ok(true) } _ => Ok(false), @@ -198,9 +198,9 @@ macro_rules! impl_buffered_source { /// Return one character without consuming it, so that future `read_*` calls /// will still include it. On EOF, return None. - fn peek_one(&mut self) -> Result> { + $($async)? fn peek_one(&mut self) -> Result> { loop { - break match self.fill_buf() { + break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => Ok(None), Ok(n) => Ok(Some(n[0])), Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, @@ -353,7 +353,7 @@ impl Reader { /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result<()> { - read_to_end!(self, end, buf, { + read_to_end!(self, end, buf, read_event_impl, { buf.clear(); }) } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index fd2d8cc3..bcaefe6e 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -129,12 +129,15 @@ macro_rules! configure_methods { macro_rules! read_event_impl { ( - $self:ident, $buf:ident + $self:ident, $buf:ident, + $read_until_open:ident, + $read_until_close:ident + $(, $await:ident)? ) => {{ let event = match $self.parser.state { - ParseState::Init => $self.read_until_open($buf, true), - ParseState::ClosedTag => $self.read_until_open($buf, false), - ParseState::OpenedTag => $self.read_until_close($buf), + ParseState::Init => $self.$read_until_open($buf, true) $(.$await)?, + ParseState::ClosedTag => $self.$read_until_open($buf, false) $(.$await)?, + ParseState::OpenedTag => $self.$read_until_close($buf) $(.$await)?, ParseState::Empty => $self.parser.close_expanded_empty(), ParseState::Exit => return Ok(Event::Eof), }; @@ -148,22 +151,25 @@ macro_rules! read_event_impl { macro_rules! read_until_open { ( - $self:ident, $buf:ident, $first:ident + $self:ident, $buf:ident, $first:ident, + $read_event:ident + $(, $await:ident)? ) => {{ $self.parser.state = ParseState::OpenedTag; if $self.parser.trim_text_start { - $self.reader.skip_whitespace(&mut $self.parser.offset)?; + $self.reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?; } // If we already at the `<` symbol, do not try to return an empty Text event - if $self.reader.skip_one(b'<', &mut $self.parser.offset)? { - return $self.read_event_impl($buf); + if $self.reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? { + return $self.$read_event($buf) $(.$await)?; } match $self .reader .read_bytes_until(b'<', $buf, &mut $self.parser.offset) + $(.$await)? { Ok(Some(bytes)) => $self.parser.read_text(bytes, $first), Ok(None) => Ok(Event::Eof), @@ -175,14 +181,16 @@ macro_rules! read_until_open { macro_rules! read_until_close { ( $self:ident, $buf:ident + $(, $await:ident)? ) => {{ $self.parser.state = ParseState::ClosedTag; - match $self.reader.peek_one() { + match $self.reader.peek_one() $(.$await)? { // ` match $self .reader .read_bang_element($buf, &mut $self.parser.offset) + $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some((bang_type, bytes))) => $self.parser.read_bang(bang_type, bytes), @@ -190,9 +198,9 @@ macro_rules! read_until_close { }, // ` match $self - // Comment for prevent formatting .reader .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => $self.parser.read_end(bytes), @@ -200,16 +208,20 @@ macro_rules! read_until_close { }, // ` match $self - // Comment for prevent formatting .reader .read_bytes_until(b'>', $buf, &mut $self.parser.offset) + $(.$await)? { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => $self.parser.read_question_mark(bytes), Err(e) => Err(e), }, // `<...` - opening or self-closed tag - Ok(Some(_)) => match $self.reader.read_element($buf, &mut $self.parser.offset) { + Ok(Some(_)) => match $self + .reader + .read_element($buf, &mut $self.parser.offset) + $(.$await)? + { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => $self.parser.read_start(bytes), Err(e) => Err(e), @@ -224,13 +236,15 @@ macro_rules! read_until_close { macro_rules! read_to_end { ( $self:expr, $end:expr, $buf:expr, + $read_event:ident, // Code block that performs clearing of internal buffer after read of each event $clear:block + $(, $await:ident)? ) => {{ let mut depth = 0; loop { $clear - match $self.read_event_impl($buf) { + match $self.$read_event($buf) $(.$await)? { Err(e) => return Err(e), Ok(Event::Start(e)) if e.name() == $end => depth += 1, @@ -528,7 +542,7 @@ impl Reader { where R: XmlSource<'i, B>, { - read_event_impl!(self, buf) + read_event_impl!(self, buf, read_until_open, read_until_close) } /// Read until '<' is found and moves reader to an `OpenedTag` state. @@ -538,7 +552,7 @@ impl Reader { where R: XmlSource<'i, B>, { - read_until_open!(self, buf, first) + read_until_open!(self, buf, first, read_event_impl) } /// Private function to read until `>` is found. This function expects that diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index e8ceec3d..ba87aac0 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -138,7 +138,7 @@ impl<'a> Reader<&'a [u8]> { /// [`check_end_names`]: Self::check_end_names /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag pub fn read_to_end(&mut self, end: QName) -> Result<()> { - read_to_end!(self, end, (), {}) + read_to_end!(self, end, (), read_event_impl, {}) } } From 0ab5f4a84c995bd788abbf80716c34ca5f3690c2 Mon Sep 17 00:00:00 2001 From: Mingun Date: Mon, 1 Aug 2022 23:49:41 +0500 Subject: [PATCH 5/6] Update test macro to support async code --- src/reader/buffered_reader.rs | 8 +- src/reader/mod.rs | 300 ++++++++++++++++++---------------- src/reader/slice_reader.rs | 8 +- 3 files changed, 173 insertions(+), 143 deletions(-) diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 6b8dfc97..ab33020b 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -426,7 +426,13 @@ mod test { input } - check!(identity, &mut Vec::new()); + check!( + #[test] + read_event_impl, + read_until_close, + identity, + &mut Vec::new() + ); #[cfg(feature = "encoding")] mod encoding { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index bcaefe6e..14ccc5ab 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -786,10 +786,14 @@ mod test { /// Checks the internal implementation of the various reader methods macro_rules! check { ( + #[$test:meta] + $read_event:ident, + $read_until_close:ident, // constructor of the XML source on which internal functions will be called $source:path, // constructor of the buffer to which read data will stored $buf:expr + $(, $async:ident, $await:ident)? ) => { mod read_bytes_until { use super::*; @@ -798,8 +802,8 @@ mod test { use pretty_assertions::assert_eq; /// Checks that search in the empty buffer returns `None` - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"".as_ref(); @@ -808,6 +812,7 @@ mod test { assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), None @@ -817,8 +822,8 @@ mod test { /// Checks that search in the buffer non-existent value returns entire buffer /// as a result and set `position` to `len()` - #[test] - fn non_existent() { + #[$test] + $($async)? fn non_existent() { let buf = $buf; let mut position = 0; let mut input = b"abcdef".as_ref(); @@ -827,6 +832,7 @@ mod test { assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) @@ -837,8 +843,8 @@ mod test { /// Checks that search in the buffer an element that is located in the front of /// buffer returns empty slice as a result and set `position` to one symbol /// after match (`1`) - #[test] - fn at_the_start() { + #[$test] + $($async)? fn at_the_start() { let buf = $buf; let mut position = 0; let mut input = b"*abcdef".as_ref(); @@ -847,6 +853,7 @@ mod test { assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"")) @@ -857,8 +864,8 @@ mod test { /// Checks that search in the buffer an element that is located in the middle of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match - #[test] - fn inside() { + #[$test] + $($async)? fn inside() { let buf = $buf; let mut position = 0; let mut input = b"abc*def".as_ref(); @@ -867,6 +874,7 @@ mod test { assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abc")) @@ -877,8 +885,8 @@ mod test { /// Checks that search in the buffer an element that is located in the end of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match (`len()`) - #[test] - fn in_the_end() { + #[$test] + $($async)? fn in_the_end() { let buf = $buf; let mut position = 0; let mut input = b"abcdef*".as_ref(); @@ -887,6 +895,7 @@ mod test { assert_eq!( $source(&mut input) .read_bytes_until(b'*', buf, &mut position) + $(.$await)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) @@ -908,15 +917,15 @@ mod test { /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error - #[test] + #[$test] #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] - fn not_properly_start() { + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"![]]>other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -929,14 +938,14 @@ mod test { /// Checks that if CDATA startup sequence was matched, but an end sequence /// is not found, parsing ends with an error - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -948,8 +957,8 @@ mod test { } /// Checks that CDATA element without content inside parsed successfully - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[]]>other content".as_ref(); @@ -958,6 +967,7 @@ mod test { assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA["))) @@ -968,8 +978,8 @@ mod test { /// Checks that CDATA element with content parsed successfully. /// Additionally checks that sequences inside CDATA that may look like /// a CDATA end sequence do not interrupt CDATA parsing - #[test] - fn with_content() { + #[$test] + $($async)? fn with_content() { let buf = $buf; let mut position = 0; let mut input = b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref(); @@ -978,6 +988,7 @@ mod test { assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content"))) @@ -1009,15 +1020,15 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] + #[$test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] - fn not_properly_start() { + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!- -->other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1028,14 +1039,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_properly_end() { + #[$test] + $($async)? fn not_properly_end() { let buf = $buf; let mut position = 0; let mut input = b"!->other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1046,14 +1057,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed1() { + #[$test] + $($async)? fn not_closed1() { let buf = $buf; let mut position = 0; let mut input = b"!--other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1064,14 +1075,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed2() { + #[$test] + $($async)? fn not_closed2() { let buf = $buf; let mut position = 0; let mut input = b"!-->other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1082,14 +1093,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed3() { + #[$test] + $($async)? fn not_closed3() { let buf = $buf; let mut position = 0; let mut input = b"!--->other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1100,8 +1111,8 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!---->other content".as_ref(); @@ -1110,6 +1121,7 @@ mod test { assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!----"))) @@ -1117,8 +1129,8 @@ mod test { assert_eq!(position, 6); } - #[test] - fn with_content() { + #[$test] + $($async)? fn with_content() { let buf = $buf; let mut position = 0; let mut input = b"!--->comment<--->other content".as_ref(); @@ -1127,6 +1139,7 @@ mod test { assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!--->comment<---"))) @@ -1146,14 +1159,14 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn not_properly_start() { + #[$test] + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!D other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1164,14 +1177,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn without_space() { + #[$test] + $($async)? fn without_space() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPEother content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1182,8 +1195,8 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPE>other content".as_ref(); @@ -1192,6 +1205,7 @@ mod test { assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!DOCTYPE"))) @@ -1199,14 +1213,14 @@ mod test { assert_eq!(position, 9); } - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"!DOCTYPE other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1225,14 +1239,14 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn not_properly_start() { + #[$test] + $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; let mut input = b"!d other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1243,14 +1257,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn without_space() { + #[$test] + $($async)? fn without_space() { let buf = $buf; let mut position = 0; let mut input = b"!doctypeother content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1261,8 +1275,8 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"!doctype>other content".as_ref(); @@ -1271,6 +1285,7 @@ mod test { assert_eq!( $source(&mut input) .read_bang_element(buf, &mut position) + $(.$await)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!doctype"))) @@ -1278,14 +1293,14 @@ mod test { assert_eq!(position, 9); } - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { let buf = $buf; let mut position = 0; let mut input = b"!doctype other content".as_ref(); // ^= 0 - match $source(&mut input).read_bang_element(buf, &mut position) { + match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1305,14 +1320,17 @@ mod test { use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let buf = $buf; let mut position = 0; let mut input = b"".as_ref(); // ^= 0 - assert_eq!($source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), None); + assert_eq!( + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), + None + ); assert_eq!(position, 0); } @@ -1321,71 +1339,71 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn empty_tag() { + #[$test] + $($async)? fn empty_tag() { let buf = $buf; let mut position = 0; let mut input = b">".as_ref(); // ^= 1 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"")) ); assert_eq!(position, 1); } - #[test] - fn normal() { + #[$test] + $($async)? fn normal() { let buf = $buf; let mut position = 0; let mut input = b"tag>".as_ref(); // ^= 4 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"tag")) ); assert_eq!(position, 4); } - #[test] - fn empty_ns_empty_tag() { + #[$test] + $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 0; let mut input = b":>".as_ref(); // ^= 2 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":")) ); assert_eq!(position, 2); } - #[test] - fn empty_ns() { + #[$test] + $($async)? fn empty_ns() { let buf = $buf; let mut position = 0; let mut input = b":tag>".as_ref(); // ^= 5 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":tag")) ); assert_eq!(position, 5); } - #[test] - fn with_attributes() { + #[$test] + $($async)? fn with_attributes() { let buf = $buf; let mut position = 0; let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); // ^= 38 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#)) ); assert_eq!(position, 38); @@ -1397,71 +1415,71 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn empty_tag() { + #[$test] + $($async)? fn empty_tag() { let buf = $buf; let mut position = 0; let mut input = b"/>".as_ref(); // ^= 2 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"/")) ); assert_eq!(position, 2); } - #[test] - fn normal() { + #[$test] + $($async)? fn normal() { let buf = $buf; let mut position = 0; let mut input = b"tag/>".as_ref(); // ^= 5 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b"tag/")) ); assert_eq!(position, 5); } - #[test] - fn empty_ns_empty_tag() { + #[$test] + $($async)? fn empty_ns_empty_tag() { let buf = $buf; let mut position = 0; let mut input = b":/>".as_ref(); // ^= 3 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":/")) ); assert_eq!(position, 3); } - #[test] - fn empty_ns() { + #[$test] + $($async)? fn empty_ns() { let buf = $buf; let mut position = 0; let mut input = b":tag/>".as_ref(); // ^= 6 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(b":tag/")) ); assert_eq!(position, 6); } - #[test] - fn with_attributes() { + #[$test] + $($async)? fn with_attributes() { let buf = $buf; let mut position = 0; let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); // ^= 41 assert_eq!( - $source(&mut input).read_element(buf, &mut position).unwrap().map(Bytes), + $source(&mut input).read_element(buf, &mut position) $(.$await)? .unwrap().map(Bytes), Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#)) ); assert_eq!(position, 41); @@ -1472,12 +1490,12 @@ mod test { mod issue_344 { use crate::errors::Error; - #[test] - fn cdata() { + #[$test] + $($async)? fn cdata() { let doc = "![]]>"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -1487,12 +1505,12 @@ mod test { } } - #[test] - fn comment() { + #[$test] + $($async)? fn comment() { let doc = "!- -->"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1502,12 +1520,12 @@ mod test { } } - #[test] - fn doctype_uppercase() { + #[$test] + $($async)? fn doctype_uppercase() { let doc = "!D>"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1517,12 +1535,12 @@ mod test { } } - #[test] - fn doctype_lowercase() { + #[$test] + $($async)? fn doctype_lowercase() { let doc = "!d>"; let mut reader = crate::Reader::from_str(doc); - match reader.read_until_close($buf) { + match reader.$read_until_close($buf) $(.$await)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1534,126 +1552,126 @@ mod test { } /// Ensures, that no empty `Text` events are generated - mod read_event_impl { + mod $read_event { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use crate::reader::Reader; use pretty_assertions::assert_eq; - #[test] - fn start_text() { + #[$test] + $($async)? fn start_text() { let mut reader = Reader::from_str("bom"); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::StartText(BytesText::from_escaped("bom").into()) ); } - #[test] - fn declaration() { + #[$test] + $($async)? fn declaration() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::from_content("xml ", 3))) ); } - #[test] - fn doctype() { + #[$test] + $($async)? fn doctype() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::DocType(BytesText::from_escaped("x")) ); } - #[test] - fn processing_instruction() { + #[$test] + $($async)? fn processing_instruction() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::PI(BytesText::from_escaped("xml-stylesheet")) ); } - #[test] - fn start() { + #[$test] + $($async)? fn start() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Start(BytesStart::new("tag")) ); } - #[test] - fn end() { + #[$test] + $($async)? fn end() { let mut reader = Reader::from_str(""); // Because we expect invalid XML, do not check that // the end name paired with the start name reader.check_end_names(false); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::End(BytesEnd::new("tag")) ); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); } /// Text event cannot be generated without preceding event of another type - #[test] - fn text() { + #[$test] + $($async)? fn text() { let mut reader = Reader::from_str("text"); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Empty(BytesStart::new("tag")) ); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Text(BytesText::from_escaped("text")) ); } - #[test] - fn cdata() { + #[$test] + $($async)? fn cdata() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::CData(BytesCData::new("")) ); } - #[test] - fn comment() { + #[$test] + $($async)? fn comment() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Comment(BytesText::from_escaped("")) ); } - #[test] - fn eof() { + #[$test] + $($async)? fn eof() { let mut reader = Reader::from_str(""); assert_eq!( - reader.read_event_impl($buf).unwrap(), + reader.$read_event($buf) $(.$await)? .unwrap(), Event::Eof ); } diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index ba87aac0..9852dcb8 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -251,7 +251,13 @@ mod test { input } - check!(identity, ()); + check!( + #[test] + read_event_impl, + read_until_close, + identity, + () + ); #[cfg(feature = "encoding")] mod encoding { From 53a7ad5c64fa28fb95e613748633202a49d2df17 Mon Sep 17 00:00:00 2001 From: Sophie Tauchert Date: Sun, 31 Jul 2022 21:36:18 +0500 Subject: [PATCH 6/6] Implement tokio-based asynchronous reader --- Cargo.toml | 16 +- Changelog.md | 2 + src/reader/async_tokio.rs | 388 ++++++++++++++++++++++++++++++++++ src/reader/buffered_reader.rs | 3 + src/reader/mod.rs | 33 ++- src/reader/ns_reader.rs | 20 +- src/reader/slice_reader.rs | 8 + tests/async-tokio.rs | 20 ++ 8 files changed, 468 insertions(+), 22 deletions(-) create mode 100644 src/reader/async_tokio.rs create mode 100644 tests/async-tokio.rs diff --git a/Cargo.toml b/Cargo.toml index f41bb994..af05e661 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,13 +8,14 @@ documentation = "https://docs.rs/quick-xml" repository = "https://github.com/tafia/quick-xml" keywords = ["xml", "serde", "parser", "writer", "html"] -categories = ["encoding", "parsing", "parser-implementations"] +categories = ["asynchronous", "encoding", "parsing", "parser-implementations"] license = "MIT" [dependencies] document-features = { version = "0.2", optional = true } encoding_rs = { version = "0.8", optional = true } serde = { version = "1.0", optional = true } +tokio = { version = "1.20", optional = true, default-features = false, features = ["io-util"] } memchr = "2.5" [dev-dependencies] @@ -23,6 +24,8 @@ pretty_assertions = "1.2" regex = "1" serde = { version = "1.0", features = ["derive"] } serde-value = "0.7" +tokio = { version = "1.20", default-features = false, features = ["macros", "rt"] } +tokio-test = "0.4" [lib] bench = false @@ -37,6 +40,13 @@ harness = false [features] default = [] + +## Enables support for asynchronous reading from `tokio`'s IO-Traits by enabling +## [reading events] from types implementing [`tokio::io::AsyncBufRead`]. +## +## [reading events]: crate::reader::Reader::read_event_into_async +async-tokio = ["tokio"] + ## Enables support of non-UTF-8 encoded documents. Encoding will be inferred from ## the XML declaration if it will be found, otherwise UTF-8 is assumed. ## @@ -123,3 +133,7 @@ required-features = ["serialize"] [[test]] name = "serde-migrated" required-features = ["serialize"] + +[[test]] +name = "async-tokio" +required-features = ["async-tokio"] diff --git a/Changelog.md b/Changelog.md index e81cab0d..23f5e72b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -39,6 +39,7 @@ |`attribute_namespace` |`resolve_attribute` - [#439]: Added utilities `detect_encoding()`, `decode()`, and `decode_with_bom_removal()` under the `quick-xml::encoding` namespace. +- [#450]: Added support of asynchronous [tokio](https://tokio.rs/) readers ### Bug Fixes @@ -218,6 +219,7 @@ [#439]: https://github.com/tafia/quick-xml/pull/439 [#440]: https://github.com/tafia/quick-xml/pull/440 [#443]: https://github.com/tafia/quick-xml/pull/443 +[#450]: https://github.com/tafia/quick-xml/pull/450 ## 0.23.0 -- 2022-05-08 diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs new file mode 100644 index 00000000..1e06b0b8 --- /dev/null +++ b/src/reader/async_tokio.rs @@ -0,0 +1,388 @@ +//! This is an implementation of [`Reader`] for reading from a [`AsyncBufRead`] +//! as underlying byte stream. This reader fully implements async/await so reading +//! can use non-blocking I/O. + +use std::future::Future; +use std::pin::Pin; +use tokio::io::{self, AsyncBufRead, AsyncBufReadExt}; + +use crate::events::Event; +use crate::name::{QName, ResolveResult}; +use crate::reader::buffered_reader::impl_buffered_source; +use crate::reader::{is_whitespace, BangType, NsReader, ParseState, ReadElementState, Reader}; +use crate::{Error, Result}; + +/// A struct for read XML asynchronously from an [`AsyncBufRead`]. +/// +/// Having own struct allows us to implement anything without risk of name conflicts +/// and does not suffer from the impossibility of having `async` in traits. +struct TokioAdapter<'a, R>(&'a mut R); + +impl<'a, R: AsyncBufRead + Unpin> TokioAdapter<'a, R> { + impl_buffered_source!('b, 0, async, await); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl Reader { + /// An asynchronous version of [`read_event_into()`]. Reads the next event into + /// given buffer. + /// + /// > This function should be defined as + /// > ```ignore + /// > pub async fn read_event_into_async<'b>( + /// > &mut self, + /// > buf: &'b mut Vec + /// > ) -> Result>; + /// > ``` + /// > but Rust does not allow to write that for recursive asynchronous function + /// + /// This is the main entry point for reading XML `Event`s when using an async reader. + /// + /// See the documentation of [`read_event_into()`] for more information. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::Reader; + /// + /// // This explicitly uses `from_reader("...".as_bytes())` to use a buffered + /// // reader instead of relying on the zero-copy optimizations for reading + /// // from byte slices, which is provides the sync interface anyway. + /// let mut reader = Reader::from_reader(r#" + /// + /// Test + /// Test 2 + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into_async(&mut buf).await { + /// Ok(Event::Start(_)) => count += 1, + /// Ok(Event::Text(e)) => txt.push(e.unescape().unwrap().into_owned()), + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok(Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_event_into()`]: Reader::read_event_into + pub fn read_event_into_async<'reader, 'b: 'reader>( + &'reader mut self, + buf: &'b mut Vec, + ) -> Pin>> + 'reader>> { + Box::pin(async move { + read_event_impl!(self, buf, read_until_open_async, read_until_close_async, await) + }) + } + + /// An asynchronous version of [`read_to_end_into()`]. + /// Reads asynchronously until end element is found using provided buffer as + /// intermediate storage for events content. This function is supposed to be + /// called after you already read a [`Start`] event. + /// + /// See the documentation of [`read_to_end_into()`] for more information. + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_reader(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let start = BytesStart::new("outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event_into_async(&mut buf).await.unwrap(), Event::Eof); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_to_end_into()`]: Self::read_to_end_into + /// [`Start`]: Event::Start + pub async fn read_to_end_into_async<'n>( + &mut self, + // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` + end: QName<'n>, + buf: &mut Vec, + ) -> Result<()> { + read_to_end!(self, end, buf, read_event_into_async, { buf.clear(); }, await) + } + + /// Read until '<' is found and moves reader to an `Opened` state. + /// + /// Return a `StartText` event if `first` is `true` and a `Text` event otherwise + async fn read_until_open_async<'b>( + &mut self, + buf: &'b mut Vec, + first: bool, + ) -> Result> { + read_until_open!(self, buf, first, TokioAdapter(&mut self.reader), read_event_into_async, await) + } + + /// Private function to read until `>` is found. This function expects that + /// it was called just after encounter a `<` symbol. + async fn read_until_close_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { + read_until_close!(self, buf, TokioAdapter(&mut self.reader), await) + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +impl NsReader { + /// An asynchronous version of [`read_event_into()`]. Reads the next event into + /// given buffer. + /// + /// This method manages namespaces but doesn't resolve them automatically. + /// You should call [`resolve_element()`] if you want to get a namespace. + /// + /// You also can use [`read_resolved_event_into_async()`] instead if you want + /// to resolve namespace as soon as you get an event. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, ResolveResult::*}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_reader(r#" + /// + /// Test + /// Test 2 + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into_async(&mut buf).await.unwrap() { + /// Event::Start(e) => { + /// count += 1; + /// let (ns, local) = reader.resolve_element(e.name()); + /// match local.as_ref() { + /// b"tag1" => assert_eq!(ns, Bound(Namespace(b"www.xxxx"))), + /// b"tag2" => assert_eq!(ns, Bound(Namespace(b"www.yyyy"))), + /// _ => unreachable!(), + /// } + /// } + /// Event::Text(e) => { + /// txt.push(e.unescape().unwrap().into_owned()) + /// } + /// Event::Eof => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_event_into()`]: NsReader::read_event_into + /// [`resolve_element()`]: Self::resolve_element + /// [`read_resolved_event_into_async()`]: Self::read_resolved_event_into_async + pub async fn read_event_into_async<'b>(&mut self, buf: &'b mut Vec) -> Result> { + self.pop(); + let event = self.reader.read_event_into_async(buf).await; + self.process_event(event) + } + + /// An asynchronous version of [`read_to_end_into()`]. + /// Reads asynchronously until end element is found using provided buffer as + /// intermediate storage for events content. This function is supposed to be + /// called after you already read a [`Start`] event. + /// + /// See the documentation of [`read_to_end_into()`] for more information. + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::name::{Namespace, ResolveResult}; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_reader(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let ns = Namespace(b"namespace 1"); + /// let start = BytesStart::from_content(r#"outer xmlns="namespace 1""#, 5); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!( + /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), + /// (ResolveResult::Bound(ns), Event::Start(start)) + /// ); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into_async(end.name(), &mut buf).await.unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!( + /// reader.read_resolved_event_into_async(&mut buf).await.unwrap(), + /// (ResolveResult::Unbound, Event::Eof) + /// ); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_to_end_into()`]: Self::read_to_end_into + /// [`Start`]: Event::Start + pub async fn read_to_end_into_async<'n>( + &mut self, + // We should name that lifetime due to https://github.com/rust-lang/rust/issues/63033` + end: QName<'n>, + buf: &mut Vec, + ) -> Result<()> { + // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should + // match literally the start name. See `Reader::check_end_names` documentation + self.reader.read_to_end_into_async(end, buf).await + } + + /// An asynchronous version of [`read_resolved_event_into()`]. Reads the next + /// event into given buffer asynchronously and resolves its namespace (if applicable). + /// + /// Namespace is resolved only for [`Start`], [`Empty`] and [`End`] events. + /// For all other events the concept of namespace is not defined, so + /// a [`ResolveResult::Unbound`] is returned. + /// + /// If you are not interested in namespaces, you can use [`read_event_into_async()`] + /// which will not automatically resolve namespaces for you. + /// + /// # Examples + /// + /// ``` + /// # tokio_test::block_on(async { + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::name::{Namespace, QName, ResolveResult::*}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_reader(r#" + /// + /// Test + /// Test 2 + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_resolved_event_into_async(&mut buf).await.unwrap() { + /// (Bound(Namespace(b"www.xxxx")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag1").into()); + /// } + /// (Bound(Namespace(b"www.yyyy")), Event::Start(e)) => { + /// count += 1; + /// assert_eq!(e.local_name(), QName(b"tag2").into()); + /// } + /// (_, Event::Start(_)) => unreachable!(), + /// + /// (_, Event::Text(e)) => { + /// txt.push(e.unescape().unwrap().into_owned()) + /// } + /// (_, Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// # }) // tokio_test::block_on + /// ``` + /// + /// [`read_resolved_event_into()`]: NsReader::read_resolved_event_into + /// [`Start`]: Event::Start + /// [`Empty`]: Event::Empty + /// [`End`]: Event::End + /// [`read_event_into_async()`]: Self::read_event_into_async + pub async fn read_resolved_event_into_async<'ns, 'b>( + // Name 'ns lifetime, because otherwise we get an error + // "implicit elided lifetime not allowed here" on ResolveResult + &'ns mut self, + buf: &'b mut Vec, + ) -> Result<(ResolveResult<'ns>, Event<'b>)> { + let event = self.read_event_into_async(buf).await; + self.resolve_event(event) + } +} + +#[cfg(test)] +mod test { + use super::TokioAdapter; + use crate::reader::test::check; + + check!( + #[tokio::test] + read_event_into_async, + read_until_close_async, + TokioAdapter, + &mut Vec::new(), + async, await + ); +} diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index ab33020b..f09ba706 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -211,6 +211,9 @@ macro_rules! impl_buffered_source { }; } +// Make it public for use in async implementations +pub(super) use impl_buffered_source; + /// Implementation of `XmlSource` for any `BufRead` reader using a user-given /// `Vec` as buffer that will be borrowed by events. impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec> for R { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 14ccc5ab..73ff5061 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -152,22 +152,22 @@ macro_rules! read_event_impl { macro_rules! read_until_open { ( $self:ident, $buf:ident, $first:ident, + $reader:expr, $read_event:ident $(, $await:ident)? ) => {{ $self.parser.state = ParseState::OpenedTag; if $self.parser.trim_text_start { - $self.reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?; + $reader.skip_whitespace(&mut $self.parser.offset) $(.$await)? ?; } // If we already at the `<` symbol, do not try to return an empty Text event - if $self.reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? { + if $reader.skip_one(b'<', &mut $self.parser.offset) $(.$await)? ? { return $self.$read_event($buf) $(.$await)?; } - match $self - .reader + match $reader .read_bytes_until(b'<', $buf, &mut $self.parser.offset) $(.$await)? { @@ -180,15 +180,15 @@ macro_rules! read_until_open { macro_rules! read_until_close { ( - $self:ident, $buf:ident + $self:ident, $buf:ident, + $reader:expr $(, $await:ident)? ) => {{ $self.parser.state = ParseState::ClosedTag; - match $self.reader.peek_one() $(.$await)? { + match $reader.peek_one() $(.$await)? { // ` match $self - .reader + Ok(Some(b'!')) => match $reader .read_bang_element($buf, &mut $self.parser.offset) $(.$await)? { @@ -197,8 +197,7 @@ macro_rules! read_until_close { Err(e) => Err(e), }, // ` match $self - .reader + Ok(Some(b'/')) => match $reader .read_bytes_until(b'>', $buf, &mut $self.parser.offset) $(.$await)? { @@ -207,8 +206,7 @@ macro_rules! read_until_close { Err(e) => Err(e), }, // ` match $self - .reader + Ok(Some(b'?')) => match $reader .read_bytes_until(b'>', $buf, &mut $self.parser.offset) $(.$await)? { @@ -217,8 +215,7 @@ macro_rules! read_until_close { Err(e) => Err(e), }, // `<...` - opening or self-closed tag - Ok(Some(_)) => match $self - .reader + Ok(Some(_)) => match $reader .read_element($buf, &mut $self.parser.offset) $(.$await)? { @@ -264,6 +261,8 @@ macro_rules! read_to_end { }}; } +#[cfg(feature = "async-tokio")] +mod async_tokio; mod buffered_reader; mod ns_reader; mod parser; @@ -455,7 +454,7 @@ impl Reader { /// let xml = r#" /// Test /// Test 2 - /// "#; + /// "#; /// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes())); /// let mut buf = Vec::new(); /// @@ -552,7 +551,7 @@ impl Reader { where R: XmlSource<'i, B>, { - read_until_open!(self, buf, first, read_event_impl) + read_until_open!(self, buf, first, self.reader, read_event_impl) } /// Private function to read until `>` is found. This function expects that @@ -561,7 +560,7 @@ impl Reader { where R: XmlSource<'i, B>, { - read_until_close!(self, buf) + read_until_close!(self, buf, self.reader) } } diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index a7ecc0a6..3f56d248 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -19,7 +19,7 @@ use crate::reader::{Reader, XmlSource}; /// Consumes a [`BufRead`] and streams XML `Event`s. pub struct NsReader { /// An XML reader - reader: Reader, + pub(super) reader: Reader, /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. buffer: Vec, @@ -63,14 +63,14 @@ impl NsReader { self.process_event(event) } - fn pop(&mut self) { + pub(super) fn pop(&mut self) { if self.pending_pop { self.ns_resolver.pop(&mut self.buffer); self.pending_pop = false; } } - fn process_event<'i>(&mut self, event: Result>) -> Result> { + pub(super) fn process_event<'i>(&mut self, event: Result>) -> Result> { match event { Ok(Event::Start(e)) => { self.ns_resolver.push(&e, &mut self.buffer); @@ -93,7 +93,7 @@ impl NsReader { } } - fn resolve_event<'i>( + pub(super) fn resolve_event<'i>( &mut self, event: Result>, ) -> Result<(ResolveResult, Event<'i>)> { @@ -538,6 +538,10 @@ impl<'i> NsReader<&'i [u8]> { /// You also can use [`read_resolved_event()`] instead if you want to resolve namespace /// as soon as you get an event. /// + /// There is no asynchronous `read_event_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Examples /// /// ``` @@ -595,6 +599,10 @@ impl<'i> NsReader<&'i [u8]> { /// If you are not interested in namespaces, you can use [`read_event()`] /// which will not automatically resolve namespaces for you. /// + /// There is no asynchronous `read_resolved_event_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Examples /// /// ``` @@ -661,6 +669,10 @@ impl<'i> NsReader<&'i [u8]> { /// encoding_. It is good practice to always get that parameter using /// [`BytesStart::to_end()`] method. /// + /// There is no asynchronous `read_to_end_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Namespaces /// /// Unlike [`Reader::read_to_end()`], this method resolves namespace diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 9852dcb8..d4bf0d7e 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -34,6 +34,10 @@ impl<'a> Reader<&'a [u8]> { /// Read an event that borrows from the input rather than a buffer. /// + /// There is no asynchronous `read_event_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Examples /// /// ``` @@ -83,6 +87,10 @@ impl<'a> Reader<&'a [u8]> { /// The correctness of the skipped events does not checked, if you disabled /// the [`check_end_names`] option. /// + /// There is no asynchronous `read_to_end_async()` version of this function, + /// because it is not necessary -- the contents are already in memory and no IO + /// is needed, therefore there is no potential for blocking. + /// /// # Namespaces /// /// While the [`Reader`] does not support namespace resolution, namespaces diff --git a/tests/async-tokio.rs b/tests/async-tokio.rs new file mode 100644 index 00000000..4d811580 --- /dev/null +++ b/tests/async-tokio.rs @@ -0,0 +1,20 @@ +use quick_xml::events::Event::*; +use quick_xml::Reader; + +#[tokio::test] +async fn test_sample() { + let src = include_str!("documents/sample_rss.xml"); + let mut reader = Reader::from_reader(src.as_bytes()); + let mut buf = Vec::new(); + let mut count = 0; + loop { + match reader.read_event_into_async(&mut buf).await.unwrap() { + Start(_) => count += 1, + Decl(e) => println!("{:?}", e.version()), + Eof => break, + _ => (), + } + buf.clear(); + } + println!("{}", count); +}