diff --git a/Cargo.toml b/Cargo.toml index 0da6e32e..2de2133d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ document-features = { version = "0.2", optional = true } encoding_rs = { version = "0.8", optional = true } serde = { version = "1.0", optional = true } memchr = "2.5" +tokio = { version = "1.19", optional = true, default-features = false, features = ["io-util", "fs"] } +async-recursion = { version = "1.0", optional = true } [dev-dependencies] criterion = "0.3" @@ -23,6 +25,7 @@ pretty_assertions = "1.2" regex = "1" serde = { version = "1.0", features = ["derive"] } serde-value = "0.7" +tokio = { version = "1.19", default-features = false, features = ["macros", "rt-multi-thread"] } [[bench]] name = "bench" @@ -94,6 +97,9 @@ serialize = ["serde"] ## Enables support for recognizing all [HTML 5 entities](https://dev.w3.org/html5/html-author/charref) escape-html = [] +## Enable support for asynchronous reading +async = ["tokio", "async-recursion"] + [package.metadata.docs.rs] all-features = true diff --git a/src/reader.rs b/src/reader.rs index abc1b591..a3fd51cf 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -13,9 +13,13 @@ use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult}; use memchr; +#[cfg(feature = "async")] +mod async_reader; mod io_reader; mod slice_reader; +#[cfg(feature = "async")] +pub use self::async_reader::AsyncReader; pub use self::{io_reader::IoReader, slice_reader::SliceReader}; /// Possible reader states. The state transition diagram (`true` and `false` shows @@ -945,7 +949,12 @@ fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> { #[cfg(test)] mod test { macro_rules! check { - ($(let mut $buf:ident = $init:expr;)?) => { + ($(let mut $buf:ident = $init:expr; $($await:tt)?)?) => { + check!(#[test] { + $(let mut $buf = $init; $($await)?)? + }); + }; + (#[$test:meta] $($async:ident)? { $(let mut $buf:ident = $init:expr; $($await:tt)?)? }) => { mod read_bytes_until { use super::input_from_bytes; // Use Bytes for printing bytes as strings for ASCII range @@ -953,8 +962,8 @@ mod test { use pretty_assertions::assert_eq; /// Checks that search in the empty buffer returns `None` - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"".as_ref()); @@ -963,6 +972,7 @@ mod test { assert_eq!( input .read_bytes_until(b'*', $(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(Bytes), None @@ -972,8 +982,8 @@ mod test { /// Checks that search in the buffer non-existent value returns entire buffer /// as a result and set `position` to `len()` - #[test] - fn non_existent() { + #[$test] + $($async)? fn non_existent() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"abcdef".as_ref()); @@ -982,6 +992,7 @@ mod test { assert_eq!( input .read_bytes_until(b'*', $(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) @@ -992,8 +1003,8 @@ mod test { /// Checks that search in the buffer an element that is located in the front of /// buffer returns empty slice as a result and set `position` to one symbol /// after match (`1`) - #[test] - fn at_the_start() { + #[$test] + $($async)? fn at_the_start() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"*abcdef".as_ref()); @@ -1002,6 +1013,7 @@ mod test { assert_eq!( input .read_bytes_until(b'*', $(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(Bytes), Some(Bytes(b"")) @@ -1012,8 +1024,8 @@ mod test { /// Checks that search in the buffer an element that is located in the middle of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match - #[test] - fn inside() { + #[$test] + $($async)? fn inside() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"abc*def".as_ref()); @@ -1022,6 +1034,7 @@ mod test { assert_eq!( input .read_bytes_until(b'*', $(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(Bytes), Some(Bytes(b"abc")) @@ -1032,8 +1045,8 @@ mod test { /// Checks that search in the buffer an element that is located in the end of /// buffer returns slice before that symbol as a result and set `position` to one /// symbol after match (`len()`) - #[test] - fn in_the_end() { + #[$test] + $($async)? fn in_the_end() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"abcdef*".as_ref()); @@ -1042,6 +1055,7 @@ mod test { assert_eq!( input .read_bytes_until(b'*', $(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(Bytes), Some(Bytes(b"abcdef")) @@ -1062,15 +1076,15 @@ mod test { /// Checks that if input begins like CDATA element, but CDATA start sequence /// is not finished, parsing ends with an error - #[test] + #[$test] #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] - fn not_properly_start() { + $($async)? fn not_properly_start() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"![]]>other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -1083,14 +1097,14 @@ mod test { /// Checks that if CDATA startup sequence was matched, but an end sequence /// is not found, parsing ends with an error - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"![CDATA[other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -1102,8 +1116,8 @@ mod test { } /// Checks that CDATA element without content inside parsed successfully - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"![CDATA[]]>other content".as_ref()); @@ -1112,6 +1126,7 @@ mod test { assert_eq!( input .read_bang_element($(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA["))) @@ -1122,8 +1137,8 @@ mod test { /// Checks that CDATA element with content parsed successfully. /// Additionally checks that sequences inside CDATA that may look like /// a CDATA end sequence do not interrupt CDATA parsing - #[test] - fn with_content() { + #[$test] + $($async)? fn with_content() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"![CDATA[cdata]] ]>content]]>other content]]>".as_ref()); @@ -1132,6 +1147,7 @@ mod test { assert_eq!( input .read_bang_element($(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::CData, Bytes(b"![CDATA[cdata]] ]>content"))) @@ -1163,15 +1179,15 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] + #[$test] #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] - fn not_properly_start() { + $($async)? fn not_properly_start() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!- -->other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1182,14 +1198,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_properly_end() { + #[$test] + $($async)? fn not_properly_end() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!->other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1200,14 +1216,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed1() { + #[$test] + $($async)? fn not_closed1() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!--other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1218,14 +1234,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed2() { + #[$test] + $($async)? fn not_closed2() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!-->other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1236,14 +1252,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn not_closed3() { + #[$test] + $($async)? fn not_closed3() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!--->other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1254,8 +1270,8 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!---->other content".as_ref()); @@ -1264,6 +1280,7 @@ mod test { assert_eq!( input .read_bang_element($(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!----"))) @@ -1271,8 +1288,8 @@ mod test { assert_eq!(position, 6); } - #[test] - fn with_content() { + #[$test] + $($async)? fn with_content() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!--->comment<--->other content".as_ref()); @@ -1281,6 +1298,7 @@ mod test { assert_eq!( input .read_bang_element($(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::Comment, Bytes(b"!--->comment<---"))) @@ -1299,14 +1317,14 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn not_properly_start() { + #[$test] + $($async)? fn not_properly_start() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!D other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1317,14 +1335,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn without_space() { + #[$test] + $($async)? fn without_space() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!DOCTYPEother content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1335,8 +1353,8 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!DOCTYPE>other content".as_ref()); @@ -1345,6 +1363,7 @@ mod test { assert_eq!( input .read_bang_element($(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!DOCTYPE"))) @@ -1352,14 +1371,14 @@ mod test { assert_eq!(position, 9); } - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!DOCTYPE other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1378,14 +1397,14 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn not_properly_start() { + #[$test] + $($async)? fn not_properly_start() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!d other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1396,14 +1415,14 @@ mod test { assert_eq!(position, 0); } - #[test] - fn without_space() { + #[$test] + $($async)? fn without_space() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!doctypeother content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1414,8 +1433,8 @@ mod test { assert_eq!(position, 0); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!doctype>other content".as_ref()); @@ -1424,6 +1443,7 @@ mod test { assert_eq!( input .read_bang_element($(&mut $buf, )? &mut position) + $($(.$await)?)? .unwrap() .map(|(ty, data)| (ty, Bytes(data))), Some((BangType::DocType, Bytes(b"!doctype"))) @@ -1431,14 +1451,14 @@ mod test { assert_eq!(position, 9); } - #[test] - fn not_closed() { + #[$test] + $($async)? fn not_closed() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"!doctype other content".as_ref()); // ^= 0 - match input.read_bang_element($(&mut $buf, )? &mut position) { + match input.read_bang_element($(&mut $buf, )? &mut position)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1458,14 +1478,14 @@ mod test { use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"".as_ref()); // ^= 0 - assert_eq!(input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), None); + assert_eq!(input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), None); assert_eq!(position, 0); } @@ -1474,71 +1494,71 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn empty_tag() { + #[$test] + $($async)? fn empty_tag() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b">".as_ref()); // ^= 1 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b"")) ); assert_eq!(position, 1); } - #[test] - fn normal() { + #[$test] + $($async)? fn normal() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"tag>".as_ref()); // ^= 4 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b"tag")) ); assert_eq!(position, 4); } - #[test] - fn empty_ns_empty_tag() { + #[$test] + $($async)? fn empty_ns_empty_tag() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b":>".as_ref()); // ^= 2 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b":")) ); assert_eq!(position, 2); } - #[test] - fn empty_ns() { + #[$test] + $($async)? fn empty_ns() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b":tag>".as_ref()); // ^= 5 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b":tag")) ); assert_eq!(position, 5); } - #[test] - fn with_attributes() { + #[$test] + $($async)? fn with_attributes() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref()); // ^= 38 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#)) ); assert_eq!(position, 38); @@ -1550,71 +1570,71 @@ mod test { use crate::utils::Bytes; use pretty_assertions::assert_eq; - #[test] - fn empty_tag() { + #[$test] + $($async)? fn empty_tag() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"/>".as_ref()); // ^= 2 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b"/")) ); assert_eq!(position, 2); } - #[test] - fn normal() { + #[$test] + $($async)? fn normal() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b"tag/>".as_ref()); // ^= 5 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b"tag/")) ); assert_eq!(position, 5); } - #[test] - fn empty_ns_empty_tag() { + #[$test] + $($async)? fn empty_ns_empty_tag() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b":/>".as_ref()); // ^= 3 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b":/")) ); assert_eq!(position, 3); } - #[test] - fn empty_ns() { + #[$test] + $($async)? fn empty_ns() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(b":tag/>".as_ref()); // ^= 6 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(b":tag/")) ); assert_eq!(position, 6); } - #[test] - fn with_attributes() { + #[$test] + $($async)? fn with_attributes() { $(let mut $buf = $init;)? let mut position = 0; let mut input = input_from_bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref()); // ^= 41 assert_eq!( - input.read_element($(&mut $buf, )? &mut position).unwrap().map(Bytes), + input.read_element($(&mut $buf, )? &mut position)$($(.$await)?)?.unwrap().map(Bytes), Some(Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#)) ); assert_eq!(position, 41); @@ -1626,13 +1646,13 @@ mod test { use super::reader_from_str; use crate::errors::Error; - #[test] - fn cdata() { + #[$test] + $($async)? fn cdata() { let doc = "![]]>"; let mut reader = reader_from_str(doc); $(let mut $buf = $init;)? - match reader.read_until_close($(&mut $buf)?) { + match reader.read_until_close($(&mut $buf)?)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "CData" => {} x => assert!( false, @@ -1642,13 +1662,13 @@ mod test { } } - #[test] - fn comment() { + #[$test] + $($async)? fn comment() { let doc = "!- -->"; let mut reader = reader_from_str(doc); $(let mut $buf = $init;)? - match reader.read_until_close($(&mut $buf)?) { + match reader.read_until_close($(&mut $buf)?)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "Comment" => {} x => assert!( false, @@ -1658,13 +1678,13 @@ mod test { } } - #[test] - fn doctype_uppercase() { + #[$test] + $($async)? fn doctype_uppercase() { let doc = "!D>"; let mut reader = reader_from_str(doc); $(let mut $buf = $init;)? - match reader.read_until_close($(&mut $buf)?) { + match reader.read_until_close($(&mut $buf)?)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1674,13 +1694,13 @@ mod test { } } - #[test] - fn doctype_lowercase() { + #[$test] + $($async)? fn doctype_lowercase() { let doc = "!d>"; let mut reader = reader_from_str(doc); $(let mut $buf = $init;)? - match reader.read_until_close($(&mut $buf)?) { + match reader.read_until_close($(&mut $buf)?)$($(.$await)?)? { Err(Error::UnexpectedEof(s)) if s == "DOCTYPE" => {} x => assert!( false, @@ -1697,63 +1717,63 @@ mod test { use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event}; use pretty_assertions::assert_eq; - #[test] - fn start_text() { + #[$test] + $($async)? fn start_text() { let mut reader = reader_from_str("bom"); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::StartText(BytesText::from_escaped(b"bom".as_ref()).into()) ); } - #[test] - fn declaration() { + #[$test] + $($async)? fn declaration() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Decl(BytesDecl::from_start(BytesStart::borrowed(b"xml ", 3))) ); } - #[test] - fn doctype() { + #[$test] + $($async)? fn doctype() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::DocType(BytesText::from_escaped(b"x".as_ref())) ); } - #[test] - fn processing_instruction() { + #[$test] + $($async)? fn processing_instruction() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::PI(BytesText::from_escaped(b"xml-stylesheet".as_ref())) ); } - #[test] - fn start() { + #[$test] + $($async)? fn start() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Start(BytesStart::borrowed_name(b"tag")) ); } - #[test] - fn end() { + #[$test] + $($async)? fn end() { let mut reader = reader_from_str(""); // Because we expect invalid XML, do not check that // the end name paired with the start name @@ -1761,68 +1781,68 @@ mod test { $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::End(BytesEnd::borrowed(b"tag")) ); } - #[test] - fn empty() { + #[$test] + $($async)? fn empty() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Empty(BytesStart::borrowed_name(b"tag")) ); } /// Text event cannot be generated without preceding event of another type - #[test] - fn text() { + #[$test] + $($async)? fn text() { let mut reader = reader_from_str("text"); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Empty(BytesStart::borrowed_name(b"tag")) ); assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Text(BytesText::from_escaped(b"text".as_ref())) ); } - #[test] - fn cdata() { + #[$test] + $($async)? fn cdata() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::CData(BytesCData::from_str("")) ); } - #[test] - fn comment() { + #[$test] + $($async)? fn comment() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Comment(BytesText::from_escaped(b"".as_ref())) ); } - #[test] - fn eof() { + #[$test] + $($async)? fn eof() { let mut reader = reader_from_str(""); $(let mut $buf = $init;)? assert_eq!( - reader.read_event_impl($(&mut $buf)?).unwrap(), + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Eof ); } @@ -1841,35 +1861,35 @@ mod test { use pretty_assertions::assert_eq; /// Checks that encoding is detected by BOM and changed after XML declaration - #[test] - fn bom_detected() { + #[$test] + $($async)? fn bom_detected() { let mut reader = reader_from_bytes(b"\xFF\xFE"); $(let mut $buf = $init;)? assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_impl($(&mut $buf)?).unwrap(); + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); - reader.read_event_impl($(&mut $buf)?).unwrap(); + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(); assert_eq!(reader.decoder().encoding(), WINDOWS_1251); - assert_eq!(reader.read_event_impl($(&mut $buf)?).unwrap(), Event::Eof); + assert_eq!(reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Eof); } /// Checks that encoding is changed by XML declaration, but only once - #[test] - fn xml_declaration() { + #[$test] + $($async)? fn xml_declaration() { let mut reader = reader_from_bytes(b""); $(let mut $buf = $init;)? assert_eq!(reader.decoder().encoding(), UTF_8); - reader.read_event_impl($(&mut $buf)?).unwrap(); + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); - reader.read_event_impl($(&mut $buf)?).unwrap(); + reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(); assert_eq!(reader.decoder().encoding(), UTF_16LE); - assert_eq!(reader.read_event_impl($(&mut $buf)?).unwrap(), Event::Eof); + assert_eq!(reader.read_event_impl($(&mut $buf)?)$($(.$await)?)?.unwrap(), Event::Eof); } } } diff --git a/src/reader/async_reader.rs b/src/reader/async_reader.rs new file mode 100644 index 00000000..ccdef053 --- /dev/null +++ b/src/reader/async_reader.rs @@ -0,0 +1,691 @@ +use std::{ + future::Future, + ops::{Deref, DerefMut}, + path::Path, +}; + +use async_recursion::async_recursion; +use tokio::{ + fs::File, + io::{self, AsyncBufRead, AsyncBufReadExt, AsyncRead, BufReader}, +}; + +use crate::{ + events::{BytesText, Event}, + name::{QName, ResolveResult}, + Error, Result, +}; + +#[cfg(feature = "encoding")] +use super::{detect_encoding, EncodingRef}; +use super::{is_whitespace, BangType, InnerReader, ReadElementState, Reader, TagState}; + +/// A struct for handling reading functions based on reading from a [`BufRead`]. +#[derive(Debug, Clone)] +pub struct AsyncReader(R); + +impl Deref for AsyncReader { + type Target = R; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for AsyncReader { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl InnerReader for AsyncReader { + type Reader = R; + + fn into_inner(self) -> Self::Reader { + self.0 + } +} + +/// Private reading functions. +impl AsyncReader { + #[inline] + async fn read_bytes_until<'buf>( + &mut self, + byte: u8, + buf: &'buf mut Vec, + position: &mut usize, + ) -> Result> { + let mut read = 0; + let mut done = false; + let start = buf.len(); + while !done { + let used = { + let available = match self.fill_buf().await { + Ok(n) if n.is_empty() => break, + Ok(n) => n, + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + + match memchr::memchr(byte, available) { + Some(i) => { + buf.extend_from_slice(&available[..i]); + done = true; + i + 1 + } + None => { + buf.extend_from_slice(available); + available.len() + } + } + }; + self.consume(used); + read += used; + } + *position += read; + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + async fn read_bang_element<'buf>( + &mut self, + buf: &'buf mut Vec, + position: &mut usize, + ) -> Result> { + // Peeked one bang ('!') before being called, so it's guaranteed to + // start with it. + let start = buf.len(); + let mut read = 1; + buf.push(b'!'); + self.consume(1); + + let bang_type = BangType::new(self.peek_one().await?)?; + + loop { + match self.fill_buf().await { + // Note: Do not update position, so the error points to + // somewhere sane rather than at the EOF + Ok(n) if n.is_empty() => return Err(bang_type.to_err()), + Ok(available) => { + if let Some((consumed, used)) = bang_type.parse(available, read) { + buf.extend_from_slice(consumed); + + self.consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self.consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + } + } + + if read == 0 { + Ok(None) + } else { + Ok(Some((bang_type, &buf[start..]))) + } + } + + #[inline] + async fn read_element<'buf>( + &mut self, + buf: &'buf mut Vec, + position: &mut usize, + ) -> Result> { + let mut state = ReadElementState::Elem; + let mut read = 0; + + let start = buf.len(); + loop { + match self.fill_buf().await { + Ok(n) if n.is_empty() => break, + Ok(available) => { + if let Some((consumed, used)) = state.change(available) { + buf.extend_from_slice(consumed); + + self.consume(used); + read += used; + + *position += read; + break; + } else { + buf.extend_from_slice(available); + + let used = available.len(); + self.consume(used); + read += used; + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => { + *position += read; + return Err(Error::Io(e)); + } + }; + } + + if read == 0 { + Ok(None) + } else { + Ok(Some(&buf[start..])) + } + } + + /// Consume and discard all the whitespace until the next non-whitespace + /// character or EOF. + async fn skip_whitespace(&mut self, position: &mut usize) -> Result<()> { + loop { + break match self.fill_buf().await { + Ok(n) => { + let count = n.iter().position(|b| !is_whitespace(*b)).unwrap_or(n.len()); + if count > 0 { + self.consume(count); + *position += count; + continue; + } else { + Ok(()) + } + } + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } + + /// Consume and discard one character if it matches the given byte. Return + /// true if it matched. + async fn skip_one(&mut self, byte: u8, position: &mut usize) -> Result { + match self.peek_one().await? { + Some(b) if b == byte => { + *position += 1; + self.consume(1); + Ok(true) + } + _ => Ok(false), + } + } + + /// Return one character without consuming it, so that future `read_*` calls + /// will still include it. On EOF, return None. + async fn peek_one(&mut self) -> Result> { + loop { + break match self.fill_buf().await { + Ok(n) if n.is_empty() => Ok(None), + Ok(n) => Ok(Some(n[0])), + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, + Err(e) => Err(Error::Io(e)), + }; + } + } +} + +/// Private functions for a [`Reader`] based on an [`AsyncReader`]. +impl Reader> { + /// Read text into the given buffer, and return an event that borrows from + /// either that buffer or from the input itself, based on the type of the + /// reader. + #[async_recursion] + async fn read_event_impl<'buf>(&mut self, buf: &'buf mut Vec) -> Result> { + let event = match self.tag_state { + TagState::Init => self.read_until_open(buf, true).await, + TagState::Closed => self.read_until_open(buf, false).await, + TagState::Opened => self.read_until_close(buf).await, + TagState::Empty => self.close_expanded_empty(), + TagState::Exit => return Ok(Event::Eof), + }; + match event { + Err(_) | Ok(Event::Eof) => self.tag_state = TagState::Exit, + _ => {} + } + event + } + + /// Read until '<' is found and moves reader to an `Opened` state. + /// + /// Return a `StartText` event if `first` is `true` and a `Text` event otherwise + async fn read_until_open<'buf>( + &mut self, + buf: &'buf mut Vec, + first: bool, + ) -> Result> { + self.tag_state = TagState::Opened; + + if self.trim_text_start { + self.reader.skip_whitespace(&mut self.buf_position).await?; + } + + // If we already at the `<` symbol, do not try to return an empty Text event + if self.reader.skip_one(b'<', &mut self.buf_position).await? { + return self.read_event_impl(buf).await; + } + + match self + .reader + .read_bytes_until(b'<', buf, &mut self.buf_position) + .await + { + Ok(Some(bytes)) => { + #[cfg(feature = "encoding")] + if first && self.encoding.can_be_refined() { + if let Some(encoding) = detect_encoding(bytes) { + self.encoding = EncodingRef::BomDetected(encoding); + } + } + + let content = if self.trim_text_end { + // Skip the ending '< + let len = bytes + .iter() + .rposition(|&b| !is_whitespace(b)) + .map_or_else(|| bytes.len(), |p| p + 1); + &bytes[..len] + } else { + bytes + }; + + Ok(if first { + Event::StartText(BytesText::from_escaped(content).into()) + } else { + Event::Text(BytesText::from_escaped(content)) + }) + } + Ok(None) => Ok(Event::Eof), + Err(e) => Err(e), + } + } + + /// Private function to read until `>` is found. This function expects that + /// it was called just after encounter a `<` symbol. + async fn read_until_close<'buf>(&mut self, buf: &'buf mut Vec) -> Result> { + self.tag_state = TagState::Closed; + + match self.reader.peek_one().await { + // ` match self + .reader + .read_bang_element(buf, &mut self.buf_position) + .await + { + Ok(None) => Ok(Event::Eof), + Ok(Some((bang_type, bytes))) => self.read_bang(bang_type, bytes), + Err(e) => Err(e), + }, + // ` match self + .reader + .read_bytes_until(b'>', buf, &mut self.buf_position) + .await + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => self.read_end(bytes), + Err(e) => Err(e), + }, + // ` match self + .reader + .read_bytes_until(b'>', buf, &mut self.buf_position) + .await + { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => self.read_question_mark(bytes), + Err(e) => Err(e), + }, + // `<...` - opening or self-closed tag + Ok(Some(_)) => match self.reader.read_element(buf, &mut self.buf_position).await { + Ok(None) => Ok(Event::Eof), + Ok(Some(bytes)) => self.read_start(bytes), + Err(e) => Err(e), + }, + Ok(None) => Ok(Event::Eof), + Err(e) => Err(e), + } + } +} + +/// Builder for reading from a file. +impl Reader>> { + /// Creates an XML reader from a file path. + pub async fn from_file_async>(path: P) -> Result { + let file = File::open(path).await.map_err(Error::Io)?; + let reader = BufReader::new(file); + Ok(Self::from_reader_internal(AsyncReader(reader))) + } +} + +/// Builder for reading from any [`BufRead`]. +impl Reader> { + /// Creates an XML reader from any type implementing [`Read`]. + pub fn from_async_reader(reader: R) -> Self { + Self::from_reader_internal(AsyncReader(reader)) + } +} + +/// Builder for reading from any [`Read`]. +impl Reader>> { + /// Creates an XML reader from any type implementing [`Read`]. + pub fn from_async_unbuffered_reader(reader: R) -> Self { + Self::from_reader_internal(AsyncReader(BufReader::new(reader))) + } +} + +/// Public reading methods for a [`Reader`] based on an [`AsyncReader`]. +impl Reader> { + /// Reads the next `Event`. + /// + /// This is the main entry point for reading XML `Event`s. + /// + /// `Event`s borrow `buf` and can be converted to own their data if needed (uses `Cow` + /// internally). + /// + /// Having the possibility to control the internal buffers gives you some additional benefits + /// such as: + /// + /// - Reduce the number of allocations by reusing the same buffer. For constrained systems, + /// you can call `buf.clear()` once you are done with processing the event (typically at the + /// end of your loop). + /// - Reserve the buffer length if you know the file size (using `Vec::with_capacity`). + /// + /// # Examples + /// + /// ``` + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// // This explicitly uses `from_reader(xml.as_bytes())` to use a buffered reader instead of + /// // relying on the zero-copy optimizations for reading from byte slices. + /// let mut reader = Reader::from_reader(xml.as_bytes()); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event_into(&mut buf) { + /// Ok(Event::Start(ref e)) => count += 1, + /// Ok(Event::Text(e)) => txt.push(e.unescape_and_decode(&reader).expect("Error!")), + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok(Event::Eof) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// ``` + #[inline] + pub async fn read_event_into<'buf>(&mut self, buf: &'buf mut Vec) -> Result> { + self.read_event_impl(buf).await + } + + /// Reads until end element is found using provided buffer as intermediate + /// storage for events content. This function is supposed to be called after + /// you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name. + /// + /// If corresponding [`End`] event will not be found, the [`Error::UnexpectedEof`] + /// will be returned. In particularly, that error will be returned if you call + /// this method without consuming the corresponding [`Start`] event first. + /// + /// If your reader created from a string slice or byte array slice, it is + /// better to use [`read_to_end()`] method, because it will not copy bytes + /// into intermediate buffer. + /// + /// The provided `buf` buffer will be filled only by one event content at time. + /// Before reading of each event the buffer will be cleared. If you know an + /// appropriate size of each event, you can preallocate the buffer to reduce + /// number of reallocations. + /// + /// The `end` parameter should contain name of the end element _in the reader + /// encoding_. It is good practice to always get that parameter using + /// [`BytesStart::to_end()`] method. + /// + /// The correctness of the skipped events does not checked, if you disabled + /// the [`check_end_names`] option. + /// + /// # Namespaces + /// + /// While the [`Reader`] does not support namespace resolution, namespaces + /// does not change the algorithm for comparing names. Although the names + /// `a:name` and `b:name` where both prefixes `a` and `b` resolves to the + /// same namespace, are semantically equivalent, `` cannot close + /// ``, because according to [the specification] + /// + /// > The end of every element that begins with a **start-tag** MUST be marked + /// > by an **end-tag** containing a name that echoes the element's type as + /// > given in the **start-tag** + /// + /// # Examples + /// + /// This example shows, how you can skip XML content after you read the + /// start event. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_reader(r#" + /// + /// + /// + /// + /// + /// + /// + /// + /// "#.as_bytes()); + /// reader.trim_text(true); + /// let mut buf = Vec::new(); + /// + /// let start = BytesStart::borrowed_name(b"outer"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Start(start)); + /// + /// //...then, we could skip all events to the corresponding end event. + /// // This call will correctly handle nested elements. + /// // Note, however, that this method does not handle namespaces. + /// reader.read_to_end_into(end.name(), &mut buf).unwrap(); + /// + /// // At the end we should get an Eof event, because we ate the whole XML + /// assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof); + /// ``` + /// + /// [`Start`]: Event::Start + /// [`End`]: Event::End + /// [`read_to_end()`]: Self::read_to_end + /// [`check_end_names`]: Self::check_end_names + /// [the specification]: https://www.w3.org/TR/xml11/#dt-etag + pub fn read_to_end_into<'_self, 'buf>( + &'_self mut self, + end: QName<'static>, + buf: &'buf mut Vec, + ) -> impl Future> + 'buf + where + '_self: 'buf, + { + async move { + let mut depth = 0; + loop { + buf.clear(); + match self.read_event_into(buf).await { + Err(e) => return Err(e), + + Ok(Event::Start(e)) if e.name() == end => depth += 1, + Ok(Event::End(e)) if e.name() == end => { + if depth == 0 { + return Ok(()); + } + depth -= 1; + } + Ok(Event::Eof) => { + let name = self.decoder().decode(end.as_ref()); + return Err(Error::UnexpectedEof(format!("", name))); + } + _ => (), + } + } + } + } + + /// Reads optional text between start and end tags. + /// + /// If the next event is a [`Text`] event, returns the decoded and unescaped content as a + /// `String`. If the next event is an [`End`] event, returns the empty string. In all other + /// cases, returns an error. + /// + /// Any text will be decoded using the XML encoding specified in the XML declaration (or UTF-8 + /// if none is specified). + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// + /// let mut xml = Reader::from_reader(b" + /// <b> + /// + /// " as &[u8]); + /// xml.trim_text(true); + /// + /// let expected = ["", ""]; + /// for &content in expected.iter() { + /// match xml.read_event_into(&mut Vec::new()) { + /// Ok(Event::Start(ref e)) => { + /// assert_eq!(&xml.read_text_into(e.name(), &mut Vec::new()).unwrap(), content); + /// }, + /// e => panic!("Expecting Start event, found {:?}", e), + /// } + /// } + /// ``` + /// + /// [`Text`]: Event::Text + /// [`End`]: Event::End + pub async fn read_text_into( + &mut self, + end: QName<'static>, + buf: &mut Vec, + ) -> Result { + let s = match self.read_event_into(buf).await { + Err(e) => return Err(e), + + Ok(Event::Text(e)) => e.unescape_and_decode(self), + Ok(Event::End(e)) if e.name() == end => return Ok("".to_string()), + Ok(Event::Eof) => return Err(Error::UnexpectedEof("Text".to_string())), + _ => return Err(Error::TextNotFound), + }; + self.read_to_end_into(end, buf).await?; + s + } + + /// Reads the next event and resolves its namespace (if applicable). + /// + /// # Examples + /// + /// ``` + /// use std::str::from_utf8; + /// use quick_xml::Reader; + /// use quick_xml::events::Event; + /// use quick_xml::name::ResolveResult::*; + /// + /// let xml = r#" + /// Test + /// Test 2 + /// "#; + /// let mut reader = Reader::from_reader(xml.as_bytes()); + /// reader.trim_text(true); + /// let mut count = 0; + /// let mut buf = Vec::new(); + /// let mut ns_buf = Vec::new(); + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_namespaced_event(&mut buf, &mut ns_buf) { + /// Ok((Bound(ns), Event::Start(e))) => { + /// count += 1; + /// match (ns.as_ref(), e.local_name().as_ref()) { + /// (b"www.xxxx", b"tag1") => (), + /// (b"www.yyyy", b"tag2") => (), + /// (ns, n) => panic!("Namespace and local name mismatch"), + /// } + /// println!("Resolved namespace: {:?}", ns); + /// } + /// Ok((Unbound, Event::Start(_))) => { + /// panic!("Element not in any namespace") + /// }, + /// Ok((Unknown(p), Event::Start(_))) => { + /// panic!("Undeclared namespace prefix {:?}", String::from_utf8(p)) + /// } + /// Ok((_, Event::Text(e))) => { + /// txt.push(e.unescape_and_decode(&reader).expect("Error!")) + /// }, + /// Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e), + /// Ok((_, Event::Eof)) => break, + /// _ => (), + /// } + /// buf.clear(); + /// } + /// println!("Found {} start events", count); + /// println!("Text events: {:?}", txt); + /// ``` + pub async fn read_namespaced_event<'b, 'ns>( + &mut self, + buf: &'b mut Vec, + namespace_buffer: &'ns mut Vec, + ) -> Result<(ResolveResult<'ns>, Event<'b>)> { + if self.pending_pop { + self.ns_resolver.pop(namespace_buffer); + } + self.pending_pop = false; + let event = self.read_event_into(buf).await; + self.resolve_namespaced_event_inner(event, namespace_buffer) + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::reader::test::check; + + fn input_from_bytes(bytes: &[u8]) -> AsyncReader<&[u8]> { + AsyncReader(bytes) + } + + fn reader_from_str(s: &str) -> Reader> { + Reader::from_async_reader(s.as_bytes()) + } + + #[allow(dead_code)] + fn reader_from_bytes(s: &[u8]) -> Reader> { + Reader::from_async_reader(s) + } + + check!(#[tokio::test] async { + let mut buf = Vec::new(); await + }); +}