From 42802fc51bfe48940e615f8c4b59e82051e5a69b Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 30 Jul 2022 16:52:28 +0500 Subject: [PATCH 1/8] Remove `pub(crate)` as it not required anymore It can confuse due to impl Deref for NsReader if NsReader's own `reader` would unaccessible --- src/reader/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/reader/mod.rs b/src/reader/mod.rs index ac3c5737..553b9b6e 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -285,7 +285,7 @@ impl EncodingRef { #[derive(Clone)] pub struct Reader { /// reader - pub(crate) reader: R, + reader: R, /// current buffer position, useful for debugging errors buf_position: usize, /// current state Open/Close From c67f03704f53a1209a5f02862da6162a8c158a82 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 30 Jul 2022 20:50:47 +0500 Subject: [PATCH 2/8] Add example for `Reader::read_event` --- src/reader/slice_reader.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index ee79fc4d..0d263b12 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -33,6 +33,35 @@ impl<'a> Reader<&'a [u8]> { } /// Read an event that borrows from the input rather than a buffer. + /// + /// # Examples + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// use quick_xml::events::Event; + /// use quick_xml::Reader; + /// + /// let mut reader = Reader::from_str(r#" + /// + /// Test + /// Test 2 + /// + /// "#); + /// reader.trim_text(true); + /// + /// let mut count = 0; + /// let mut txt = Vec::new(); + /// loop { + /// match reader.read_event().unwrap() { + /// Event::Start(e) => count += 1, + /// Event::Text(e) => txt.push(e.unescape().unwrap().into_owned()), + /// Event::Eof => break, + /// _ => (), + /// } + /// } + /// assert_eq!(count, 3); + /// assert_eq!(txt, vec!["Test".to_string(), "Test 2".to_string()]); + /// ``` #[inline] pub fn read_event(&mut self) -> Result> { self.read_event_impl(()) From 4ac24b1b634f42879f8456164a3d40ab18985846 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sat, 30 Jul 2022 02:28:07 +0500 Subject: [PATCH 3/8] Extract processing of namespaces into separate functions This functions will be reused by async reading methods Co-authored-by: Sophie Tauchert --- src/reader/ns_reader.rs | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index deeb7e74..a7ecc0a6 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -58,11 +58,20 @@ impl NsReader { where R: XmlSource<'i, B>, { + self.pop(); + let event = self.reader.read_event_impl(buf); + self.process_event(event) + } + + fn pop(&mut self) { if self.pending_pop { self.ns_resolver.pop(&mut self.buffer); self.pending_pop = false; } - match self.reader.read_event_impl(buf) { + } + + fn process_event<'i>(&mut self, event: Result>) -> Result> { + match event { Ok(Event::Start(e)) => { self.ns_resolver.push(&e, &mut self.buffer); Ok(Event::Start(e)) @@ -84,11 +93,11 @@ impl NsReader { } } - fn read_resolved_event_impl<'i, B>(&mut self, buf: B) -> Result<(ResolveResult, Event<'i>)> - where - R: XmlSource<'i, B>, - { - match self.read_event_impl(buf) { + fn resolve_event<'i>( + &mut self, + event: Result>, + ) -> Result<(ResolveResult, Event<'i>)> { + match event { Ok(Event::Start(e)) => Ok(( self.ns_resolver.find(e.name(), &mut self.buffer), Event::Start(e), @@ -408,7 +417,8 @@ impl NsReader { &mut self, buf: &'b mut Vec, ) -> Result<(ResolveResult, Event<'b>)> { - self.read_resolved_event_impl(buf) + let event = self.read_event_impl(buf); + self.resolve_event(event) } /// Reads until end element is found using provided buffer as intermediate @@ -632,7 +642,8 @@ impl<'i> NsReader<&'i [u8]> { /// [`read_event()`]: Self::read_event #[inline] pub fn read_resolved_event(&mut self) -> Result<(ResolveResult, Event<'i>)> { - self.read_resolved_event_impl(()) + let event = self.read_event_impl(()); + self.resolve_event(event) } /// Reads until end element is found. This function is supposed to be called From 5148d37a8a1504acc0c51a8dc755a28f33e5d520 Mon Sep 17 00:00:00 2001 From: Mingun Date: Sun, 31 Jul 2022 15:56:36 +0500 Subject: [PATCH 4/8] Replace name `buf_position` with more traditional `offset` --- src/reader/mod.rs | 34 +++++++++++++++++----------------- tests/unit_tests.rs | 8 ++++---- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 553b9b6e..2befd658 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -287,7 +287,7 @@ pub struct Reader { /// reader reader: R, /// current buffer position, useful for debugging errors - buf_position: usize, + offset: usize, /// current state Open/Close tag_state: TagState, /// expand empty element into an opening and closing element @@ -343,7 +343,7 @@ impl Reader { trim_text_end: false, trim_markup_names_in_closing_tags: true, check_end_names: true, - buf_position: 0, + offset: 0, check_comments: false, #[cfg(feature = "encoding")] @@ -430,9 +430,9 @@ impl Reader { // when internal state is Opened, we have actually read until '<', // which we don't want to show if let TagState::Opened = self.tag_state { - self.buf_position - 1 + self.offset - 1 } else { - self.buf_position + self.offset } } @@ -506,7 +506,7 @@ impl Reader { if let Some(p) = memchr::memchr_iter(b'-', &buf[3..len - 2]) .position(|p| buf[3 + p + 1] == b'-') { - self.buf_position += len - p; + self.offset += len - p; return Err(Error::UnexpectedToken("--".to_string())); } } @@ -550,8 +550,8 @@ impl Reader { &buf[1..] }; if self.check_end_names { - let mismatch_err = |expected: &[u8], found: &[u8], buf_position: &mut usize| { - *buf_position -= buf.len(); + let mismatch_err = |expected: &[u8], found: &[u8], offset: &mut usize| { + *offset -= buf.len(); Err(Error::EndEventMismatch { expected: from_utf8(expected).unwrap_or("").to_owned(), found: from_utf8(found).unwrap_or("").to_owned(), @@ -561,13 +561,13 @@ impl Reader { Some(start) => { let expected = &self.opened_buffer[start..]; if name != expected { - mismatch_err(expected, name, &mut self.buf_position) + mismatch_err(expected, name, &mut self.offset) } else { self.opened_buffer.truncate(start); Ok(Event::End(BytesEnd::wrap(name.into()))) } } - None => mismatch_err(b"", &buf[1..], &mut self.buf_position), + None => mismatch_err(b"", &buf[1..], &mut self.offset), } } else { Ok(Event::End(BytesEnd::wrap(name.into()))) @@ -595,7 +595,7 @@ impl Reader { Ok(Event::PI(BytesText::wrap(&buf[1..len - 1], self.decoder()))) } } else { - self.buf_position -= len; + self.offset -= len; Err(Error::UnexpectedEof("XmlDecl".to_string())) } } @@ -668,17 +668,17 @@ impl Reader { self.tag_state = TagState::Opened; if self.trim_text_start { - self.reader.skip_whitespace(&mut self.buf_position)?; + self.reader.skip_whitespace(&mut self.offset)?; } // If we already at the `<` symbol, do not try to return an empty Text event - if self.reader.skip_one(b'<', &mut self.buf_position)? { + if self.reader.skip_one(b'<', &mut self.offset)? { return self.read_event_impl(buf); } match self .reader - .read_bytes_until(b'<', buf, &mut self.buf_position) + .read_bytes_until(b'<', buf, &mut self.offset) { Ok(Some(bytes)) => self.read_text(bytes, first), Ok(None) => Ok(Event::Eof), @@ -696,7 +696,7 @@ impl Reader { match self.reader.peek_one() { // ` match self.reader.read_bang_element(buf, &mut self.buf_position) { + Ok(Some(b'!')) => match self.reader.read_bang_element(buf, &mut self.offset) { Ok(None) => Ok(Event::Eof), Ok(Some((bang_type, bytes))) => self.read_bang(bang_type, bytes), Err(e) => Err(e), @@ -704,7 +704,7 @@ impl Reader { // ` match self .reader - .read_bytes_until(b'>', buf, &mut self.buf_position) + .read_bytes_until(b'>', buf, &mut self.offset) { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => self.read_end(bytes), @@ -713,14 +713,14 @@ impl Reader { // ` match self .reader - .read_bytes_until(b'>', buf, &mut self.buf_position) + .read_bytes_until(b'>', buf, &mut self.offset) { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => self.read_question_mark(bytes), Err(e) => Err(e), }, // `<...` - opening or self-closed tag - Ok(Some(_)) => match self.reader.read_element(buf, &mut self.buf_position) { + Ok(Some(_)) => match self.reader.read_element(buf, &mut self.offset) { Ok(None) => Ok(Event::Eof), Ok(Some(bytes)) => self.read_start(bytes), Err(e) => Err(e), diff --git a/tests/unit_tests.rs b/tests/unit_tests.rs index b9d62054..70d53236 100644 --- a/tests/unit_tests.rs +++ b/tests/unit_tests.rs @@ -415,7 +415,7 @@ fn test_new_xml_decl_empty() { } #[test] -fn test_buf_position_err_end_element() { +fn test_offset_err_end_element() { let mut r = Reader::from_str(""); r.trim_text(true).check_end_names(true); @@ -431,7 +431,7 @@ fn test_buf_position_err_end_element() { } #[test] -fn test_buf_position_err_comment() { +fn test_offset_err_comment() { let mut r = Reader::from_str(" Opened -/// Opened -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> Closed -/// Closed -- "#lt;false#gt;\n(no event)"\nText --> Opened +/// Init -- "(no event)"\nStartText --> OpenedTag +/// OpenedTag -- Decl, DocType, PI\nComment, CData\nStart, Empty, End --> ClosedTag +/// ClosedTag -- "#lt;false#gt;\n(no event)"\nText --> OpenedTag /// end -/// Closed -- "#lt;true#gt;"\nStart --> Empty -/// Empty -- End --> Closed +/// ClosedTag -- "#lt;true#gt;"\nStart --> Empty +/// Empty -- End --> ClosedTag /// _ -. Eof .-> Exit /// ``` #[derive(Clone)] -enum TagState { +enum ParseState { /// Initial state in which reader stay after creation. Transition from that /// state could produce a `StartText`, `Decl`, `Comment` or `Start` event. - /// The next state is always `Opened`. The reader will never return to this - /// state. The event emitted during transition to `Opened` is a `StartEvent` + /// The next state is always `OpenedTag`. The reader will never return to this + /// state. The event emitted during transition to `OpenedTag` is a `StartEvent` /// if the first symbol not `<`, otherwise no event are emitted. Init, /// State after seeing the `<` symbol. Depending on the next symbol all other /// events (except `StartText`) could be generated. /// - /// After generating ane event the reader moves to the `Closed` state. - Opened, + /// After generating ane event the reader moves to the `ClosedTag` state. + OpenedTag, /// State in which reader searches the `<` symbol of a markup. All bytes before /// that symbol will be returned in the [`Event::Text`] event. After that - /// the reader moves to the `Opened` state. - Closed, + /// the reader moves to the `OpenedTag` state. + ClosedTag, /// This state is used only if option `expand_empty_elements` is set to `true`. - /// Reader enters to this state when it is in a `Closed` state and emits an + /// Reader enters to this state when it is in a `ClosedTag` state and emits an /// [`Event::Start`] event. The next event emitted will be an [`Event::End`], - /// after which reader returned to the `Closed` state. + /// after which reader returned to the `ClosedTag` state. Empty, /// Reader enters this state when `Eof` event generated or an error occurred. /// This is the last state, the reader stay in it forever. @@ -374,9 +374,9 @@ impl Reader { /// /// Useful when debugging errors. pub fn buffer_position(&self) -> usize { - // when internal state is Opened, we have actually read until '<', + // when internal state is OpenedTag, we have actually read until '<', // which we don't want to show - if let TagState::Opened = self.parser.tag_state { + if let ParseState::OpenedTag = self.parser.state { self.parser.offset - 1 } else { self.parser.offset @@ -405,28 +405,28 @@ impl Reader { where R: XmlSource<'i, B>, { - let event = match self.parser.tag_state { - TagState::Init => self.read_until_open(buf, true), - TagState::Closed => self.read_until_open(buf, false), - TagState::Opened => self.read_until_close(buf), - TagState::Empty => self.parser.close_expanded_empty(), - TagState::Exit => return Ok(Event::Eof), + let event = match self.parser.state { + ParseState::Init => self.read_until_open(buf, true), + ParseState::ClosedTag => self.read_until_open(buf, false), + ParseState::OpenedTag => self.read_until_close(buf), + ParseState::Empty => self.parser.close_expanded_empty(), + ParseState::Exit => return Ok(Event::Eof), }; match event { - Err(_) | Ok(Event::Eof) => self.parser.tag_state = TagState::Exit, + Err(_) | Ok(Event::Eof) => self.parser.state = ParseState::Exit, _ => {} } event } - /// Read until '<' is found and moves reader to an `Opened` state. + /// Read until '<' is found and moves reader to an `OpenedTag` state. /// /// Return a `StartText` event if `first` is `true` and a `Text` event otherwise fn read_until_open<'i, B>(&mut self, buf: B, first: bool) -> Result> where R: XmlSource<'i, B>, { - self.parser.tag_state = TagState::Opened; + self.parser.state = ParseState::OpenedTag; if self.parser.trim_text_start { self.reader.skip_whitespace(&mut self.parser.offset)?; @@ -453,7 +453,7 @@ impl Reader { where R: XmlSource<'i, B>, { - self.parser.tag_state = TagState::Closed; + self.parser.state = ParseState::ClosedTag; match self.reader.peek_one() { // `` + /// Trims trailing whitespaces from markup names in closing tags `` pub trim_markup_names_in_closing_tags: bool, - /// check if End nodes match last Start node + /// Check if [`Event::End`] nodes match last [`Event::Start`] node pub check_end_names: bool, - /// check if comments contains `--` (false per default) + /// Check if comments contains `--` (false per default) pub check_comments: bool, /// All currently Started elements which didn't have a matching /// End element yet. @@ -219,7 +219,7 @@ impl Parser { if let Some(&b'/') = buf.last() { let end = if name_end < len { name_end } else { len - 1 }; if self.expand_empty_elements { - self.tag_state = TagState::Empty; + self.state = ParseState::Empty; self.opened_starts.push(self.opened_buffer.len()); self.opened_buffer.extend(&buf[..end]); Ok(Event::Start(BytesStart::wrap(&buf[..len - 1], end))) @@ -237,7 +237,7 @@ impl Parser { #[inline] pub fn close_expanded_empty(&mut self) -> Result> { - self.tag_state = TagState::Closed; + self.state = ParseState::ClosedTag; let name = self .opened_buffer .split_off(self.opened_starts.pop().unwrap()); @@ -263,7 +263,7 @@ impl Default for Parser { fn default() -> Self { Self { offset: 0, - tag_state: TagState::Init, + state: ParseState::Init, expand_empty_elements: false, trim_text_start: false, trim_text_end: false,