diff --git a/Changelog.md b/Changelog.md index e75d1407..1bf0fe4f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -41,6 +41,7 @@ under the `quick-xml::encoding` namespace. - [#450]: Added support of asynchronous [tokio](https://tokio.rs/) readers - [#455]: Change return type of all `read_to_end*` methods to return a span between tags +- [#455]: Added `Reader::read_text` method to return a raw content (including markup) between tags ### Bug Fixes diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 379f067c..0c02e5e2 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -4,6 +4,7 @@ //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname +use std::borrow::Cow; use std::fs::File; use std::io::{BufRead, BufReader}; use std::ops::Deref; @@ -750,6 +751,75 @@ impl<'i> NsReader<&'i [u8]> { // match literally the start name. See `Self::check_end_names` documentation self.reader.read_to_end(end) } + + /// Reads content between start and end tags, including any markup. This + /// function is supposed to be called after you already read a [`Start`] event. + /// + /// Manages nested cases where parent and child elements have the same name. + /// + /// This method does not unescape read data, instead it returns content + /// "as is" of the XML document. This is because it has no idea what text + /// it reads, and if, for example, it contains CDATA section, attempt to + /// unescape it content will spoil data. + /// + /// Any text will be decoded using the XML current [`decoder()`]. + /// + /// Actually, this method perform the following code: + /// + /// ```ignore + /// let span = reader.read_to_end(end)?; + /// let text = reader.decoder().decode(&reader.inner_slice[span]); + /// ``` + /// + /// # Examples + /// + /// This example shows, how you can read a HTML content from your XML document. + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// # use std::borrow::Cow; + /// use quick_xml::events::{BytesStart, Event}; + /// use quick_xml::NsReader; + /// + /// let mut reader = NsReader::from_str(r#" + /// + ///
Usual XML rules does not apply inside it + ///
For example, elements not needed to be "closed" + /// + /// "#); + /// reader.trim_text(true); + /// + /// let start = BytesStart::new("html"); + /// let end = start.to_end().into_owned(); + /// + /// // First, we read a start event... + /// assert_eq!(reader.read_event().unwrap(), Event::Start(start)); + /// // ...and disable checking of end names because we expect HTML further... + /// reader.check_end_names(false); + /// + /// // ...then, we could read text content until close tag. + /// // This call will correctly handle nested elements. + /// let text = reader.read_text(end.name()).unwrap(); + /// assert_eq!(text, Cow::Borrowed(r#" + ///
Usual XML rules does not apply inside it + ///
For example, elements not needed to be "closed"
+ /// "#));
+ ///
+ /// // Now we can enable checks again
+ /// reader.check_end_names(true);
+ ///
+ /// // At the end we should get an Eof event, because we ate the whole XML
+ /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
+ /// ```
+ ///
+ /// [`Start`]: Event::Start
+ /// [`decoder()`]: Reader::decoder()
+ #[inline]
+ pub fn read_text(&mut self, end: QName) -> Result Usual XML rules does not apply inside it
+ /// For example, elements not needed to be "closed"
+ ///
+ /// ");
+ /// reader.trim_text(true);
+ ///
+ /// let start = BytesStart::new("html");
+ /// let end = start.to_end().into_owned();
+ ///
+ /// // First, we read a start event...
+ /// assert_eq!(reader.read_event().unwrap(), Event::Start(start));
+ /// // ...and disable checking of end names because we expect HTML further...
+ /// reader.check_end_names(false);
+ ///
+ /// // ...then, we could read text content until close tag.
+ /// // This call will correctly handle nested elements.
+ /// let text = reader.read_text(end.name()).unwrap();
+ /// assert_eq!(text, Cow::Borrowed(r#"
+ /// Usual XML rules does not apply inside it
+ /// For example, elements not needed to be "closed"
+ /// "#));
+ ///
+ /// // Now we can enable checks again
+ /// reader.check_end_names(true);
+ ///
+ /// // At the end we should get an Eof event, because we ate the whole XML
+ /// assert_eq!(reader.read_event().unwrap(), Event::Eof);
+ /// ```
+ ///
+ /// [`Start`]: Event::Start
+ /// [`decoder()`]: Self::decoder()
+ pub fn read_text(&mut self, end: QName) -> Result