Skip to content

Commit

Permalink
Merge pull request #398 from Mingun/no-empty-texts
Browse files Browse the repository at this point in the history
Do not generate empty `Text` events
  • Loading branch information
Mingun committed Jun 19, 2022
2 parents e701c4d + bdf9f46 commit 3b37c0e
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 16 deletions.
3 changes: 3 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- [#393]: Now `event_namespace`, `attribute_namespace` and `read_event_namespaced`
returns `ResolveResult::Unknown` if prefix was not registered in namespace buffer
- [#393]: Fix breaking processing after encounter an attribute with a reserved name (started with "xmlns")
- [#363]: Do not generate empty `Event::Text` events

### Misc Changes

Expand Down Expand Up @@ -61,9 +62,11 @@
- [#387]: Added a bunch of tests for sequences deserialization
- [#393]: Added more tests for namespace resolver
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs

[#8]: https://github.com/Mingun/fast-xml/pull/8
[#9]: https://github.com/Mingun/fast-xml/pull/9
[#363]: https://github.com/tafia/quick-xml/issues/363
[#387]: https://github.com/tafia/quick-xml/pull/387
[#391]: https://github.com/tafia/quick-xml/pull/391
[#393]: https://github.com/tafia/quick-xml/pull/393
Expand Down
118 changes: 115 additions & 3 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -266,9 +266,11 @@ impl<R: BufRead> Reader<R> {

if self.trim_text_start {
self.reader.skip_whitespace(&mut self.buf_position)?;
if self.reader.skip_one(b'<', &mut self.buf_position)? {
return self.read_event_buffered(buf);
}
}

// If we already at the `<` symbol, do not try to return an empty Text event
if self.reader.skip_one(b'<', &mut self.buf_position)? {
return self.read_event_buffered(buf);
}

match self
Expand Down Expand Up @@ -2244,6 +2246,116 @@ mod test {
}
}
}

/// Ensures, that no empty `Text` events are generated
mod read_event_buffered {
use crate::events::{BytesCData, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::reader::Reader;
use pretty_assertions::assert_eq;

#[test]
fn declaration() {
let mut reader = Reader::from_str("<?xml ?>");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::Decl(BytesDecl::from_start(BytesStart::borrowed(b"xml ", 3)))
);
}

#[test]
fn doctype() {
let mut reader = Reader::from_str("<!DOCTYPE x>");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::DocType(BytesText::from_escaped(b"x".as_ref()))
);
}

#[test]
fn processing_instruction() {
let mut reader = Reader::from_str("<?xml-stylesheet?>");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::PI(BytesText::from_escaped(b"xml-stylesheet".as_ref()))
);
}

#[test]
fn start() {
let mut reader = Reader::from_str("<tag>");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::Start(BytesStart::borrowed_name(b"tag"))
);
}

#[test]
fn end() {
let mut reader = Reader::from_str("</tag>");
// Because we expect invalid XML, do not check that
// the end name paired with the start name
reader.check_end_names(false);

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::End(BytesEnd::borrowed(b"tag"))
);
}

#[test]
fn empty() {
let mut reader = Reader::from_str("<tag/>");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::Empty(BytesStart::borrowed_name(b"tag"))
);
}

#[test]
fn text() {
let mut reader = Reader::from_str("text");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::Text(BytesText::from_escaped(b"text".as_ref()))
);
}

#[test]
fn cdata() {
let mut reader = Reader::from_str("<![CDATA[]]>");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::CData(BytesCData::from_str(""))
);
}

#[test]
fn comment() {
let mut reader = Reader::from_str("<!---->");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::Comment(BytesText::from_escaped(b"".as_ref()))
);
}

#[test]
fn eof() {
let mut reader = Reader::from_str("");

assert_eq!(
reader.read_event_buffered($buf).unwrap(),
Event::Eof
);
}
}
};
}

Expand Down
5 changes: 1 addition & 4 deletions tests/unit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,10 +168,7 @@ fn test_trim_test() {

let mut r = Reader::from_str(txt);
r.trim_text(false);
next_eq!(
r, Text, b"", Start, b"a", Text, b"", Start, b"b", Text, b" ", End, b"b", Text, b"", End,
b"a"
);
next_eq!(r, Start, b"a", Start, b"b", Text, b" ", End, b"b", End, b"a");
}

#[test]
Expand Down
11 changes: 2 additions & 9 deletions tests/xmlrs_reader_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,9 +257,7 @@ fn issue_98_cdata_ending_with_right_bracket() {
r#"<hello><![CDATA[Foo [Bar]]]></hello>"#,
r#"
|StartElement(hello)
|Characters()
|CData(Foo [Bar])
|Characters()
|EndElement(hello)
|EndDocument
"#,
Expand Down Expand Up @@ -306,9 +304,7 @@ fn issue_105_unexpected_double_dash() {
r#"<hello><![CDATA[--]]></hello>"#,
r#"
|StartElement(hello)
|Characters()
|CData(--)
|Characters()
|EndElement(hello)
|EndDocument
"#,
Expand Down Expand Up @@ -359,10 +355,12 @@ fn default_namespace_applies_to_end_elem() {
);
}

#[track_caller]
fn test(input: &str, output: &str, is_short: bool) {
test_bytes(input.as_bytes(), output.as_bytes(), is_short);
}

#[track_caller]
fn test_bytes(input: &[u8], output: &[u8], is_short: bool) {
// Normalize newlines on Windows to just \n, which is what the reader and
// writer use.
Expand All @@ -380,11 +378,6 @@ fn test_bytes(input: &[u8], output: &[u8], is_short: bool) {
let mut buf = Vec::new();
let mut ns_buffer = Vec::new();

if !is_short {
// discard first whitespace
reader.read_event(&mut buf).unwrap();
}

loop {
buf.clear();
let event = reader.read_namespaced_event(&mut buf, &mut ns_buffer);
Expand Down

0 comments on commit 3b37c0e

Please sign in to comment.