Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Edition2018 #274

Merged
merged 14 commits into from Aug 10, 2021
4 changes: 4 additions & 0 deletions .github/workflows/rust.yml
Expand Up @@ -16,9 +16,13 @@ jobs:
run: cargo build
- name: Run tests (no features)
run: cargo test
run: cargo test --no-default-features
- name: Run tests (serialize)
run: cargo test --features serialize
- name: Run tests (encoding+serialize)
run: cargo test --features encoding,serialize
- name: Run tests (escape-html+serialize)
run: cargo test --features escape-html,serialize
- name: Check fmt
run: cargo fmt -- --check

1 change: 1 addition & 0 deletions Cargo.toml
Expand Up @@ -3,6 +3,7 @@ name = "quick-xml"
version = "0.22.0"
authors = ["Johann Tuffe <tafia973@gmail.com>"]
description = "High performance xml reader and writer"
edition = "2018"

documentation = "https://docs.rs/quick-xml"
repository = "https://github.com/tafia/quick-xml"
Expand Down
6 changes: 6 additions & 0 deletions Changelog.md
Expand Up @@ -10,6 +10,12 @@

## Unreleased

- style: convert to rust edition 2018
- fix: don't encode multi byte escape characters as big endian
- feat: add `Writer::write_nested_event`
- feat: add `BytesStart::try_get_attribute`
- test: add more test on github actions

## 0.22.0

- feat (breaking): Move html entity escape behind a `'escape-html'` feature to help with compilation
Expand Down
2 changes: 1 addition & 1 deletion compare/Cargo.toml
Expand Up @@ -9,5 +9,5 @@ edition = "2018"
[dev-dependencies]
quick-xml = { path = "..", features = ["serialize"] }
xml-rs = "0.8.0"
serde-xml-rs = "0.3.1"
serde-xml-rs = "0.4.1"
serde = { version = "1.0.103", features = [ "derive" ] }
8 changes: 4 additions & 4 deletions src/errors.rs
Expand Up @@ -33,7 +33,7 @@ pub enum Error {
/// Duplicate attribute
DuplicatedAttribute(usize, usize),
/// Escape error
EscapeError(::escape::EscapeError),
EscapeError(crate::escape::EscapeError),
}

impl From<::std::io::Error> for Error {
Expand All @@ -52,18 +52,18 @@ impl From<::std::str::Utf8Error> for Error {
}
}

impl From<::escape::EscapeError> for Error {
impl From<crate::escape::EscapeError> for Error {
/// Creates a new `Error::EscapeError` from the given error
#[inline]
fn from(error: ::escape::EscapeError) -> Error {
fn from(error: crate::escape::EscapeError) -> Error {
Error::EscapeError(error)
}
}

/// A specialized `Result` type where the error is hard-wired to [`Error`].
///
/// [`Error`]: enum.Error.html
pub type Result<T> = ::std::result::Result<T, Error>;
pub type Result<T> = std::result::Result<T, Error>;

impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
Expand Down
51 changes: 42 additions & 9 deletions src/escapei.rs
Expand Up @@ -63,17 +63,40 @@ impl std::error::Error for EscapeError {}
/// Escapes a `&[u8]` and replaces all xml special characters (<, >, &, ', ") with their
/// corresponding xml escaped value.
pub fn escape(raw: &[u8]) -> Cow<[u8]> {
#[inline]
fn to_escape(b: u8) -> bool {
match b {
b'<' | b'>' | b'\'' | b'&' | b'"' => true,
_ => false,
}
}

_escape(raw, to_escape)
}

/// Should only be used for escaping text content. In xml text content, it is allowed
/// (though not recommended) to leave the quote special characters " and ' unescaped.
/// This function escapes a `&[u8]` and replaces xml special characters (<, >, &) with
/// their corresponding xml escaped value, but does not escape quote characters.
pub fn partial_escape(raw: &[u8]) -> Cow<[u8]> {
#[inline]
fn to_escape(b: u8) -> bool {
match b {
b'<' | b'>' | b'&' => true,
_ => false,
}
}

_escape(raw, to_escape)
}

/// Escapes a `&[u8]` and replaces a subset of xml special characters (<, >, &, ', ") with their
/// corresponding xml escaped value.
fn _escape<F: Fn(u8) -> bool>(raw: &[u8], escape_chars: F) -> Cow<[u8]> {
let mut escaped = None;
let mut bytes = raw.iter();
let mut pos = 0;
while let Some(i) = bytes.position(|&b| to_escape(b)) {
while let Some(i) = bytes.position(|&b| escape_chars(b)) {
if escaped.is_none() {
escaped = Some(Vec::with_capacity(raw.len()));
}
Expand Down Expand Up @@ -121,7 +144,7 @@ pub fn unescape_with<'a>(
}

/// Unescape a `&[u8]` and replaces all xml escaped characters ('&...;') into their corresponding
/// value, using an optional dictionnary of custom entities.
/// value, using an optional dictionary of custom entities.
///
/// # Pre-condition
///
Expand Down Expand Up @@ -182,7 +205,7 @@ const fn named_entity(name: &[u8]) -> Option<&str> {
b"amp" => "&",
b"apos" => "'",
b"quot" => "\"",
_ => return None
_ => return None,
};
Some(s)
}
Expand Down Expand Up @@ -821,11 +844,9 @@ const fn named_entity(name: &[u8]) -> Option<&str> {
b"mid" | b"VerticalBar" | b"smid" | b"shortmid" => "\u{2223}",
b"nmid" | b"NotVerticalBar" | b"nsmid" | b"nshortmid" => "\u{2224}",
b"par" | b"parallel" | b"DoubleVerticalBar" | b"spar" | b"shortparallel" => "\u{2225}",
b"npar"
| b"nparallel"
| b"NotDoubleVerticalBar"
| b"nspar"
| b"nshortparallel" => "\u{2226}",
b"npar" | b"nparallel" | b"NotDoubleVerticalBar" | b"nspar" | b"nshortparallel" => {
"\u{2226}"
}
b"and" | b"wedge" => "\u{2227}",
b"or" | b"vee" => "\u{2228}",
b"cap" => "\u{2229}",
Expand Down Expand Up @@ -1646,7 +1667,7 @@ const fn named_entity(name: &[u8]) -> Option<&str> {
b"xopf" => "\u{1D56}",
b"yopf" => "\u{1D56}",
b"zopf" => "\u{1D56}",
_ => return None
_ => return None,
};
Some(s)
}
Expand Down Expand Up @@ -1741,3 +1762,15 @@ fn test_escape() {
"prefix_&quot;a&quot;b&amp;&lt;&gt;c".as_bytes()
);
}

#[test]
fn test_partial_escape() {
assert_eq!(&*partial_escape(b"test"), b"test");
assert_eq!(&*partial_escape(b"<test>"), b"&lt;test&gt;");
assert_eq!(&*partial_escape(b"\"a\"bc"), b"\"a\"bc");
assert_eq!(&*partial_escape(b"\"a\"b&c"), b"\"a\"b&amp;c");
assert_eq!(
&*partial_escape(b"prefix_\"a\"b&<>c"),
"prefix_\"a\"b&amp;&lt;&gt;c".as_bytes()
);
}
11 changes: 4 additions & 7 deletions src/events/attributes.rs
Expand Up @@ -2,13 +2,10 @@
//!
//! Provides an iterator over attributes key/value pairs

use errors::{Error, Result};
use escape::{do_unescape, escape};
use reader::{is_whitespace, Reader};
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::BufRead;
use std::ops::Range;
use crate::errors::{Error, Result};
use crate::escape::{do_unescape, escape};
use crate::reader::{is_whitespace, Reader};
use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Range};

/// Iterator over XML attributes.
///
Expand Down
31 changes: 20 additions & 11 deletions src/events/mod.rs
Expand Up @@ -39,16 +39,11 @@ pub mod attributes;

#[cfg(feature = "encoding_rs")]
use encoding_rs::Encoding;
use std::borrow::Cow;
use std::collections::HashMap;
use std::io::BufRead;
use std::ops::Deref;
use std::str::from_utf8;
use std::{borrow::Cow, collections::HashMap, io::BufRead, ops::Deref, str::from_utf8};

use self::attributes::{Attribute, Attributes};
use errors::{Error, Result};
use escape::{do_unescape, escape};
use reader::Reader;
use crate::escape::{do_unescape, escape};
use crate::{errors::Error, errors::Result, reader::Reader};
use attributes::{Attribute, Attributes};

use memchr;

Expand Down Expand Up @@ -132,7 +127,7 @@ impl<'a> BytesStart<'a> {
///
/// # Example
///
/// ```
/// ```rust
/// # use quick_xml::{Error, Writer};
/// use quick_xml::events::{BytesStart, Event};
///
Expand Down Expand Up @@ -353,6 +348,20 @@ impl<'a> BytesStart<'a> {
self.buf.to_mut().truncate(self.name_len);
self
}

/// Try to get an attribute
pub fn try_get_attribute<N: AsRef<[u8]> + Sized>(
&'a self,
attr_name: N,
) -> Result<Option<Attribute<'a>>> {
for a in self.attributes() {
let a = a?;
if a.key == attr_name.as_ref() {
return Ok(Some(a));
}
}
Ok(None)
}
}

impl<'a> std::fmt::Debug for BytesStart<'a> {
Expand Down Expand Up @@ -831,7 +840,7 @@ pub enum Event<'a> {

impl<'a> Event<'a> {
/// Converts the event to an owned version, untied to the lifetime of
/// buffer used when reading but incurring a new, seperate allocation.
/// buffer used when reading but incurring a new, separate allocation.
pub fn into_owned(self) -> Event<'static> {
match self {
Event::Start(e) => Event::Start(e.into_owned()),
Expand Down
11 changes: 5 additions & 6 deletions src/lib.rs
Expand Up @@ -144,8 +144,8 @@ mod errors;
mod escapei;
pub mod escape {
//! Manage xml character escapes
pub(crate) use escapei::do_unescape;
pub use escapei::{escape, unescape, unescape_with, EscapeError};
pub(crate) use crate::escapei::{do_unescape, EscapeError};
pub use crate::escapei::{escape, partial_escape, unescape, unescape_with};
}
pub mod events;
mod reader;
Expand All @@ -156,7 +156,6 @@ mod writer;

// reexports
#[cfg(feature = "serialize")]
pub use errors::serialize::DeError;
pub use errors::{Error, Result};
pub use reader::Reader;
pub use writer::Writer;
pub use crate::errors::serialize::DeError;
pub use crate::errors::{Error, Result};
pub use crate::{reader::Reader, writer::Writer};
8 changes: 3 additions & 5 deletions src/reader.rs
Expand Up @@ -2,16 +2,14 @@

#[cfg(feature = "encoding")]
use std::borrow::Cow;
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::path::Path;
use std::str::from_utf8;
use std::{fs::File, path::Path, str::from_utf8};

#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_16BE, UTF_16LE};

use errors::{Error, Result};
use events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event};
use crate::errors::{Error, Result};
use crate::events::{attributes::Attribute, BytesDecl, BytesEnd, BytesStart, BytesText, Event};

use memchr;

Expand Down