Skip to content

Commit

Permalink
Add a write_bom() method to the Writer
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Aug 17, 2022
1 parent e27feab commit b302b6f
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 12 deletions.
8 changes: 5 additions & 3 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
- [#450]: Added support of asynchronous [tokio](https://tokio.rs/) readers
- [#455]: Change return type of all `read_to_end*` methods to return a span between tags
- [#455]: Added `Reader::read_text` method to return a raw content (including markup) between tags

- [#459]: Added a `Writer::write_bom()` method for inserting a Byte-Order-Mark into the document.

### Bug Fixes

Expand Down Expand Up @@ -175,11 +175,13 @@
- [#440]: Removed `Deserializer::from_slice` and `quick_xml::de::from_slice` methods because deserializing from a byte
array cannot guarantee borrowing due to possible copying while decoding.

- [#455]: Removed `Reader::read_text_into` which is only not a better wrapper over match on `Event::Text`
- [#455]: Removed `Reader::read_text_into` which is just a thin wrapper over match on `Event::Text`

- [#456]: Reader and writer stuff grouped under `reader` and `writer` modules.
You still can use re-exported definitions from a crate root

- [#459]: Made the `Writer::write()` method non-public as writing random bytes to a document is not generally useful or desirable.

### New Tests

- [#9]: Added tests for incorrect nested tags in input
Expand Down Expand Up @@ -223,7 +225,7 @@
[#450]: https://github.com/tafia/quick-xml/pull/450
[#455]: https://github.com/tafia/quick-xml/pull/455
[#456]: https://github.com/tafia/quick-xml/pull/456

[#458]: https://github.com/tafia/quick-xml/pull/458

## 0.23.0 -- 2022-05-08

Expand Down
23 changes: 16 additions & 7 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
use crate::Error;
use crate::Result;

/// Unicode "byte order mark" encoded as UTF-8
pub(crate) const UTF8_BOM: &[u8] = &[0xEF, 0xBB, 0xBF];
/// Unicode "byte order mark" encoded as UTF-16 with little-endian byte order
#[allow(dead_code)]
pub(crate) const UTF16_LE_BOM: &[u8] = &[0xFF, 0xFE];
/// Unicode "byte order mark" encoded as UTF-16 with big-endian byte order
#[allow(dead_code)]
pub(crate) const UTF16_BE_BOM: &[u8] = &[0xFE, 0xFF];

/// Decoder of byte slices into strings.
///
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
Expand Down Expand Up @@ -62,7 +71,7 @@ impl Decoder {
///
/// If you instead want to use XML declared encoding, use the `encoding` feature
pub fn decode_with_bom_removal<'b>(&self, bytes: &'b [u8]) -> Result<Cow<'b, str>> {
let bytes = if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
let bytes = if bytes.starts_with(UTF8_BOM) {
&bytes[3..]
} else {
bytes
Expand Down Expand Up @@ -131,11 +140,11 @@ pub fn decode_with_bom_removal<'b>(bytes: &'b [u8]) -> Result<Cow<'b, str>> {

#[cfg(feature = "encoding")]
fn split_at_bom<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> (&'b [u8], &'b [u8]) {
if encoding == UTF_8 && bytes.starts_with(&[0xEF, 0xBB, 0xBF]) {
if encoding == UTF_8 && bytes.starts_with(UTF8_BOM) {
bytes.split_at(3)
} else if encoding == UTF_16LE && bytes.starts_with(&[0xFF, 0xFE]) {
} else if encoding == UTF_16LE && bytes.starts_with(UTF16_LE_BOM) {
bytes.split_at(2)
} else if encoding == UTF_16BE && bytes.starts_with(&[0xFE, 0xFF]) {
} else if encoding == UTF_16BE && bytes.starts_with(UTF16_BE_BOM) {
bytes.split_at(2)
} else {
(&[], bytes)
Expand Down Expand Up @@ -172,9 +181,9 @@ fn remove_bom<'b>(bytes: &'b [u8], encoding: &'static Encoding) -> &'b [u8] {
pub fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
match bytes {
// with BOM
_ if bytes.starts_with(&[0xFE, 0xFF]) => Some(UTF_16BE),
_ if bytes.starts_with(&[0xFF, 0xFE]) => Some(UTF_16LE),
_ if bytes.starts_with(&[0xEF, 0xBB, 0xBF]) => Some(UTF_8),
_ if bytes.starts_with(UTF16_BE_BOM) => Some(UTF_16BE),
_ if bytes.starts_with(UTF16_LE_BOM) => Some(UTF_16LE),
_ if bytes.starts_with(UTF8_BOM) => Some(UTF_8),

// without BOM
_ if bytes.starts_with(&[0x00, b'<', 0x00, b'?']) => Some(UTF_16BE), // Some BE encoding, for example, UTF-16 or ISO-10646-UCS-2
Expand Down
40 changes: 38 additions & 2 deletions src/writer.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! Contains high-level interface for an events-based XML emitter.

use std::io::Write;

use crate::encoding::UTF8_BOM;
use crate::errors::{Error, Result};
use crate::events::{attributes::Attribute, BytesCData, BytesStart, BytesText, Event};
use std::io::Write;

/// XML writer.
///
Expand Down Expand Up @@ -86,6 +88,39 @@ impl<W: Write> Writer<W> {
&mut self.writer
}

/// Write a Byte-Order-Mark character to the document.
///
/// # Example
///
/// ```rust
/// # use quick_xml::Result;
/// # fn main() -> Result<()> {
/// use quick_xml::events::{BytesStart, BytesText, Event};
/// use quick_xml::writer::Writer;
/// use quick_xml::Error;
/// use std::io::Cursor;
///
/// let mut buffer = Vec::new();
/// let mut writer = Writer::new_with_indent(&mut buffer, b' ', 4);
///
/// writer.write_bom()?;
/// writer
/// .create_element("empty")
/// .with_attribute(("attr1", "value1"))
/// .write_empty()
/// .expect("failure");
///
/// assert_eq!(
/// std::str::from_utf8(&buffer).unwrap(),
/// "\u{FEFF}<empty attr1=\"value1\"/>"
/// );
/// # Ok(())
/// # }
/// ```
pub fn write_bom(&mut self) -> Result<()> {
self.write(UTF8_BOM)
}

/// Writes the given event to the underlying writer.
pub fn write_event<'a, E: AsRef<Event<'a>>>(&mut self, event: E) -> Result<()> {
let mut next_should_line_break = true;
Expand Down Expand Up @@ -128,7 +163,7 @@ impl<W: Write> Writer<W> {

/// Writes bytes
#[inline]
pub fn write(&mut self, value: &[u8]) -> Result<()> {
pub(crate) fn write(&mut self, value: &[u8]) -> Result<()> {
self.writer.write_all(value).map_err(Error::Io)
}

Expand Down Expand Up @@ -502,6 +537,7 @@ mod indentation {
</paired>"#
);
}

#[test]
fn element_writer_empty() {
let mut buffer = Vec::new();
Expand Down

0 comments on commit b302b6f

Please sign in to comment.