Skip to content

Commit

Permalink
temp
Browse files Browse the repository at this point in the history
  • Loading branch information
dralley committed Aug 15, 2022
1 parent f243cda commit 61022d8
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 77 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ license = "MIT"
[dependencies]
document-features = { version = "0.2", optional = true }
encoding_rs = { version = "0.8", optional = true }
encoding_rs_io = { version = "0.1", optional = true }
serde = { version = "1.0", optional = true }
tokio = { version = "1.20", optional = true, default-features = false, features = ["io-util"] }
memchr = "2.5"
Expand Down Expand Up @@ -57,7 +58,7 @@ async-tokio = ["tokio"]
## crate, that satisfied the restriction above.
##
## [standard compliant]: https://www.w3.org/TR/xml11/#charencoding
encoding = ["encoding_rs"]
encoding = ["encoding_rs", "encoding_rs_io"]

## Enables support for recognizing all [HTML 5 entities](https://dev.w3.org/html5/html-author/charref)
escape-html = []
Expand Down
4 changes: 2 additions & 2 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ mod var;

pub use crate::errors::serialize::DeError;
use crate::{
encoding::Decoder,
encoding::{Decoder, DecodingReader},
errors::Error,
events::{BytesCData, BytesEnd, BytesStart, BytesText, Event},
name::QName,
Expand Down Expand Up @@ -697,7 +697,7 @@ impl<'de> Deserializer<'de, SliceReader<'de>> {
}
}

impl<'de, R> Deserializer<'de, IoReader<R>>
impl<'de, R> Deserializer<'de, IoReader<DecodingReader<R>>>
where
R: BufRead,
{
Expand Down
110 changes: 110 additions & 0 deletions src/encoding.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,103 @@
//! A module for wrappers that encode / decode data.

use std::borrow::Cow;
use std::io;

#[cfg(feature = "encoding")]
use encoding_rs::{Encoding, UTF_16BE, UTF_16LE, UTF_8};
#[cfg(feature = "encoding")]
use encoding_rs_io::{DecodeReaderBytes, DecodeReaderBytesBuilder};

#[cfg(feature = "encoding")]
use crate::Error;
use crate::Result;

///
#[derive(Debug)]
pub struct ValidatingReader<R> {
reader: R,
leftover_bytes_buf: [u8; 7],
len: u8,
first: bool,
}

impl<R: io::Read> ValidatingReader<R> {
///
pub fn new(reader: R) -> Self {
Self {
reader,
leftover_bytes_buf: [0; 7],
len: 0,
first: true,
}
}
}

impl<R: io::Read> io::Read for ValidatingReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
buf[..self.len.into()].copy_from_slice(&self.leftover_bytes_buf[..self.len.into()]);
let (_leftovers, copy_dest) = buf.split_at_mut(self.len.into());
let amt = self.reader.read(copy_dest)?;

match std::str::from_utf8(buf) {
Ok(_) => Ok(amt),
Err(err) => {
let (valid, after_valid) = buf.split_at(err.valid_up_to());
self.leftover_bytes_buf[..after_valid.len()].copy_from_slice(after_valid);
self.len = after_valid.len() as u8;
Ok(valid.len())
}
}
}
}

/// A struct for transparently decoding / validating bytes to known-valid UTF-8.
#[derive(Debug)]
pub struct DecodingReader<R> {
#[cfg(feature = "encoding")]
reader: io::BufReader<DecodeReaderBytes<R, Vec<u8>>>,
#[cfg(not(feature = "encoding"))]
reader: io::BufReader<ValidatingReader<R>>,
}

impl<R: io::Read> DecodingReader<R> {
/// Build a new DecodingReader which decodes a stream of bytes into valid UTF-8.
#[cfg(feature = "encoding")]
pub fn new(reader: R) -> Self {
let decoder = DecodeReaderBytesBuilder::new()
.bom_override(true)
.build(reader);

Self {
reader: io::BufReader::new(decoder),
}
}

/// Build a new DecodingReader which only validates UTF-8.
#[cfg(not(feature = "encoding"))]
pub fn new(reader: R) -> Self {
Self {
reader: io::BufReader::new(ValidatingReader::new(reader)),
}
}
}

impl<R: io::Read> io::Read for DecodingReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.reader.read(buf)
}
}

impl<R: io::Read> io::BufRead for DecodingReader<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
self.reader.fill_buf()
}

fn consume(&mut self, amt: usize) {
self.reader.consume(amt)
}
}

/// Decoder of byte slices into strings.
///
/// If feature `encoding` is enabled, this encoding taken from the `"encoding"`
Expand Down Expand Up @@ -184,3 +273,24 @@ pub fn detect_encoding(bytes: &[u8]) -> Option<&'static Encoding> {
_ => None,
}
}

#[cfg(test)]
mod test {
use std::io::Read;

use super::*;

#[track_caller]
fn test_input(input: &[u8]) {
let mut reader = ValidatingReader::new(input);
let mut buf = [0; 100];
assert_eq!(reader.read(&mut buf).unwrap(), input.len());
}

// #[test]
// fn test() {
// test_input(b"asdf");
// test_input(b"\x82\xA0\x82\xA2\x82\xA4");
// test_input(b"\xEF\xBB\xBFfoo\xFFbar");
// }
}
14 changes: 7 additions & 7 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@
//! underlying byte stream.

use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::io;
use std::path::Path;

use memchr;

use crate::encoding::DecodingReader;
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::name::QName;
Expand Down Expand Up @@ -210,15 +211,15 @@ pub(super) use impl_buffered_source;

/// Implementation of `XmlSource` for any `BufRead` reader using a user-given
/// `Vec<u8>` as buffer that will be borrowed by events.
impl<'b, R: BufRead> XmlSource<'b, &'b mut Vec<u8>> for R {
impl<'b, R: io::BufRead> XmlSource<'b, &'b mut Vec<u8>> for R {
impl_buffered_source!();
}

////////////////////////////////////////////////////////////////////////////////////////////////////

/// This is an implementation of [`Reader`] for reading from a [`BufRead`] as
/// underlying byte stream.
impl<R: BufRead> Reader<R> {
impl<R: io::BufRead> Reader<R> {
/// Reads the next `Event`.
///
/// This is the main entry point for reading XML `Event`s.
Expand Down Expand Up @@ -361,15 +362,13 @@ impl<R: BufRead> Reader<R> {
}
}

impl Reader<BufReader<File>> {
impl Reader<DecodingReader<File>> {
/// Creates an XML reader from a file path.
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
let file = File::open(path).map_err(Error::Io)?;
let reader = BufReader::new(file);
Ok(Self::from_reader(reader))
Ok(Self::from_reader(file))
}
}

#[cfg(test)]
mod test {
use crate::reader::test::check;
Expand Down Expand Up @@ -397,6 +396,7 @@ mod test {

/// Checks that encoding is detected by BOM and changed after XML declaration
#[test]
#[ignore = "dalley fixme"]
fn bom_detected() {
let mut reader =
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
Expand Down
66 changes: 8 additions & 58 deletions src/reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
use encoding_rs::Encoding;
use std::ops::Range;

use crate::encoding::Decoder;
use std::io::Read;

use crate::encoding::{Decoder, DecodingReader};
use crate::errors::{Error, Result};
use crate::events::Event;
use crate::reader::parser::Parser;
Expand Down Expand Up @@ -433,73 +435,19 @@ pub struct Reader<R> {
}

/// Builder methods
impl<R> Reader<R> {
impl<R: Read> Reader<DecodingReader<R>> {
/// Creates a `Reader` that reads from a given reader.
pub fn from_reader(reader: R) -> Self {
Self {
reader,
reader: DecodingReader::new(reader),
parser: Parser::default(),
}
}

configure_methods!();
}

/// Getters
impl<R> Reader<R> {
/// Consumes `Reader` returning the underlying reader
///
/// Can be used to compute line and column of a parsing error position
///
/// # Examples
///
/// ```
/// # use pretty_assertions::assert_eq;
/// use std::{str, io::Cursor};
/// use quick_xml::events::Event;
/// use quick_xml::reader::Reader;
///
/// let xml = r#"<tag1 att1 = "test">
/// <tag2><!--Test comment-->Test</tag2>
/// <tag3>Test 2</tag3>
/// </tag1>"#;
/// let mut reader = Reader::from_reader(Cursor::new(xml.as_bytes()));
/// let mut buf = Vec::new();
///
/// fn into_line_and_column(reader: Reader<Cursor<&[u8]>>) -> (usize, usize) {
/// let end_pos = reader.buffer_position();
/// let mut cursor = reader.into_inner();
/// let s = String::from_utf8(cursor.into_inner()[0..end_pos].to_owned())
/// .expect("can't make a string");
/// let mut line = 1;
/// let mut column = 0;
/// for c in s.chars() {
/// if c == '\n' {
/// line += 1;
/// column = 0;
/// } else {
/// column += 1;
/// }
/// }
/// (line, column)
/// }
///
/// loop {
/// match reader.read_event_into(&mut buf) {
/// Ok(Event::Start(ref e)) => match e.name().as_ref() {
/// b"tag1" | b"tag2" => (),
/// tag => {
/// assert_eq!(b"tag3", tag);
/// assert_eq!((3, 22), into_line_and_column(reader));
/// break;
/// }
/// },
/// Ok(Event::Eof) => unreachable!(),
/// _ => (),
/// }
/// buf.clear();
/// }
/// ```
/// Consumes `Reader` returning the underlying reader.
pub fn into_inner(self) -> R {
self.reader
}
Expand Down Expand Up @@ -538,6 +486,8 @@ impl<R> Reader<R> {
pub fn decoder(&self) -> Decoder {
self.parser.decoder()
}

configure_methods!();
}

/// Private sync reading methods
Expand Down
12 changes: 7 additions & 5 deletions src/reader/ns_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

use std::borrow::Cow;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::io;
use std::ops::Deref;
use std::path::Path;

use crate::encoding::DecodingReader;
use crate::errors::Result;
use crate::events::Event;
use crate::name::{LocalName, NamespaceResolver, QName, ResolveResult};
use crate::reader::{Reader, Span, XmlSource};

/// A low level encoding-agnostic XML event reader that performs namespace resolution.
///
/// Consumes a [`BufRead`] and streams XML `Event`s.
Expand All @@ -33,7 +33,7 @@ pub struct NsReader<R> {
}

/// Builder methods
impl<R> NsReader<R> {
impl<R: io::Read> NsReader<DecodingReader<R>> {
/// Creates a `NsReader` that reads from a reader.
#[inline]
pub fn from_reader(reader: R) -> Self {
Expand Down Expand Up @@ -299,7 +299,7 @@ impl<R> NsReader<R> {
}
}

impl<R: BufRead> NsReader<R> {
impl<R: io::BufRead> NsReader<R> {
/// Reads the next event into given buffer.
///
/// This method manages namespaces but doesn't resolve them automatically.
Expand Down Expand Up @@ -522,7 +522,7 @@ impl<R: BufRead> NsReader<R> {
}
}

impl NsReader<BufReader<File>> {
impl NsReader<DecodingReader<File>> {
/// Creates an XML reader from a file path.
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
Ok(Self::new(Reader::from_file(path)?))
Expand All @@ -536,6 +536,8 @@ impl<'i> NsReader<&'i [u8]> {
Self::new(Reader::from_str(s))
}

configure_methods!(reader);

/// Reads the next event, borrow its content from the input buffer.
///
/// This method manages namespaces but doesn't resolve them automatically.
Expand Down

0 comments on commit 61022d8

Please sign in to comment.