Skip to content

Commit

Permalink
Add support for readers that implement Seek (#218)
Browse files Browse the repository at this point in the history
`Archive::new` requires only `Read` for backward-compatibility, while
`Archive::new_from_seek` can be used with readers that also implement
`Seek`, to allow more efficient skipping over file contents.
  • Loading branch information
fermeise committed Oct 4, 2021
1 parent 60c6bd8 commit 2a9c543
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 31 deletions.
48 changes: 27 additions & 21 deletions src/archive.rs
@@ -1,5 +1,4 @@
use std::cell::{Cell, RefCell};
use std::cmp;
use std::fs;
use std::io;
use std::io::prelude::*;
Expand All @@ -10,12 +9,13 @@ use crate::entry::{EntryFields, EntryIo};
use crate::error::TarError;
use crate::other;
use crate::pax::pax_extensions_size;
use crate::skip::{SeekingSkipRead, SkipRead};
use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};

/// A top-level representation of an archive file.
///
/// This archive can have an entry added to it and it can be iterated over.
pub struct Archive<R: ?Sized + Read> {
pub struct Archive<R: ?Sized + SkipRead> {
inner: ArchiveInner<R>,
}

Expand All @@ -30,19 +30,19 @@ pub struct ArchiveInner<R: ?Sized> {
}

/// An iterator over the entries of an archive.
pub struct Entries<'a, R: 'a + Read> {
pub struct Entries<'a, R: 'a + SkipRead> {
fields: EntriesFields<'a>,
_ignored: marker::PhantomData<&'a Archive<R>>,
}

struct EntriesFields<'a> {
archive: &'a Archive<dyn Read + 'a>,
archive: &'a Archive<dyn SkipRead + 'a>,
next: u64,
done: bool,
raw: bool,
}

impl<R: Read> Archive<R> {
impl<R: SkipRead> Archive<R> {
/// Create a new archive with the underlying object as the reader.
pub fn new(obj: R) -> Archive<R> {
Archive {
Expand Down Expand Up @@ -70,7 +70,7 @@ impl<R: Read> Archive<R> {
/// iterator returns), then the contents read for each entry may be
/// corrupted.
pub fn entries(&mut self) -> io::Result<Entries<R>> {
let me: &mut Archive<dyn Read> = self;
let me: &mut Archive<dyn SkipRead> = self;
me._entries().map(|fields| Entries {
fields: fields,
_ignored: marker::PhantomData,
Expand All @@ -97,7 +97,7 @@ impl<R: Read> Archive<R> {
/// ar.unpack("foo").unwrap();
/// ```
pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<()> {
let me: &mut Archive<dyn Read> = self;
let me: &mut Archive<dyn SkipRead> = self;
me._unpack(dst.as_ref())
}

Expand Down Expand Up @@ -143,7 +143,15 @@ impl<R: Read> Archive<R> {
}
}

impl<'a> Archive<dyn Read + 'a> {
impl<R: Read + Seek> Archive<R> {
/// Create a new archive using a reader that also implements `Seek`, to
/// allow more efficient skipping over file contents.
pub fn new_from_seek(obj: R) -> Archive<SeekingSkipRead<R>> {
Archive::new(SeekingSkipRead::new(obj))
}
}

impl<'a> Archive<dyn SkipRead + 'a> {
fn _entries(&mut self) -> io::Result<EntriesFields> {
if self.inner.pos.get() != 0 {
return Err(other(
Expand Down Expand Up @@ -191,21 +199,19 @@ impl<'a> Archive<dyn Read + 'a> {
Ok(())
}

fn skip(&self, mut amt: u64) -> io::Result<()> {
let mut buf = [0u8; 4096 * 8];
while amt > 0 {
let n = cmp::min(amt, buf.len() as u64);
let n = (&self.inner).read(&mut buf[..n as usize])?;
if n == 0 {
return Err(other("unexpected EOF during skip"));
}
amt -= n as u64;
fn skip(&self, amt: u64) -> io::Result<()> {
let n = self.inner.obj.borrow_mut().skip(amt).map(|i| {
self.inner.pos.set(self.inner.pos.get() + i);
i
})?;
if n != amt {
return Err(other("unexpected EOF during skip"));
}
Ok(())
}
}

impl<'a, R: Read> Entries<'a, R> {
impl<'a, R: SkipRead> Entries<'a, R> {
/// Indicates whether this iterator will return raw entries or not.
///
/// If the raw list of entries are returned, then no preprocessing happens
Expand All @@ -221,7 +227,7 @@ impl<'a, R: Read> Entries<'a, R> {
}
}
}
impl<'a, R: Read> Iterator for Entries<'a, R> {
impl<'a, R: SkipRead> Iterator for Entries<'a, R> {
type Item = io::Result<Entry<'a, R>>;

fn next(&mut self) -> Option<io::Result<Entry<'a, R>>> {
Expand Down Expand Up @@ -500,7 +506,7 @@ impl<'a> Iterator for EntriesFields<'a> {
}
}

impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
impl<'a, R: ?Sized + SkipRead> Read for &'a ArchiveInner<R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
self.obj.borrow_mut().read(into).map(|i| {
self.pos.set(self.pos.get() + i as u64);
Expand All @@ -513,7 +519,7 @@ impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner<R> {
///
/// If the reader reaches its end before filling the buffer at all, returns `false`.
/// Otherwise returns `true`.
fn try_read_all<R: Read>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
fn try_read_all<R: SkipRead>(r: &mut R, buf: &mut [u8]) -> io::Result<bool> {
let mut read = 0;
while read < buf.len() {
match r.read(&mut buf[read..])? {
Expand Down
21 changes: 11 additions & 10 deletions src/entry.rs
Expand Up @@ -14,14 +14,15 @@ use crate::error::TarError;
use crate::header::bytes2path;
use crate::other;
use crate::pax::pax_extensions;
use crate::skip::SkipRead;
use crate::{Archive, Header, PaxExtensions};

/// A read-only view into an entry of an archive.
///
/// This structure is a window into a portion of a borrowed archive which can
/// be inspected. It acts as a file handle by implementing the Reader trait. An
/// entry cannot be rewritten once inserted into an archive.
pub struct Entry<'a, R: 'a + Read> {
pub struct Entry<'a, R: 'a + SkipRead> {
fields: EntryFields<'a>,
_ignored: marker::PhantomData<&'a Archive<R>>,
}
Expand All @@ -45,7 +46,7 @@ pub struct EntryFields<'a> {

pub enum EntryIo<'a> {
Pad(io::Take<io::Repeat>),
Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>),
Data(io::Take<&'a ArchiveInner<dyn SkipRead + 'a>>),
}

/// When unpacking items the unpacked thing is returned to allow custom
Expand All @@ -60,7 +61,7 @@ pub enum Unpacked {
__Nonexhaustive,
}

impl<'a, R: Read> Entry<'a, R> {
impl<'a, R: SkipRead> Entry<'a, R> {
/// Returns the path name for this entry.
///
/// This method may fail if the pathname is not valid Unicode and this is
Expand Down Expand Up @@ -260,18 +261,18 @@ impl<'a, R: Read> Entry<'a, R> {
}
}

impl<'a, R: Read> Read for Entry<'a, R> {
impl<'a, R: SkipRead> Read for Entry<'a, R> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
self.fields.read(into)
Read::read(&mut self.fields, into)
}
}

impl<'a> EntryFields<'a> {
pub fn from<R: Read>(entry: Entry<R>) -> EntryFields {
pub fn from<R: SkipRead>(entry: Entry<R>) -> EntryFields {
entry.fields
}

pub fn into_entry<R: Read>(self) -> Entry<'a, R> {
pub fn into_entry<R: SkipRead>(self) -> Entry<'a, R> {
Entry {
fields: self,
_ignored: marker::PhantomData,
Expand Down Expand Up @@ -810,7 +811,7 @@ impl<'a> EntryFields<'a> {
impl<'a> Read for EntryFields<'a> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
loop {
match self.data.get_mut(0).map(|io| io.read(into)) {
match self.data.get_mut(0).map(|io| Read::read(io, into)) {
Some(Ok(0)) => {
self.data.remove(0);
}
Expand All @@ -824,8 +825,8 @@ impl<'a> Read for EntryFields<'a> {
impl<'a> Read for EntryIo<'a> {
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
match *self {
EntryIo::Pad(ref mut io) => io.read(into),
EntryIo::Data(ref mut io) => io.read(into),
EntryIo::Pad(ref mut io) => Read::read(io, into),
EntryIo::Data(ref mut io) => Read::read(io, into),
}
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Expand Up @@ -38,6 +38,7 @@ mod entry_type;
mod error;
mod header;
mod pax;
mod skip;

fn other(msg: &str) -> Error {
Error::new(ErrorKind::Other, msg)
Expand Down
72 changes: 72 additions & 0 deletions src/skip.rs
@@ -0,0 +1,72 @@
use std::cmp;
use std::io;
use std::io::prelude::*;

use crate::other;

/// The `SkipRead` trait allows for reading bytes from a source, as well as
/// skipping over a number of bytes
///
/// This is used for backward compatibility with [`std::io::Read`]. Each reader
/// automatically implements `SkipRead`. Readers that additionally implement
/// [`std::io::Seek`] can use `SeekingSkipRead` to implements efficient skipping
/// based on [`std::io::Seek:seek`].
pub trait SkipRead {
/// Pull some bytes from this source into the specified buffer, returning
/// how many bytes were read.
///
/// Identical to [`std::io::Read::read`].
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize>;

/// Skip a specified number of bytes from the source, returning how many
/// bytes were skipped.
fn skip(&mut self, amt: u64) -> io::Result<u64>;
}

/// `SkipRead` is implemented for all readers by using [`std::io::Read::read`]
/// for both reading and skipping.
impl<R: Read> SkipRead for R {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.read(buf)
}

fn skip(&mut self, amt: u64) -> io::Result<u64> {
let mut buf = [0u8; 4096 * 8];
let mut skipped = 0;
while skipped < amt {
let n = cmp::min(amt - skipped, buf.len() as u64);
let n = self.read(&mut buf[..n as usize])?;
if n == 0 {
return Err(other("unexpected EOF during skip"));
}
skipped += n as u64;
}
Ok(skipped)
}
}

/// Wrapper to implement `SkipRead` more efficiently for readers that also
/// implement [`std::io::Seek`]. Skipping is implemented using
/// [`std::io::Seek:seek`].
pub struct SeekingSkipRead<R: Read + Seek> {
inner: R,
}

impl<R: Read + Seek> SeekingSkipRead<R> {
pub fn new(obj: R) -> SeekingSkipRead<R> {
SeekingSkipRead { inner: obj }
}
}

impl<R: Read + Seek> SkipRead for SeekingSkipRead<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.read(buf)
}

fn skip(&mut self, amount: u64) -> io::Result<u64> {
let old_pos = self.inner.stream_position()?;
self.inner
.seek(io::SeekFrom::Current(amount as i64))
.map(|pos| pos - old_pos)
}
}
42 changes: 42 additions & 0 deletions tests/all.rs
Expand Up @@ -153,6 +153,48 @@ fn writing_files() {
assert!(entries.next().is_none());
}

struct LoggingReader<R> {
inner: R,
read_bytes: u64,
}

impl<R> LoggingReader<R> {
fn new(reader: R) -> LoggingReader<R> {
LoggingReader {
inner: reader,
read_bytes: 0,
}
}
}

impl<T: Read> Read for LoggingReader<T> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.inner.read(buf).map(|i| {
self.read_bytes += i as u64;
i
})
}
}

impl<T: Seek> Seek for LoggingReader<T> {
fn seek(&mut self, pos: io::SeekFrom) -> io::Result<u64> {
self.inner.seek(pos)
}
}

#[test]
fn new_from_seek() {
let mut reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar")));
let mut ar_reader = Archive::new(&mut reader);
for _ in t!(ar_reader.entries()) {}
assert!(reader.read_bytes == 2560);

let mut seekable_reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar")));
let mut ar_seekable_reader = Archive::new_from_seek(&mut seekable_reader);
for _ in t!(ar_seekable_reader.entries()) {}
assert!(seekable_reader.read_bytes == 1536);
}

#[test]
fn large_filename() {
let mut ar = Builder::new(Vec::new());
Expand Down

0 comments on commit 2a9c543

Please sign in to comment.