From 396c55d73811df1843ca84a1779d0046fda4d225 Mon Sep 17 00:00:00 2001 From: Christian Ocker Date: Tue, 5 Oct 2021 12:59:39 +0200 Subject: [PATCH] Add support for readers that implement Seek (#218) `Archive::entries_with_seek` can be used to get an iterator over entries for a reader that implements `Seek`. --- src/archive.rs | 50 +++++++++++++++++++++++++++++++---- tests/all.rs | 71 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 109 insertions(+), 12 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 8c333322..983bc0e6 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -1,5 +1,6 @@ use std::cell::{Cell, RefCell}; use std::cmp; +use std::convert::TryFrom; use std::fs; use std::io; use std::io::prelude::*; @@ -35,8 +36,12 @@ pub struct Entries<'a, R: 'a + Read> { _ignored: marker::PhantomData<&'a Archive>, } +trait SeekRead: Read + Seek {} +impl SeekRead for R {} + struct EntriesFields<'a> { archive: &'a Archive, + seekable_archive: Option<&'a Archive>, next: u64, done: bool, raw: bool, @@ -71,7 +76,7 @@ impl Archive { /// corrupted. pub fn entries(&mut self) -> io::Result> { let me: &mut Archive = self; - me._entries().map(|fields| Entries { + me._entries(None).map(|fields| Entries { fields: fields, _ignored: marker::PhantomData, }) @@ -143,8 +148,29 @@ impl Archive { } } +impl Archive { + /// Construct an iterator over the entries in this archive for a seekable + /// reader. Seek will be used to efficiently skip over file contents. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_with_seek(&mut self) -> io::Result> { + let me: &Archive = self; + let me_seekable: &Archive = self; + me._entries(Some(me_seekable)).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } +} + impl<'a> Archive { - fn _entries(&mut self) -> io::Result { + fn _entries( + &'a self, + seekable_archive: Option<&'a Archive>, + ) -> io::Result { if self.inner.pos.get() != 0 { return Err(other( "cannot call entries unless archive is at \ @@ -153,13 +179,14 @@ impl<'a> Archive { } Ok(EntriesFields { archive: self, + seekable_archive, done: false, next: 0, raw: false, }) } - fn _unpack(&mut self, dst: &Path) -> io::Result<()> { + fn _unpack(&'a mut self, dst: &Path) -> io::Result<()> { if dst.symlink_metadata().is_err() { fs::create_dir_all(&dst) .map_err(|e| TarError::new(&format!("failed to create `{}`", dst.display()), e))?; @@ -176,7 +203,7 @@ impl<'a> Archive { // descendants), to ensure that directory permissions do not interfer with descendant // extraction. let mut directories = Vec::new(); - for entry in self._entries()? { + for entry in self._entries(None)? { let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; if file.header().entry_type() == crate::EntryType::Directory { directories.push(file); @@ -241,7 +268,7 @@ impl<'a> EntriesFields<'a> { loop { // Seek to the start of the next header in the archive let delta = self.next - self.archive.inner.pos.get(); - self.archive.skip(delta)?; + self.skip(delta)?; // EOF is an indicator that we are at the end of the archive. if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { @@ -476,6 +503,19 @@ impl<'a> EntriesFields<'a> { } Ok(()) } + + fn skip(&mut self, amt: u64) -> io::Result<()> { + if let Some(seekable_archive) = self.seekable_archive { + let pos = io::SeekFrom::Current( + i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?, + ); + let i = seekable_archive.inner.obj.borrow_mut().seek(pos)?; + seekable_archive.inner.pos.set(i); + Ok(()) + } else { + self.archive.skip(amt) + } + } } impl<'a> Iterator for EntriesFields<'a> { diff --git a/tests/all.rs b/tests/all.rs index d29a5190..914d3b6e 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -11,7 +11,7 @@ use std::iter::repeat; use std::path::{Path, PathBuf}; use filetime::FileTime; -use tar::{Archive, Builder, EntryType, Header, HeaderMode}; +use tar::{Archive, Builder, Entries, EntryType, Header, HeaderMode}; use tempfile::{Builder as TempBuilder, TempDir}; macro_rules! t { @@ -203,11 +203,7 @@ fn large_filename() { assert!(entries.next().is_none()); } -#[test] -fn reading_entries() { - let rdr = Cursor::new(tar!("reading_files.tar")); - let mut ar = Archive::new(rdr); - let mut entries = t!(ar.entries()); +fn reading_entries_common(mut entries: Entries) { let mut a = t!(entries.next().unwrap()); assert_eq!(&*a.header().path_bytes(), b"a"); let mut s = String::new(); @@ -216,8 +212,8 @@ fn reading_entries() { s.truncate(0); t!(a.read_to_string(&mut s)); assert_eq!(s, ""); - let mut b = t!(entries.next().unwrap()); + let mut b = t!(entries.next().unwrap()); assert_eq!(&*b.header().path_bytes(), b"b"); s.truncate(0); t!(b.read_to_string(&mut s)); @@ -225,6 +221,67 @@ fn reading_entries() { assert!(entries.next().is_none()); } +#[test] +fn reading_entries() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + reading_entries_common(t!(ar.entries())); +} + +#[test] +fn reading_entries_with_seek() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + reading_entries_common(t!(ar.entries_with_seek())); +} + +struct LoggingReader { + inner: R, + read_bytes: u64, +} + +impl LoggingReader { + fn new(reader: R) -> LoggingReader { + LoggingReader { + inner: reader, + read_bytes: 0, + } + } +} + +impl Read for LoggingReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.inner.read(buf).map(|i| { + self.read_bytes += i as u64; + i + }) + } +} + +impl Seek for LoggingReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.inner.seek(pos) + } +} + +#[test] +fn skipping_entries_with_seek() { + let mut reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_reader = Archive::new(&mut reader); + let files: Vec<_> = t!(ar_reader.entries()) + .map(|entry| entry.unwrap().path().unwrap().to_path_buf()) + .collect(); + + let mut seekable_reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_seekable_reader = Archive::new(&mut seekable_reader); + let files_seekable: Vec<_> = t!(ar_seekable_reader.entries_with_seek()) + .map(|entry| entry.unwrap().path().unwrap().to_path_buf()) + .collect(); + + assert!(files == files_seekable); + assert!(seekable_reader.read_bytes < reader.read_bytes); +} + fn check_dirtree(td: &TempDir) { let dir_a = td.path().join("a"); let dir_b = td.path().join("a/b");