From 831b4e6a375f33f68ea93256a1131db5c89ef7cf Mon Sep 17 00:00:00 2001 From: Christian Ocker Date: Tue, 5 Oct 2021 12:59:39 +0200 Subject: [PATCH] Add support for readers that implement Seek (#218) `Archive::entries_with_seek` can be used to get an iterator over entries for a reader that implements `Seek`. --- src/archive.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++++---- tests/all.rs | 42 +++++++++++++++++++++++++++++++++ 2 files changed, 101 insertions(+), 5 deletions(-) diff --git a/src/archive.rs b/src/archive.rs index 8c333322..452a0121 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -1,5 +1,6 @@ use std::cell::{Cell, RefCell}; use std::cmp; +use std::convert::TryFrom; use std::fs; use std::io; use std::io::prelude::*; @@ -35,8 +36,12 @@ pub struct Entries<'a, R: 'a + Read> { _ignored: marker::PhantomData<&'a Archive>, } +trait SeekRead: Read + Seek {} +impl SeekRead for R {} + struct EntriesFields<'a> { archive: &'a Archive, + seekable_archive: Option<&'a Archive>, next: u64, done: bool, raw: bool, @@ -71,7 +76,7 @@ impl Archive { /// corrupted. pub fn entries(&mut self) -> io::Result> { let me: &mut Archive = self; - me._entries().map(|fields| Entries { + me._entries(None).map(|fields| Entries { fields: fields, _ignored: marker::PhantomData, }) @@ -143,8 +148,29 @@ impl Archive { } } +impl Archive { + /// Construct an iterator over the entries in this archive for a seekable + /// reader. Seek will be used to efficiently skip over file contents. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_with_seek(&mut self) -> io::Result> { + let me: &Archive = self; + let me_seekable: &Archive = self; + me._entries(Some(me_seekable)).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } +} + impl<'a> Archive { - fn _entries(&mut self) -> io::Result { + fn _entries( + &'a self, + seekable_archive: Option<&'a Archive>, + ) -> io::Result { if self.inner.pos.get() != 0 { return Err(other( "cannot call entries unless archive is at \ @@ -153,13 +179,14 @@ impl<'a> Archive { } Ok(EntriesFields { archive: self, + seekable_archive, done: false, next: 0, raw: false, }) } - fn _unpack(&mut self, dst: &Path) -> io::Result<()> { + fn _unpack(&'a mut self, dst: &Path) -> io::Result<()> { if dst.symlink_metadata().is_err() { fs::create_dir_all(&dst) .map_err(|e| TarError::new(&format!("failed to create `{}`", dst.display()), e))?; @@ -176,7 +203,7 @@ impl<'a> Archive { // descendants), to ensure that directory permissions do not interfer with descendant // extraction. let mut directories = Vec::new(); - for entry in self._entries()? { + for entry in self._entries(None)? { let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; if file.header().entry_type() == crate::EntryType::Directory { directories.push(file); @@ -205,6 +232,16 @@ impl<'a> Archive { } } +impl<'a> Archive { + fn skip_with_seek(&self, amt: u64) -> io::Result<()> { + (&self.inner) + .seek(io::SeekFrom::Current( + i64::try_from(amt).map_err(|_| other("seek position out of bounds"))?, + )) + .map(|_| ()) + } +} + impl<'a, R: Read> Entries<'a, R> { /// Indicates whether this iterator will return raw entries or not. /// @@ -241,7 +278,7 @@ impl<'a> EntriesFields<'a> { loop { // Seek to the start of the next header in the archive let delta = self.next - self.archive.inner.pos.get(); - self.archive.skip(delta)?; + self.skip(delta)?; // EOF is an indicator that we are at the end of the archive. if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { @@ -476,6 +513,14 @@ impl<'a> EntriesFields<'a> { } Ok(()) } + + fn skip(&mut self, amt: u64) -> io::Result<()> { + if let Some(seekable_archive) = self.seekable_archive { + seekable_archive.skip_with_seek(amt) + } else { + self.archive.skip(amt) + } + } } impl<'a> Iterator for EntriesFields<'a> { @@ -509,6 +554,15 @@ impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner { } } +impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.obj.borrow_mut().seek(pos).map(|i| { + self.pos.set(i); + i + }) + } +} + /// Try to fill the buffer from the reader. /// /// If the reader reaches its end before filling the buffer at all, returns `false`. diff --git a/tests/all.rs b/tests/all.rs index d29a5190..efd5bdb3 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -153,6 +153,48 @@ fn writing_files() { assert!(entries.next().is_none()); } +struct LoggingReader { + inner: R, + read_bytes: u64, +} + +impl LoggingReader { + fn new(reader: R) -> LoggingReader { + LoggingReader { + inner: reader, + read_bytes: 0, + } + } +} + +impl Read for LoggingReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.inner.read(buf).map(|i| { + self.read_bytes += i as u64; + i + }) + } +} + +impl Seek for LoggingReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.inner.seek(pos) + } +} + +#[test] +fn new_from_seek() { + let mut reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_reader = Archive::new(&mut reader); + for _ in t!(ar_reader.entries()) {} + assert!(reader.read_bytes == 2560); + + let mut seekable_reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_seekable_reader = Archive::new(&mut seekable_reader); + for _ in t!(ar_seekable_reader.entries_with_seek()) {} + assert!(seekable_reader.read_bytes == 1536); +} + #[test] fn large_filename() { let mut ar = Builder::new(Vec::new());