diff --git a/src/archive.rs b/src/archive.rs index 8c333322..470f9612 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -35,14 +35,18 @@ pub struct Entries<'a, R: 'a + Read> { _ignored: marker::PhantomData<&'a Archive>, } +trait SeekRead: Read + Seek {} +impl SeekRead for R {} + struct EntriesFields<'a> { archive: &'a Archive, + seekable_archive: Option<&'a Archive>, next: u64, done: bool, raw: bool, } -impl Archive { +impl<'a, R: Read> Archive { /// Create a new archive with the underlying object as the reader. pub fn new(obj: R) -> Archive { Archive { @@ -71,7 +75,7 @@ impl Archive { /// corrupted. pub fn entries(&mut self) -> io::Result> { let me: &mut Archive = self; - me._entries().map(|fields| Entries { + me._entries(None).map(|fields| Entries { fields: fields, _ignored: marker::PhantomData, }) @@ -143,8 +147,29 @@ impl Archive { } } +impl<'a, R: Seek + Read> Archive { + /// Construct an iterator over the entries in this archive for a seekable + /// reader. Seek will be used to efficiently skip over file contents. + /// + /// Note that care must be taken to consider each entry within an archive in + /// sequence. If entries are processed out of sequence (from what the + /// iterator returns), then the contents read for each entry may be + /// corrupted. + pub fn entries_with_seek(&mut self) -> io::Result> { + let me: &Archive = self; + let me_seekable: &Archive = self; + me._entries(Some(me_seekable)).map(|fields| Entries { + fields: fields, + _ignored: marker::PhantomData, + }) + } +} + impl<'a> Archive { - fn _entries(&mut self) -> io::Result { + fn _entries( + &'a self, + seekable_archive: Option<&'a Archive>, + ) -> io::Result { if self.inner.pos.get() != 0 { return Err(other( "cannot call entries unless archive is at \ @@ -153,13 +178,14 @@ impl<'a> Archive { } Ok(EntriesFields { archive: self, + seekable_archive, done: false, next: 0, raw: false, }) } - fn _unpack(&mut self, dst: &Path) -> io::Result<()> { + fn _unpack(&'a mut self, dst: &Path) -> io::Result<()> { if dst.symlink_metadata().is_err() { fs::create_dir_all(&dst) .map_err(|e| TarError::new(&format!("failed to create `{}`", dst.display()), e))?; @@ -176,7 +202,7 @@ impl<'a> Archive { // descendants), to ensure that directory permissions do not interfer with descendant // extraction. let mut directories = Vec::new(); - for entry in self._entries()? { + for entry in self._entries(None)? { let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?; if file.header().entry_type() == crate::EntryType::Directory { directories.push(file); @@ -205,6 +231,14 @@ impl<'a> Archive { } } +impl<'a> Archive { + fn skip_with_seek(&self, amt: u64) -> io::Result<()> { + (&self.inner) + .seek(io::SeekFrom::Current(amt as i64)) + .map(|_| ()) + } +} + impl<'a, R: Read> Entries<'a, R> { /// Indicates whether this iterator will return raw entries or not. /// @@ -241,7 +275,11 @@ impl<'a> EntriesFields<'a> { loop { // Seek to the start of the next header in the archive let delta = self.next - self.archive.inner.pos.get(); - self.archive.skip(delta)?; + if let Some(seekable_archive) = self.seekable_archive { + seekable_archive.skip_with_seek(delta)?; + } else { + self.archive.skip(delta)?; + } // EOF is an indicator that we are at the end of the archive. if !try_read_all(&mut &self.archive.inner, header.as_mut_bytes())? { @@ -509,6 +547,15 @@ impl<'a, R: ?Sized + Read> Read for &'a ArchiveInner { } } +impl<'a, R: ?Sized + Seek> Seek for &'a ArchiveInner { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.obj.borrow_mut().seek(pos).map(|i| { + self.pos.set(i); + i + }) + } +} + /// Try to fill the buffer from the reader. /// /// If the reader reaches its end before filling the buffer at all, returns `false`. diff --git a/tests/all.rs b/tests/all.rs index d29a5190..efd5bdb3 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -153,6 +153,48 @@ fn writing_files() { assert!(entries.next().is_none()); } +struct LoggingReader { + inner: R, + read_bytes: u64, +} + +impl LoggingReader { + fn new(reader: R) -> LoggingReader { + LoggingReader { + inner: reader, + read_bytes: 0, + } + } +} + +impl Read for LoggingReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.inner.read(buf).map(|i| { + self.read_bytes += i as u64; + i + }) + } +} + +impl Seek for LoggingReader { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + self.inner.seek(pos) + } +} + +#[test] +fn new_from_seek() { + let mut reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_reader = Archive::new(&mut reader); + for _ in t!(ar_reader.entries()) {} + assert!(reader.read_bytes == 2560); + + let mut seekable_reader = LoggingReader::new(Cursor::new(tar!("reading_files.tar"))); + let mut ar_seekable_reader = Archive::new(&mut seekable_reader); + for _ in t!(ar_seekable_reader.entries_with_seek()) {} + assert!(seekable_reader.read_bytes == 1536); +} + #[test] fn large_filename() { let mut ar = Builder::new(Vec::new());