Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

accurate mtimes for directories, overwrite symlinks, preserve ownership #217

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
37 changes: 36 additions & 1 deletion src/archive.rs
Expand Up @@ -22,6 +22,9 @@ pub struct ArchiveInner<R: ?Sized> {
unpack_xattrs: bool,
preserve_permissions: bool,
preserve_mtime: bool,
#[cfg(unix)]
preserve_ownership: bool,
overwrite: bool,
ignore_zeros: bool,
obj: RefCell<R>,
}
Expand All @@ -47,6 +50,9 @@ impl<R: Read> Archive<R> {
unpack_xattrs: false,
preserve_permissions: false,
preserve_mtime: true,
#[cfg(unix)]
preserve_ownership: false,
overwrite: true,
ignore_zeros: false,
obj: RefCell::new(obj),
pos: Cell::new(0),
Expand Down Expand Up @@ -117,6 +123,21 @@ impl<R: Read> Archive<R> {
self.inner.preserve_permissions = preserve;
}

/// Indicate whether ownership information is preserved
/// when unpacking this entry.
///
/// This flag is disabled by default and is only present on
/// Unix.
#[cfg(unix)]
pub fn set_preserve_ownership(&mut self, preserve: bool) {
self.inner.preserve_ownership = preserve;
}

/// Indicate whether files and symlinks should be overwritten on extraction.
pub fn set_overwrite(&mut self, overwrite: bool) {
self.inner.overwrite = overwrite;
}

/// Indicate whether access time information is preserved when unpacking
/// this entry.
///
Expand Down Expand Up @@ -151,9 +172,20 @@ impl<'a> Archive<dyn Read + 'a> {
}

fn _unpack(&mut self, dst: &Path) -> io::Result<()> {
let mut deferred_times = vec![];

for entry in self._entries()? {
let mut file = entry.map_err(|e| TarError::new("failed to iterate over archive", e))?;
file.unpack_in(dst)?;
file.unpack_in(&mut deferred_times, dst)?;
}
{
// set times on directories
for (dst, time) in deferred_times {
//eprintln!("actually set mtime {} on dir: {:?}", time, dst);
filetime::set_file_times(&dst, time, time).map_err(|e| {
TarError::new(&format!("failed to set mtime for `{}`", dst.display()), e)
})?;
}
}
Ok(())
}
Expand Down Expand Up @@ -254,7 +286,10 @@ impl<'a> EntriesFields<'a> {
pax_extensions: None,
unpack_xattrs: self.archive.inner.unpack_xattrs,
preserve_permissions: self.archive.inner.preserve_permissions,
#[cfg(unix)]
preserve_ownership: self.archive.inner.preserve_ownership,
preserve_mtime: self.archive.inner.preserve_mtime,
overwrite: self.archive.inner.overwrite,
};

// Store where the next entry is, rounding up by 512 bytes (the size of
Expand Down
182 changes: 161 additions & 21 deletions src/entry.rs
Expand Up @@ -15,6 +15,8 @@ use crate::header::bytes2path;
use crate::other;
use crate::pax::pax_extensions;
use crate::{Archive, Header, PaxExtensions};
#[cfg(unix)]
use std::ffi::CString;

/// A read-only view into an entry of an archive.
///
Expand All @@ -39,7 +41,10 @@ pub struct EntryFields<'a> {
pub data: Vec<EntryIo<'a>>,
pub unpack_xattrs: bool,
pub preserve_permissions: bool,
#[cfg(unix)]
pub preserve_ownership: bool,
pub preserve_mtime: bool,
pub overwrite: bool,
}

pub enum EntryIo<'a> {
Expand Down Expand Up @@ -185,13 +190,19 @@ impl<'a, R: Read> Entry<'a, R> {
///
/// let mut ar = Archive::new(File::open("foo.tar").unwrap());
///
/// let mut deferred_times= vec![];
///
/// for (i, file) in ar.entries().unwrap().enumerate() {
/// let mut file = file.unwrap();
/// file.unpack(format!("file-{}", i)).unwrap();
/// file.unpack(&mut deferred_times, format!("file-{}", i)).unwrap();
/// }
/// ```
pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> {
self.fields.unpack(None, dst.as_ref())
pub fn unpack<P: AsRef<Path>>(
&mut self,
deferred_times: &mut Vec<(PathBuf, FileTime)>,
dst: P,
) -> io::Result<Unpacked> {
self.fields.unpack(deferred_times, None, dst.as_ref())
}

/// Extracts this file under the specified path, avoiding security issues.
Expand All @@ -213,13 +224,19 @@ impl<'a, R: Read> Entry<'a, R> {
///
/// let mut ar = Archive::new(File::open("foo.tar").unwrap());
///
/// let mut deferred_times= vec![];
///
/// for (i, file) in ar.entries().unwrap().enumerate() {
/// let mut file = file.unwrap();
/// file.unpack_in("target").unwrap();
/// file.unpack_in(&mut deferred_times, "target").unwrap();
/// }
/// ```
pub fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> {
self.fields.unpack_in(dst.as_ref())
pub fn unpack_in<P: AsRef<Path>>(
&mut self,
deferred_times: &mut Vec<(PathBuf, FileTime)>,
dst: P,
) -> io::Result<bool> {
self.fields.unpack_in(deferred_times, dst.as_ref())
}

/// Indicate whether extended file attributes (xattrs on Unix) are preserved
Expand All @@ -242,6 +259,16 @@ impl<'a, R: Read> Entry<'a, R> {
self.fields.preserve_permissions = preserve;
}

/// Indicate whether ownership information is preserved
/// when unpacking this entry.
///
/// This flag is disabled by default and is only present on
/// Unix.
#[cfg(unix)]
pub fn set_preserve_ownership(&mut self, preserve: bool) {
self.fields.preserve_ownership = preserve;
}

/// Indicate whether access time information is preserved when unpacking
/// this entry.
///
Expand Down Expand Up @@ -341,7 +368,11 @@ impl<'a> EntryFields<'a> {
Ok(Some(pax_extensions(self.pax_extensions.as_ref().unwrap())))
}

fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> {
fn unpack_in(
&mut self,
deferred_times: &mut Vec<(PathBuf, FileTime)>,
dst: &Path,
) -> io::Result<bool> {
// Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
// * Leading '/'s are trimmed. For example, `///test` is treated as
// `test`.
Expand Down Expand Up @@ -401,7 +432,7 @@ impl<'a> EntryFields<'a> {

let canon_target = self.validate_inside_dst(&dst, parent)?;

self.unpack(Some(&canon_target), &file_dst)
self.unpack(deferred_times, Some(&canon_target), &file_dst)
.map_err(|e| TarError::new(&format!("failed to unpack `{}`", file_dst.display()), e))?;

Ok(true)
Expand All @@ -425,14 +456,33 @@ impl<'a> EntryFields<'a> {
}

/// Returns access to the header of this entry in the archive.
fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> {
fn unpack(
&mut self,
deferred_times: &mut Vec<(PathBuf, FileTime)>,
target_base: Option<&Path>,
dst: &Path,
) -> io::Result<Unpacked> {
let kind = self.header.entry_type();

if kind.is_dir() {
self.unpack_dir(dst)?;
if let Ok(mode) = self.header.mode() {
set_perms(dst, None, mode, self.preserve_permissions)?;
}
#[cfg(unix)]
{
if self.preserve_ownership {
unsafe {
set_owner(&dst, self.header.uid()?, self.header.gid()?)?;
}
}
}
if self.preserve_mtime {
if let Ok(mtime) = self.header.mtime() {
let mtime_set = FileTime::from_unix_time(mtime as i64, 0);
deferred_times.push((dst.to_owned(), mtime_set));
}
}
return Ok(Unpacked::__Nonexhaustive);
} else if kind.is_hard_link() || kind.is_symlink() {
let src = match self.link_name()? {
Expand Down Expand Up @@ -484,17 +534,41 @@ impl<'a> EntryFields<'a> {
)
})?;
} else {
symlink(&src, dst).map_err(|err| {
Error::new(
err.kind(),
format!(
"{} when symlinking {} to {}",
err,
src.display(),
dst.display()
),
)
})?;
symlink(&src, dst)
.or_else(|err_io| {
if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite {
// remove dest and try once more
std::fs::remove_file(dst).and_then(|()| symlink(&src, dst))
} else {
Err(err_io)
}
})
.map_err(|err| {
Error::new(
err.kind(),
format!(
"{} when symlinking {} to {}",
err,
src.display(),
dst.display()
),
)
})
.and_then(|_| {
if self.preserve_mtime {
if let Ok(mtime) = self.header.mtime() {
let mtime_set = FileTime::from_unix_time(mtime as i64, 0);
filetime::set_symlink_file_times(&dst, mtime_set, mtime_set)
.map_err(|e| {
TarError::new(
&format!("failed to set mtime for `{}`", dst.display()),
e,
)
})?;
}
}
Ok(())
})?;
};
return Ok(Unpacked::__Nonexhaustive);

Expand Down Expand Up @@ -529,6 +603,14 @@ impl<'a> EntryFields<'a> {
if let Ok(mode) = self.header.mode() {
set_perms(dst, None, mode, self.preserve_permissions)?;
}
#[cfg(unix)]
{
if self.preserve_ownership {
unsafe {
set_owner(&dst, self.header.uid()?, self.header.gid()?)?;
}
}
}
return Ok(Unpacked::__Nonexhaustive);
}

Expand All @@ -550,12 +632,14 @@ impl<'a> EntryFields<'a> {
let mut f = open(dst).or_else(|err| {
if err.kind() != ErrorKind::AlreadyExists {
Err(err)
} else {
} else if self.overwrite {
match fs::remove_file(dst) {
Ok(()) => open(dst),
Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst),
Err(e) => Err(e),
}
} else {
Err(err)
}
})?;
for io in self.data.drain(..) {
Expand Down Expand Up @@ -596,6 +680,15 @@ impl<'a> EntryFields<'a> {
})?;
}
}
#[cfg(unix)]
{
if self.preserve_ownership {
unsafe {
set_owner(&dst, self.header.uid()?, self.header.gid()?)?;
}
}
}

if let Ok(mode) = self.header.mode() {
set_perms(dst, Some(&mut f), mode, self.preserve_permissions)?;
}
Expand Down Expand Up @@ -779,3 +872,50 @@ impl<'a> Read for EntryIo<'a> {
}
}
}

/// Attempts setting ownership information on provided path.
///
/// # Safety
/// This is hacky and racy since no file descriptors are used. Might need retrofitting the
/// crate to use `nix` low-level API.
#[cfg(unix)]
unsafe fn set_owner<P: AsRef<Path>>(path: P, uid: u64, gid: u64) -> io::Result<()> {
use std::os::unix::prelude::*;

//let fd = f.as_raw_fd();
if uid > libc::uid_t::max_value() as u64 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("uid ({}) of entry would overflow system `uid_t` type", uid),
));
}
if gid > libc::gid_t::max_value() as u64 {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("gid ({}) of entry would overflow system `gid_t` type", gid),
));
}
let cstr_path =
CString::new(Vec::from(path.as_ref().as_os_str().as_bytes())).map_err(|err_nul| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("unable to create CString from path: {}", err_nul),
)
})?;
let res_chown =
/*unsafe*/ { libc::chown(cstr_path.as_ptr(), uid as libc::uid_t, gid as libc::gid_t) };
if res_chown != 0 {
return Err(io::Error::from_raw_os_error(libc_errno()));
}
Ok(())
}

#[cfg(all(unix, target_os = "linux"))]
fn libc_errno() -> libc::c_int {
unsafe { *libc::__errno_location() }
}

#[cfg(all(unix, not(target_os = "linux")))]
fn libc_errno() -> i32 {
unsafe { *libc::__error() }
}