Skip to content

Commit

Permalink
Add support for PAX Format, Version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ncihnegn committed Aug 22, 2022
1 parent f4f439c commit 2f5ea4f
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 25 deletions.
91 changes: 67 additions & 24 deletions src/archive.rs
Expand Up @@ -9,9 +9,10 @@ use std::path::Path;

use crate::entry::{EntryFields, EntryIo};
use crate::error::TarError;
use crate::header::{SparseEntry, BLOCK_SIZE};
use crate::other;
use crate::pax::pax_extensions_size;
use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header};
use crate::{Entry, GnuExtSparseHeader, Header};

/// A top-level representation of an archive file.
///
Expand Down Expand Up @@ -256,6 +257,7 @@ impl<'a, R: Read> Iterator for Entries<'a, R> {
}
}

#[allow(unused_assignments)] // https://github.com/rust-lang/rust/issues/22630
impl<'a> EntriesFields<'a> {
fn next_entry_raw(
&mut self,
Expand All @@ -277,14 +279,14 @@ impl<'a> EntriesFields<'a> {
// Otherwise, check if we are ignoring zeros and continue, or break as if this is the
// end of the archive.
if !header.as_bytes().iter().all(|i| *i == 0) {
self.next += 512;
self.next += BLOCK_SIZE as u64;
break;
}

if !self.archive.inner.ignore_zeros {
return Ok(None);
}
self.next += 512;
self.next += BLOCK_SIZE as u64;
header_pos = self.next;
}

Expand Down Expand Up @@ -325,11 +327,11 @@ impl<'a> EntriesFields<'a> {
// Store where the next entry is, rounding up by 512 bytes (the size of
// a header);
let size = size
.checked_add(511)
.checked_add(BLOCK_SIZE as u64 - 1)
.ok_or_else(|| other("size overflow"))?;
self.next = self
.next
.checked_add(size & !(512 - 1))
.checked_add(size & !(BLOCK_SIZE as u64 - 1))
.ok_or_else(|| other("size overflow"))?;

Ok(Some(ret.into_entry()))
Expand Down Expand Up @@ -394,26 +396,65 @@ impl<'a> EntriesFields<'a> {
if let Some(pax_extensions_ref) = &pax_extensions {
pax_size = pax_extensions_size(pax_extensions_ref);
}
// Not an entry
// Keep pax_extensions for the next ustar header
processed -= 1;
continue;
}

let mut fields = EntryFields::from(entry);
fields.pax_extensions = pax_extensions;
pax_extensions = None; // Reset pax_extensions after use
if is_recognized_header && fields.is_pax_sparse() {
gnu_longname = fields.pax_sparse_name();
}
fields.long_pathname = gnu_longname;
fields.long_linkname = gnu_longlink;
fields.pax_extensions = pax_extensions;
self.parse_sparse_header(&mut fields)?;
return Ok(Some(fields.into_entry()));
}
}

fn parse_sparse_header(&mut self, entry: &mut EntryFields<'a>) -> io::Result<()> {
if !entry.header.entry_type().is_gnu_sparse() {
if !entry.is_pax_sparse() && !entry.header.entry_type().is_gnu_sparse() {
return Ok(());
}
let gnu = match entry.header.as_gnu() {
Some(gnu) => gnu,
None => return Err(other("sparse entry type listed but not GNU header")),
};
let mut sparse_map = Vec::<SparseEntry>::new();
let mut real_size = 0;
if entry.is_pax_sparse() {
real_size = entry.pax_sparse_realsize()?;
let mut num_bytes_read = 0;
let mut reader = io::BufReader::with_capacity(BLOCK_SIZE, &self.archive.inner);
let mut read_decimal_line = || -> io::Result<u64> {
let mut str = String::new();
num_bytes_read += reader.read_line(&mut str)?;
str.strip_suffix("\n")
.and_then(|s| s.parse::<u64>().ok())
.ok_or_else(|| other("failed to read a decimal line"))
};

let num_entries = read_decimal_line()?;
for _ in 0..num_entries {
let offset = read_decimal_line()?;
let size = read_decimal_line()?;
sparse_map.push(SparseEntry { offset, size });
}
let rem = BLOCK_SIZE - (num_bytes_read % BLOCK_SIZE);
entry.size -= (num_bytes_read + rem) as u64;
} else if entry.header.entry_type().is_gnu_sparse() {
let gnu = match entry.header.as_gnu() {
Some(gnu) => gnu,
None => return Err(other("sparse entry type listed but not GNU header")),
};
real_size = gnu.real_size()?;
for block in gnu.sparse.iter() {
if !block.is_empty() {
let offset = block.offset()?;
let size = block.length()?;
sparse_map.push(SparseEntry { offset, size });
}
}
}

// Sparse files are represented internally as a list of blocks that are
// read. Blocks are either a bunch of 0's or they're data from the
Expand Down Expand Up @@ -442,13 +483,10 @@ impl<'a> EntriesFields<'a> {
let data = &mut entry.data;
let reader = &self.archive.inner;
let size = entry.size;
let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> {
if block.is_empty() {
return Ok(());
}
let off = block.offset()?;
let len = block.length()?;
if len != 0 && (size - remaining) % 512 != 0 {
let mut add_block = |block: &SparseEntry| -> io::Result<_> {
let off = block.offset;
let len = block.size;
if len != 0 && (size - remaining) % BLOCK_SIZE as u64 != 0 {
return Err(other(
"previous block in sparse file was not \
aligned to 512-byte boundary",
Expand All @@ -474,25 +512,30 @@ impl<'a> EntriesFields<'a> {
data.push(EntryIo::Data(reader.take(len)));
Ok(())
};
for block in gnu.sparse.iter() {
add_block(block)?
for block in sparse_map {
add_block(&block)?
}
if gnu.is_extended() {
if entry.header.as_gnu().map(|gnu| gnu.is_extended()) == Some(true) {
let mut ext = GnuExtSparseHeader::new();
ext.isextended[0] = 1;
while ext.is_extended() {
if !try_read_all(&mut &self.archive.inner, ext.as_mut_bytes())? {
return Err(other("failed to read extension"));
}

self.next += 512;
self.next += BLOCK_SIZE as u64;
for block in ext.sparse.iter() {
add_block(block)?;
if !block.is_empty() {
add_block(&SparseEntry {
offset: block.offset()?,
size: block.length()?,
})?;
}
}
}
}
}
if cur != gnu.real_size()? {
if cur != real_size {
return Err(other(
"mismatch in sparse file chunks and \
size in header",
Expand Down
40 changes: 40 additions & 0 deletions src/entry.rs
Expand Up @@ -285,6 +285,46 @@ impl<'a> EntryFields<'a> {
self.read_to_end(&mut v).map(|_| v)
}

pub fn is_pax_sparse(&mut self) -> bool {
if let Some(ref pax) = self.pax_extensions {
let mut extensions = PaxExtensions::new(pax).filter_map(|f| f.ok());
return extensions
.find(|f| f.key_bytes() == b"GNU.sparse.major" && f.value_bytes() == b"1")
.is_some()
&& extensions
.find(|f| f.key_bytes() == b"GNU.sparse.minor" && f.value_bytes() == b"0")
.is_some();
}
false
}

pub fn pax_sparse_name(&mut self) -> Option<Vec<u8>> {
if let Some(ref pax) = self.pax_extensions {
return PaxExtensions::new(pax)
.filter_map(|f| f.ok())
.find(|f| f.key_bytes() == b"GNU.sparse.name")
.map(|f| f.value_bytes().to_vec());
}
None
}

pub fn pax_sparse_realsize(&mut self) -> io::Result<u64> {
if let Some(ref pax) = self.pax_extensions {
let pax = PaxExtensions::new(pax)
.filter_map(|f| f.ok())
.find(|f| f.key_bytes() == b"GNU.sparse.realsize")
.map(|f| f.value_bytes());
if let Some(field) = pax {
let str =
std::str::from_utf8(&field).map_err(|_| other("failed to read string"))?;
return str
.parse::<u64>()
.map_err(|_| other("failed to parse the real size"));
}
}
Err(other("PAX extension GNU.sparse.realsize not found"))
}

fn path(&self) -> io::Result<Cow<Path>> {
bytes2path(self.path_bytes())
}
Expand Down
10 changes: 9 additions & 1 deletion src/header.rs
Expand Up @@ -16,11 +16,13 @@ use std::str;
use crate::other;
use crate::EntryType;

pub const BLOCK_SIZE: usize = 512;

/// Representation of the header of an entry in an archive
#[repr(C)]
#[allow(missing_docs)]
pub struct Header {
bytes: [u8; 512],
bytes: [u8; BLOCK_SIZE],
}

/// Declares the information that should be included when filling a Header
Expand Down Expand Up @@ -110,6 +112,12 @@ pub struct GnuHeader {
pub pad: [u8; 17],
}

/// Description of a spare entry.
pub struct SparseEntry {
pub offset: u64,
pub size: u64,
}

/// Description of the header of a spare entry.
///
/// Specifies the offset/number of bytes of a chunk of data in octal.
Expand Down
13 changes: 13 additions & 0 deletions tests/all.rs
Expand Up @@ -1089,6 +1089,19 @@ fn sparse_with_trailing() {
assert_eq!(&s[0x100_000..], "1MB through\n");
}

#[test]
fn pax_sparse() {
let rdr = Cursor::new(tar!("pax_sparse.tar"));
let mut ar = Archive::new(rdr);
let td = t!(TempBuilder::new().prefix("tar-rs").tempdir());
t!(ar.unpack(td.path()));

let mut s = String::new();
t!(t!(File::open(td.path().join("sparse_begin.txt"))).read_to_string(&mut s));
assert_eq!(&s[..5], "test\n");
assert!(s[5..].chars().all(|x| x == '\u{0}'));
}

#[test]
fn path_separators() {
let mut ar = Builder::new(Vec::new());
Expand Down
Binary file added tests/archives/pax_sparse.tar
Binary file not shown.

0 comments on commit 2f5ea4f

Please sign in to comment.