Skip to content

Commit

Permalink
Merge pull request #143 from ANSSI-FR/extract-keep-fd
Browse files Browse the repository at this point in the history
Avoid repetitive open/write/close on mlar extract
  • Loading branch information
commial committed Sep 29, 2023
2 parents 84f3979 + b2b0158 commit e469af9
Show file tree
Hide file tree
Showing 3 changed files with 162 additions and 9 deletions.
1 change: 1 addition & 0 deletions mlar/Cargo.toml
Expand Up @@ -27,6 +27,7 @@ zeroize = { version = "1", default-features = false}
# Could be made optional / feature to enable (for binary size)
tar = "0.4"
rand_chacha = "0.3"
lru = "0"

[dev-dependencies]
assert_cmd = "2.0"
Expand Down
43 changes: 34 additions & 9 deletions mlar/src/main.rs
Expand Up @@ -5,6 +5,7 @@ use curve25519_parser::{
use glob::Pattern;
use hkdf::Hkdf;
use humansize::{FormatSize, DECIMAL};
use lru::LruCache;
use mla::config::{ArchiveReaderConfig, ArchiveWriterConfig};
use mla::errors::{Error, FailSafeReadError};
use mla::helpers::linear_extract;
Expand All @@ -25,7 +26,9 @@ use std::fmt;
use std::fs::{self, read_dir, File};
use std::io::{self, BufRead};
use std::io::{Read, Seek, Write};
use std::num::NonZeroUsize;
use std::path::{Component, Path, PathBuf};
use std::sync::Mutex;
use tar::{Builder, Header};
use zeroize::Zeroize;

Expand Down Expand Up @@ -443,19 +446,32 @@ fn create_file<P1: AsRef<Path>>(
///
/// This wrapper is used to avoid opening all files simultaneously, potentially
/// reaching the filesystem limit, but rather appending to file on-demand
/// This could be enhanced with a limited pool of active file, but this
/// optimisation doesn't seems necessary for now
struct FileWriter {
///
/// A limited pool of active file, in a LRU cache, is used to avoid too many open-close
struct FileWriter<'a> {
/// Target file for data appending
path: PathBuf,
/// Reference on the cache
// A `Mutex` is used instead of a `RefCell` as `FileWriter` can be `Send`
cache: &'a Mutex<LruCache<PathBuf, File>>,
}

impl Write for FileWriter {
/// Max number of fd simultaneously opened
pub const FILE_WRITER_POOL_SIZE: usize = 1000;

impl<'a> Write for FileWriter<'a> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
fs::OpenOptions::new()
.append(true)
.open(&self.path)?
.write(buf)
// Only one thread is using the FileWriter, safe to `.unwrap()`
let mut cache = self.cache.lock().unwrap();
if !cache.contains(&self.path) {
let file = fs::OpenOptions::new().append(true).open(&self.path)?;
cache.put(self.path.clone(), file);
}
// Safe to `unwrap` here cause we ensure the element is in the cache (mono-threaded)
let file = cache.get_mut(&self.path).unwrap();
file.write(buf)

// `file` will be closed on deletion from the cache
}

fn flush(&mut self) -> io::Result<()> {
Expand Down Expand Up @@ -575,11 +591,20 @@ fn extract(matches: &ArgMatches) -> Result<(), MlarError> {
if verbose {
println!("Extracting the whole archive using a linear extraction");
}
let cache = Mutex::new(LruCache::new(
NonZeroUsize::new(FILE_WRITER_POOL_SIZE).unwrap(),
));
let mut export: HashMap<&String, FileWriter> = HashMap::new();
for fname in &iter {
match create_file(&output_dir, fname)? {
Some((_file, path)) => {
export.insert(fname, FileWriter { path });
export.insert(
fname,
FileWriter {
path,
cache: &cache,
},
);
}
None => continue,
}
Expand Down
127 changes: 127 additions & 0 deletions mlar/tests/integration.rs
Expand Up @@ -1248,3 +1248,130 @@ fn test_no_open_on_encrypt() {
let assert = cmd.assert();
assert.failure();
}

// This value should be bigger than FILE_WRITER_POOL_SIZE
const TEST_MANY_FILES_NB: usize = 2000;

#[test]
fn test_extract_lot_files() {
let mlar_file = NamedTempFile::new("output.mla").unwrap();
let mut rng: StdRng = SeedableRng::from_seed([0u8; 32]);
let mut files_archive_order = vec![];
let mut files = vec![];
const SIZE_FILE: usize = 10;

// Create many files, filled with a few alphanumeric characters
for i in 1..TEST_MANY_FILES_NB {
let tmp_file = NamedTempFile::new(format!("file{}.bin", i)).unwrap();
let data: Vec<u8> = Alphanumeric.sample_iter(&mut rng).take(SIZE_FILE).collect();
tmp_file.write_binary(data.as_slice()).unwrap();

files_archive_order.push(tmp_file.path().to_path_buf());
files.push(tmp_file);
}

files.sort_by(|i1, i2| Ord::cmp(&i1.path(), &i2.path()));

let mut testfs = TestFS {
files,
files_archive_order,
};

// `mlar create -l -o output.mla -
let mut cmd = Command::cargo_bin(UTIL).unwrap();
cmd.arg("create")
.arg("-l")
.arg("-o")
.arg(mlar_file.path())
.arg("-");

// Use "-" to avoid large command line (Windows limitation is about 8191 char)
let mut file_list = String::new();
for file in &testfs.files {
file_list.push_str(format!("{}\n", file.path().to_string_lossy()).as_str());
}
cmd.write_stdin(String::from(&file_list));

println!("{:?}", cmd);
let assert = cmd.assert();
assert.success().stderr(String::from(&file_list));

let mut file_list = String::new();
for file in &testfs.files {
file_list.push_str(format!("{}\n", file.path().to_string_lossy()).as_str());
}

// Test global (with all files)

// `mlar extract -v -i output.mla -o ouput_dir -g '*'`
let output_dir = TempDir::new().unwrap();
let mut cmd = Command::cargo_bin(UTIL).unwrap();
cmd.arg("extract")
.arg("-v")
.arg("-i")
.arg(mlar_file.path())
.arg("-o")
.arg(output_dir.path())
.arg("-g")
.arg("*");

println!("{:?}", cmd);
let assert = cmd.assert();
assert.success().stdout(file_list);

ensure_directory_content(output_dir.path(), &testfs.files);

// Test linear extraction of all files

// `mlar extract -v -i output.mla -o ouput_dir`
let output_dir = TempDir::new().unwrap();
let mut cmd = Command::cargo_bin(UTIL).unwrap();
cmd.arg("extract")
.arg("-v")
.arg("-i")
.arg(mlar_file.path())
.arg("-o")
.arg(output_dir.path());

println!("{:?}", cmd);
let assert = cmd.assert();
assert
.success()
.stdout("Extracting the whole archive using a linear extraction\n");

ensure_directory_content(output_dir.path(), &testfs.files);

// Test extraction of one file explicitly
// `mlar extract -v -i output.mla -o ouput_dir file1`
let one_filename = &testfs.files_archive_order[0];
let mut one_file = Vec::new();
loop {
match testfs.files.pop() {
None => {
break;
}
Some(ntf) => {
if ntf.path() == one_filename {
one_file.push(ntf);
}
}
}
}
let output_dir = TempDir::new().unwrap();
let mut cmd = Command::cargo_bin(UTIL).unwrap();
cmd.arg("extract")
.arg("-v")
.arg("-i")
.arg(mlar_file.path())
.arg("-o")
.arg(output_dir.path())
.arg(one_filename);

println!("{:?}", cmd);
let assert = cmd.assert();
assert
.success()
.stdout(format!("{}\n", one_filename.to_string_lossy()));

ensure_directory_content(output_dir.path(), &one_file);
}

0 comments on commit e469af9

Please sign in to comment.