From fb5105725ff2a6344cdff7eba62bbd8e78576842 Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Sat, 12 Sep 2020 10:32:10 +0100 Subject: [PATCH 1/6] refactor: reintroduce path sanitization strategy I've documented the drawbacks of this strategy to make sure users are aware of the tradeoff being made. --- src/read.rs | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/read.rs b/src/read.rs index d19b353f7..cf16c6265 100644 --- a/src/read.rs +++ b/src/read.rs @@ -9,6 +9,7 @@ use crate::zipcrypto::ZipCryptoReaderValid; use std::borrow::Cow; use std::collections::HashMap; use std::io::{self, prelude::*}; +use std::path::Component; use crate::cp437::FromCp437; use crate::types::{DateTime, System, ZipFileData}; @@ -564,11 +565,21 @@ impl<'a> ZipFile<'a> { } /// Get the name of the file + /// + /// # Warnings + /// + /// It is dangerous to use this name directly when extracting an archive. + /// It may contain an absolute path (`/etc/shadow`), or break out of the + /// current directory (`../runtime`). Carelessly writing to these paths + /// allows an attacker to craft a ZIP archive that will overwrite critical + /// files. pub fn name(&self) -> &str { &self.data.file_name } /// Get the name of the file, in the raw (internal) byte representation. + /// + /// The encoding of this data is currently undefined. pub fn name_raw(&self) -> &[u8] { &self.data.file_name_raw } @@ -578,9 +589,24 @@ impl<'a> ZipFile<'a> { #[deprecated( since = "0.5.7", note = "by stripping `..`s from the path, the meaning of paths can change. - You must use a sanitization strategy that's appropriate for your input" + `mangled_name` can be used if this behaviour is desirable" )] pub fn sanitized_name(&self) -> ::std::path::PathBuf { + self.mangled_name() + } + + /// Rewrite the path, ignoring any path components with special meaning. + /// + /// - Absolute paths are made relative + /// - [`ParentDir`]s are ignored + /// - Truncates the filename at a NULL byte + /// + /// This is appropriate if you need to be able to extract *something* from + /// any archive, but will easily misrepresent trivial paths like + /// `foo/../bar` as `foo/bar` (instead of `bar`). + /// + /// [`ParentDir`]: `Component::ParentDir` + pub fn mangled_name(&self) -> ::std::path::PathBuf { self.data.file_name_sanitized() } From 103003388c1cbbbcc9bca0eb6f5b76b642171fb0 Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Sat, 12 Sep 2020 10:36:41 +0100 Subject: [PATCH 2/6] feat: implement a defensive sanitisation strategy --- src/read.rs | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index cf16c6265..8d04a7a1a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -9,7 +9,7 @@ use crate::zipcrypto::ZipCryptoReaderValid; use std::borrow::Cow; use std::collections::HashMap; use std::io::{self, prelude::*}; -use std::path::Component; +use std::path::{Component, Path}; use crate::cp437::FromCp437; use crate::types::{DateTime, System, ZipFileData}; @@ -573,6 +573,9 @@ impl<'a> ZipFile<'a> { /// current directory (`../runtime`). Carelessly writing to these paths /// allows an attacker to craft a ZIP archive that will overwrite critical /// files. + /// + /// You can use the [`ZipFile::name_as_child`] method to validate the name + /// as a safe path. pub fn name(&self) -> &str { &self.data.file_name } @@ -603,13 +606,41 @@ impl<'a> ZipFile<'a> { /// /// This is appropriate if you need to be able to extract *something* from /// any archive, but will easily misrepresent trivial paths like - /// `foo/../bar` as `foo/bar` (instead of `bar`). + /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, + /// [`ZipFile::name_as_child`] is the better option in most scenarios. /// /// [`ParentDir`]: `Component::ParentDir` pub fn mangled_name(&self) -> ::std::path::PathBuf { self.data.file_name_sanitized() } + /// Ensure the file path is safe to use as a [`Path`]. + /// + /// - It can't contain NULL bytes + /// - It can't resolve to a path outside the current directory + /// > `foo/../bar` is fine, `foo/../../bar` is not. + /// - It can't be an absolute path + /// + /// This will read well-formed ZIP files correctly, and is resistant + /// to path-based exploits. It is recommended over + /// [`ZipFile::mangled_name`]. + pub fn name_as_child(&self) -> Option<&Path> { + if self.data.file_name.contains('\0') { + return None; + } + let path = Path::new(&self.data.file_name); + let mut depth = 0usize; + for component in path.components() { + match component { + Component::Prefix(_) | Component::RootDir => return None, + Component::ParentDir => depth = depth.checked_sub(1)?, + Component::Normal(_) => depth += 1, + Component::CurDir => (), + } + } + Some(path) + } + /// Get the comment of the file pub fn comment(&self) -> &str { &self.data.file_comment From a35c8ffa91fdd212d7ce1325f504514fb2b039bd Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Sat, 12 Sep 2020 10:37:33 +0100 Subject: [PATCH 3/6] chore: update tests to use preferred method --- examples/extract.rs | 14 ++++++-------- examples/file_info.rs | 13 +++++++++---- tests/zip64_large.rs | 5 ++--- tests/zip_crypto.rs | 3 +-- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/examples/extract.rs b/examples/extract.rs index fcd23e3a7..5aa675708 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -18,8 +18,10 @@ fn real_main() -> i32 { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); - #[allow(deprecated)] - let outpath = file.sanitized_name(); + let outpath = match file.name_as_child() { + Some(path) => path, + None => continue, + }; { let comment = file.comment(); @@ -29,17 +31,13 @@ fn real_main() -> i32 { } if (&*file.name()).ends_with('/') { - println!( - "File {} extracted to \"{}\"", - i, - outpath.as_path().display() - ); + println!("File {} extracted to \"{}\"", i, outpath.display()); fs::create_dir_all(&outpath).unwrap(); } else { println!( "File {} extracted to \"{}\" ({} bytes)", i, - outpath.as_path().display(), + outpath.display(), file.size() ); if let Some(p) = outpath.parent() { diff --git a/examples/file_info.rs b/examples/file_info.rs index bed1b8187..dd72e7767 100644 --- a/examples/file_info.rs +++ b/examples/file_info.rs @@ -19,8 +19,13 @@ fn real_main() -> i32 { for i in 0..archive.len() { let file = archive.by_index(i).unwrap(); - #[allow(deprecated)] - let outpath = file.sanitized_name(); + let outpath = match file.name_as_child() { + Some(path) => path, + None => { + println!("Entry {} has a suspicious path", file.name()); + continue; + } + }; { let comment = file.comment(); @@ -33,13 +38,13 @@ fn real_main() -> i32 { println!( "Entry {} is a directory with name \"{}\"", i, - outpath.as_path().display() + outpath.display() ); } else { println!( "Entry {} is a file with name \"{}\" ({} bytes)", i, - outpath.as_path().display(), + outpath.display(), file.size() ); } diff --git a/tests/zip64_large.rs b/tests/zip64_large.rs index c4b5a6b43..ae790a541 100644 --- a/tests/zip64_large.rs +++ b/tests/zip64_large.rs @@ -195,12 +195,11 @@ fn zip64_large() { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); - #[allow(deprecated)] - let outpath = file.sanitized_name(); + let outpath = file.name_as_child().unwrap(); println!( "Entry {} has name \"{}\" ({} bytes)", i, - outpath.as_path().display(), + outpath.display(), file.size() ); diff --git a/tests/zip_crypto.rs b/tests/zip_crypto.rs index aabe40d61..266264951 100644 --- a/tests/zip_crypto.rs +++ b/tests/zip_crypto.rs @@ -75,8 +75,7 @@ fn encrypted_file() { .by_index_decrypt(0, "test".as_bytes()) .unwrap() .unwrap(); - #[allow(deprecated)] - let file_name = file.sanitized_name(); + let file_name = file.name_as_child().unwrap(); assert_eq!(file_name, std::path::PathBuf::from("test.txt")); let mut data = Vec::new(); From d0e905acc513395ae398654e1ebecc428ede3e76 Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Sat, 12 Sep 2020 10:38:47 +0100 Subject: [PATCH 4/6] feat: provide archive extraction API --- src/read.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index 8d04a7a1a..b25862b55 100644 --- a/src/read.rs +++ b/src/read.rs @@ -311,6 +311,49 @@ impl ZipArchive { comment: footer.zip_file_comment, }) } + /// Extract a Zip archive into a directory. + /// + /// Malformed and malicious paths are rejected so that they cannot escape + /// the given directory. + /// + /// This bails on the first error and does not attempt cleanup. + /// + /// # Platform-specific behaviour + /// + /// On unix systems permissions from the zip file are preserved, if they exist. + pub fn extract>(&mut self, directory: P) -> ZipResult<()> { + use std::fs; + + for i in 0..self.len() { + let mut file = self.by_index(i)?; + let filepath = file + .name_as_child() + .ok_or(ZipError::InvalidArchive("Invalid file path"))?; + + let outpath = directory.as_ref().join(filepath); + + if (file.name()).ends_with('/') { + fs::create_dir_all(&outpath)?; + } else { + if let Some(p) = outpath.parent() { + if !p.exists() { + fs::create_dir_all(&p)?; + } + } + let mut outfile = fs::File::create(&outpath)?; + io::copy(&mut file, &mut outfile)?; + } + // Get and Set permissions + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Some(mode) = file.unix_mode() { + fs::set_permissions(&outpath, fs::Permissions::from_mode(mode))?; + } + } + } + Ok(()) + } /// Number of files contained in this zip. pub fn len(&self) -> usize { @@ -967,8 +1010,7 @@ mod test { for i in 0..zip.len() { let zip_file = zip.by_index(i).unwrap(); - #[allow(deprecated)] - let full_name = zip_file.sanitized_name(); + let full_name = zip_file.name_as_child().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( (file_name.starts_with("dir") && zip_file.is_dir()) From 33a787ec546d30518c6b54c347dee587dbee58b2 Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Sat, 12 Sep 2020 11:10:19 +0100 Subject: [PATCH 5/6] fix: overlapping borrows on unix platforms When cfg(unix), the `outpatj` meeded to last until the `set_permissions` call, but it can't exist during the `io::copy` --- examples/extract.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/extract.rs b/examples/extract.rs index 5aa675708..8eeee9f69 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -19,7 +19,7 @@ fn real_main() -> i32 { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); let outpath = match file.name_as_child() { - Some(path) => path, + Some(path) => path.to_owned(), None => continue, }; From 105368aebfcd5e58d31afc5786a3b4bfeed8ad33 Mon Sep 17 00:00:00 2001 From: Marli Frost Date: Tue, 10 Nov 2020 16:37:14 +0000 Subject: [PATCH 6/6] docs: improve explanation of new APIs --- examples/extract.rs | 2 +- examples/file_info.rs | 2 +- src/read.rs | 25 ++++++++++--------------- tests/zip64_large.rs | 2 +- tests/zip_crypto.rs | 2 +- 5 files changed, 14 insertions(+), 19 deletions(-) diff --git a/examples/extract.rs b/examples/extract.rs index 8eeee9f69..05c5a4aad 100644 --- a/examples/extract.rs +++ b/examples/extract.rs @@ -18,7 +18,7 @@ fn real_main() -> i32 { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); - let outpath = match file.name_as_child() { + let outpath = match file.enclosed_name() { Some(path) => path.to_owned(), None => continue, }; diff --git a/examples/file_info.rs b/examples/file_info.rs index dd72e7767..315b5c386 100644 --- a/examples/file_info.rs +++ b/examples/file_info.rs @@ -19,7 +19,7 @@ fn real_main() -> i32 { for i in 0..archive.len() { let file = archive.by_index(i).unwrap(); - let outpath = match file.name_as_child() { + let outpath = match file.enclosed_name() { Some(path) => path, None => { println!("Entry {} has a suspicious path", file.name()); diff --git a/src/read.rs b/src/read.rs index b25862b55..3d7206a38 100644 --- a/src/read.rs +++ b/src/read.rs @@ -311,28 +311,23 @@ impl ZipArchive { comment: footer.zip_file_comment, }) } - /// Extract a Zip archive into a directory. + /// Extract a Zip archive into a directory, overwriting files if they + /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. /// - /// Malformed and malicious paths are rejected so that they cannot escape - /// the given directory. - /// - /// This bails on the first error and does not attempt cleanup. - /// - /// # Platform-specific behaviour - /// - /// On unix systems permissions from the zip file are preserved, if they exist. + /// Extraction is not atomic; If an error is encountered, some of the files + /// may be left on disk. pub fn extract>(&mut self, directory: P) -> ZipResult<()> { use std::fs; for i in 0..self.len() { let mut file = self.by_index(i)?; let filepath = file - .name_as_child() + .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; let outpath = directory.as_ref().join(filepath); - if (file.name()).ends_with('/') { + if file.name().ends_with('/') { fs::create_dir_all(&outpath)?; } else { if let Some(p) = outpath.parent() { @@ -617,7 +612,7 @@ impl<'a> ZipFile<'a> { /// allows an attacker to craft a ZIP archive that will overwrite critical /// files. /// - /// You can use the [`ZipFile::name_as_child`] method to validate the name + /// You can use the [`ZipFile::enclosed_name`] method to validate the name /// as a safe path. pub fn name(&self) -> &str { &self.data.file_name @@ -650,7 +645,7 @@ impl<'a> ZipFile<'a> { /// This is appropriate if you need to be able to extract *something* from /// any archive, but will easily misrepresent trivial paths like /// `foo/../bar` as `foo/bar` (instead of `bar`). Because of this, - /// [`ZipFile::name_as_child`] is the better option in most scenarios. + /// [`ZipFile::enclosed_name`] is the better option in most scenarios. /// /// [`ParentDir`]: `Component::ParentDir` pub fn mangled_name(&self) -> ::std::path::PathBuf { @@ -667,7 +662,7 @@ impl<'a> ZipFile<'a> { /// This will read well-formed ZIP files correctly, and is resistant /// to path-based exploits. It is recommended over /// [`ZipFile::mangled_name`]. - pub fn name_as_child(&self) -> Option<&Path> { + pub fn enclosed_name(&self) -> Option<&Path> { if self.data.file_name.contains('\0') { return None; } @@ -1010,7 +1005,7 @@ mod test { for i in 0..zip.len() { let zip_file = zip.by_index(i).unwrap(); - let full_name = zip_file.name_as_child().unwrap(); + let full_name = zip_file.enclosed_name().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( (file_name.starts_with("dir") && zip_file.is_dir()) diff --git a/tests/zip64_large.rs b/tests/zip64_large.rs index ae790a541..3d10a3181 100644 --- a/tests/zip64_large.rs +++ b/tests/zip64_large.rs @@ -195,7 +195,7 @@ fn zip64_large() { for i in 0..archive.len() { let mut file = archive.by_index(i).unwrap(); - let outpath = file.name_as_child().unwrap(); + let outpath = file.enclosed_name().unwrap(); println!( "Entry {} has name \"{}\" ({} bytes)", i, diff --git a/tests/zip_crypto.rs b/tests/zip_crypto.rs index 266264951..cae6b1f32 100644 --- a/tests/zip_crypto.rs +++ b/tests/zip_crypto.rs @@ -75,7 +75,7 @@ fn encrypted_file() { .by_index_decrypt(0, "test".as_bytes()) .unwrap() .unwrap(); - let file_name = file.name_as_child().unwrap(); + let file_name = file.enclosed_name().unwrap(); assert_eq!(file_name, std::path::PathBuf::from("test.txt")); let mut data = Vec::new();