From 12d6fe9a32aab517fcb7a7f3e37c6358b66c809f Mon Sep 17 00:00:00 2001 From: Bojan Date: Mon, 12 Apr 2021 11:08:24 -0300 Subject: [PATCH 1/5] enhance: add supposer for zstd skippable frames --- src/matchers/archive.rs | 28 +++++++++++++++++++++++++++- testdata/sample.skippable.zst | Bin 0 -> 105 bytes tests/archive.rs | 8 +++++++- 3 files changed, 34 insertions(+), 2 deletions(-) create mode 100644 testdata/sample.skippable.zst diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index 3e6c53e..a5c6c6f 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -1,3 +1,5 @@ +use core::convert::TryInto; + /// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool { crate::book::is_epub(buf) @@ -199,9 +201,33 @@ pub fn is_dcm(buf: &[u8]) -> bool { buf.len() > 131 && buf[128] == 0x44 && buf[129] == 0x49 && buf[130] == 0x43 && buf[131] == 0x4D } +const ZSTD_SKIP_START: usize = 0x184D2A50; +const ZSTD_SKIP_MASK: usize = 0xFFFFFFF0; + /// Returns whether a buffer is a Zstd archive. +// Zstandard compressed data is made of one or more frames. +// There are two frame formats defined by Zstandard: Zstandard frames and Skippable frames. +// See more details from https://tools.ietf.org/id/draft-kucherawy-dispatch-zstd-00.html#rfc.section.2 pub fn is_zst(buf: &[u8]) -> bool { - buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD + if buf.len() > 3 && buf[0] == 0x28 && buf[1] == 0xB5 && buf[2] == 0x2F && buf[3] == 0xFD { + return true; + } + + if buf.len() < 8 { + return false; + } + + let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()) as usize; + if magic & ZSTD_SKIP_MASK == ZSTD_SKIP_START { + let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()) as usize; + if buf.len() < 8 + data_len { + return false; + } + let next_frame = &buf[8 + data_len..]; + return is_zst(next_frame); + } + + return false; } /// Returns whether a buffer is a MSI Windows Installer archive. diff --git a/testdata/sample.skippable.zst b/testdata/sample.skippable.zst new file mode 100644 index 0000000000000000000000000000000000000000..c8f7d95a8461c9074a319454da4a2be509c4c120 GIT binary patch literal 105 zcmWI0@|9p=U|@&^VvVi(e_0}0nHWNtOBf7*z|73V+=#)@#K6eNz{JGV%#gvr5C}{d z6bu+ji%SxV6ciYW6Z5i56EhhMxfnPZezAUVW)|UTWoC$Bn8236Hf!yc-|7vlQo_HB E0gbU4cK`qY literal 0 HcmV?d00001 diff --git a/tests/archive.rs b/tests/archive.rs index 37c8e5a..6c34ee5 100644 --- a/tests/archive.rs +++ b/tests/archive.rs @@ -9,5 +9,11 @@ test_format!( ); test_format!(Archive, "application/zstd", "zst", zst, "sample.tar.zst"); - test_format!(Archive, "application/x-cpio", "cpio", cpio, "sample.cpio"); +test_format!( + Archive, + "application/zstd", + "zst", + zst_skip, + "sample.skippable.zst" +); From effadc497646de2f70584488c464e0da5b7df3e7 Mon Sep 17 00:00:00 2001 From: Bojan Date: Mon, 12 Apr 2021 11:38:56 -0300 Subject: [PATCH 2/5] fix: fix style --- src/matchers/archive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index a5c6c6f..bf3e5f8 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -227,7 +227,7 @@ pub fn is_zst(buf: &[u8]) -> bool { return is_zst(next_frame); } - return false; + false } /// Returns whether a buffer is a MSI Windows Installer archive. From f24856cfa3803c65b5e1ae4e0b71d228fc7ab55a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20M=2E=20Bezerra?= Date: Sun, 11 Jun 2023 17:37:17 -0300 Subject: [PATCH 3/5] remove nesting --- src/matchers/archive.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index bf3e5f8..190922c 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -218,16 +218,17 @@ pub fn is_zst(buf: &[u8]) -> bool { } let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()) as usize; - if magic & ZSTD_SKIP_MASK == ZSTD_SKIP_START { - let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()) as usize; - if buf.len() < 8 + data_len { - return false; - } - let next_frame = &buf[8 + data_len..]; - return is_zst(next_frame); + if magic & ZSTD_SKIP_MASK != ZSTD_SKIP_START { + return false; + } + + let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()) as usize; + if buf.len() < 8 + data_len { + return false; } - false + let next_frame = &buf[8 + data_len..]; + is_zst(next_frame) } /// Returns whether a buffer is a MSI Windows Installer archive. From 56bcbeee799a00bddb5b2c1186ee2aa61855fef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20M=2E=20Bezerra?= Date: Sun, 11 Jun 2023 17:37:38 -0300 Subject: [PATCH 4/5] check if u32 -> usize respect usize boundaries which is necessary for u16 systems --- src/matchers/archive.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index 190922c..9155347 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -1,4 +1,4 @@ -use core::convert::TryInto; +use std::convert::{TryFrom, TryInto}; /// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool { @@ -217,12 +217,20 @@ pub fn is_zst(buf: &[u8]) -> bool { return false; } - let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()) as usize; + let magic = u32::from_le_bytes(buf[0..4].try_into().unwrap()); + let Ok(magic) = usize::try_from(magic) else { + return false; + }; + if magic & ZSTD_SKIP_MASK != ZSTD_SKIP_START { return false; } - let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()) as usize; + let data_len = u32::from_le_bytes(buf[4..8].try_into().unwrap()); + let Ok(data_len) = usize::try_from(data_len) else { + return false; + }; + if buf.len() < 8 + data_len { return false; } From 8c8c835c48579fa2e9f5ce158edb5f3148342c27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Marcos?= Date: Thu, 22 Jun 2023 01:29:59 -0300 Subject: [PATCH 5/5] Replace `std` by `core` Co-authored-by: Bojan --- src/matchers/archive.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/matchers/archive.rs b/src/matchers/archive.rs index 9155347..ff03ca3 100644 --- a/src/matchers/archive.rs +++ b/src/matchers/archive.rs @@ -1,4 +1,4 @@ -use std::convert::{TryFrom, TryInto}; +use core::convert::{TryFrom, TryInto}; /// Returns whether a buffer is an ePub. pub fn is_epub(buf: &[u8]) -> bool {