From b3714de1d613e9a8eb3cf92053c99b60f4566c7b Mon Sep 17 00:00:00 2001 From: Kento Oki Date: Sat, 13 Apr 2024 09:18:03 +0900 Subject: [PATCH] PE: parse rich header --- src/pe/header.rs | 313 +++++++++++++++++++++++++++++++++++++++++++---- src/pe/mod.rs | 4 +- 2 files changed, 289 insertions(+), 28 deletions(-) diff --git a/src/pe/header.rs b/src/pe/header.rs index 62bfe190..38bcf9a5 100644 --- a/src/pe/header.rs +++ b/src/pe/header.rs @@ -1,8 +1,7 @@ use crate::error; -use crate::pe::{data_directories, optional_header, section_table, symbol}; +use crate::pe::{data_directories, debug, optional_header, section_table, symbol}; use crate::strtab; use alloc::vec::Vec; -use log::debug; use scroll::{ctx, IOread, IOwrite, Pread, Pwrite, SizeWith}; /// In `winnt.h` and `pe.h`, it's `IMAGE_DOS_HEADER`. It's a DOS header present in all PE binaries. @@ -381,10 +380,26 @@ impl DosHeader { pe_pointer, }) } + + /// Parse the DOS stub. + /// + /// The DOS stub is a small program that prints the message "This program cannot be run in DOS mode" and exits; and + /// is not really read for the PECOFF file format. It's a relic from the MS-DOS era. + pub fn parse_dos_stub<'a>(bytes: &'a [u8], pe_pointer: u32) -> error::Result<&'a [u8]> { + let end_offset = pe_pointer as usize; + if bytes.len() < end_offset as usize { + return Err(error::Error::Malformed(format!( + "DOS stub is too short ({} bytes) to contain the PE header pointer ({:#x})", + bytes.len(), + end_offset + ))); + } + Ok(&bytes[0..end_offset]) + } } #[repr(C)] -#[derive(Debug, PartialEq, Copy, Clone, Pread, Pwrite)] +#[derive(Debug, PartialEq, Copy, Clone)] /// The DOS stub program which should be executed in DOS mode. It prints the message "This program cannot be run in DOS mode" and exits. /// /// ## Position in a modern PE file @@ -393,17 +408,21 @@ impl DosHeader { /// /// * De facto, can be followed by a non-standard ["Rich header"](https://0xrick.github.io/win-internals/pe3/#rich-header). /// * According to the standard, is followed by the [Header::signature] and then the [CoffHeader]. -pub struct DosStub(pub [u8; 0x40]); -impl Default for DosStub { +pub struct DosStub<'a> { + pub data: &'a [u8], +} +impl<'a> Default for DosStub<'a> { fn default() -> Self { // "This program cannot be run in DOS mode" error program - Self([ - 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, 0x4C, 0xCD, 0x21, - 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x20, 0x63, - 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, 0x69, - 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, - 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - ]) + Self { + data: &[ + 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, 0x4C, 0xCD, 0x21, + 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, 0x20, 0x63, + 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, 0x69, + 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + } } } @@ -775,11 +794,12 @@ impl CoffHeader { /// /// The PE header is located at the very beginning of the file and /// is followed by the section table and sections. -#[derive(Debug, PartialEq, Copy, Clone, Default)] -pub struct Header { +#[derive(Debug, PartialEq, Clone, Default)] +pub struct Header<'a> { pub dos_header: DosHeader, /// DOS program for legacy loaders - pub dos_stub: DosStub, + pub dos_stub: &'a [u8], + pub rich_header: Option>, // Q (JohnScience): should we care about the "rich header"? // https://0xrick.github.io/win-internals/pe3/#rich-header @@ -792,15 +812,11 @@ pub struct Header { pub optional_header: Option, } -impl Header { - pub fn parse(bytes: &[u8]) -> error::Result { +impl<'a> Header<'a> { + pub fn parse(bytes: &'a [u8]) -> error::Result { let dos_header = DosHeader::parse(&bytes)?; - let dos_stub = bytes.pread(DOS_STUB_OFFSET as usize).map_err(|_| { - error::Error::Malformed(format!( - "cannot parse DOS stub (offset {:#x})", - DOS_STUB_OFFSET - )) - })?; + let dos_stub: &'a [u8] = DosHeader::parse_dos_stub(&bytes, dos_header.pe_pointer)?; + let rich_header = RichHeader::parse(&bytes)?; let mut offset = dos_header.pe_pointer as usize; let signature = bytes.gread_with(&mut offset, scroll::LE).map_err(|_| { error::Error::Malformed(format!("cannot parse PE signature (offset {:#x})", offset)) @@ -814,6 +830,7 @@ impl Header { Ok(Header { dos_header, dos_stub, + rich_header, signature, coff_header, optional_header, @@ -821,13 +838,13 @@ impl Header { } } -impl ctx::TryIntoCtx for Header { +impl<'a> ctx::TryIntoCtx for Header<'a> { type Error = error::Error; fn try_into_ctx(self, bytes: &mut [u8], ctx: scroll::Endian) -> Result { let offset = &mut 0; bytes.gwrite_with(self.dos_header, offset, ctx)?; - bytes.gwrite_with(self.dos_stub, offset, ctx)?; + bytes.gwrite_with(self.dos_stub, offset, ())?; bytes.gwrite_with(self.signature, offset, scroll::LE)?; bytes.gwrite_with(self.coff_header, offset, ctx)?; if let Some(opt_header) = self.optional_header { @@ -837,6 +854,123 @@ impl ctx::TryIntoCtx for Header { } } +/// The DANS marker is a XOR-decoded version of the string "DanS" and is used to identify the Rich header. +pub const DANS_MARKER: u32 = 0x536E6144; +/// The Rich marker is a XOR-decoded version of the string "Rich" and is used to identify the Rich header. +pub const RICH_MARKER: u32 = 0x68636952; + +/// The Rich header is a undocumented header that is used to store information about the build environment. +/// +/// The Rich Header first appeared in Visual Studio 6.0 and contains: a product identifier, build number, and the number of times it was used during the build process. +#[derive(Debug, PartialEq, Clone, Default)] +pub struct RichHeader<'a> { + pub data: &'a [u8], + pub metadatas: Vec, +} + +/// The Rich metadata is a pair of 16-bit values that store the tool version and the use count. +#[derive(Debug, PartialEq, Copy, Clone, Default)] +pub struct RichMetadata { + /// The tool version is a 16-bit value that stores the build version of the tool. + pub tool_version: u32, + /// The use count is a 16-bit value that stores the number of times the tool was used during the build process. + pub use_count: u32, +} + +impl<'a> RichHeader<'a> { + /// Parse the rich header from the given bytes. + /// + /// To decode the Rich header, + /// - First locate the Rich marker and the subsequent 32-bit encryption key. + /// - Then, work backwards from the Rich marker, XORing the key with the stored 32-bit values until you decode the DanS marker. + /// + /// Between these markers, you'll find pairs of 32-bit values: + /// + /// - the first indicates the Microsoft tool used, and + /// - the second shows the count of linked object files made with that tool. + /// - The upper 16 bits of the tool ID describe the tool type, + /// - while the lower 16 bits specify the tool’s build version. + pub fn parse(bytes: &'a [u8]) -> error::Result> { + // Parse the DOS header; some fields are required to locate the Rich header. + let dos_header = DosHeader::parse(bytes)?; + let dos_header_end_offset = PE_POINTER_OFFSET as usize; + let pe_header_start_offset = dos_header.pe_pointer as usize; + + // The Rich header is not present in all PE files. + if (pe_header_start_offset - dos_header_end_offset) < 8 { + return Ok(None); + } + + // The Rich header is located between the DOS header and the PE header. + let scan_start = dos_header_end_offset + 4; + let scan_end = pe_header_start_offset; + debug_assert!(scan_end > scan_start, "Rich header scan range is invalid"); + let scan_stub = &bytes[scan_start..scan_end]; + + // First locate the Rich marker and the subsequent 32-bit encryption key. + let (rich_end_offset, key) = match scan_stub + .windows(8) + .position(|window| u32::from_le_bytes(window[..4].try_into().unwrap()) == RICH_MARKER) + .map(|offset| { + let rich_key = + u32::from_le_bytes(scan_stub[offset + 4..offset + 8].try_into().unwrap()); + (offset, rich_key) + }) { + Some(data) => data, + None => return Ok(None), + }; + + // Scope the buffer + let rich_header = &scan_stub[..rich_end_offset]; + + // Look for DanS marker + let rich_start_offset = match scan_stub + .windows(4) + .position(|window| u32::from_le_bytes(window.try_into().unwrap()) ^ key == DANS_MARKER) + .map(|offset| offset + 4) + { + Some(offset) => offset, + None => { + return Err(error::Error::Malformed( + "Rich header does not contain the DanS marker".to_string(), + )); + } + }; + + let rich_header = &rich_header[rich_start_offset..]; + + // Skip padding bytes + let padding_count = rich_header + .chunks(4) + .take_while(|chunk| { + let value = u32::from_le_bytes((*chunk).try_into().unwrap()); + value == key + }) + .count() + * 4; + + // Extract the Rich header data without the padding + let rich_header = &rich_header[padding_count..]; + + let metadatas = rich_header + .chunks(8) + .map(|chunk| { + let tool_version = u32::from_le_bytes(chunk[0..4].try_into().unwrap()) ^ key; + let use_count = u32::from_le_bytes(chunk[4..8].try_into().unwrap()) ^ key; + RichMetadata { + tool_version, + use_count, + } + }) + .collect(); + + Ok(Some(RichHeader { + data: rich_header, + metadatas, + })) + } +} + /// The TE header is a reduced PE32/PE32+ header containing only fields /// required for execution in the Platform Initialization /// ([PI](https://uefi.org/specs/PI/1.8/V1_Introduction.html)) architecture. @@ -1020,7 +1154,9 @@ pub fn machine_to_str(machine: u16) -> &'static str { #[cfg(test)] mod tests { - use super::{machine_to_str, Header, COFF_MACHINE_X86, DOS_MAGIC, PE_MAGIC}; + use super::{ + machine_to_str, Header, RichHeader, RichMetadata, COFF_MACHINE_X86, DOS_MAGIC, PE_MAGIC, + }; const CRSS_HEADER: [u8; 688] = [ 0x4d, 0x5a, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, @@ -1071,6 +1207,69 @@ mod tests { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ]; + static NO_RICH_HEADER: [u8; 262] = [ + 0x4D, 0x5A, 0x50, 0x00, 0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x0F, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x1A, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0xBA, 0x10, 0x00, 0x0E, 0x1F, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x90, 0x90, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, + 0x61, 0x6D, 0x20, 0x6D, 0x75, 0x73, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x75, 0x6E, 0x64, 0x65, 0x72, 0x20, 0x57, 0x69, 0x6E, 0x33, 0x32, 0x0D, 0x0A, 0x24, 0x37, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x50, 0x45, 0x00, 0x00, 0x64, 0x86, + ]; + + static CORRECT_RICH_HEADER: [u8; 256] = [ + 0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x00, 0x00, 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, + 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x73, 0x4C, 0x5B, 0xB1, 0x37, 0x2D, 0x35, + 0xE2, 0x37, 0x2D, 0x35, 0xE2, 0x37, 0x2D, 0x35, 0xE2, 0x44, 0x4F, 0x31, 0xE3, 0x3D, 0x2D, + 0x35, 0xE2, 0x44, 0x4F, 0x36, 0xE3, 0x32, 0x2D, 0x35, 0xE2, 0x44, 0x4F, 0x30, 0xE3, 0x48, + 0x2D, 0x35, 0xE2, 0xEE, 0x4F, 0x36, 0xE3, 0x3E, 0x2D, 0x35, 0xE2, 0xEE, 0x4F, 0x30, 0xE3, + 0x14, 0x2D, 0x35, 0xE2, 0xEE, 0x4F, 0x31, 0xE3, 0x25, 0x2D, 0x35, 0xE2, 0x44, 0x4F, 0x34, + 0xE3, 0x3C, 0x2D, 0x35, 0xE2, 0x37, 0x2D, 0x34, 0xE2, 0xAF, 0x2D, 0x35, 0xE2, 0x37, 0x2D, + 0x35, 0xE2, 0x23, 0x2D, 0x35, 0xE2, 0xFC, 0x4E, 0x37, 0xE3, 0x36, 0x2D, 0x35, 0xE2, 0x52, + 0x69, 0x63, 0x68, 0x37, 0x2D, 0x35, 0xE2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x45, 0x00, 0x00, 0x64, 0x86, 0x05, + 0x00, + ]; + + static CORRUPTED_RICH_HEADER: [u8; 256] = [ + 0x4D, 0x5A, 0x90, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF8, 0x00, 0x00, 0x00, 0x0E, 0x1F, 0xBA, 0x0E, 0x00, 0xB4, 0x09, 0xCD, 0x21, 0xB8, 0x01, + 0x4C, 0xCD, 0x21, 0x54, 0x68, 0x69, 0x73, 0x20, 0x70, 0x72, 0x6F, 0x67, 0x72, 0x61, 0x6D, + 0x20, 0x63, 0x61, 0x6E, 0x6E, 0x6F, 0x74, 0x20, 0x62, 0x65, 0x20, 0x72, 0x75, 0x6E, 0x20, + 0x69, 0x6E, 0x20, 0x44, 0x4F, 0x53, 0x20, 0x6D, 0x6F, 0x64, 0x65, 0x2E, 0x0D, 0x0D, 0x0A, + 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x4C, 0x5B, 0xB1, 0x37, 0x2D, 0x35, + 0xE2, 0x37, 0x2D, 0x35, 0xE2, 0x37, 0x2D, 0x35, 0xE2, 0x44, 0x4F, 0x31, 0xE3, 0x3D, 0x2D, + 0x35, 0xE2, 0x44, 0x4F, 0x36, 0xE3, 0x32, 0x2D, 0x35, 0xE2, 0x44, 0x4F, 0x30, 0xE3, 0x48, + 0x2D, 0x35, 0xE2, 0xEE, 0x4F, 0x36, 0xE3, 0x3E, 0x2D, 0x35, 0xE2, 0xEE, 0x4F, 0x30, 0xE3, + 0x14, 0x2D, 0x35, 0xE2, 0xEE, 0x4F, 0x31, 0xE3, 0x25, 0x2D, 0x35, 0xE2, 0x44, 0x4F, 0x34, + 0xE3, 0x3C, 0x2D, 0x35, 0xE2, 0x37, 0x2D, 0x34, 0xE2, 0xAF, 0x2D, 0x35, 0xE2, 0x37, 0x2D, + 0x35, 0xE2, 0x23, 0x2D, 0x35, 0xE2, 0xFC, 0x4E, 0x37, 0xE3, 0x36, 0x2D, 0x35, 0xE2, 0x52, + 0x69, 0x63, 0x68, 0x37, 0x2D, 0x35, 0xE2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x45, 0x00, 0x00, 0x64, 0x86, 0x05, + 0x00, + ]; + #[test] fn crss_header() { let header = Header::parse(&&CRSS_HEADER[..]).unwrap(); @@ -1080,4 +1279,66 @@ mod tests { assert!(machine_to_str(header.coff_header.machine) == "X86"); println!("header: {:?}", &header); } + + #[test] + fn parse_no_rich_header() { + let header = RichHeader::parse(&NO_RICH_HEADER).unwrap(); + assert_eq!(header, None); + } + + #[test] + fn parse_correct_rich_header() { + let header = RichHeader::parse(&CORRECT_RICH_HEADER).unwrap(); + assert_ne!(header, None); + let header = header.unwrap(); + let expected = vec![ + RichMetadata { + tool_version: 0x1046273, + use_count: 0xa, + }, + RichMetadata { + tool_version: 0x1036273, + use_count: 0x5, + }, + RichMetadata { + tool_version: 0x1056273, + use_count: 0x7f, + }, + RichMetadata { + tool_version: 0x10362d9, + use_count: 0x9, + }, + RichMetadata { + tool_version: 0x10562d9, + use_count: 0x23, + }, + RichMetadata { + tool_version: 0x10462d9, + use_count: 0x12, + }, + RichMetadata { + tool_version: 0x1016273, + use_count: 0xb, + }, + RichMetadata { + tool_version: 0x10000, + use_count: 0x98, + }, + RichMetadata { + tool_version: 0x0, + use_count: 0x14, + }, + RichMetadata { + tool_version: 0x10263cb, + use_count: 0x1, + }, + ]; + assert_eq!(header.metadatas, expected); + } + + #[test] + fn parse_corrupted_rich_header() { + let header_result = RichHeader::parse(&CORRUPTED_RICH_HEADER); + assert_eq!(header_result.is_err(), true); + } } diff --git a/src/pe/mod.rs b/src/pe/mod.rs index 1f2bac7a..990b7e87 100644 --- a/src/pe/mod.rs +++ b/src/pe/mod.rs @@ -44,7 +44,7 @@ pub struct PE<'a> { bytes: &'a [u8], authenticode_excluded_sections: Option, /// The PE header - pub header: header::Header, + pub header: header::Header<'a>, /// A list of the sections in this PE binary pub sections: Vec, /// The size of the binary @@ -410,7 +410,7 @@ impl<'a> ctx::TryIntoCtx for PE<'a> { } _ => None, }; - bytes.gwrite_with(self.header, &mut offset, ctx)?; + bytes.gwrite_with(self.header.clone(), &mut offset, ctx)?; max_offset = max(offset, max_offset); self.write_sections(bytes, &mut offset, file_alignment, ctx)?; // We want the section offset for which we have the highest pointer on disk.