From 37c76862fdc9a97aed7ae0ba940ec5a42c26b9f4 Mon Sep 17 00:00:00 2001 From: "Hui, Chunyang" Date: Thu, 7 Jan 2021 10:35:08 +0000 Subject: [PATCH] elf.parse: added lazy_parse function lazy_parse will only generate a dummy Elf struct with only Header. Users can choose to parse whatever they want and fill the Elf based on their needs. --- src/elf/mod.rs | 94 ++++++++++++++++++++++++------- tests/bins/elf/gnu_hash/README.md | 64 +++++++++++++++++++++ tests/elf.rs | 45 +++++++++++++++ 3 files changed, 184 insertions(+), 19 deletions(-) diff --git a/src/elf/mod.rs b/src/elf/mod.rs index b8775b23..b3973dfc 100644 --- a/src/elf/mod.rs +++ b/src/elf/mod.rs @@ -202,22 +202,46 @@ if_sylvan! { pub fn is_object_file(&self) -> bool { self.header.e_type == header::ET_REL } + + /// Parses the contents to get the Header only. This `bytes` buffer should contain at least the length for parsing Header. + pub fn parse_header(bytes: &'a [u8]) -> error::Result
{ + bytes.pread::
(0) + } + + /// Lazy parse the ELF contents. This function mainly just assembles an Elf struct. Once we have the struct, we can choose to parse whatever we want. + pub fn lazy_parse(header: Header) -> error::Result { + let misc = parse_misc(&header)?; + + Ok(Elf { + header, + program_headers: vec![], + section_headers: Default::default(), + shdr_strtab: Default::default(), + dynamic: None, + dynsyms: Default::default(), + dynstrtab: Strtab::default(), + syms: Default::default(), + strtab: Default::default(), + dynrelas: Default::default(), + dynrels: Default::default(), + pltrelocs: Default::default(), + shdr_relocs: Default::default(), + soname: None, + interpreter: None, + libraries: vec![], + is_64: misc.is_64, + is_lib: misc.is_lib, + entry: misc.entry, + little_endian: misc.little_endian, + ctx: misc.ctx, + }) + } + /// Parses the contents of the byte stream in `bytes`, and maybe returns a unified binary pub fn parse(bytes: &'a [u8]) -> error::Result { - let header = bytes.pread::
(0)?; - let entry = header.e_entry as usize; - let is_lib = header.e_type == header::ET_DYN; - let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB; - let endianness = scroll::Endian::from(is_lsb); - let class = header.e_ident[header::EI_CLASS]; - if class != header::ELFCLASS64 && class != header::ELFCLASS32 { - return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}", - class, - header.e_ident[header::EI_DATA]))); - } - let is_64 = class == header::ELFCLASS64; - let container = if is_64 { Container::Big } else { Container::Little }; - let ctx = Ctx::new(container, endianness); + let header = Self::parse_header(bytes)?; + let misc = parse_misc(&header)?; + let ctx = misc.ctx; let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?; @@ -329,11 +353,11 @@ if_sylvan! { soname, interpreter, libraries, - is_64, - is_lib, - entry: entry as u64, - little_endian: is_lsb, - ctx, + is_64: misc.is_64, + is_lib: misc.is_lib, + entry: misc.entry, + little_endian: misc.little_endian, + ctx: ctx, }) } } @@ -388,6 +412,38 @@ if_sylvan! { }; Ok(nchain) } + + struct Misc { + is_64: bool, + is_lib: bool, + entry: u64, + little_endian: bool, + ctx: Ctx, + } + + fn parse_misc(header: &Header) -> error::Result { + let entry = header.e_entry as usize; + let is_lib = header.e_type == header::ET_DYN; + let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB; + let endianness = scroll::Endian::from(is_lsb); + let class = header.e_ident[header::EI_CLASS]; + if class != header::ELFCLASS64 && class != header::ELFCLASS32 { + return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}", + class, + header.e_ident[header::EI_DATA]))); + } + let is_64 = class == header::ELFCLASS64; + let container = if is_64 { Container::Big } else { Container::Little }; + let ctx = Ctx::new(container, endianness); + + Ok(Misc{ + is_64, + is_lib, + entry: entry as u64, + little_endian:is_lsb, + ctx, + }) + } } #[cfg(test)] diff --git a/tests/bins/elf/gnu_hash/README.md b/tests/bins/elf/gnu_hash/README.md index f2ca18d8..479eaa50 100644 --- a/tests/bins/elf/gnu_hash/README.md +++ b/tests/bins/elf/gnu_hash/README.md @@ -21,6 +21,70 @@ Symbol table '.dynsym' contains 13 entries: 10: 000000000000067b 43 FUNC GLOBAL DEFAULT 12 main 11: 0000000000000520 0 FUNC GLOBAL DEFAULT 9 _init 12: 00000000000006a8 0 FUNC GLOBAL DEFAULT 13 _fini + +% readelf --section-headers hello.so +There are 26 section headers, starting at offset 0x1140: + +Section Headers: + [Nr] Name Type Address Offset + Size EntSize Flags Link Info Align + [ 0] NULL 0000000000000000 00000000 + 0000000000000000 0000000000000000 0 0 0 + [ 1] .note.gnu.build-i NOTE 00000000000001c8 000001c8 + 0000000000000024 0000000000000000 A 0 0 4 + [ 2] .gnu.hash GNU_HASH 00000000000001f0 000001f0 + 0000000000000040 0000000000000000 A 3 0 8 + [ 3] .dynsym DYNSYM 0000000000000230 00000230 + 0000000000000138 0000000000000018 A 4 1 8 + [ 4] .dynstr STRTAB 0000000000000368 00000368 + 00000000000000a6 0000000000000000 A 0 0 1 + [ 5] .gnu.version VERSYM 000000000000040e 0000040e + 000000000000001a 0000000000000002 A 3 0 2 + [ 6] .gnu.version_r VERNEED 0000000000000428 00000428 + 0000000000000020 0000000000000000 A 4 1 8 + [ 7] .rela.dyn RELA 0000000000000448 00000448 + 00000000000000a8 0000000000000018 A 3 0 8 + [ 8] .rela.plt RELA 00000000000004f0 000004f0 + 0000000000000030 0000000000000018 AI 3 21 8 + [ 9] .init PROGBITS 0000000000000520 00000520 + 0000000000000017 0000000000000000 AX 0 0 4 + [10] .plt PROGBITS 0000000000000540 00000540 + 0000000000000030 0000000000000010 AX 0 0 16 + [11] .plt.got PROGBITS 0000000000000570 00000570 + 0000000000000008 0000000000000008 AX 0 0 8 + [12] .text PROGBITS 0000000000000580 00000580 + 0000000000000126 0000000000000000 AX 0 0 16 + [13] .fini PROGBITS 00000000000006a8 000006a8 + 0000000000000009 0000000000000000 AX 0 0 4 + [14] .rodata PROGBITS 00000000000006b1 000006b1 + 0000000000000010 0000000000000000 A 0 0 1 + [15] .eh_frame_hdr PROGBITS 00000000000006c4 000006c4 + 000000000000002c 0000000000000000 A 0 0 4 + [16] .eh_frame PROGBITS 00000000000006f0 000006f0 + 000000000000009c 0000000000000000 A 0 0 8 + [17] .init_array INIT_ARRAY 0000000000200e10 00000e10 + 0000000000000008 0000000000000008 WA 0 0 8 + [18] .fini_array FINI_ARRAY 0000000000200e18 00000e18 + 0000000000000008 0000000000000008 WA 0 0 8 + [19] .dynamic DYNAMIC 0000000000200e20 00000e20 + 00000000000001c0 0000000000000010 WA 4 0 8 + [20] .got PROGBITS 0000000000200fe0 00000fe0 + 0000000000000020 0000000000000008 WA 0 0 8 + [21] .got.plt PROGBITS 0000000000201000 00001000 + 0000000000000028 0000000000000008 WA 0 0 8 + [22] .data PROGBITS 0000000000201028 00001028 + 0000000000000008 0000000000000000 WA 0 0 8 + [23] .bss NOBITS 0000000000201030 00001030 + 0000000000000008 0000000000000000 WA 0 0 1 + [24] .comment PROGBITS 0000000000000000 00001030 + 000000000000002a 0000000000000001 MS 0 0 1 + [25] .shstrtab STRTAB 0000000000000000 0000105a + 00000000000000e1 0000000000000000 0 0 1 +Key to Flags: + W (write), A (alloc), X (execute), M (merge), S (strings), I (info), + L (link order), O (extra OS processing required), G (group), T (TLS), + C (compressed), x (unknown), o (OS specific), E (exclude), + l (large), p (processor specific) ``` Or in 32-bit mode (one might need to install `gcc-multilib` on Ubuntu): diff --git a/tests/elf.rs b/tests/elf.rs index 916a0944..cc2aa486 100644 --- a/tests/elf.rs +++ b/tests/elf.rs @@ -33,6 +33,44 @@ fn parse_gnu_hash_section(base: &[u8], symbol_name: &str) -> Result Result { + let header = Elf::parse_header(base).map_err(|_| "parse elf header error")?; + // dummy Elf with only header + let mut obj = Elf::lazy_parse(header).map_err(|_| "cannot parse ELF file")?; + + use goblin::container::{Container, Ctx}; + use goblin::elf::SectionHeader; + use goblin::strtab::Strtab; + + let ctx = Ctx { + le: scroll::Endian::Little, + container: Container::Big, + }; + + obj.section_headers = + SectionHeader::parse(base, header.e_shoff as usize, header.e_shnum as usize, ctx) + .map_err(|_| "parse section headers error")?; + + let strtab_idx = header.e_shstrndx as usize; + let strtab_shdr = &obj.section_headers[strtab_idx]; + let strtab = Strtab::parse( + base, + strtab_shdr.sh_offset as usize, + strtab_shdr.sh_size as usize, + 0x0, + ) + .map_err(|_| "parse string table error")?; + for (_, section) in obj.section_headers.iter().enumerate() { + let section_name = strtab.get(section.sh_name).unwrap().unwrap(); + if section_name == ".text" { + return Ok(section.sh_size); + } + } + + Err("Didn't find text section") +} + #[test] fn test_parse_gnu_hash_section_64bit() { static ALIGNED_DATA: &AlignedData<[u8]> = @@ -99,6 +137,13 @@ fn test_parse_gnu_hash_section_32bit() { ); } +#[test] +fn test_parse_text_section_size_lazy() { + static ALIGNED_DATA: &AlignedData<[u8]> = + &AlignedData(*include_bytes!("bins/elf/gnu_hash/hello.so")); + assert_eq!(parse_text_section_size_lazy(&ALIGNED_DATA.0), Ok(0x126)); +} + #[test] fn test_oom() { use goblin::container::{Container, Ctx};