Skip to content

Commit

Permalink
elf.parse: added lazy_parse function
Browse files Browse the repository at this point in the history
lazy_parse will only generate a dummy Elf struct with only Header.
Users can choose to parse whatever they want and fill the Elf based
on their needs.
  • Loading branch information
jessehui committed Feb 1, 2021
1 parent 8d91dd7 commit 1ae2c00
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 19 deletions.
93 changes: 74 additions & 19 deletions src/elf/mod.rs
Expand Up @@ -202,22 +202,45 @@ if_sylvan! {
pub fn is_object_file(&self) -> bool {
self.header.e_type == header::ET_REL
}

pub fn parse_hdr(bytes: &'a [u8]) -> error::Result<Header> {
bytes.pread::<Header>(0)
}

// Lazy parse the ELF contents. This function mainly just assembles an Elf struct. Once we have the struct, we can choose to parse whatever we want.
pub fn lazy_parse(header: Header) -> error::Result<Self> {
let misc = parse_misc(&header)?;

Ok(Elf {
header,
program_headers: vec![],
section_headers: Default::default(),
shdr_strtab: Default::default(),
dynamic: None,
dynsyms: Default::default(),
dynstrtab: Strtab::default(),
syms: Default::default(),
strtab: Default::default(),
dynrelas: Default::default(),
dynrels: Default::default(),
pltrelocs: Default::default(),
shdr_relocs: Default::default(),
soname: None,
interpreter: None,
libraries: vec![],
is_64: misc.is_64,
is_lib: misc.is_lib,
entry: misc.entry,
little_endian: misc.little_endian,
ctx: misc.ctx,
})
}

/// Parses the contents of the byte stream in `bytes`, and maybe returns a unified binary
pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
let header = bytes.pread::<Header>(0)?;
let entry = header.e_entry as usize;
let is_lib = header.e_type == header::ET_DYN;
let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
let endianness = scroll::Endian::from(is_lsb);
let class = header.e_ident[header::EI_CLASS];
if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
class,
header.e_ident[header::EI_DATA])));
}
let is_64 = class == header::ELFCLASS64;
let container = if is_64 { Container::Big } else { Container::Little };
let ctx = Ctx::new(container, endianness);
let header = Self::parse_hdr(bytes)?;
let misc = parse_misc(&header)?;
let ctx = misc.ctx;

let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?;

Expand Down Expand Up @@ -329,11 +352,11 @@ if_sylvan! {
soname,
interpreter,
libraries,
is_64,
is_lib,
entry: entry as u64,
little_endian: is_lsb,
ctx,
is_64: misc.is_64,
is_lib: misc.is_lib,
entry: misc.entry,
little_endian: misc.little_endian,
ctx: ctx,
})
}
}
Expand Down Expand Up @@ -388,6 +411,38 @@ if_sylvan! {
};
Ok(nchain)
}

struct Misc {
is_64: bool,
is_lib: bool,
entry: u64,
little_endian: bool,
ctx: Ctx,
}

fn parse_misc(header: &Header) -> error::Result<Misc> {
let entry = header.e_entry as usize;
let is_lib = header.e_type == header::ET_DYN;
let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
let endianness = scroll::Endian::from(is_lsb);
let class = header.e_ident[header::EI_CLASS];
if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
class,
header.e_ident[header::EI_DATA])));
}
let is_64 = class == header::ELFCLASS64;
let container = if is_64 { Container::Big } else { Container::Little };
let ctx = Ctx::new(container, endianness);

Ok(Misc{
is_64,
is_lib,
entry: entry as u64,
little_endian:is_lsb,
ctx,
})
}
}

#[cfg(test)]
Expand Down
64 changes: 64 additions & 0 deletions tests/bins/elf/gnu_hash/README.md
Expand Up @@ -21,6 +21,70 @@ Symbol table '.dynsym' contains 13 entries:
10: 000000000000067b 43 FUNC GLOBAL DEFAULT 12 main
11: 0000000000000520 0 FUNC GLOBAL DEFAULT 9 _init
12: 00000000000006a8 0 FUNC GLOBAL DEFAULT 13 _fini

% readelf --section-headers hello.so
There are 26 section headers, starting at offset 0x1140:

Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .note.gnu.build-i NOTE 00000000000001c8 000001c8
0000000000000024 0000000000000000 A 0 0 4
[ 2] .gnu.hash GNU_HASH 00000000000001f0 000001f0
0000000000000040 0000000000000000 A 3 0 8
[ 3] .dynsym DYNSYM 0000000000000230 00000230
0000000000000138 0000000000000018 A 4 1 8
[ 4] .dynstr STRTAB 0000000000000368 00000368
00000000000000a6 0000000000000000 A 0 0 1
[ 5] .gnu.version VERSYM 000000000000040e 0000040e
000000000000001a 0000000000000002 A 3 0 2
[ 6] .gnu.version_r VERNEED 0000000000000428 00000428
0000000000000020 0000000000000000 A 4 1 8
[ 7] .rela.dyn RELA 0000000000000448 00000448
00000000000000a8 0000000000000018 A 3 0 8
[ 8] .rela.plt RELA 00000000000004f0 000004f0
0000000000000030 0000000000000018 AI 3 21 8
[ 9] .init PROGBITS 0000000000000520 00000520
0000000000000017 0000000000000000 AX 0 0 4
[10] .plt PROGBITS 0000000000000540 00000540
0000000000000030 0000000000000010 AX 0 0 16
[11] .plt.got PROGBITS 0000000000000570 00000570
0000000000000008 0000000000000008 AX 0 0 8
[12] .text PROGBITS 0000000000000580 00000580
0000000000000126 0000000000000000 AX 0 0 16
[13] .fini PROGBITS 00000000000006a8 000006a8
0000000000000009 0000000000000000 AX 0 0 4
[14] .rodata PROGBITS 00000000000006b1 000006b1
0000000000000010 0000000000000000 A 0 0 1
[15] .eh_frame_hdr PROGBITS 00000000000006c4 000006c4
000000000000002c 0000000000000000 A 0 0 4
[16] .eh_frame PROGBITS 00000000000006f0 000006f0
000000000000009c 0000000000000000 A 0 0 8
[17] .init_array INIT_ARRAY 0000000000200e10 00000e10
0000000000000008 0000000000000008 WA 0 0 8
[18] .fini_array FINI_ARRAY 0000000000200e18 00000e18
0000000000000008 0000000000000008 WA 0 0 8
[19] .dynamic DYNAMIC 0000000000200e20 00000e20
00000000000001c0 0000000000000010 WA 4 0 8
[20] .got PROGBITS 0000000000200fe0 00000fe0
0000000000000020 0000000000000008 WA 0 0 8
[21] .got.plt PROGBITS 0000000000201000 00001000
0000000000000028 0000000000000008 WA 0 0 8
[22] .data PROGBITS 0000000000201028 00001028
0000000000000008 0000000000000000 WA 0 0 8
[23] .bss NOBITS 0000000000201030 00001030
0000000000000008 0000000000000000 WA 0 0 1
[24] .comment PROGBITS 0000000000000000 00001030
000000000000002a 0000000000000001 MS 0 0 1
[25] .shstrtab STRTAB 0000000000000000 0000105a
00000000000000e1 0000000000000000 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
L (link order), O (extra OS processing required), G (group), T (TLS),
C (compressed), x (unknown), o (OS specific), E (exclude),
l (large), p (processor specific)
```

Or in 32-bit mode (one might need to install `gcc-multilib` on Ubuntu):
Expand Down
47 changes: 47 additions & 0 deletions tests/elf.rs
Expand Up @@ -33,8 +33,48 @@ fn parse_gnu_hash_section(base: &[u8], symbol_name: &str) -> Result<Sym, &'stati
section.copied().ok_or("cannot find symbol")
}

// Use lazy_parse and assembles the Elf with only parts we care
fn parse_text_section_size_lazy(base: &[u8]) -> Result<u64, &'static str> {
let header = Elf::parse_hdr(base).map_err(|_| "parse elf header error")?;
// dummy Elf with only header
let mut obj = Elf::lazy_parse(header).map_err(|_| "cannot parse ELF file")?;

use goblin::container::{Container, Ctx};
use goblin::elf::SectionHeader;
use goblin::strtab::Strtab;

let ctx = Ctx {
le: scroll::Endian::Little,
container: Container::Big,
};

// get section headers
let section_headers =
SectionHeader::parse(base, header.e_shoff as usize, header.e_shnum as usize, ctx)
.map_err(|_| "parse section headers error")?;
obj.section_headers = section_headers;

let strtab_idx = header.e_shstrndx as usize;
let strtab_shdr = &obj.section_headers[strtab_idx];
let strtab = Strtab::parse(
base,
strtab_shdr.sh_offset as usize,
strtab_shdr.sh_size as usize,
0x0,
).map_err(|_| "parse string table error")?;
for (_, section) in obj.section_headers.iter().enumerate() {
let section_name = strtab.get(section.sh_name).unwrap().unwrap();
if section_name == ".text" {
return Ok(section.sh_size);
}
}

Err("Didn't find text section")
}

#[test]
fn test_parse_gnu_hash_section_64bit() {
use goblin::elf::*;
static ALIGNED_DATA: &AlignedData<[u8]> =
&AlignedData(*include_bytes!("bins/elf/gnu_hash/hello.so"));

Expand Down Expand Up @@ -99,6 +139,13 @@ fn test_parse_gnu_hash_section_32bit() {
);
}

#[test]
fn test_parse_text_section_size_lazy() {
static ALIGNED_DATA: &AlignedData<[u8]> =
&AlignedData(*include_bytes!("bins/elf/gnu_hash/hello.so"));
assert_eq!(parse_text_section_size_lazy(&ALIGNED_DATA.0), Ok(0x126));
}

#[test]
fn test_oom() {
use goblin::container::{Container, Ctx};
Expand Down

0 comments on commit 1ae2c00

Please sign in to comment.