Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

elf.parse: added lazy_parse function #254

Merged
merged 1 commit into from Feb 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
94 changes: 75 additions & 19 deletions src/elf/mod.rs
Expand Up @@ -202,22 +202,46 @@ if_sylvan! {
pub fn is_object_file(&self) -> bool {
self.header.e_type == header::ET_REL
}

/// Parses the contents to get the Header only. This `bytes` buffer should contain at least the length for parsing Header.
pub fn parse_header(bytes: &'a [u8]) -> error::Result<Header> {
bytes.pread::<Header>(0)
}

/// Lazy parse the ELF contents. This function mainly just assembles an Elf struct. Once we have the struct, we can choose to parse whatever we want.
pub fn lazy_parse(header: Header) -> error::Result<Self> {
let misc = parse_misc(&header)?;

Ok(Elf {
header,
program_headers: vec![],
section_headers: Default::default(),
shdr_strtab: Default::default(),
dynamic: None,
dynsyms: Default::default(),
dynstrtab: Strtab::default(),
syms: Default::default(),
strtab: Default::default(),
dynrelas: Default::default(),
dynrels: Default::default(),
pltrelocs: Default::default(),
shdr_relocs: Default::default(),
soname: None,
interpreter: None,
libraries: vec![],
is_64: misc.is_64,
is_lib: misc.is_lib,
entry: misc.entry,
little_endian: misc.little_endian,
ctx: misc.ctx,
})
}

/// Parses the contents of the byte stream in `bytes`, and maybe returns a unified binary
pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
let header = bytes.pread::<Header>(0)?;
let entry = header.e_entry as usize;
let is_lib = header.e_type == header::ET_DYN;
let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
let endianness = scroll::Endian::from(is_lsb);
let class = header.e_ident[header::EI_CLASS];
if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
class,
header.e_ident[header::EI_DATA])));
}
let is_64 = class == header::ELFCLASS64;
let container = if is_64 { Container::Big } else { Container::Little };
let ctx = Ctx::new(container, endianness);
let header = Self::parse_header(bytes)?;
let misc = parse_misc(&header)?;
let ctx = misc.ctx;

let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?;

Expand Down Expand Up @@ -329,11 +353,11 @@ if_sylvan! {
soname,
interpreter,
libraries,
is_64,
is_lib,
entry: entry as u64,
little_endian: is_lsb,
ctx,
is_64: misc.is_64,
is_lib: misc.is_lib,
entry: misc.entry,
little_endian: misc.little_endian,
ctx: ctx,
})
}
}
Expand Down Expand Up @@ -388,6 +412,38 @@ if_sylvan! {
};
Ok(nchain)
}

struct Misc {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i like this!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your review.

is_64: bool,
is_lib: bool,
entry: u64,
little_endian: bool,
ctx: Ctx,
}

fn parse_misc(header: &Header) -> error::Result<Misc> {
let entry = header.e_entry as usize;
let is_lib = header.e_type == header::ET_DYN;
let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
let endianness = scroll::Endian::from(is_lsb);
let class = header.e_ident[header::EI_CLASS];
if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
class,
header.e_ident[header::EI_DATA])));
}
let is_64 = class == header::ELFCLASS64;
let container = if is_64 { Container::Big } else { Container::Little };
let ctx = Ctx::new(container, endianness);

Ok(Misc{
is_64,
is_lib,
entry: entry as u64,
little_endian:is_lsb,
ctx,
})
}
}

#[cfg(test)]
Expand Down
64 changes: 64 additions & 0 deletions tests/bins/elf/gnu_hash/README.md
Expand Up @@ -21,6 +21,70 @@ Symbol table '.dynsym' contains 13 entries:
10: 000000000000067b 43 FUNC GLOBAL DEFAULT 12 main
11: 0000000000000520 0 FUNC GLOBAL DEFAULT 9 _init
12: 00000000000006a8 0 FUNC GLOBAL DEFAULT 13 _fini

% readelf --section-headers hello.so
There are 26 section headers, starting at offset 0x1140:

Section Headers:
[Nr] Name Type Address Offset
Size EntSize Flags Link Info Align
[ 0] NULL 0000000000000000 00000000
0000000000000000 0000000000000000 0 0 0
[ 1] .note.gnu.build-i NOTE 00000000000001c8 000001c8
0000000000000024 0000000000000000 A 0 0 4
[ 2] .gnu.hash GNU_HASH 00000000000001f0 000001f0
0000000000000040 0000000000000000 A 3 0 8
[ 3] .dynsym DYNSYM 0000000000000230 00000230
0000000000000138 0000000000000018 A 4 1 8
[ 4] .dynstr STRTAB 0000000000000368 00000368
00000000000000a6 0000000000000000 A 0 0 1
[ 5] .gnu.version VERSYM 000000000000040e 0000040e
000000000000001a 0000000000000002 A 3 0 2
[ 6] .gnu.version_r VERNEED 0000000000000428 00000428
0000000000000020 0000000000000000 A 4 1 8
[ 7] .rela.dyn RELA 0000000000000448 00000448
00000000000000a8 0000000000000018 A 3 0 8
[ 8] .rela.plt RELA 00000000000004f0 000004f0
0000000000000030 0000000000000018 AI 3 21 8
[ 9] .init PROGBITS 0000000000000520 00000520
0000000000000017 0000000000000000 AX 0 0 4
[10] .plt PROGBITS 0000000000000540 00000540
0000000000000030 0000000000000010 AX 0 0 16
[11] .plt.got PROGBITS 0000000000000570 00000570
0000000000000008 0000000000000008 AX 0 0 8
[12] .text PROGBITS 0000000000000580 00000580
0000000000000126 0000000000000000 AX 0 0 16
[13] .fini PROGBITS 00000000000006a8 000006a8
0000000000000009 0000000000000000 AX 0 0 4
[14] .rodata PROGBITS 00000000000006b1 000006b1
0000000000000010 0000000000000000 A 0 0 1
[15] .eh_frame_hdr PROGBITS 00000000000006c4 000006c4
000000000000002c 0000000000000000 A 0 0 4
[16] .eh_frame PROGBITS 00000000000006f0 000006f0
000000000000009c 0000000000000000 A 0 0 8
[17] .init_array INIT_ARRAY 0000000000200e10 00000e10
0000000000000008 0000000000000008 WA 0 0 8
[18] .fini_array FINI_ARRAY 0000000000200e18 00000e18
0000000000000008 0000000000000008 WA 0 0 8
[19] .dynamic DYNAMIC 0000000000200e20 00000e20
00000000000001c0 0000000000000010 WA 4 0 8
[20] .got PROGBITS 0000000000200fe0 00000fe0
0000000000000020 0000000000000008 WA 0 0 8
[21] .got.plt PROGBITS 0000000000201000 00001000
0000000000000028 0000000000000008 WA 0 0 8
[22] .data PROGBITS 0000000000201028 00001028
0000000000000008 0000000000000000 WA 0 0 8
[23] .bss NOBITS 0000000000201030 00001030
0000000000000008 0000000000000000 WA 0 0 1
[24] .comment PROGBITS 0000000000000000 00001030
000000000000002a 0000000000000001 MS 0 0 1
[25] .shstrtab STRTAB 0000000000000000 0000105a
00000000000000e1 0000000000000000 0 0 1
Key to Flags:
W (write), A (alloc), X (execute), M (merge), S (strings), I (info),
L (link order), O (extra OS processing required), G (group), T (TLS),
C (compressed), x (unknown), o (OS specific), E (exclude),
l (large), p (processor specific)
```

Or in 32-bit mode (one might need to install `gcc-multilib` on Ubuntu):
Expand Down
45 changes: 45 additions & 0 deletions tests/elf.rs
Expand Up @@ -33,6 +33,44 @@ fn parse_gnu_hash_section(base: &[u8], symbol_name: &str) -> Result<Sym, &'stati
section.copied().ok_or("cannot find symbol")
}

// Use lazy_parse and assembles the Elf with only parts we care
fn parse_text_section_size_lazy(base: &[u8]) -> Result<u64, &'static str> {
let header = Elf::parse_header(base).map_err(|_| "parse elf header error")?;
// dummy Elf with only header
let mut obj = Elf::lazy_parse(header).map_err(|_| "cannot parse ELF file")?;

use goblin::container::{Container, Ctx};
use goblin::elf::SectionHeader;
use goblin::strtab::Strtab;

let ctx = Ctx {
le: scroll::Endian::Little,
container: Container::Big,
};

obj.section_headers =
SectionHeader::parse(base, header.e_shoff as usize, header.e_shnum as usize, ctx)
.map_err(|_| "parse section headers error")?;

let strtab_idx = header.e_shstrndx as usize;
let strtab_shdr = &obj.section_headers[strtab_idx];
let strtab = Strtab::parse(
base,
strtab_shdr.sh_offset as usize,
strtab_shdr.sh_size as usize,
0x0,
)
.map_err(|_| "parse string table error")?;
for (_, section) in obj.section_headers.iter().enumerate() {
let section_name = strtab.get(section.sh_name).unwrap().unwrap();
if section_name == ".text" {
return Ok(section.sh_size);
}
}

Err("Didn't find text section")
}

#[test]
fn test_parse_gnu_hash_section_64bit() {
static ALIGNED_DATA: &AlignedData<[u8]> =
Expand Down Expand Up @@ -99,6 +137,13 @@ fn test_parse_gnu_hash_section_32bit() {
);
}

#[test]
fn test_parse_text_section_size_lazy() {
static ALIGNED_DATA: &AlignedData<[u8]> =
&AlignedData(*include_bytes!("bins/elf/gnu_hash/hello.so"));
assert_eq!(parse_text_section_size_lazy(&ALIGNED_DATA.0), Ok(0x126));
}

#[test]
fn test_oom() {
use goblin::container::{Container, Ctx};
Expand Down