Skip to content

Commit

Permalink
elf.parse: added lazy_parse function
Browse files Browse the repository at this point in the history
lazy_parse will only generate a dummy Elf struct with only Header.
Users can choose to parse whatever they want and fill the Elf based
on their needs.
  • Loading branch information
jessehui committed Jan 28, 2021
1 parent 8d91dd7 commit 6f9297d
Show file tree
Hide file tree
Showing 2 changed files with 152 additions and 21 deletions.
97 changes: 76 additions & 21 deletions src/elf/mod.rs
Expand Up @@ -202,22 +202,45 @@ if_sylvan! {
pub fn is_object_file(&self) -> bool {
self.header.e_type == header::ET_REL
}

pub fn parse_elf_hdr(bytes: &'a [u8]) -> error::Result<Header> {
bytes.pread::<Header>(0)
}

// Lazy parse the ELF contents. This function mainly just assembles an Elf struct. Once we have the struct, we can choose to parse whatever we want.
pub fn lazy_parse(header: Header) -> error::Result<Self> {
let misc = parse_misc(&header)?;

Ok(Elf {
header,
program_headers: vec![],
section_headers: Default::default(),
shdr_strtab: Default::default(),
dynamic: None,
dynsyms: Default::default(),
dynstrtab: Strtab::default(),
syms: Default::default(),
strtab: Default::default(),
dynrelas: Default::default(),
dynrels: Default::default(),
pltrelocs: Default::default(),
shdr_relocs: Default::default(),
soname: None,
interpreter: None,
libraries: vec![],
is_64: misc.is_64,
is_lib: misc.is_lib,
entry: misc.entry,
little_endian: misc.little_endian,
ctx: misc.ctx,
})
}

/// Parses the contents of the byte stream in `bytes`, and maybe returns a unified binary
pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
let header = bytes.pread::<Header>(0)?;
let entry = header.e_entry as usize;
let is_lib = header.e_type == header::ET_DYN;
let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
let endianness = scroll::Endian::from(is_lsb);
let class = header.e_ident[header::EI_CLASS];
if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
class,
header.e_ident[header::EI_DATA])));
}
let is_64 = class == header::ELFCLASS64;
let container = if is_64 { Container::Big } else { Container::Little };
let ctx = Ctx::new(container, endianness);
let header = Self::parse_elf_hdr(bytes)?;
let misc = parse_misc(&header)?;
let ctx = misc.ctx;

let program_headers = ProgramHeader::parse(bytes, header.e_phoff as usize, header.e_phnum as usize, ctx)?;

Expand Down Expand Up @@ -329,11 +352,11 @@ if_sylvan! {
soname,
interpreter,
libraries,
is_64,
is_lib,
entry: entry as u64,
little_endian: is_lsb,
ctx,
is_64: misc.is_64,
is_lib: misc.is_lib,
entry: misc.entry,
little_endian: misc.little_endian,
ctx: misc.ctx,
})
}
}
Expand All @@ -346,7 +369,7 @@ if_sylvan! {
}
}

fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> error::Result<usize> {
pub fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> error::Result<usize> {
let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
Expand Down Expand Up @@ -379,7 +402,7 @@ if_sylvan! {
}
}

fn hash_len(bytes: &[u8], offset: usize, machine: u16, ctx: Ctx) -> error::Result<usize> {
pub fn hash_len(bytes: &[u8], offset: usize, machine: u16, ctx: Ctx) -> error::Result<usize> {
// Based on readelf code.
let nchain = if (machine == header::EM_FAKE_ALPHA || machine == header::EM_S390) && ctx.container.is_big() {
bytes.pread_with::<u64>(offset + 4, ctx.le)? as usize
Expand All @@ -388,6 +411,38 @@ if_sylvan! {
};
Ok(nchain)
}

struct Misc {
is_64: bool,
is_lib: bool,
entry: u64,
little_endian: bool,
ctx: Ctx,
}

fn parse_misc(header: &Header) -> error::Result<Misc> {
let entry = header.e_entry as usize;
let is_lib = header.e_type == header::ET_DYN;
let is_lsb = header.e_ident[header::EI_DATA] == header::ELFDATA2LSB;
let endianness = scroll::Endian::from(is_lsb);
let class = header.e_ident[header::EI_CLASS];
if class != header::ELFCLASS64 && class != header::ELFCLASS32 {
return Err(error::Error::Malformed(format!("Unknown values in ELF ident header: class: {} endianness: {}",
class,
header.e_ident[header::EI_DATA])));
}
let is_64 = class == header::ELFCLASS64;
let container = if is_64 { Container::Big } else { Container::Little };
let ctx = Ctx::new(container, endianness);

Ok(Misc{
is_64,
is_lib,
entry: entry as u64,
little_endian:is_lsb,
ctx,
})
}
}

#[cfg(test)]
Expand Down
76 changes: 76 additions & 0 deletions tests/elf.rs
Expand Up @@ -33,6 +33,71 @@ fn parse_gnu_hash_section(base: &[u8], symbol_name: &str) -> Result<Sym, &'stati
section.copied().ok_or("cannot find symbol")
}

// Use lazy_parse and assembles the Elf with only parts we care
fn parse_gnu_hash_section_lazy(base: &[u8], symbol_name: &str) -> Result<Sym, &'static str> {
let header = Elf::parse_elf_hdr(base).map_err(|_| "parse elf header error")?;
// dummy Elf with only header
let mut obj = Elf::lazy_parse(header).map_err(|_| "cannot parse ELF file")?;

use goblin::container::{Container, Ctx};
use goblin::elf::{gnu_hash_len, Dynamic, ProgramHeader, SectionHeader, Symtab};
use goblin::strtab::Strtab;

let ctx = Ctx {
le: scroll::Endian::Little,
container: Container::Big,
};

// get program headers
let program_headers =
ProgramHeader::parse(base, header.e_phoff as usize, header.e_phnum as usize, ctx)
.map_err(|_| "parse program headers error")?;
obj.program_headers = program_headers;

// get section headers
let section_headers =
SectionHeader::parse(base, header.e_shoff as usize, header.e_shnum as usize, ctx)
.map_err(|_| "parse section headers error")?;
obj.section_headers = section_headers;

// get dynsyms
let dynamic = Dynamic::parse(base, &obj.program_headers, ctx)
.map_err(|_| "")?
.ok_or("parsing dynamic linking info error")?;
let dyn_info = &dynamic.info;
let gnu_hash = dyn_info.gnu_hash.ok_or("gnu hash is none")?;
let num_syms =
gnu_hash_len(base, gnu_hash as usize, ctx).map_err(|_| "get gnu hash len error")?;
let dynsyms = Symtab::parse(base, dyn_info.symtab, num_syms, ctx)
.map_err(|_| "parse symbol table error")?;
let dynstrtab = Strtab::parse(base, dyn_info.strtab, dyn_info.strsz, 0x0)
.map_err(|_| "parse dynstrtab error")?;
obj.dynamic = Some(dynamic);
obj.dynsyms = dynsyms;
obj.dynstrtab = dynstrtab;
let dynsyms_vec = &obj.dynsyms.to_vec();

let hash_section = obj
.section_headers
.iter()
.find(|s| s.sh_type == SHT_GNU_HASH)
.ok_or("object does not contain .gnu.hash section")?;
let hashtab: &[u8] = unsafe {
let addr = base.as_ptr().add(hash_section.sh_offset as usize);
let size = hash_section.sh_size as usize;
slice::from_raw_parts(addr, size)
};

let section = unsafe {
if obj.is_64 {
GnuHash64::from_raw_table(hashtab, dynsyms_vec)?.find(symbol_name, &obj.dynstrtab)
} else {
GnuHash32::from_raw_table(hashtab, dynsyms_vec)?.find(symbol_name, &obj.dynstrtab)
}
};
section.copied().ok_or("cannot find symbol")
}

#[test]
fn test_parse_gnu_hash_section_64bit() {
static ALIGNED_DATA: &AlignedData<[u8]> =
Expand Down Expand Up @@ -64,6 +129,17 @@ fn test_parse_gnu_hash_section_64bit() {
parse_gnu_hash_section(&ALIGNED_DATA.0, "__gmon_start__"),
Err("cannot find symbol"),
);
assert_eq!(
parse_gnu_hash_section_lazy(&ALIGNED_DATA.0, "helloWorld"),
Ok(Sym {
st_name: 97,
st_info: 0x12,
st_other: 0,
st_shndx: 12,
st_value: 0x65a,
st_size: 33,
})
);
}

#[test]
Expand Down

0 comments on commit 6f9297d

Please sign in to comment.