From 8febb721d5284555d9db5797bd3b07e5c9f7b466 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20Mei=C3=9Fner?= Date: Fri, 2 Jul 2021 05:35:56 +0200 Subject: [PATCH] strtab: Parse Strtab only once on Creation (#275) * Parse Strtab immediately when created, instead of on demand when entries are requested. * Renames from_slice_unsafe => from_slice_unparsed and updates comments. --- src/archive/mod.rs | 8 +-- src/elf/dynamic.rs | 2 +- src/elf/mod.rs | 6 +- src/pe/symbol.rs | 7 +- src/strtab.rs | 176 +++++++++++++++++++++++++++++++-------------- tests/elf.rs | 2 +- 6 files changed, 132 insertions(+), 69 deletions(-) diff --git a/src/archive/mod.rs b/src/archive/mod.rs index 04e8037e..bfe09ccf 100644 --- a/src/archive/mod.rs +++ b/src/archive/mod.rs @@ -282,8 +282,8 @@ impl<'a> Index<'a> { let string_offset: u32 = buffer.pread_with(i * 8 + 4, scroll::LE)?; let archive_member: u32 = buffer.pread_with(i * 8 + 8, scroll::LE)?; - let string = match strtab.get(string_offset as usize) { - Some(result) => result, + let string = match strtab.get_at(string_offset as usize) { + Some(result) => Ok(result), None => Err(Error::Malformed(format!( "{} entry {} has string offset {}, which is out of bounds", BSD_SYMDEF_NAME, i, string_offset @@ -353,8 +353,8 @@ impl<'a> NameIndex<'a> { let idx = name.trim_start_matches('/').trim_end(); match usize::from_str_radix(idx, 10) { Ok(idx) => { - let name = match self.strtab.get(idx + 1) { - Some(result) => result, + let name = match self.strtab.get_at(idx + 1) { + Some(result) => Ok(result), None => Err(Error::Malformed(format!( "Name {} is out of range in archive NameIndex", name diff --git a/src/elf/dynamic.rs b/src/elf/dynamic.rs index 5425fb9d..8de3b77e 100644 --- a/src/elf/dynamic.rs +++ b/src/elf/dynamic.rs @@ -453,7 +453,7 @@ if_alloc! { let mut needed = Vec::with_capacity(count); for dynamic in &self.dyns { if dynamic.d_tag as u64 == DT_NEEDED { - if let Some(Ok(lib)) = strtab.get(dynamic.d_val as usize) { + if let Some(lib) = strtab.get_at(dynamic.d_val as usize) { needed.push(lib) } else { warn!("Invalid DT_NEEDED {}", dynamic.d_val) diff --git a/src/elf/mod.rs b/src/elf/mod.rs index be86cf11..9101d95e 100644 --- a/src/elf/mod.rs +++ b/src/elf/mod.rs @@ -174,9 +174,7 @@ if_sylvan! { continue; } - if section_name.is_some() && !self.shdr_strtab - .get(sect.sh_name) - .map_or(false, |r| r.ok() == section_name) { + if section_name.is_some() && self.shdr_strtab.get_at(sect.sh_name) != section_name { continue; } @@ -298,7 +296,7 @@ if_sylvan! { if dyn_info.soname != 0 { // FIXME: warn! here - soname = match dynstrtab.get(dyn_info.soname) { Some(Ok(soname)) => Some(soname), _ => None }; + soname = dynstrtab.get_at(dyn_info.soname); } if dyn_info.needed_count > 0 { libraries = dynamic.get_libraries(&dynstrtab); diff --git a/src/pe/symbol.rs b/src/pe/symbol.rs index 26fa794a..a4e36514 100644 --- a/src/pe/symbol.rs +++ b/src/pe/symbol.rs @@ -218,11 +218,8 @@ impl Symbol { /// a strtab entry. pub fn name<'a>(&'a self, strtab: &'a strtab::Strtab) -> error::Result<&'a str> { if let Some(offset) = self.name_offset() { - strtab.get(offset as usize).unwrap_or_else(|| { - Err(error::Error::Malformed(format!( - "Invalid Symbol name offset {:#x}", - offset - ))) + strtab.get_at(offset as usize).ok_or_else(|| { + error::Error::Malformed(format!("Invalid Symbol name offset {:#x}", offset)) }) } else { Ok(self.name.pread(0)?) diff --git a/src/strtab.rs b/src/strtab.rs index b2e8c308..7d77e2b0 100644 --- a/src/strtab.rs +++ b/src/strtab.rs @@ -3,7 +3,6 @@ use core::fmt; use core::ops::Index; -use core::slice; use core::str; use scroll::{ctx, Pread}; if_alloc! { @@ -15,8 +14,10 @@ if_alloc! { /// member index). Constructed using [`parse`](#method.parse) /// with your choice of delimiter. Please be careful. pub struct Strtab<'a> { - bytes: &'a [u8], delim: ctx::StrCtx, + bytes: &'a [u8], + #[cfg(feature = "alloc")] + strings: Vec<(usize, &'a str)>, } #[inline(always)] @@ -25,31 +26,34 @@ fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&s } impl<'a> Strtab<'a> { - /// Construct a new strtab with `bytes` as the backing string table, using `delim` as the delimiter between entries - pub fn new(bytes: &'a [u8], delim: u8) -> Self { - Strtab { + /// Creates a `Strtab` directly without bounds check and without parsing it. + /// + /// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled. + pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self { + Self { delim: ctx::StrCtx::Delimiter(delim), - bytes, + bytes: &bytes[offset..offset + len], + #[cfg(feature = "alloc")] + strings: Vec::new(), } } - /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter - /// # Safety + /// Gets a str reference from the backing bytes starting at byte `offset`. /// - /// This function creates a `Strtab` directly from a raw pointer and size - pub unsafe fn from_raw(ptr: *const u8, size: usize, delim: u8) -> Strtab<'a> { - Strtab { - delim: ctx::StrCtx::Delimiter(delim), - bytes: slice::from_raw_parts(ptr, size), + /// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8. + /// Use this method if the `Strtab` was created using `from_slice_unparsed()`. + pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> { + if offset >= self.bytes.len() { + None + } else { + Some(get_str(offset, self.bytes, self.delim).unwrap()) } } #[cfg(feature = "alloc")] - /// Parses a strtab from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter - pub fn parse( - bytes: &'a [u8], - offset: usize, - len: usize, - delim: u8, - ) -> error::Result> { + /// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter. + /// + /// Errors if bytes are invalid UTF-8. + /// Requires `feature = "alloc"` + pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result { let (end, overflow) = offset.overflowing_add(len); if overflow || end > bytes.len() { return Err(error::Error::Malformed(format!( @@ -60,28 +64,74 @@ impl<'a> Strtab<'a> { overflow ))); } - Ok(Strtab { - bytes: &bytes[offset..end], - delim: ctx::StrCtx::Delimiter(delim), - }) + let mut result = Self::from_slice_unparsed(bytes, offset, len, delim); + let mut i = 0; + while i < result.bytes.len() { + let string = get_str(i, result.bytes, result.delim)?; + result.strings.push((i, string)); + i += string.len() + 1; + } + Ok(result) + } + #[cfg(feature = "alloc")] + /// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries. + /// + /// Requires `feature = "alloc"` + pub fn new(bytes: &'a [u8], delim: u8) -> error::Result { + Self::parse(bytes, 0, bytes.len(), delim) } #[cfg(feature = "alloc")] - /// Converts the string table to a vector, with the original `delim` used to separate the strings + /// Converts the string table to a vector of parsed strings. + /// + /// Requires `feature = "alloc"` pub fn to_vec(&self) -> error::Result> { - let len = self.bytes.len(); - let mut strings = Vec::with_capacity(len); - let mut i = 0; - while i < len { - let string = self.get(i).unwrap()?; - i = i + string.len() + 1; - strings.push(string); + // Fallback in case `Strtab` was created using `from_slice_unparsed()`. + if self.strings.is_empty() { + let mut result = Vec::new(); + let mut i = 0; + while i < self.bytes.len() { + let string = get_str(i, self.bytes, self.delim)?; + result.push(string); + i += string.len() + 1; + } + return Ok(result); } - Ok(strings) + Ok(self.strings.iter().map(|&(_key, value)| value).collect()) } - /// Safely parses and gets a str reference from the backing bytes starting at byte `offset`. + #[cfg(feature = "alloc")] + /// Safely gets a str reference from the parsed table starting at byte `offset`. + /// /// If the index is out of bounds, `None` is returned. /// Requires `feature = "alloc"` + pub fn get_at(&self, offset: usize) -> Option<&'a str> { + match self + .strings + .binary_search_by_key(&offset, |&(key, _value)| key) + { + Ok(index) => Some(self.strings[index].1), + Err(index) => { + if index == 0 { + return None; + } + let (string_begin_offset, entire_string) = self.strings[index - 1]; + entire_string.get(offset - string_begin_offset..) + } + } + } + #[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")] + /// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter + /// + /// # Safety + /// This function creates a `Strtab` directly from a raw pointer and size + pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> { + Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim) + } + #[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")] #[cfg(feature = "alloc")] + /// Parses a str reference from the parsed table starting at byte `offset`. + /// + /// If the index is out of bounds, `None` is returned. + /// Requires `feature = "alloc"` pub fn get(&self, offset: usize) -> Option> { if offset >= self.bytes.len() { None @@ -89,15 +139,6 @@ impl<'a> Strtab<'a> { Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into)) } } - /// Gets a str reference from the backing bytes starting at byte `offset`. - /// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8. - pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> { - if offset >= self.bytes.len() { - None - } else { - Some(get_str(offset, self.bytes, self.delim).unwrap()) - } - } } impl<'a> fmt::Debug for Strtab<'a> { @@ -110,10 +151,12 @@ impl<'a> fmt::Debug for Strtab<'a> { } impl<'a> Default for Strtab<'a> { - fn default() -> Strtab<'a> { - Strtab { - bytes: &[], + fn default() -> Self { + Self { delim: ctx::StrCtx::default(), + bytes: &[], + #[cfg(feature = "alloc")] + strings: Vec::new(), } } } @@ -133,8 +176,7 @@ impl<'a> Index for Strtab<'a> { #[test] fn as_vec_no_final_null() { - let bytes = b"\0printf\0memmove\0busta"; - let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), 0x0) }; + let strtab = Strtab::new(b"\0printf\0memmove\0busta", 0x0).unwrap(); let vec = strtab.to_vec().unwrap(); assert_eq!(vec.len(), 4); assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); @@ -142,8 +184,7 @@ fn as_vec_no_final_null() { #[test] fn as_vec_no_first_null_no_final_null() { - let bytes = b"printf\0memmove\0busta"; - let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), 0x0) }; + let strtab = Strtab::new(b"printf\0memmove\0busta", 0x0).unwrap(); let vec = strtab.to_vec().unwrap(); assert_eq!(vec.len(), 3); assert_eq!(vec, vec!["printf", "memmove", "busta"]); @@ -151,8 +192,7 @@ fn as_vec_no_first_null_no_final_null() { #[test] fn to_vec_final_null() { - let bytes = b"\0printf\0memmove\0busta\0"; - let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), 0x0) }; + let strtab = Strtab::new(b"\0printf\0memmove\0busta\0", 0x0).unwrap(); let vec = strtab.to_vec().unwrap(); assert_eq!(vec.len(), 4); assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); @@ -160,9 +200,37 @@ fn to_vec_final_null() { #[test] fn to_vec_newline_delim() { - let bytes = b"\nprintf\nmemmove\nbusta\n"; - let strtab = unsafe { Strtab::from_raw(bytes.as_ptr(), bytes.len(), b'\n') }; + let strtab = Strtab::new(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap(); let vec = strtab.to_vec().unwrap(); assert_eq!(vec.len(), 4); assert_eq!(vec, vec!["", "printf", "memmove", "busta"]); } + +#[test] +fn parse_utf8() { + assert!(match Strtab::new(&[0x80, 0x80], b'\n') { + Err(error::Error::Scroll(scroll::Error::BadInput { + size: 2, + msg: "invalid utf8", + })) => true, + _ => false, + }); + assert!(match Strtab::new(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') { + Ok(_) => true, + _ => false, + }); +} + +#[test] +fn get_at_utf8() { + let strtab = Strtab::new("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap(); + assert_eq!(strtab.get_at(0), Some("")); + assert_eq!(strtab.get_at(5), Some("")); + assert_eq!(strtab.get_at(6), Some("memmove")); + assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta")); + assert_eq!(strtab.get_at(16), None); + assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta")); + assert_eq!(strtab.get_at(21), Some("usta")); + assert_eq!(strtab.get_at(25), Some("")); + assert_eq!(strtab.get_at(26), None); +} diff --git a/tests/elf.rs b/tests/elf.rs index f943efa7..3bd96f45 100644 --- a/tests/elf.rs +++ b/tests/elf.rs @@ -60,7 +60,7 @@ fn parse_text_section_size_lazy(base: &[u8]) -> Result { ) .map_err(|_| "parse string table error")?; for (_, section) in obj.section_headers.iter().enumerate() { - let section_name = strtab.get(section.sh_name).unwrap().unwrap(); + let section_name = strtab.get_at(section.sh_name).unwrap(); if section_name == ".text" { return Ok(section.sh_size); }