-
Notifications
You must be signed in to change notification settings - Fork 155
/
strtab.rs
236 lines (223 loc) · 8.34 KB
/
strtab.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
//! A byte-offset based string table.
//! Commonly used in ELF binaries, Unix archives, and even PE binaries.
use core::fmt;
use core::ops::Index;
use core::str;
use scroll::{ctx, Pread};
if_alloc! {
use crate::error;
use alloc::vec::Vec;
}
/// A common string table format which is indexed by byte offsets (and not
/// member index). Constructed using [`parse`](#method.parse)
/// with your choice of delimiter. Please be careful.
pub struct Strtab<'a> {
delim: ctx::StrCtx,
bytes: &'a [u8],
#[cfg(feature = "alloc")]
strings: Vec<(usize, &'a str)>,
}
#[inline(always)]
fn get_str(offset: usize, bytes: &[u8], delim: ctx::StrCtx) -> scroll::Result<&str> {
bytes.pread_with::<&str>(offset, delim)
}
impl<'a> Strtab<'a> {
/// Creates a `Strtab` directly without bounds check and without parsing it.
///
/// This is potentially unsafe and should only be used if `feature = "alloc"` is disabled.
pub fn from_slice_unparsed(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> Self {
Self {
delim: ctx::StrCtx::Delimiter(delim),
bytes: &bytes[offset..offset + len],
#[cfg(feature = "alloc")]
strings: Vec::new(),
}
}
/// Gets a str reference from the backing bytes starting at byte `offset`.
///
/// If the index is out of bounds, `None` is returned. Panics if bytes are invalid UTF-8.
/// Use this method if the `Strtab` was created using `from_slice_unparsed()`.
pub fn get_unsafe(&self, offset: usize) -> Option<&'a str> {
if offset >= self.bytes.len() {
None
} else {
Some(get_str(offset, self.bytes, self.delim).unwrap())
}
}
#[cfg(feature = "alloc")]
/// Parses a `Strtab` from `bytes` at `offset` with `len` size as the backing string table, using `delim` as the delimiter.
///
/// Errors if bytes are invalid UTF-8.
/// Requires `feature = "alloc"`
pub fn parse(bytes: &'a [u8], offset: usize, len: usize, delim: u8) -> error::Result<Self> {
let (end, overflow) = offset.overflowing_add(len);
if overflow || end > bytes.len() {
return Err(error::Error::Malformed(format!(
"Strtable size ({}) + offset ({}) is out of bounds for {} #bytes. Overflowed: {}",
len,
offset,
bytes.len(),
overflow
)));
}
let mut result = Self::from_slice_unparsed(bytes, offset, len, delim);
let mut i = 0;
while i < result.bytes.len() {
let string = get_str(i, result.bytes, result.delim)?;
result.strings.push((i, string));
i += string.len() + 1;
}
Ok(result)
}
#[cfg(feature = "alloc")]
/// Parses a `Strtab` with `bytes` as the backing string table, using `delim` as the delimiter between entries.
///
/// Requires `feature = "alloc"`
pub fn new(bytes: &'a [u8], delim: u8) -> error::Result<Self> {
Self::parse(bytes, 0, bytes.len(), delim)
}
#[cfg(feature = "alloc")]
/// Converts the string table to a vector of parsed strings.
///
/// Requires `feature = "alloc"`
pub fn to_vec(&self) -> error::Result<Vec<&'a str>> {
// Fallback in case `Strtab` was created using `from_slice_unparsed()`.
if self.strings.is_empty() {
let mut result = Vec::new();
let mut i = 0;
while i < self.bytes.len() {
let string = get_str(i, self.bytes, self.delim)?;
result.push(string);
i += string.len() + 1;
}
return Ok(result);
}
Ok(self.strings.iter().map(|&(_key, value)| value).collect())
}
#[cfg(feature = "alloc")]
/// Safely gets a str reference from the parsed table starting at byte `offset`.
///
/// If the index is out of bounds, `None` is returned.
/// Requires `feature = "alloc"`
pub fn get_at(&self, offset: usize) -> Option<&'a str> {
match self
.strings
.binary_search_by_key(&offset, |&(key, _value)| key)
{
Ok(index) => Some(self.strings[index].1),
Err(index) => {
if index == 0 {
return None;
}
let (string_begin_offset, entire_string) = self.strings[index - 1];
entire_string.get(offset - string_begin_offset..)
}
}
}
#[deprecated(since = "0.4.2", note = "Use from_slice_unparsed() instead")]
/// Construct a strtab from a `ptr`, and a `size`, using `delim` as the delimiter
///
/// # Safety
/// This function creates a `Strtab` directly from a raw pointer and size
pub unsafe fn from_raw(ptr: *const u8, len: usize, delim: u8) -> Strtab<'a> {
Self::from_slice_unparsed(core::slice::from_raw_parts(ptr, len), 0, len, delim)
}
#[deprecated(since = "0.4.2", note = "Bad performance, use get_at() instead")]
#[cfg(feature = "alloc")]
/// Parses a str reference from the parsed table starting at byte `offset`.
///
/// If the index is out of bounds, `None` is returned.
/// Requires `feature = "alloc"`
pub fn get(&self, offset: usize) -> Option<error::Result<&'a str>> {
if offset >= self.bytes.len() {
None
} else {
Some(get_str(offset, self.bytes, self.delim).map_err(core::convert::Into::into))
}
}
}
impl<'a> fmt::Debug for Strtab<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Strtab")
.field("delim", &self.delim)
.field("bytes", &str::from_utf8(self.bytes))
.finish()
}
}
impl<'a> Default for Strtab<'a> {
fn default() -> Self {
Self {
delim: ctx::StrCtx::default(),
bytes: &[],
#[cfg(feature = "alloc")]
strings: Vec::new(),
}
}
}
impl<'a> Index<usize> for Strtab<'a> {
type Output = str;
/// Gets str reference at starting at byte `offset`.
/// **NB**: this will panic if the underlying bytes are not valid utf8, or the offset is invalid
#[inline(always)]
fn index(&self, offset: usize) -> &Self::Output {
// This can't delegate to get() because get() requires #[cfg(features = "alloc")]
// It's also slightly less useful than get() because the lifetime -- specified by the Index
// trait -- matches &self, even though we could return &'a instead
get_str(offset, self.bytes, self.delim).unwrap()
}
}
#[test]
fn as_vec_no_final_null() {
let strtab = Strtab::new(b"\0printf\0memmove\0busta", 0x0).unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 4);
assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}
#[test]
fn as_vec_no_first_null_no_final_null() {
let strtab = Strtab::new(b"printf\0memmove\0busta", 0x0).unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 3);
assert_eq!(vec, vec!["printf", "memmove", "busta"]);
}
#[test]
fn to_vec_final_null() {
let strtab = Strtab::new(b"\0printf\0memmove\0busta\0", 0x0).unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 4);
assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}
#[test]
fn to_vec_newline_delim() {
let strtab = Strtab::new(b"\nprintf\nmemmove\nbusta\n", b'\n').unwrap();
let vec = strtab.to_vec().unwrap();
assert_eq!(vec.len(), 4);
assert_eq!(vec, vec!["", "printf", "memmove", "busta"]);
}
#[test]
fn parse_utf8() {
assert!(match Strtab::new(&[0x80, 0x80], b'\n') {
Err(error::Error::Scroll(scroll::Error::BadInput {
size: 2,
msg: "invalid utf8",
})) => true,
_ => false,
});
assert!(match Strtab::new(&[0xC6, 0x92, 0x6F, 0x6F], b'\n') {
Ok(_) => true,
_ => false,
});
}
#[test]
fn get_at_utf8() {
let strtab = Strtab::new("\nƒoo\nmemmove\n🅱️usta\n".as_bytes(), b'\n').unwrap();
assert_eq!(strtab.get_at(0), Some(""));
assert_eq!(strtab.get_at(5), Some(""));
assert_eq!(strtab.get_at(6), Some("memmove"));
assert_eq!(strtab.get_at(14), Some("\u{1f171}\u{fe0f}usta"));
assert_eq!(strtab.get_at(16), None);
assert_eq!(strtab.get_at(18), Some("\u{fe0f}usta"));
assert_eq!(strtab.get_at(21), Some("usta"));
assert_eq!(strtab.get_at(25), Some(""));
assert_eq!(strtab.get_at(26), None);
}