-
Notifications
You must be signed in to change notification settings - Fork 22
/
mapping.rs
706 lines (627 loc) · 22.2 KB
/
mapping.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
//! Module for managing guest memory mappings.
use libc::iovec;
use crate::common::PAGE_SIZE;
use crate::util::aspace::ASpace;
use crate::vmm::VmmFile;
use std::fs::File;
use std::io::{Error, ErrorKind, Result};
use std::marker::PhantomData;
use std::mem::ManuallyDrop;
use std::os::unix::io::AsRawFd;
use std::ptr::{copy_nonoverlapping, NonNull};
// 2MB guard length
/// The size of a guard page.
pub const GUARD_LEN: usize = 0x20000;
pub const GUARD_ALIGN: usize = 0x20000;
#[cfg(target_os = "illumos")]
const FLAGS_MAP_GUARD: i32 =
libc::MAP_ANON | libc::MAP_PRIVATE | libc::MAP_NORESERVE | libc::MAP_ALIGN;
#[cfg(not(target_os = "illumos"))]
const FLAGS_MAP_GUARD: i32 =
libc::MAP_ANON | libc::MAP_PRIVATE | libc::MAP_NORESERVE;
bitflags! {
/// Bitflags representing memory protections.
pub struct Prot: u8 {
const NONE = 0;
const READ = bhyve_api::PROT_READ as u8;
const WRITE = bhyve_api::PROT_WRITE as u8;
const EXEC = bhyve_api::PROT_EXEC as u8;
const ALL = (bhyve_api::PROT_READ
| bhyve_api::PROT_WRITE
| bhyve_api::PROT_EXEC) as u8;
}
}
/// A region of memory, bounded by two guard pages.
pub struct GuardSpace {
// Original PROT_NONE mapping, which is replaced by other mappings.
//
// Portions of map are vended out to callers during the usage of
// GuardSpace, though the remaining "unused" portions are unmapped
// manually in GuardSpace's drop implementation.
map: ManuallyDrop<Mapping>,
// Tracks the allocated mappings within the GuardSpace,
// relative to the start address of the first guard page.
aspace: ASpace<()>,
}
impl GuardSpace {
/// Creates a new guard region, capable of storing a mapping of the
/// requested size.
///
/// # Arguments
/// - `size`: The size of the mapping, not including guard pages.
/// Implicitly rounded up to the nearest [`GUARD_LEN`].
pub fn new(size: usize) -> Result<GuardSpace> {
let prot = Prot::NONE;
// Round up size to the nearest GUARD_LEN.
let padded = (size + (GUARD_LEN - 1)) & !(GUARD_LEN - 1);
// Total size is the user-accessible space, plus pages on either side.
let overall = GUARD_LEN * 2 + padded;
// Safety: This invocation of mmap isn't safe only because of FFI;
// it isn't requesting a fixed address mapping, and uses anonymous
// (rather than file-backed) virtual memory.
let ptr = unsafe {
libc::mmap(
GUARD_ALIGN as *mut libc::c_void,
overall,
prot.bits().into(),
FLAGS_MAP_GUARD,
-1,
0,
) as *mut u8
};
let ptr = NonNull::new(ptr).ok_or_else(Error::last_os_error)?;
let mut aspace = ASpace::new(0, overall);
// Register the two guard pages.
aspace.register(0, GUARD_LEN, ()).unwrap();
aspace.register(overall - GUARD_LEN, GUARD_LEN, ()).unwrap();
Ok(GuardSpace {
map: ManuallyDrop::new(Mapping {
inner: SubMapping {
ptr,
len: overall,
prot,
_phantom: PhantomData,
},
}),
aspace,
})
}
/// Creates a new mapping within the bounds of the guard region, replacing
/// guard pages with the new mapping.
///
/// `size` must be divisible by [`PAGE_SIZE`].
///
/// The lifetime of the returned mapping can exceed the lifetime of the
/// GuardSpace - dropping the GuardSpace early merely removes the guard
/// mappings.
pub fn mapping(
&mut self,
size: usize,
prot: Prot,
vmm: &VmmFile,
devoff: i64,
) -> Result<Mapping> {
if size % PAGE_SIZE != 0 {
return Err(Error::new(
ErrorKind::InvalidInput,
"Size not aligned to page size",
));
}
// Find free space large enough for this mapping.
//
// This acts as a first-fit allocator.
let free_space = self
.aspace
.inverse_iter()
.find(|&extent| extent.len() >= size)
.ok_or_else(|| {
Error::new(ErrorKind::NotFound, "Not enough guard space")
})?;
// Access the to-be-mapped subregion.
let subregion =
self.map.as_ref().subregion(free_space.start(), size).ok_or_else(
|| Error::new(ErrorKind::NotFound, "Not enough guard space"),
)?;
// Safety: The region of memory being replaced by MAP_FIXED has been
// allocated by the GuardSpace, and becomes inaccessible to other
// callers after this invocation succeeds.
let mapping = unsafe {
Mapping::new_internal(Some(subregion.ptr), size, prot, vmm, devoff)?
};
self.aspace.register(free_space.start(), size, ()).unwrap();
Ok(mapping)
}
}
impl Drop for GuardSpace {
fn drop(&mut self) {
// Deregister the guard pages, as we would like to unmap them.
self.aspace.unregister(0).unwrap();
self.aspace.unregister(self.map.as_ref().len() - GUARD_LEN).unwrap();
// Unmap all space marked "free" in the original mapping.
// Other regions are used by Mapping objects, and will be
// unmapped when those mappings go out of scope.
for free_space in self.aspace.inverse_iter() {
let r = unsafe {
let start =
self.map.as_ref().ptr.as_ptr().add(free_space.start());
libc::munmap(start as *mut libc::c_void, free_space.len())
};
assert!(r == 0, "Unmap of GuardSpace failed");
}
}
}
/// A owned region of mapped guest memory, accessible via [`SubMapping`].
///
/// When dealing with raw pointers, caution must be taken to dereference the
/// pointer safely:
/// - The pointer must not be null
/// - The dereferenced pointer must be within bounds of a valid mapping
///
/// Additionally, aliasing rules apply to references:
/// - References cannot outlive their referents
/// - Mutable references cannot be aliased
///
/// These issues become especially hairy across mappings, where an
/// out-of-process entity (i.e., the guest, hardware, etc) may modify memory.
///
/// This structure provides an interface which upholds the following conditions:
/// - Reads to a memory region are only permitted if the mapping is readable.
/// - Writes to a memory region are only permitted if the mapping is writable.
/// - References to memory are not exposed from the structure.
pub struct Mapping {
inner: SubMapping<'static>,
}
impl Mapping {
/// Creates a new memory mapping from a VmmFile, with the requested
/// permissions.
pub fn new(
size: usize,
prot: Prot,
vmm: &VmmFile,
devoff: i64,
) -> Result<Self> {
// Safety: addr == None, so the invocation may choose its own mapping.
unsafe { Mapping::new_internal(None, size, prot, vmm, devoff) }
}
// Safety:
// - If addr != None, the caller must ensure that the region of memory
// from [addr, addr + size) has previously been mapped with Prot::None.
// Using mmap with MAP_FIXED silently replaces conflicting pages, so
// pointing to an arbitrary address risks colliding with the rest of the
// address space.
// - The creator of the VmmFile is responsible for ensuring it points
// to an object that may not be truncated. If this property is upheld,
// the returned mapping cannot suddenly become invalided.
// - The returned region of memory must not be accessed via reference,
// as it is accessible to the guest, which may arbitrarily read or
// write the region.
unsafe fn new_internal(
addr: Option<NonNull<u8>>,
size: usize,
prot: Prot,
vmm: &VmmFile,
devoff: i64,
) -> Result<Self> {
let flags =
libc::MAP_SHARED | if addr.is_some() { libc::MAP_FIXED } else { 0 };
let addr = addr
.map(|addr| addr.as_ptr() as *mut libc::c_void)
.unwrap_or_else(core::ptr::null_mut);
let ptr =
libc::mmap(addr, size, prot.bits().into(), flags, vmm.fd(), devoff)
as *mut u8;
let ptr = NonNull::new(ptr).ok_or_else(Error::last_os_error)?;
let m = Mapping {
inner: SubMapping { ptr, len: size, prot, _phantom: PhantomData },
};
Ok(m)
}
}
impl Drop for Mapping {
fn drop(&mut self) {
let map = self.as_ref();
// Safety:
// - No references may exist to the mapping at the time it is dropped,
// as no references are created.
// - No child mappings (SubMappings) of the original should exist, as
// they have shorter lifetimes.
unsafe {
libc::munmap(map.ptr.as_ptr() as *mut libc::c_void, map.len);
}
}
}
/// A borrowed region from a [`Mapping`] object.
///
/// Provides interfaces for acting on memory, but does not own the
/// underlying memory region.
#[derive(Debug)]
pub struct SubMapping<'a> {
ptr: NonNull<u8>,
len: usize,
prot: Prot,
_phantom: PhantomData<&'a ()>,
}
// Safety: SubMapping's API does not provide raw access to the underlying
// pointer, nor any mechanism to create references to the underlying data.
unsafe impl<'a> Send for SubMapping<'a> {}
unsafe impl<'a> Sync for SubMapping<'a> {}
impl<'a> AsRef<SubMapping<'a>> for Mapping {
fn as_ref(&self) -> &SubMapping<'a> {
&self.inner
}
}
impl<'a> SubMapping<'a> {
/// Acquire a reference to a region of memory within the
/// current mapping.
///
/// - `offset` is relative to the current mapping.
/// - `length` is the length of the new subregion.
///
/// Returns `None` if the requested offset/length extends beyond the end of
/// the mapping.
pub fn subregion(
&self,
offset: usize,
length: usize,
) -> Option<SubMapping> {
let end = offset.checked_add(length)?;
if self.len < end {
return None;
}
// Safety:
// - Starting and resulting pointer must be within bounds or
// one past the end of the same allocated object.
// - The computed offset, in bytes, cannot overflow isize.
// - The offset cannot rely on "wrapping around" the address
// space.
let ptr =
NonNull::new(unsafe { self.ptr.as_ptr().add(offset) }).unwrap();
let sub = SubMapping {
ptr,
len: length,
prot: self.prot,
_phantom: PhantomData,
};
Some(sub)
}
/// Reads a `T` object from the mapping.
pub fn read<T: Copy>(&self) -> Result<T> {
if !self.prot.contains(Prot::READ) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No read access",
));
}
let typed = self.ptr.as_ptr() as *const T;
if self.len < std::mem::size_of::<T>() {
return Err(Error::new(ErrorKind::InvalidData, "Buffer too small"));
}
// Safety:
// - typed must be valid for reads
// - typed must point to a properly initialized value of T
Ok(unsafe { typed.read_unaligned() })
}
/// Reads a buffer of bytes from the mapping.
pub fn read_bytes(&self, buf: &mut [u8]) -> Result<usize> {
if !self.prot.contains(Prot::READ) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No read access",
));
}
let to_copy = usize::min(buf.len(), self.len);
let src = self.ptr.as_ptr();
let dst = buf.as_mut_ptr();
// Safety:
// - src must be valid for reads of to_copy * size_of::<u8>() bytes.
// - dst must be valid for writes of count * size_of::<u8>() bytes.
// - Both src and dst must be properly aligned.
// - The region of memory beginning at src with a size of count *
// size_of::<u8>() bytes must not overlap with the region of memory beginning
// at dst with the same size.
unsafe {
copy_nonoverlapping(src, dst, to_copy);
}
Ok(to_copy)
}
/// Pread from `file` into the mapping.
pub fn pread(
&self,
file: &File,
length: usize,
offset: i64,
) -> Result<usize> {
if !self.prot.contains(Prot::WRITE) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No write access",
));
}
let to_read = usize::min(length, self.len);
let read = unsafe {
libc::pread(
file.as_raw_fd(),
self.ptr.as_ptr() as *mut libc::c_void,
to_read,
offset,
)
};
if read == -1 {
return Err(Error::last_os_error());
}
Ok(read as usize)
}
/// Writes `value` into the mapping.
pub fn write<T: Copy>(&self, value: &T) -> Result<()> {
if !self.prot.contains(Prot::WRITE) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No write access",
));
}
let typed = self.ptr.as_ptr() as *mut T;
unsafe {
typed.write_unaligned(*value);
}
Ok(())
}
/// Writes a buffer of bytes into the mapping.
pub fn write_bytes(&self, buf: &[u8]) -> Result<usize> {
if !self.prot.contains(Prot::WRITE) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No write access",
));
}
let to_copy = usize::min(buf.len(), self.len);
let src = buf.as_ptr();
let dst = self.ptr.as_ptr();
// Safety:
// - src must be valid for reads of count * size_of::<T>() bytes.
// - dst must be valid for writes of count * size_of::<T>() bytes.
// - Both src and dst must be properly aligned.
// - The region of memory beginning at src with a size of count *
// size_of::<T>() bytes must not overlap with the region of memory beginning
// at dst with the same size.
unsafe {
copy_nonoverlapping(src, dst, to_copy);
}
Ok(to_copy)
}
/// Writes a single byte `val` to the mapping, `count` times.
pub fn write_byte(&self, val: u8, count: usize) -> Result<usize> {
if !self.prot.contains(Prot::WRITE) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No write access",
));
}
let to_copy = usize::min(count, self.len);
unsafe {
self.ptr.as_ptr().write_bytes(val, to_copy);
}
Ok(to_copy)
}
/// Pwrite from the mapping to `file`.
pub fn pwrite(
&self,
file: &File,
length: usize,
offset: i64,
) -> Result<usize> {
if !self.prot.contains(Prot::READ) {
return Err(Error::new(
ErrorKind::PermissionDenied,
"No write access",
));
}
let to_write = usize::min(length, self.len);
let written = unsafe {
libc::pwrite(
file.as_raw_fd(),
self.ptr.as_ptr() as *const libc::c_void,
to_write,
offset,
)
};
if written == -1 {
return Err(Error::last_os_error());
}
Ok(written as usize)
}
/// Returns the length of the mapping.
pub fn len(&self) -> usize {
self.len
}
/// Returns true if the mapping is empty.
pub fn is_empty(&self) -> bool {
self.len == 0
}
/// Returns a raw readable reference to the underlying data.
///
/// # Safety
///
/// - The caller must never create a reference to the underlying
/// memory region.
/// - The returned pointer must not outlive the mapping.
/// - The caller may only read up to `len()` bytes.
pub unsafe fn raw_readable(&self) -> Option<*const u8> {
if self.prot.contains(Prot::READ) {
Some(self.ptr.as_ptr() as *const u8)
} else {
None
}
}
/// Returns a raw writable reference to the underlying data.
///
/// # Safety
///
/// - The caller must never create a reference to the underlying
/// memory region.
/// - The returned pointer must not outlive the mapping.
/// - The caller may only write up to `len()` bytes.
pub unsafe fn raw_writable(&self) -> Option<*mut u8> {
if self.prot.contains(Prot::WRITE) {
Some(self.ptr.as_ptr() as *mut u8)
} else {
None
}
}
}
pub trait MappingExt {
/// preadv from `file` into multiple mappings
fn preadv(&self, file: &File, offset: i64) -> Result<usize>;
/// pwritev from multiple mappings to `file`
fn pwritev(&self, file: &File, offset: i64) -> Result<usize>;
}
impl<'a, T: AsRef<[SubMapping<'a>]>> MappingExt for T {
fn preadv(&self, file: &File, offset: i64) -> Result<usize> {
if !self
.as_ref()
.iter()
.all(|mapping| mapping.prot.contains(Prot::WRITE))
{
return Err(Error::new(
ErrorKind::PermissionDenied,
"No write access",
));
}
let iov = self
.as_ref()
.iter()
.map(|mapping| iovec {
iov_base: mapping.ptr.as_ptr() as *mut libc::c_void,
iov_len: mapping.len,
})
.collect::<Vec<_>>();
let read = unsafe {
libc::preadv(
file.as_raw_fd(),
iov.as_ptr(),
iov.len() as libc::c_int,
offset,
)
};
if read == -1 {
return Err(Error::last_os_error());
}
Ok(read as usize)
}
fn pwritev(&self, file: &File, offset: i64) -> Result<usize> {
if !self
.as_ref()
.iter()
.all(|mapping| mapping.prot.contains(Prot::READ))
{
return Err(Error::new(
ErrorKind::PermissionDenied,
"No read access",
));
}
let iov = self
.as_ref()
.iter()
.map(|mapping| iovec {
iov_base: mapping.ptr.as_ptr() as *mut libc::c_void,
iov_len: mapping.len,
})
.collect::<Vec<_>>();
let written = unsafe {
libc::pwritev(
file.as_raw_fd(),
iov.as_ptr(),
iov.len() as libc::c_int,
offset,
)
};
if written == -1 {
return Err(Error::last_os_error());
}
Ok(written as usize)
}
}
#[cfg(test)]
pub mod tests {
use super::*;
use tempfile::tempfile;
pub fn test_vmm(len: u64) -> VmmFile {
let file = tempfile().unwrap();
file.set_len(len).unwrap();
unsafe { VmmFile::new(file) }
}
#[test]
fn memory_protections_match_libc() {
assert_eq!(Prot::READ.bits() as i32, libc::PROT_READ);
assert_eq!(Prot::WRITE.bits() as i32, libc::PROT_WRITE);
assert_eq!(Prot::EXEC.bits() as i32, libc::PROT_EXEC);
}
#[test]
fn guard_space_creates_readable_writable_regions() {
let mut guard = GuardSpace::new(GUARD_LEN).unwrap();
let vmm = test_vmm(GUARD_LEN as u64);
let mapping = guard
.mapping(GUARD_LEN, Prot::READ | Prot::WRITE, &vmm, 0)
.unwrap();
let input: u64 = 0xDEADBEEF;
mapping.as_ref().write(&input).unwrap();
let output = mapping.as_ref().read().unwrap();
assert_eq!(input, output);
}
#[test]
fn guard_space_cannot_allocate_beyond_end() {
let mut guard = GuardSpace::new(GUARD_LEN).unwrap();
let vmm = test_vmm(GUARD_LEN as u64);
let _ = guard
.mapping(GUARD_LEN, Prot::READ | Prot::WRITE, &vmm, 0)
.unwrap();
// No space remaining after the first allocation.
assert!(guard
.mapping(GUARD_LEN, Prot::READ | Prot::WRITE, &vmm, 0)
.is_err());
}
#[test]
fn guard_space_must_allocate_modulo_page_size() {
let mut guard = GuardSpace::new(GUARD_LEN).unwrap();
let vmm = test_vmm(GUARD_LEN as u64);
assert!(guard.mapping(PAGE_SIZE - 1, Prot::READ, &vmm, 0).is_err());
}
#[test]
fn mapping_denies_read_beyond_end() {
let vmm = test_vmm(GUARD_LEN as u64);
let mapping = Mapping::new(GUARD_LEN, Prot::READ, &vmm, 0).unwrap();
assert!(mapping.as_ref().read::<[u8; GUARD_LEN + 1]>().is_err());
}
#[test]
fn mapping_shortens_read_bytes_beyond_end() {
let vmm = test_vmm(GUARD_LEN as u64);
let mapping = Mapping::new(GUARD_LEN, Prot::READ, &vmm, 0).unwrap();
let mut buf: [u8; GUARD_LEN + 1] = [0; GUARD_LEN + 1];
assert_eq!(GUARD_LEN, mapping.as_ref().read_bytes(&mut buf).unwrap());
}
#[test]
fn mapping_create_empty() {
let vmm = test_vmm(GUARD_LEN as u64);
let mapping = Mapping::new(0, Prot::READ, &vmm, 0).unwrap();
assert_eq!(0, mapping.as_ref().len());
assert!(mapping.as_ref().is_empty());
}
#[test]
fn mapping_valid_subregions() {
let vmm = test_vmm(GUARD_LEN as u64);
let mapping = Mapping::new(GUARD_LEN, Prot::READ, &vmm, 0).unwrap();
assert!(mapping.as_ref().subregion(0, 0).is_some());
assert!(mapping.as_ref().subregion(0, GUARD_LEN / 2).is_some());
assert!(mapping.as_ref().subregion(GUARD_LEN, 0).is_some());
}
#[test]
fn mapping_invalid_subregions() {
let vmm = test_vmm(GUARD_LEN as u64);
let mapping = Mapping::new(GUARD_LEN, Prot::READ, &vmm, 0).unwrap();
// Beyond the end of the mapping.
assert!(mapping.as_ref().subregion(GUARD_LEN + 1, 0).is_none());
assert!(mapping.as_ref().subregion(GUARD_LEN, 1).is_none());
// Overflow.
assert!(mapping.as_ref().subregion(usize::MAX, 1).is_none());
assert!(mapping.as_ref().subregion(1, usize::MAX).is_none());
}
}