Skip to content

Commit

Permalink
Auto merge of rust-lang#124686 - saethlin:rust-file-footer, r=<try>
Browse files Browse the repository at this point in the history
Add a footer in FileEncoder and check for it in MemDecoder

We have a few reports of ICEs due to decoding failures, where the fault does not lie with the compiler. The goal of this PR is to add some very lightweight and on-by-default validation to the compiler's outputs. If validation fails, we emit a fatal error for rmeta files in general that mentions the path that didn't load, and for incremental compilation artifacts we emit a verbose warning that tries to explain the situation and treat the artifacts as outdated.

The validation currently implemented here is very crude, and yet I think we have 11 ICE reports currently open (you can find them by searching issues for `1002111927320821928687967599834759150` which this simple validation would have detected. The structure of the code changes here should permit the addition of further validation code, such as a checksum, if it is merited. I would like to have code to detect corruption such as reported in rust-lang#124719, but I'm not yet sure how to do that efficiently, and this PR is already a good size.

The ICE reports I have in mind that this PR would have smoothed over are:
rust-lang#124469
rust-lang#123352
rust-lang#123376 [^1]
rust-lang#99763
rust-lang#93900.

---

[^1]: This one might be a compiler bug, but even if it is I think the workflow described is pushing the envelope of what we can support
  • Loading branch information
bors committed May 6, 2024
2 parents 80420a6 + 36f52a4 commit 52bca7c
Show file tree
Hide file tree
Showing 16 changed files with 117 additions and 54 deletions.
5 changes: 4 additions & 1 deletion compiler/rustc_codegen_ssa/src/lib.rs
Expand Up @@ -195,6 +195,7 @@ pub enum CodegenErrors {
EmptyVersionNumber,
EncodingVersionMismatch { version_array: String, rlink_version: u32 },
RustcVersionMismatch { rustc_version: String },
CorruptFile,
}

pub fn provide(providers: &mut Providers) {
Expand Down Expand Up @@ -265,7 +266,9 @@ impl CodegenResults {
});
}

let mut decoder = MemDecoder::new(&data[4..], 0);
let Some(mut decoder) = MemDecoder::new(&data[4..], 0) else {
return Err(CodegenErrors::CorruptFile);
};
let rustc_version = decoder.read_str();
if rustc_version != sess.cfg_version {
return Err(CodegenErrors::RustcVersionMismatch {
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_driver_impl/messages.ftl
Expand Up @@ -10,6 +10,8 @@ driver_impl_ice_path_error = the ICE couldn't be written to `{$path}`: {$error}
driver_impl_ice_path_error_env = the environment variable `RUSTC_ICE` is set to `{$env_var}`
driver_impl_ice_version = rustc {$version} running on {$triple}
driver_impl_rlink_corrupt_file = corrupt metadata encountered in `{$file}`
driver_impl_rlink_empty_version_number = The input does not contain version number
driver_impl_rlink_encoding_version_mismatch = .rlink file was produced with encoding version `{$version_array}`, but the current version is `{$rlink_version}`
Expand Down
8 changes: 5 additions & 3 deletions compiler/rustc_driver_impl/src/lib.rs
Expand Up @@ -95,7 +95,7 @@ mod signal_handler {

use crate::session_diagnostics::{
RLinkEmptyVersionNumber, RLinkEncodingVersionMismatch, RLinkRustcVersionMismatch,
RLinkWrongFileType, RlinkNotAFile, RlinkUnableToRead,
RLinkWrongFileType, RlinkCorruptFile, RlinkNotAFile, RlinkUnableToRead,
};

rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
Expand Down Expand Up @@ -644,15 +644,17 @@ fn process_rlink(sess: &Session, compiler: &interface::Compiler) {
match err {
CodegenErrors::WrongFileType => dcx.emit_fatal(RLinkWrongFileType),
CodegenErrors::EmptyVersionNumber => dcx.emit_fatal(RLinkEmptyVersionNumber),
CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => sess
.dcx()
CodegenErrors::EncodingVersionMismatch { version_array, rlink_version } => dcx
.emit_fatal(RLinkEncodingVersionMismatch { version_array, rlink_version }),
CodegenErrors::RustcVersionMismatch { rustc_version } => {
dcx.emit_fatal(RLinkRustcVersionMismatch {
rustc_version,
current_version: sess.cfg_version,
})
}
CodegenErrors::CorruptFile => {
dcx.emit_fatal(RlinkCorruptFile { file: file.display().to_string() });
}
};
}
};
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_driver_impl/src/session_diagnostics.rs
Expand Up @@ -32,6 +32,12 @@ pub(crate) struct RLinkRustcVersionMismatch<'a> {
#[diag(driver_impl_rlink_no_a_file)]
pub(crate) struct RlinkNotAFile;

#[derive(Diagnostic)]
#[diag(driver_impl_rlink_corrupt_file)]
pub(crate) struct RlinkCorruptFile {
pub file: String,
}

#[derive(Diagnostic)]
#[diag(driver_impl_ice)]
pub(crate) struct Ice;
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_incremental/messages.ftl
Expand Up @@ -21,6 +21,8 @@ incremental_cargo_help_2 =
incremental_copy_workproduct_to_cache =
error copying object file `{$from}` to incremental directory as `{$to}`: {$err}
incremental_corrupt_file = corrupt incremental compilation artifact found at `{$path}`. This file will automatically be ignored and deleted. If you see this message repeatedly or can provoke it without manually manipulating the compiler's artifacts, please file an issue. The incremental compilation system relies on hardlinks and filesystem locks behaving correctly, and may not deal well with OS crashes, so whatever information you can provide about your filesystem or other state may be very relevant.
incremental_create_dep_graph = failed to create dependency graph at `{$path}`: {$err}
incremental_create_incr_comp_dir =
Expand Down
6 changes: 6 additions & 0 deletions compiler/rustc_incremental/src/errors.rs
Expand Up @@ -306,3 +306,9 @@ pub struct DeleteWorkProduct<'a> {
pub path: &'a Path,
pub err: std::io::Error,
}

#[derive(Diagnostic)]
#[diag(incremental_corrupt_file)]
pub struct CorruptFile<'a> {
pub path: &'a Path,
}
21 changes: 17 additions & 4 deletions compiler/rustc_incremental/src/persist/load.rs
Expand Up @@ -115,7 +115,12 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr

if let LoadResult::Ok { data: (work_products_data, start_pos) } = load_result {
// Decode the list of work_products
let mut work_product_decoder = MemDecoder::new(&work_products_data[..], start_pos);
let Some(mut work_product_decoder) =
MemDecoder::new(&work_products_data[..], start_pos)
else {
sess.dcx().emit_warn(errors::CorruptFile { path: &work_products_path });
return LoadResult::DataOutOfDate;
};
let work_products: Vec<SerializedWorkProduct> =
Decodable::decode(&mut work_product_decoder);

Expand Down Expand Up @@ -145,7 +150,10 @@ fn load_dep_graph(sess: &Session) -> LoadResult<(Arc<SerializedDepGraph>, WorkPr
LoadResult::DataOutOfDate => LoadResult::DataOutOfDate,
LoadResult::LoadDepGraph(path, err) => LoadResult::LoadDepGraph(path, err),
LoadResult::Ok { data: (bytes, start_pos) } => {
let mut decoder = MemDecoder::new(&bytes, start_pos);
let Some(mut decoder) = MemDecoder::new(&bytes, start_pos) else {
sess.dcx().emit_warn(errors::CorruptFile { path: &path });
return LoadResult::DataOutOfDate;
};
let prev_commandline_args_hash = u64::decode(&mut decoder);

if prev_commandline_args_hash != expected_hash {
Expand Down Expand Up @@ -181,9 +189,14 @@ pub fn load_query_result_cache(sess: &Session) -> Option<OnDiskCache<'_>> {

let _prof_timer = sess.prof.generic_activity("incr_comp_load_query_result_cache");

match load_data(&query_cache_path(sess), sess) {
let path = query_cache_path(sess);
match load_data(&path, sess) {
LoadResult::Ok { data: (bytes, start_pos) } => {
Some(OnDiskCache::new(sess, bytes, start_pos))
let cache = OnDiskCache::new(sess, bytes, start_pos).unwrap_or_else(|| {
sess.dcx().emit_warn(errors::CorruptFile { path: &path });
OnDiskCache::new_empty(sess.source_map())
});
Some(cache)
}
_ => Some(OnDiskCache::new_empty(sess.source_map())),
}
Expand Down
7 changes: 6 additions & 1 deletion compiler/rustc_metadata/src/locator.rs
Expand Up @@ -853,7 +853,12 @@ fn get_metadata_section<'p>(
slice_owned(mmap, Deref::deref)
}
};
let blob = MetadataBlob(raw_bytes);
let Some(blob) = MetadataBlob::new(raw_bytes) else {
return Err(MetadataError::LoadFailure(format!(
"corrupt metadata encountered in {}",
filename.display()
)));
};
match blob.check_compatibility(cfg_version) {
Ok(()) => Ok(blob),
Err(None) => Err(MetadataError::LoadFailure(format!(
Expand Down
28 changes: 22 additions & 6 deletions compiler/rustc_metadata/src/rmeta/decoder.rs
Expand Up @@ -39,10 +39,9 @@ use rustc_span::hygiene::HygieneDecodeContext;
mod cstore_impl;

/// A reference to the raw binary version of crate metadata.
/// A `MetadataBlob` internally is just a reference counted pointer to
/// the actual data, so cloning it is cheap.
#[derive(Clone)]
pub(crate) struct MetadataBlob(pub(crate) OwnedSlice);
/// This struct applies [`MemDecoder`]'s validation when constructed
/// so that later constructions are guaranteed to succeed.
pub(crate) struct MetadataBlob(OwnedSlice);

impl std::ops::Deref for MetadataBlob {
type Target = [u8];
Expand All @@ -53,6 +52,16 @@ impl std::ops::Deref for MetadataBlob {
}
}

impl MetadataBlob {
pub fn new(slice: OwnedSlice) -> Option<Self> {
if MemDecoder::new(&*slice, 0).is_some() { Some(Self(slice)) } else { None }
}

pub fn bytes(&self) -> &OwnedSlice {
&self.0
}
}

/// A map from external crate numbers (as decoded from some crate file) to
/// local crate numbers (as generated during this session). Each external
/// crate may refer to types in other external crates, and each has their
Expand Down Expand Up @@ -164,7 +173,14 @@ pub(super) trait Metadata<'a, 'tcx>: Copy {
fn decoder(self, pos: usize) -> DecodeContext<'a, 'tcx> {
let tcx = self.tcx();
DecodeContext {
opaque: MemDecoder::new(self.blob(), pos),
// FIXME: This unwrap should never panic because we check that it won't when creating
// `MetadataBlob`. Ideally we'd just have a `MetadataDecoder` and hand out subslices of
// it as we do elsewhere in the compiler using `MetadataDecoder::split_at`. But we own
// the data for the decoder so holding onto the `MemDecoder` too would make us a
// self-referential struct which is downright goofy because `MetadataBlob` is already
// self-referential. Probably `MemDecoder` should contain an `OwnedSlice`, but that
// demands a significant refactoring due to our crate graph.
opaque: MemDecoder::new(self.blob(), pos).unwrap(),
cdata: self.cdata(),
blob: self.blob(),
sess: self.sess().or(tcx.map(|tcx| tcx.sess)),
Expand Down Expand Up @@ -392,7 +408,7 @@ impl<'a, 'tcx> TyDecoder for DecodeContext<'a, 'tcx> {
where
F: FnOnce(&mut Self) -> R,
{
let new_opaque = MemDecoder::new(self.opaque.data(), pos);
let new_opaque = self.opaque.split_at(pos);
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
let old_state = mem::replace(&mut self.lazy_state, LazyState::NoNode);
let r = f(self);
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_metadata/src/rmeta/def_path_hash_map.rs
Expand Up @@ -48,7 +48,7 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMapRef<'static>
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> DefPathHashMapRef<'static> {
let len = d.read_usize();
let pos = d.position();
let o = d.blob().clone().0.slice(|blob| &blob[pos..pos + len]);
let o = d.blob().bytes().clone().slice(|blob| &blob[pos..pos + len]);

// Although we already have the data we need via the `OwnedSlice`, we still need
// to advance the `DecodeContext`'s position so it's in a valid state after
Expand Down
39 changes: 19 additions & 20 deletions compiler/rustc_middle/src/query/on_disk_cache.rs
Expand Up @@ -154,24 +154,22 @@ impl EncodedSourceFileId {

impl<'sess> OnDiskCache<'sess> {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Self {
debug_assert!(sess.opts.incremental.is_some());

// Wrap in a scope so we can borrow `data`.
let footer: Footer = {
let mut decoder = MemDecoder::new(&data, start_pos);

// Decode the *position* of the footer, which can be found in the
// last 8 bytes of the file.
let footer_pos = decoder
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
IntEncodedWithFixedSize::decode(decoder).0 as usize
});
// Decode the file footer, which contains all the lookup tables, etc.
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER))
};
pub fn new(sess: &'sess Session, data: Mmap, start_pos: usize) -> Option<Self> {
assert!(sess.opts.incremental.is_some());

Self {
let mut decoder = MemDecoder::new(&data, start_pos)?;

// Decode the *position* of the footer, which can be found in the
// last 8 bytes of the file.
let footer_pos = decoder
.with_position(decoder.len() - IntEncodedWithFixedSize::ENCODED_SIZE, |decoder| {
IntEncodedWithFixedSize::decode(decoder).0 as usize
});
// Decode the file footer, which contains all the lookup tables, etc.
let footer: Footer =
decoder.with_position(footer_pos, |decoder| decode_tagged(decoder, TAG_FILE_FOOTER));

Some(Self {
serialized_data: RwLock::new(Some(data)),
file_index_to_stable_id: footer.file_index_to_stable_id,
file_index_to_file: Default::default(),
Expand All @@ -184,7 +182,7 @@ impl<'sess> OnDiskCache<'sess> {
expn_data: footer.expn_data,
foreign_expn_data: footer.foreign_expn_data,
hygiene_context: Default::default(),
}
})
}

pub fn new_empty(source_map: &'sess SourceMap) -> Self {
Expand Down Expand Up @@ -437,7 +435,8 @@ impl<'sess> OnDiskCache<'sess> {
let serialized_data = self.serialized_data.read();
let mut decoder = CacheDecoder {
tcx,
opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize()),
opaque: MemDecoder::new(serialized_data.as_deref().unwrap_or(&[]), pos.to_usize())
.unwrap(),
source_map: self.source_map,
file_index_to_file: &self.file_index_to_file,
file_index_to_stable_id: &self.file_index_to_stable_id,
Expand Down Expand Up @@ -558,7 +557,7 @@ impl<'a, 'tcx> TyDecoder for CacheDecoder<'a, 'tcx> {
{
debug_assert!(pos < self.opaque.len());

let new_opaque = MemDecoder::new(self.opaque.data(), pos);
let new_opaque = self.opaque.split_at(pos);
let old_opaque = mem::replace(&mut self.opaque, new_opaque);
let r = f(self);
self.opaque = old_opaque;
Expand Down
10 changes: 3 additions & 7 deletions compiler/rustc_query_system/src/dep_graph/serialized.rs
Expand Up @@ -182,15 +182,13 @@ impl SerializedDepGraph {
pub fn decode<D: Deps>(d: &mut MemDecoder<'_>) -> Arc<SerializedDepGraph> {
// The last 16 bytes are the node count and edge count.
debug!("position: {:?}", d.position());
let (node_count, edge_count, graph_size) =
d.with_position(d.len() - 3 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
let (node_count, edge_count) =
d.with_position(d.len() - 2 * IntEncodedWithFixedSize::ENCODED_SIZE, |d| {
debug!("position: {:?}", d.position());
let node_count = IntEncodedWithFixedSize::decode(d).0 as usize;
let edge_count = IntEncodedWithFixedSize::decode(d).0 as usize;
let graph_size = IntEncodedWithFixedSize::decode(d).0 as usize;
(node_count, edge_count, graph_size)
(node_count, edge_count)
});
assert_eq!(d.len(), graph_size);
debug!("position: {:?}", d.position());

debug!(?node_count, ?edge_count);
Expand Down Expand Up @@ -606,8 +604,6 @@ impl<D: Deps> EncoderState<D> {
debug!("position: {:?}", encoder.position());
IntEncodedWithFixedSize(node_count).encode(&mut encoder);
IntEncodedWithFixedSize(edge_count).encode(&mut encoder);
let graph_size = encoder.position() + IntEncodedWithFixedSize::ENCODED_SIZE;
IntEncodedWithFixedSize(graph_size as u64).encode(&mut encoder);
debug!("position: {:?}", encoder.position());
// Drop the encoder so that nothing is written after the counts.
let result = encoder.finish();
Expand Down
16 changes: 11 additions & 5 deletions compiler/rustc_serialize/src/opaque.rs
Expand Up @@ -17,6 +17,8 @@ use crate::int_overflow::DebugStrictAdd;

pub type FileEncodeResult = Result<usize, (PathBuf, io::Error)>;

const FOOTER: &[u8] = b"rust-end-file";

/// The size of the buffer in `FileEncoder`.
const BUF_SIZE: usize = 8192;

Expand Down Expand Up @@ -181,6 +183,7 @@ impl FileEncoder {
}

pub fn finish(&mut self) -> FileEncodeResult {
self.write_all(FOOTER);
self.flush();
#[cfg(debug_assertions)]
{
Expand Down Expand Up @@ -261,15 +264,18 @@ pub struct MemDecoder<'a> {

impl<'a> MemDecoder<'a> {
#[inline]
pub fn new(data: &'a [u8], position: usize) -> MemDecoder<'a> {
pub fn new(data: &'a [u8], position: usize) -> Option<MemDecoder<'a>> {
let data = data.strip_suffix(FOOTER)?;
let Range { start, end } = data.as_ptr_range();
MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData }
Some(MemDecoder { start, current: data[position..].as_ptr(), end, _marker: PhantomData })
}

#[inline]
pub fn data(&self) -> &'a [u8] {
// SAFETY: This recovers the original slice, only using members we never modify.
unsafe { std::slice::from_raw_parts(self.start, self.len()) }
pub fn split_at(&self, position: usize) -> MemDecoder<'a> {
assert!(position <= self.len());
// SAFETY: We checked above that this offset is within the original slice
let current = unsafe { self.start.add(position) };
MemDecoder { start: self.start, current, end: self.end, _marker: PhantomData }
}

#[inline]
Expand Down
13 changes: 9 additions & 4 deletions compiler/rustc_serialize/tests/leb128.rs
@@ -1,4 +1,5 @@
use rustc_serialize::leb128::*;
use rustc_serialize::opaque::MemDecoder;
use rustc_serialize::Decoder;

macro_rules! impl_test_unsigned_leb128 {
Expand All @@ -25,13 +26,15 @@ macro_rules! impl_test_unsigned_leb128 {
let n = $write_fn_name(&mut buf, x);
stream.extend(&buf[..n]);
}
let stream_end = stream.len();
stream.extend(b"rust-end-file");

let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
let mut decoder = MemDecoder::new(&stream, 0).unwrap();
for &expected in &values {
let actual = $read_fn_name(&mut decoder);
assert_eq!(expected, actual);
}
assert_eq!(stream.len(), decoder.position());
assert_eq!(stream_end, decoder.position());
}
};
}
Expand Down Expand Up @@ -72,13 +75,15 @@ macro_rules! impl_test_signed_leb128 {
let n = $write_fn_name(&mut buf, x);
stream.extend(&buf[..n]);
}
let stream_end = stream.len();
stream.extend(b"rust-end-file");

let mut decoder = rustc_serialize::opaque::MemDecoder::new(&stream, 0);
let mut decoder = MemDecoder::new(&stream, 0).unwrap();
for &expected in &values {
let actual = $read_fn_name(&mut decoder);
assert_eq!(expected, actual);
}
assert_eq!(stream.len(), decoder.position());
assert_eq!(stream_end, decoder.position());
}
};
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_serialize/tests/opaque.rs
Expand Up @@ -42,7 +42,7 @@ fn check_round_trip<
encoder.finish().unwrap();

let data = fs::read(&tmpfile).unwrap();
let mut decoder = MemDecoder::new(&data[..], 0);
let mut decoder = MemDecoder::new(&data[..], 0).unwrap();
for value in values {
let decoded = Decodable::decode(&mut decoder);
assert_eq!(value, decoded);
Expand Down

0 comments on commit 52bca7c

Please sign in to comment.