Skip to content

Commit

Permalink
Auto merge of rust-lang#86368 - michaelwoerister:lexing-ice, r=davidtwco
Browse files Browse the repository at this point in the history
Disambiguate between SourceFiles from different crates even if they have the same path

This PR fixes an ICE that can occur when the compiler encounters a source file that is part of both the local crate and an upstream crate:

1. While importing source files from an upstream crate the compiler creates a `SourceFile` entry for `foo.rs` in the `SourceMap`. Since this is an imported source file its `src` field is `None`.
2. At a later point the parser encounters `foo.rs` again. It tells the `SourceMap` to load the file but because we already have an entry for `foo.rs` the `SourceMap` will return the existing version with `src == None`.
3. The parser proceeds under the assumption that `src.is_some()` and panics when actually trying to use the file's contents.

This PR fixes the issue by adding the source file's associated `CrateNum` to the `SourceMap`'s interning key. As a consequence the two instances of the file will each have a separate entry in the `SourceMap`. They just happen to share the same file path. This approach seemed less problematic to me than trying to mutate the `SourceFile` after it had already been created.

Another, more involved, approach might be to merge the `src` and the `external_src` field.

Fixes rust-lang#85955
  • Loading branch information
bors committed Jun 22, 2021
2 parents 3487be1 + c3c4ab5 commit 80926fc
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 15 deletions.
38 changes: 33 additions & 5 deletions compiler/rustc_middle/src/ty/query/on_disk_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub struct OnDiskCache<'sess> {
cnum_map: OnceCell<UnhashMap<StableCrateId, CrateNum>>,

source_map: &'sess SourceMap,
file_index_to_stable_id: FxHashMap<SourceFileIndex, StableSourceFileId>,
file_index_to_stable_id: FxHashMap<SourceFileIndex, EncodedSourceFileId>,

// Caches that are populated lazily during decoding.
file_index_to_file: Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
Expand Down Expand Up @@ -111,7 +111,7 @@ pub struct OnDiskCache<'sess> {
// This type is used only for serialization and deserialization.
#[derive(Encodable, Decodable)]
struct Footer {
file_index_to_stable_id: FxHashMap<SourceFileIndex, StableSourceFileId>,
file_index_to_stable_id: FxHashMap<SourceFileIndex, EncodedSourceFileId>,
query_result_index: EncodedQueryResultIndex,
diagnostics_index: EncodedQueryResultIndex,
// The location of all allocations.
Expand Down Expand Up @@ -157,6 +157,32 @@ crate struct RawDefId {
pub index: u32,
}

/// An `EncodedSourceFileId` is the same as a `StableSourceFileId` except that
/// the source crate is represented as a [StableCrateId] instead of as a
/// `CrateNum`. This way `EncodedSourceFileId` can be encoded and decoded
/// without any additional context, i.e. with a simple `opaque::Decoder` (which
/// is the only thing available when decoding the cache's [Footer].
#[derive(Encodable, Decodable, Clone, Debug)]
struct EncodedSourceFileId {
file_name_hash: u64,
stable_crate_id: StableCrateId,
}

impl EncodedSourceFileId {
fn translate(&self, cnum_map: &UnhashMap<StableCrateId, CrateNum>) -> StableSourceFileId {
let cnum = cnum_map[&self.stable_crate_id];
StableSourceFileId { file_name_hash: self.file_name_hash, cnum }
}

fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId {
let source_file_id = StableSourceFileId::new(file);
EncodedSourceFileId {
file_name_hash: source_file_id.file_name_hash,
stable_crate_id: tcx.stable_crate_id(source_file_id.cnum),
}
}
}

impl<'sess> OnDiskCache<'sess> {
/// Creates a new `OnDiskCache` instance from the serialized data in `data`.
pub fn new(sess: &'sess Session, data: Vec<u8>, start_pos: usize) -> Self {
Expand Down Expand Up @@ -238,7 +264,8 @@ impl<'sess> OnDiskCache<'sess> {
let index = SourceFileIndex(index as u32);
let file_ptr: *const SourceFile = &**file as *const _;
file_to_file_index.insert(file_ptr, index);
file_index_to_stable_id.insert(index, StableSourceFileId::new(&file));
let source_file_id = EncodedSourceFileId::new(tcx, &file);
file_index_to_stable_id.insert(index, source_file_id);
}

(file_to_file_index, file_index_to_stable_id)
Expand Down Expand Up @@ -605,7 +632,7 @@ pub struct CacheDecoder<'a, 'tcx> {
source_map: &'a SourceMap,
cnum_map: &'a UnhashMap<StableCrateId, CrateNum>,
file_index_to_file: &'a Lock<FxHashMap<SourceFileIndex, Lrc<SourceFile>>>,
file_index_to_stable_id: &'a FxHashMap<SourceFileIndex, StableSourceFileId>,
file_index_to_stable_id: &'a FxHashMap<SourceFileIndex, EncodedSourceFileId>,
alloc_decoding_session: AllocDecodingSession<'a>,
syntax_contexts: &'a FxHashMap<u32, AbsoluteBytePos>,
expn_data: &'a FxHashMap<u32, AbsoluteBytePos>,
Expand All @@ -618,14 +645,15 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
ref file_index_to_file,
ref file_index_to_stable_id,
ref source_map,
ref cnum_map,
..
} = *self;

file_index_to_file
.borrow_mut()
.entry(index)
.or_insert_with(|| {
let stable_id = file_index_to_stable_id[&index];
let stable_id = file_index_to_stable_id[&index].translate(cnum_map);
source_map
.source_file_by_stable_id(stable_id)
.expect("failed to lookup `SourceFile` in new context")
Expand Down
41 changes: 31 additions & 10 deletions compiler/rustc_span/src/source_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,25 +117,42 @@ impl FileLoader for RealFileLoader {
}
}

// This is a `SourceFile` identifier that is used to correlate `SourceFile`s between
// subsequent compilation sessions (which is something we need to do during
// incremental compilation).
/// This is a [SourceFile] identifier that is used to correlate source files between
/// subsequent compilation sessions (which is something we need to do during
/// incremental compilation).
///
/// The [StableSourceFileId] also contains the CrateNum of the crate the source
/// file was originally parsed for. This way we get two separate entries in
/// the [SourceMap] if the same file is part of both the local and an upstream
/// crate. Trying to only have one entry for both cases is problematic because
/// at the point where we discover that there's a local use of the file in
/// addition to the upstream one, we might already have made decisions based on
/// the assumption that it's an upstream file. Treating the two files as
/// different has no real downsides.
#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
pub struct StableSourceFileId(u128);
pub struct StableSourceFileId {
// A hash of the source file's FileName. This is hash so that it's size
// is more predictable than if we included the actual FileName value.
pub file_name_hash: u64,

// The CrateNum of the crate this source file was originally parsed for.
// We cannot include this information in the hash because at the time
// of hashing we don't have the context to map from the CrateNum's numeric
// value to a StableCrateId.
pub cnum: CrateNum,
}

// FIXME: we need a more globally consistent approach to the problem solved by
// StableSourceFileId, perhaps built atop source_file.name_hash.
impl StableSourceFileId {
pub fn new(source_file: &SourceFile) -> StableSourceFileId {
StableSourceFileId::new_from_name(&source_file.name)
StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
}

fn new_from_name(name: &FileName) -> StableSourceFileId {
fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
let mut hasher = StableHasher::new();

name.hash(&mut hasher);

StableSourceFileId(hasher.finish())
StableSourceFileId { file_name_hash: hasher.finish(), cnum }
}
}

Expand Down Expand Up @@ -274,7 +291,7 @@ impl SourceMap {
// be empty, so the working directory will be used.
let (filename, _) = self.path_mapping.map_filename_prefix(&filename);

let file_id = StableSourceFileId::new_from_name(&filename);
let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);

let lrc_sf = match self.source_file_by_stable_id(file_id) {
Some(lrc_sf) => lrc_sf,
Expand All @@ -288,6 +305,10 @@ impl SourceMap {
self.hash_kind,
));

// Let's make sure the file_id we generated above actually matches
// the ID we generate for the SourceFile we just created.
debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);

let mut files = self.files.borrow_mut();

files.source_files.push(source_file.clone());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#[inline]
pub fn some_function() -> u32 {
1
}
21 changes: 21 additions & 0 deletions src/test/ui/include-macros/same-file-in-two-crates.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// This test makes sure that the compiler can handle the same source file to be
// part of the local crate *and* an upstream crate. This can happen, for example,
// when there is some auto-generated code that is part of both a library and an
// accompanying integration test.
//
// The test uses include!() to include a source file that is also part of
// an upstream crate.
//
// This is a regression test for https://github.com/rust-lang/rust/issues/85955.

// check-pass
// compile-flags: --crate-type=rlib
// aux-build:same-file-in-two-crates-aux.rs
extern crate same_file_in_two_crates_aux;

pub fn foo() -> u32 {
same_file_in_two_crates_aux::some_function() +
some_function()
}

include!("./auxiliary/same-file-in-two-crates-aux.rs");

0 comments on commit 80926fc

Please sign in to comment.