From 8d36e8b32c800e6227ab2ffa7fb64729313e10a1 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 25 Dec 2021 23:45:33 +0000
Subject: [PATCH 01/46] Add weak memory config option

---
 src/bin/miri.rs | 3 +++
 src/eval.rs     | 3 +++
 src/machine.rs  | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/src/bin/miri.rs b/src/bin/miri.rs
index e3f38956da..907e620404 100644
--- a/src/bin/miri.rs
+++ b/src/bin/miri.rs
@@ -318,6 +318,7 @@ fn main() {
             miri_config.stacked_borrows = false;
         } else if arg == "-Zmiri-disable-data-race-detector" {
             miri_config.data_race_detector = false;
+            miri_config.weak_memory_emulation = false;
         } else if arg == "-Zmiri-disable-alignment-check" {
             miri_config.check_alignment = miri::AlignmentCheck::None;
         } else if arg == "-Zmiri-symbolic-alignment-check" {
@@ -340,6 +341,8 @@ fn main() {
                 isolation_enabled = Some(false);
             }
             miri_config.isolated_op = miri::IsolatedOp::Allow;
+        } else if arg == "-Zmiri-disable-weak-memory-emulation" {
+            miri_config.weak_memory_emulation = false;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-isolation-error=") {
             if matches!(isolation_enabled, Some(false)) {
                 panic!("-Zmiri-isolation-error cannot be used along with -Zmiri-disable-isolation");
diff --git a/src/eval.rs b/src/eval.rs
index a782dfa3fc..bdf527a0d1 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -105,6 +105,8 @@ pub struct MiriConfig {
     pub tag_raw: bool,
     /// Determine if data race detection should be enabled
     pub data_race_detector: bool,
+    /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled
+    pub weak_memory_emulation: bool,
     /// Rate of spurious failures for compare_exchange_weak atomic operations,
     /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
     pub cmpxchg_weak_failure_rate: f64,
@@ -142,6 +144,7 @@ impl Default for MiriConfig {
             tracked_alloc_ids: HashSet::default(),
             tag_raw: false,
             data_race_detector: true,
+            weak_memory_emulation: true,
             cmpxchg_weak_failure_rate: 0.8,
             measureme_out: None,
             panic_on_unsupported: false,
diff --git a/src/machine.rs b/src/machine.rs
index 369bb92c6f..2060bba0b8 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -323,6 +323,9 @@ pub struct Evaluator<'mir, 'tcx> {
 
     /// Corresponds to -Zmiri-mute-stdout-stderr and doesn't write the output but acts as if it succeeded.
     pub(crate) mute_stdout_stderr: bool,
+
+    /// Whether weak memory emulation is enabled
+    pub(crate) weak_memory: bool,
 }
 
 impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
@@ -378,6 +381,7 @@ impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
             check_alignment: config.check_alignment,
             cmpxchg_weak_failure_rate: config.cmpxchg_weak_failure_rate,
             mute_stdout_stderr: config.mute_stdout_stderr,
+            weak_memory: config.weak_memory_emulation,
         }
     }
 

From 16315b1540959e4834ba461471e868868c68c4bc Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Mon, 17 Jan 2022 17:40:17 +0000
Subject: [PATCH 02/46] Add test cases

---
 tests/run-pass/concurrency/weak_memory.rs     | 257 ++++++++++++++++++
 tests/run-pass/concurrency/weak_memory.stderr |   2 +
 2 files changed, 259 insertions(+)
 create mode 100644 tests/run-pass/concurrency/weak_memory.rs
 create mode 100644 tests/run-pass/concurrency/weak_memory.stderr

diff --git a/tests/run-pass/concurrency/weak_memory.rs b/tests/run-pass/concurrency/weak_memory.rs
new file mode 100644
index 0000000000..bd3d1de7c2
--- /dev/null
+++ b/tests/run-pass/concurrency/weak_memory.rs
@@ -0,0 +1,257 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows
+
+// Weak memory emulation tests. All of the following test if
+// our weak memory emulation produces any inconsistent execution outcomes
+//
+// Due to the random nature of choosing valid stores, it is always
+// possible that our tests spuriously succeeds: even though our weak
+// memory emulation code has incorrectly identified a store in
+// modification order as being valid, it may be never chosen by
+// the RNG and never observed in our tests.
+//
+// To mitigate this, each test is ran enough times such that the chance
+// of spurious success is very low. These tests never supriously fail.
+//
+// Note that we can't effectively test whether our weak memory emulation
+// can produce *all* consistent execution outcomes. This may be possible
+// if Miri's scheduler is sufficiently random and explores all possible
+// interleavings of our small test cases after a reasonable number of runs.
+// However, since Miri's scheduler is not even pre-emptive, there will
+// always be possible interleavings (and possible execution outcomes),
+// that can never be observed regardless of how weak memory emulation is
+// implemented.
+
+// Test cases and their consistent outcomes are from
+// http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/
+// Based on
+// M. Batty, S. Owens, S. Sarkar, P. Sewell and T. Weber,
+// "Mathematizing C++ concurrency", ACM SIGPLAN Notices, vol. 46, no. 1, pp. 55-66, 2011.
+// Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
+
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::{fence, AtomicUsize};
+use std::thread::{spawn, yield_now};
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test
+// multiple tests
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    // A workaround to put the initialisation value in the store buffer
+    ret.store(val, Relaxed);
+    ret
+}
+
+// Spins and yields until until acquires a pre-determined value
+fn acquires_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Acquire) != val {
+        yield_now();
+    }
+    val
+}
+
+fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Relaxed) != val {
+        yield_now();
+    }
+    val
+}
+
+// https://plv.mpi-sws.org/scfix/paper.pdf
+// 2.2 Second Problem: SC Fences are Too Weak
+fn test_rwc_syncs() {
+    /*
+    int main() {
+        atomic_int x = 0;
+        atomic_int y = 0;
+
+        {{{ x.store(1,mo_relaxed);
+        ||| { r1=x.load(mo_relaxed).readsvalue(1);
+              fence(mo_seq_cst);
+              r2=y.load(mo_relaxed); }
+        ||| { y.store(1,mo_relaxed);
+              fence(mo_seq_cst);
+              r3=x.load(mo_relaxed); }
+        }}}
+        return 0;
+    }
+    */
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        reads_value(&x, 1);
+        fence(SeqCst);
+        y.load(Relaxed)
+    });
+
+    let j3 = spawn(move || {
+        y.store(1, Relaxed);
+        fence(SeqCst);
+        x.load(Relaxed)
+    });
+
+    j1.join().unwrap();
+    let b = j2.join().unwrap();
+    let c = j3.join().unwrap();
+
+    assert_ne!((b, c), (0, 0));
+}
+
+fn test_corr() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+        x.store(2, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let r2 = x.load(Relaxed); // -------------------------------------+
+        y.store(1, Release); // ---------------------+                    |
+        r2 //                                        |                    |
+    }); //                                           |                    |
+    //                                               |synchronizes-with   |happens-before
+    let j3 = spawn(move || { //                      |                    |
+        acquires_value(&y, 1); // <------------------+                    |
+        x.load(Relaxed) // <----------------------------------------------+
+        // The two reads on x are ordered by hb, so they cannot observe values
+        // differently from the modification order. If the first read observed
+        // 2, then the second read must observe 2 as well.
+    });
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+    if r2 == 2 {
+        assert_eq!(r3, 2);
+    }
+}
+
+fn test_wrc() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Release); // ---------------------+---------------------+
+    }); //                                           |                     |
+    //                                               |synchronizes-with    |
+    let j2 = spawn(move || { //                      |                     |
+        acquires_value(&x, 1); // <------------------+                     |
+        y.store(1, Release); // ---------------------+                     |happens-before
+    }); //                                           |                     |
+    //                                               |synchronizes-with    |
+    let j3 = spawn(move || { //                      |                     |
+        acquires_value(&y, 1); // <------------------+                     |
+        x.load(Relaxed) // <-----------------------------------------------+
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    assert_eq!(r3, 1);
+}
+
+fn test_message_passing() {
+    let mut var = 0u32;
+    let ptr = &mut var as *mut u32;
+    let x = EvilSend(ptr);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        unsafe { *x.0 = 1 }; // -----------------------------------------+
+        y.store(1, Release); // ---------------------+                   |
+    }); //                                           |                   |
+    //                                               |synchronizes-with  | happens-before
+    let j2 = spawn(move || { //                      |                   |
+        acquires_value(&y, 1); // <------------------+                   |
+        unsafe { *x.0 } // <---------------------------------------------+
+    });
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r2, 1);
+}
+
+// LB+acq_rel+acq_rel
+fn test_load_buffering_acq_rel() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+    let j1 = spawn(move || {
+        let r1 = x.load(Acquire);
+        y.store(1, Release);
+        r1
+    });
+
+    let j2 = spawn(move || {
+        let r2 = y.load(Acquire);
+        x.store(1, Release);
+        r2
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    // 3 consistent outcomes: (0,0), (0,1), (1,0)
+    assert_ne!((r1, r2), (1, 1));
+}
+
+fn test_mixed_access() {
+    /*
+    int main() {
+      atomic_int x = 0;
+      {{{
+        x.store(1, mo_relaxed);
+      }}}
+
+      x.store(2, mo_relaxed);
+
+      {{{
+        r1 = x.load(mo_relaxed);
+      }}}
+
+      return 0;
+    }
+        */
+    let x = static_atomic(0);
+
+    spawn(move || {
+        x.store(1, Relaxed);
+    })
+    .join()
+    .unwrap();
+
+    x.store(2, Relaxed);
+
+    let r2 = spawn(move || x.load(Relaxed)).join().unwrap();
+
+    assert_eq!(r2, 2);
+}
+
+pub fn main() {
+    // TODO: does this make chances of spurious success
+    // "sufficiently low"? This also takes a long time to run,
+    // prehaps each function should be its own test case so they
+    // can be run in parallel
+    for _ in 0..500 {
+        test_mixed_access();
+        test_load_buffering_acq_rel();
+        test_message_passing();
+        test_wrc();
+        test_corr();
+        test_rwc_syncs();
+    }
+}
diff --git a/tests/run-pass/concurrency/weak_memory.stderr b/tests/run-pass/concurrency/weak_memory.stderr
new file mode 100644
index 0000000000..03676519d4
--- /dev/null
+++ b/tests/run-pass/concurrency/weak_memory.stderr
@@ -0,0 +1,2 @@
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+

From e7698f4f07dcec9cf42b3de133f9ca171d0677f0 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Mon, 27 Dec 2021 19:07:23 +0000
Subject: [PATCH 03/46] Implement weak memory emulation

---
 src/data_race.rs                          | 170 +++++++++++--
 src/lib.rs                                |   1 +
 src/machine.rs                            |  12 +-
 src/weak_memory.rs                        | 297 ++++++++++++++++++++++
 tests/run-pass/concurrency/weak_memory.rs |  23 ++
 5 files changed, 476 insertions(+), 27 deletions(-)
 create mode 100644 src/weak_memory.rs

diff --git a/src/data_race.rs b/src/data_race.rs
index eb67a487b5..82ee32ddee 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -12,7 +12,7 @@
 //! The implementation also models races with memory allocation and deallocation via treating allocation and
 //! deallocation as a type of write internally for detecting data-races.
 //!
-//! This does not explore weak memory orders and so can still miss data-races
+//! Weak memory orders are explored but not all weak behaviours are exhibited, so it can still miss data-races
 //! but should not report false-positives
 //!
 //! Data-race definition from(<https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races>):
@@ -29,22 +29,6 @@
 //! This means that the thread-index can be safely re-used, starting on the next timestamp for the newly created
 //! thread.
 //!
-//! The sequentially consistent ordering corresponds to the ordering that the threads
-//! are currently scheduled, this means that the data-race detector has no additional
-//! logic for sequentially consistent accesses at the moment since they are indistinguishable
-//! from acquire/release operations. If weak memory orderings are explored then this
-//! may need to change or be updated accordingly.
-//!
-//! Per the C++ spec for the memory model a sequentially consistent operation:
-//!   "A load operation with this memory order performs an acquire operation,
-//!    a store performs a release operation, and read-modify-write performs
-//!    both an acquire operation and a release operation, plus a single total
-//!    order exists in which all threads observe all modifications in the same
-//!    order (see Sequentially-consistent ordering below) "
-//! So in the absence of weak memory effects a seq-cst load & a seq-cst store is identical
-//! to an acquire load and a release store given the global sequentially consistent order
-//! of the schedule.
-//!
 //! The timestamps used in the data-race detector assign each sequence of non-atomic operations
 //! followed by a single atomic or concurrent operation a single timestamp.
 //! Write, Read, Write, ThreadJoin will be represented by a single timestamp value on a thread.
@@ -67,6 +51,7 @@ use std::{
     mem,
 };
 
+use rustc_const_eval::interpret::alloc_range;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_middle::{mir, ty::layout::TyAndLayout};
@@ -115,10 +100,10 @@ pub enum AtomicFenceOp {
 /// of a thread, contains the happens-before clock and
 /// additional metadata to model atomic fence operations.
 #[derive(Clone, Default, Debug)]
-struct ThreadClockSet {
+pub struct ThreadClockSet {
     /// The increasing clock representing timestamps
     /// that happen-before this thread.
-    clock: VClock,
+    pub clock: VClock,
 
     /// The set of timestamps that will happen-before this
     /// thread once it performs an acquire fence.
@@ -127,6 +112,12 @@ struct ThreadClockSet {
     /// The last timestamp of happens-before relations that
     /// have been released by this thread by a fence.
     fence_release: VClock,
+
+    pub fence_seqcst: VClock,
+
+    pub write_seqcst: VClock,
+
+    pub read_seqcst: VClock,
 }
 
 impl ThreadClockSet {
@@ -169,7 +160,7 @@ pub struct DataRace;
 /// common case where no atomic operations
 /// exists on the memory cell.
 #[derive(Clone, PartialEq, Eq, Default, Debug)]
-struct AtomicMemoryCellClocks {
+pub struct AtomicMemoryCellClocks {
     /// The clock-vector of the timestamp of the last atomic
     /// read operation performed by each thread.
     /// This detects potential data-races between atomic read
@@ -514,7 +505,32 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_ref();
+        // This will read from the last store in the modification order of this location. In case
+        // weak memory emulation is enabled, this may not be the store we will pick to actually read from and return.
+        // This is fine with StackedBorrow and race checks because they don't concern metadata on
+        // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
+        // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
+
+        if let Some(global) = &this.machine.data_race {
+            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                if atomic == AtomicReadOp::SeqCst {
+                    global.sc_read();
+                }
+                let mut rng = this.machine.rng.borrow_mut();
+                let loaded = alloc_buffers.buffered_read(
+                    alloc_range(base_offset, place.layout.size),
+                    global,
+                    atomic == AtomicReadOp::SeqCst,
+                    &mut *rng,
+                    || this.validate_atomic_load(place, atomic),
+                )?;
+
+                return Ok(loaded.unwrap_or(scalar));
+            }
+        }
+
         this.validate_atomic_load(place, atomic)?;
         Ok(scalar)
     }
@@ -528,7 +544,27 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
-        this.validate_atomic_store(dest, atomic)
+
+        this.validate_atomic_store(dest, atomic)?;
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr)?;
+        if let (
+            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            if atomic == AtomicWriteOp::SeqCst {
+                global.sc_write();
+            }
+            let size = dest.layout.size;
+            alloc_buffers.buffered_write(
+                val,
+                alloc_range(base_offset, size),
+                global,
+                atomic == AtomicWriteOp::SeqCst,
+            )?;
+        }
+
+        Ok(())
     }
 
     /// Perform an atomic operation on a memory location.
@@ -550,6 +586,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         this.allow_data_races_mut(|this| this.write_immediate(*val, &(*place).into()))?;
 
         this.validate_atomic_rmw(place, atomic)?;
+
+        this.buffered_atomic_rmw(val.to_scalar_or_uninit(), place, atomic)?;
         Ok(old)
     }
 
@@ -565,7 +603,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
         let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
         this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
+
         this.validate_atomic_rmw(place, atomic)?;
+
+        this.buffered_atomic_rmw(new, place, atomic)?;
         Ok(old)
     }
 
@@ -584,15 +625,25 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
 
         let new_val = if min {
-            if lt { &old } else { &rhs }
+            if lt {
+                &old
+            } else {
+                &rhs
+            }
         } else {
-            if lt { &rhs } else { &old }
+            if lt {
+                &rhs
+            } else {
+                &old
+            }
         };
 
         this.allow_data_races_mut(|this| this.write_immediate(**new_val, &(*place).into()))?;
 
         this.validate_atomic_rmw(place, atomic)?;
 
+        this.buffered_atomic_rmw(new_val.to_scalar_or_uninit(), place, atomic)?;
+
         // Return the old value.
         Ok(old)
     }
@@ -642,14 +693,56 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         if cmpxchg_success {
             this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
             this.validate_atomic_rmw(place, success)?;
+            this.buffered_atomic_rmw(new, place, success)?;
         } else {
             this.validate_atomic_load(place, fail)?;
+            // A failed compare exchange is equivalent to a load, reading from the latest store
+            // in the modification order.
+            // Since `old` is only a value and not the store element, we need to separately
+            // find it in our store buffer and perform load_impl on it.
+            if let Some(global) = &this.machine.data_race {
+                if fail == AtomicReadOp::SeqCst {
+                    global.sc_read();
+                }
+                let size = place.layout.size;
+                let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+                if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                    if global.multi_threaded.get() {
+                        alloc_buffers.read_from_last_store(alloc_range(base_offset, size), global);
+                    }
+                }
+            }
         }
 
         // Return the old value.
         Ok(res)
     }
 
+    fn buffered_atomic_rmw(
+        &mut self,
+        new_val: ScalarMaybeUninit<Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicRwOp,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let (
+            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            if atomic == AtomicRwOp::SeqCst {
+                global.sc_read();
+                global.sc_write();
+            }
+            let size = place.layout.size;
+            let range = alloc_range(base_offset, size);
+            alloc_buffers.read_from_last_store(range, global);
+            alloc_buffers.buffered_write(new_val, range, global, atomic == AtomicRwOp::SeqCst)?;
+        }
+        Ok(())
+    }
+
     /// Update the data-race detector for an atomic read occurring at the
     /// associated memory-place and on the current thread.
     fn validate_atomic_load(
@@ -723,7 +816,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         if let Some(data_race) = &mut this.machine.data_race {
-            data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+            data_race.maybe_perform_sync_operation(|index, mut clocks| {
                 log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
 
                 // Apply data-race detection for the current fences
@@ -737,6 +830,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                     // Either Release | AcqRel | SeqCst
                     clocks.apply_release_fence();
                 }
+                if atomic == AtomicFenceOp::SeqCst {
+                    data_race.last_sc_fence.borrow_mut().set_at_index(&clocks.clock, index);
+                    clocks.fence_seqcst.join(&data_race.last_sc_fence.borrow());
+                    clocks.write_seqcst.join(&data_race.last_sc_write.borrow());
+                }
 
                 // Increment timestamp in case of release semantics.
                 Ok(atomic != AtomicFenceOp::Acquire)
@@ -1116,6 +1214,12 @@ pub struct GlobalState {
     /// The associated vector index will be moved into re-use candidates
     /// after the join operation occurs.
     terminated_threads: RefCell<FxHashMap<ThreadId, VectorIdx>>,
+
+    /// The timestamp of last SC fence performed by each thread
+    last_sc_fence: RefCell<VClock>,
+
+    /// The timestamp of last SC write performed by each thread
+    last_sc_write: RefCell<VClock>,
 }
 
 impl GlobalState {
@@ -1131,6 +1235,8 @@ impl GlobalState {
             active_thread_count: Cell::new(1),
             reuse_candidates: RefCell::new(FxHashSet::default()),
             terminated_threads: RefCell::new(FxHashMap::default()),
+            last_sc_fence: RefCell::new(VClock::default()),
+            last_sc_write: RefCell::new(VClock::default()),
         };
 
         // Setup the main-thread since it is not explicitly created:
@@ -1445,7 +1551,7 @@ impl GlobalState {
     /// Load the current vector clock in use and the current set of thread clocks
     /// in use for the vector.
     #[inline]
-    fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
+    pub fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
         let index = self.current_index();
         let ref_vector = self.vector_clocks.borrow();
         let clocks = Ref::map(ref_vector, |vec| &vec[index]);
@@ -1455,7 +1561,7 @@ impl GlobalState {
     /// Load the current vector clock in use and the current set of thread clocks
     /// in use for the vector mutably for modification.
     #[inline]
-    fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+    pub fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
         let index = self.current_index();
         let ref_vector = self.vector_clocks.borrow_mut();
         let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
@@ -1468,4 +1574,16 @@ impl GlobalState {
     fn current_index(&self) -> VectorIdx {
         self.current_index.get()
     }
+
+    // SC ATOMIC STORE rule in the paper.
+    fn sc_write(&self) {
+        let (index, clocks) = self.current_thread_state();
+        self.last_sc_write.borrow_mut().set_at_index(&clocks.clock, index);
+    }
+
+    // SC ATOMIC READ rule in the paper.
+    fn sc_read(&self) {
+        let (.., mut clocks) = self.current_thread_state_mut();
+        clocks.read_seqcst.join(&self.last_sc_fence.borrow());
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index f7c256656a..06ab2fabab 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -45,6 +45,7 @@ mod stacked_borrows;
 mod sync;
 mod thread;
 mod vector_clock;
+mod weak_memory;
 
 // Establish a "crate-wide prelude": we often import `crate::*`.
 
diff --git a/src/machine.rs b/src/machine.rs
index 2060bba0b8..aa2a930ccd 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -190,6 +190,9 @@ pub struct AllocExtra {
     /// Data race detection via the use of a vector-clock,
     ///  this is only added if it is enabled.
     pub data_race: Option<data_race::AllocExtra>,
+    /// Weak memory emulation via the use of store buffers,
+    ///  this is only added if it is enabled.
+    pub weak_memory: Option<weak_memory::AllocExtra>,
 }
 
 /// Precomputed layouts of primitive types
@@ -630,9 +633,16 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
         } else {
             None
         };
+        let buffer_alloc = if ecx.machine.weak_memory {
+            // FIXME: if this is an atomic obejct, we want to supply its initial value
+            // while allocating the store buffer here.
+            Some(weak_memory::AllocExtra::new_allocation(alloc.size()))
+        } else {
+            None
+        };
         let alloc: Allocation<Tag, Self::AllocExtra> = alloc.convert_tag_add_extra(
             &ecx.tcx,
-            AllocExtra { stacked_borrows: stacks, data_race: race_alloc },
+            AllocExtra { stacked_borrows: stacks, data_race: race_alloc, weak_memory: buffer_alloc },
             |ptr| Evaluator::tag_alloc_base_pointer(ecx, ptr),
         );
         Cow::Owned(alloc)
diff --git a/src/weak_memory.rs b/src/weak_memory.rs
new file mode 100644
index 0000000000..c82a31d0a8
--- /dev/null
+++ b/src/weak_memory.rs
@@ -0,0 +1,297 @@
+//! Implementation of C++11-consistent weak memory emulation using store buffers
+//! based on Dynamic Race Detection for C++ ("the paper"):
+//! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
+
+// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
+// 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
+// but this is not used anywhere so it's omitted here.
+//
+// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
+// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
+// This is not necessary as later loads by the same thread will always have greater timetstamp values, so we only need to record the timestamp of the first
+// load by each thread. This optimisation is done in tsan11
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.h#L35-L37)
+// and here.
+// 
+// 3. §4.5 of the paper wants an SC store to mark all existing stores in the buffer that happens before it
+// as SC. This is not done in the operational semantics but implemented correctly in tsan11
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L160-L167)
+// and here.
+//
+// 4. W_SC ; R_SC case requires the SC load to ignore all but last store maked SC (stores not marked SC are not
+// affected). But this rule is applied to all loads in ReadsFromSet from the paper (last two lines of code), not just SC load.
+// This is implemented correctly in tsan11
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L295)
+// and here.
+
+use std::{
+    cell::{Ref, RefCell, RefMut},
+    collections::VecDeque,
+};
+
+use rustc_const_eval::interpret::{AllocRange, InterpResult, ScalarMaybeUninit};
+use rustc_data_structures::fx::FxHashMap;
+use rustc_target::abi::Size;
+
+use crate::{
+    data_race::{GlobalState, ThreadClockSet},
+    RangeMap, Tag, VClock, VTimestamp, VectorIdx,
+};
+
+pub type AllocExtra = StoreBufferAlloc;
+#[derive(Debug, Clone)]
+pub struct StoreBufferAlloc {
+    /// Store buffer of each atomic object in this allocation
+    // Load may modify a StoreBuffer to record the loading thread's
+    // timestamp so we need interior mutability here.
+    store_buffer: RefCell<RangeMap<StoreBuffer>>,
+}
+
+impl StoreBufferAlloc {
+    pub fn new_allocation(len: Size) -> Self {
+        Self { store_buffer: RefCell::new(RangeMap::new(len, StoreBuffer::default())) }
+    }
+
+    /// Gets a store buffer associated with an atomic object in this allocation
+    fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
+        Ref::map(self.store_buffer.borrow(), |range_map| {
+            let (.., store_buffer) = range_map.iter(range.start, range.size).next().unwrap();
+            store_buffer
+        })
+    }
+
+    fn get_store_buffer_mut(&self, range: AllocRange) -> RefMut<'_, StoreBuffer> {
+        RefMut::map(self.store_buffer.borrow_mut(), |range_map| {
+            let (.., store_buffer) = range_map.iter_mut(range.start, range.size).next().unwrap();
+            store_buffer
+        })
+    }
+
+    /// Reads from the last store in modification order
+    pub fn read_from_last_store<'tcx>(&self, range: AllocRange, global: &GlobalState) {
+        let store_buffer = self.get_store_buffer(range);
+        let store_elem = store_buffer.buffer.back();
+        if let Some(store_elem) = store_elem {
+            let (index, clocks) = global.current_thread_state();
+            store_elem.load_impl(index, &clocks);
+        }
+    }
+
+    pub fn buffered_read<'tcx>(
+        &self,
+        range: AllocRange,
+        global: &GlobalState,
+        is_seqcst: bool,
+        rng: &mut (impl rand::Rng + ?Sized),
+        validate: impl FnOnce() -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx, Option<ScalarMaybeUninit<Tag>>> {
+        // Having a live borrow to store_buffer while calling validate_atomic_load is fine
+        // because the race detector doesn't touch store_buffer
+        let store_buffer = self.get_store_buffer(range);
+
+        let store_elem = {
+            // The `clocks` we got here must be dropped before calling validate_atomic_load
+            // as the race detector will update it
+            let (.., clocks) = global.current_thread_state();
+            // Load from a valid entry in the store buffer
+            store_buffer.fetch_store(is_seqcst, &clocks, &mut *rng)
+        };
+
+        // Unlike in write_scalar_atomic, thread clock updates have to be done
+        // after we've picked a store element from the store buffer, as presented
+        // in ATOMIC LOAD rule of the paper. This is because fetch_store
+        // requires access to ThreadClockSet.clock, which is updated by the race detector
+        validate()?;
+
+        let loaded = store_elem.map(|store_elem| {
+            let (index, clocks) = global.current_thread_state();
+            store_elem.load_impl(index, &clocks)
+        });
+        Ok(loaded)
+    }
+
+    pub fn buffered_write<'tcx>(
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        range: AllocRange,
+        global: &GlobalState,
+        is_seqcst: bool,
+    ) -> InterpResult<'tcx> {
+        let (index, clocks) = global.current_thread_state();
+
+        let mut store_buffer = self.get_store_buffer_mut(range);
+        store_buffer.store_impl(val, index, &clocks.clock, is_seqcst);
+        Ok(())
+    }
+}
+
+const STORE_BUFFER_LIMIT: usize = 128;
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StoreBuffer {
+    // Stores to this location in modification order
+    buffer: VecDeque<StoreElement>,
+}
+
+impl Default for StoreBuffer {
+    fn default() -> Self {
+        let mut buffer = VecDeque::new();
+        buffer.reserve(STORE_BUFFER_LIMIT);
+        Self { buffer }
+    }
+}
+
+impl<'mir, 'tcx: 'mir> StoreBuffer {
+    /// Selects a valid store element in the buffer.
+    /// The buffer does not contain the value used to initialise the atomic object
+    /// so a fresh atomic object has an empty store buffer until an explicit store.
+    fn fetch_store<R: rand::Rng + ?Sized>(
+        &self,
+        is_seqcst: bool,
+        clocks: &ThreadClockSet,
+        rng: &mut R,
+    ) -> Option<&StoreElement> {
+        use rand::seq::IteratorRandom;
+        let mut found_sc = false;
+        // FIXME: this should be an inclusive take_while (stops after a false predicate, but
+        // includes the element that gave the false), but such function doesn't yet
+        // exist in the standard libary https://github.com/rust-lang/rust/issues/62208
+        let mut keep_searching = true;
+        let candidates = self
+            .buffer
+            .iter()
+            .rev()
+            .take_while(move |&store_elem| {
+                if !keep_searching {
+                    return false;
+                }
+                // CoWR: if a store happens-before the current load,
+                // then we can't read-from anything earlier in modification order.
+                if store_elem.timestamp <= clocks.clock[store_elem.store_index] {
+                    log::info!("Stopped due to coherent write-read");
+                    keep_searching = false;
+                    return true;
+                }
+
+                // CoRR: if there was a load from this store which happened-before the current load,
+                // then we cannot read-from anything earlier in modification order.
+                if store_elem.loads.borrow().iter().any(|(&load_index, &load_timestamp)| {
+                    load_timestamp <= clocks.clock[load_index]
+                }) {
+                    log::info!("Stopped due to coherent read-read");
+                    keep_searching = false;
+                    return true;
+                }
+
+                // The current load, which may be sequenced-after an SC fence, can only read-from
+                // the last store sequenced-before an SC fence in another thread (or any stores
+                // later than that SC fence)
+                if store_elem.timestamp <= clocks.fence_seqcst[store_elem.store_index] {
+                    log::info!("Stopped due to coherent load sequenced after sc fence");
+                    keep_searching = false;
+                    return true;
+                }
+
+                // The current non-SC load can only read-from the latest SC store (or any stores later than that
+                // SC store)
+                if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
+                    && store_elem.is_seqcst
+                {
+                    log::info!("Stopped due to needing to load from the last SC store");
+                    keep_searching = false;
+                    return true;
+                }
+
+                // The current SC load can only read-from the last store sequenced-before
+                // the last SC fence (or any stores later than the SC fence)
+                if is_seqcst && store_elem.timestamp <= clocks.read_seqcst[store_elem.store_index] {
+                    log::info!("Stopped due to sc load needing to load from the last SC store before an SC fence");
+                    keep_searching = false;
+                    return true;
+                }
+
+                true
+            })
+            .filter(|&store_elem| {
+                if is_seqcst {
+                    // An SC load needs to ignore all but last store maked SC (stores not marked SC are not
+                    // affected)
+                    let include = !(store_elem.is_seqcst && found_sc);
+                    found_sc |= store_elem.is_seqcst;
+                    include
+                } else {
+                    true
+                }
+            });
+
+        candidates.choose(rng)
+    }
+
+    /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
+    fn store_impl(
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        index: VectorIdx,
+        thread_clock: &VClock,
+        is_seqcst: bool,
+    ) {
+        let store_elem = StoreElement {
+            store_index: index,
+            timestamp: thread_clock[index],
+            // In the language provided in the paper, an atomic store takes the value from a
+            // non-atomic memory location.
+            // But we already have the immediate value here so we don't need to do the memory
+            // access
+            val,
+            is_seqcst,
+            loads: RefCell::new(FxHashMap::default()),
+        };
+        self.buffer.push_back(store_elem);
+        if self.buffer.len() > STORE_BUFFER_LIMIT {
+            self.buffer.pop_front();
+        }
+        if is_seqcst {
+            // Every store that happens before this needs to be marked as SC
+            // so that in a later SC load, only the last SC store (i.e. this one) or stores that
+            // aren't ordered by hb with the last SC is picked.
+            self.buffer.iter_mut().rev().for_each(|elem| {
+                if elem.timestamp <= thread_clock[elem.store_index] {
+                    elem.is_seqcst = true;
+                }
+            })
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StoreElement {
+    /// The identifier of the vector index, corresponding to a thread
+    /// that performed the store.
+    store_index: VectorIdx,
+
+    /// Whether this store is SC.
+    is_seqcst: bool,
+
+    /// The timestamp of the storing thread when it performed the store
+    timestamp: VTimestamp,
+    /// The value of this store
+    val: ScalarMaybeUninit<Tag>,
+
+    /// Timestamp of first loads from this store element by each thread
+    /// Behind a RefCell to keep load op take &self
+    loads: RefCell<FxHashMap<VectorIdx, VTimestamp>>,
+}
+
+impl StoreElement {
+    /// ATOMIC LOAD IMPL in the paper
+    /// Unlike the operational semantics in the paper, we don't need to keep track
+    /// of the thread timestamp for every single load. Keeping track of the first (smallest)
+    /// timestamp of each thread that has loaded from a store is sufficient: if the earliest
+    /// load of another thread happens before the current one, then we must stop searching the store
+    /// buffer regardless of subsequent loads by the same thread; if the earliest load of another
+    /// thread doesn't happen before the current one, then no subsequent load by the other thread
+    /// can happen before the current one.
+    fn load_impl(&self, index: VectorIdx, clocks: &ThreadClockSet) -> ScalarMaybeUninit<Tag> {
+        let _ = self.loads.borrow_mut().try_insert(index, clocks.clock[index]);
+        self.val
+    }
+}
diff --git a/tests/run-pass/concurrency/weak_memory.rs b/tests/run-pass/concurrency/weak_memory.rs
index bd3d1de7c2..b8e780ade1 100644
--- a/tests/run-pass/concurrency/weak_memory.rs
+++ b/tests/run-pass/concurrency/weak_memory.rs
@@ -63,6 +63,28 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
     val
 }
 
+// https://plv.mpi-sws.org/scfix/paper.pdf
+// Test case SB
+fn test_sc_store_buffering() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, SeqCst);
+        y.load(SeqCst)
+    });
+
+    let j2 = spawn(move || {
+        y.store(1, SeqCst);
+        x.load(SeqCst)
+    });
+
+    let a = j1.join().unwrap();
+    let b = j2.join().unwrap();
+
+    assert_ne!((a, b), (0, 0));
+}
+
 // https://plv.mpi-sws.org/scfix/paper.pdf
 // 2.2 Second Problem: SC Fences are Too Weak
 fn test_rwc_syncs() {
@@ -247,6 +269,7 @@ pub fn main() {
     // prehaps each function should be its own test case so they
     // can be run in parallel
     for _ in 0..500 {
+        test_sc_store_buffering();
         test_mixed_access();
         test_load_buffering_acq_rel();
         test_message_passing();

From aca3b3a645e6d0295e9f1829c37af811a6de5251 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Fri, 15 Apr 2022 21:44:22 +0100
Subject: [PATCH 04/46] set_at_index sets the default value (0) if index
 doesn't exist in the other vector

---
 src/vector_clock.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/vector_clock.rs b/src/vector_clock.rs
index e13e9c39fc..716fdba0f6 100644
--- a/src/vector_clock.rs
+++ b/src/vector_clock.rs
@@ -108,10 +108,8 @@ impl VClock {
 
     /// Set the element at the current index of the vector
     pub fn set_at_index(&mut self, other: &Self, idx: VectorIdx) {
-        let idx = idx.index();
-        let mut_slice = self.get_mut_with_min_len(idx + 1);
-        let slice = other.as_slice();
-        mut_slice[idx] = slice[idx];
+        let mut_slice = self.get_mut_with_min_len(idx.index() + 1);
+        mut_slice[idx.index()] = other[idx];
     }
 
     /// Set the vector to the all-zero vector

From cf266584b7d9e42f6b1ba622b828e7d95c243225 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 16 Apr 2022 01:01:49 +0100
Subject: [PATCH 05/46] Comment out and provide context to C++20 test

---
 tests/run-pass/concurrency/weak_memory.rs | 144 +++++++++++-----------
 1 file changed, 75 insertions(+), 69 deletions(-)

diff --git a/tests/run-pass/concurrency/weak_memory.rs b/tests/run-pass/concurrency/weak_memory.rs
index b8e780ade1..efbbc45909 100644
--- a/tests/run-pass/concurrency/weak_memory.rs
+++ b/tests/run-pass/concurrency/weak_memory.rs
@@ -63,73 +63,6 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
     val
 }
 
-// https://plv.mpi-sws.org/scfix/paper.pdf
-// Test case SB
-fn test_sc_store_buffering() {
-    let x = static_atomic(0);
-    let y = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.store(1, SeqCst);
-        y.load(SeqCst)
-    });
-
-    let j2 = spawn(move || {
-        y.store(1, SeqCst);
-        x.load(SeqCst)
-    });
-
-    let a = j1.join().unwrap();
-    let b = j2.join().unwrap();
-
-    assert_ne!((a, b), (0, 0));
-}
-
-// https://plv.mpi-sws.org/scfix/paper.pdf
-// 2.2 Second Problem: SC Fences are Too Weak
-fn test_rwc_syncs() {
-    /*
-    int main() {
-        atomic_int x = 0;
-        atomic_int y = 0;
-
-        {{{ x.store(1,mo_relaxed);
-        ||| { r1=x.load(mo_relaxed).readsvalue(1);
-              fence(mo_seq_cst);
-              r2=y.load(mo_relaxed); }
-        ||| { y.store(1,mo_relaxed);
-              fence(mo_seq_cst);
-              r3=x.load(mo_relaxed); }
-        }}}
-        return 0;
-    }
-    */
-    let x = static_atomic(0);
-    let y = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.store(1, Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        reads_value(&x, 1);
-        fence(SeqCst);
-        y.load(Relaxed)
-    });
-
-    let j3 = spawn(move || {
-        y.store(1, Relaxed);
-        fence(SeqCst);
-        x.load(Relaxed)
-    });
-
-    j1.join().unwrap();
-    let b = j2.join().unwrap();
-    let c = j3.join().unwrap();
-
-    assert_ne!((b, c), (0, 0));
-}
-
 fn test_corr() {
     let x = static_atomic(0);
     let y = static_atomic(0);
@@ -263,18 +196,91 @@ fn test_mixed_access() {
     assert_eq!(r2, 2);
 }
 
+// The following two tests are taken from Repairing Sequential Consistency in C/C++11
+// by Lahav et al.
+// https://plv.mpi-sws.org/scfix/paper.pdf
+
+// Test case SB
+fn test_sc_store_buffering() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, SeqCst);
+        y.load(SeqCst)
+    });
+
+    let j2 = spawn(move || {
+        y.store(1, SeqCst);
+        x.load(SeqCst)
+    });
+
+    let a = j1.join().unwrap();
+    let b = j2.join().unwrap();
+
+    assert_ne!((a, b), (0, 0));
+}
+
+// 2.2 Second Problem: SC Fences are Too Weak
+// This test should pass under the C++20 model Rust is using.
+// Unfortunately, Miri's weak memory emulation only follows C++11 model
+// as we don't know how to correctly emulate C++20's revised SC semantics
+#[allow(dead_code)]
+fn test_cpp20_rwc_syncs() {
+    /*
+    int main() {
+        atomic_int x = 0;
+        atomic_int y = 0;
+
+        {{{ x.store(1,mo_relaxed);
+        ||| { r1=x.load(mo_relaxed).readsvalue(1);
+              fence(mo_seq_cst);
+              r2=y.load(mo_relaxed); }
+        ||| { y.store(1,mo_relaxed);
+              fence(mo_seq_cst);
+              r3=x.load(mo_relaxed); }
+        }}}
+        return 0;
+    }
+    */
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        reads_value(&x, 1);
+        fence(SeqCst);
+        y.load(Relaxed)
+    });
+
+    let j3 = spawn(move || {
+        y.store(1, Relaxed);
+        fence(SeqCst);
+        x.load(Relaxed)
+    });
+
+    j1.join().unwrap();
+    let b = j2.join().unwrap();
+    let c = j3.join().unwrap();
+
+    assert_ne!((b, c), (0, 0));
+}
+
 pub fn main() {
     // TODO: does this make chances of spurious success
     // "sufficiently low"? This also takes a long time to run,
     // prehaps each function should be its own test case so they
     // can be run in parallel
     for _ in 0..500 {
-        test_sc_store_buffering();
         test_mixed_access();
         test_load_buffering_acq_rel();
         test_message_passing();
         test_wrc();
         test_corr();
-        test_rwc_syncs();
+        test_sc_store_buffering();
+        // test_cpp20_rwc_syncs();
     }
 }

From ecdab5ff35297e9d70647f076b3aba656c8ad850 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 1 May 2022 12:36:00 +0100
Subject: [PATCH 06/46] Clearer boundries between alloc metadata with multiple
 buffers and an individual store buffer

---
 src/data_race.rs   | 20 ++++++++---------
 src/weak_memory.rs | 56 +++++++++++++++++++++-------------------------
 2 files changed, 36 insertions(+), 40 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index 82ee32ddee..303cf7007e 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -519,8 +519,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                     global.sc_read();
                 }
                 let mut rng = this.machine.rng.borrow_mut();
-                let loaded = alloc_buffers.buffered_read(
-                    alloc_range(base_offset, place.layout.size),
+                let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size));
+                let loaded = buffer.buffered_read(
                     global,
                     atomic == AtomicReadOp::SeqCst,
                     &mut *rng,
@@ -555,10 +555,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
             if atomic == AtomicWriteOp::SeqCst {
                 global.sc_write();
             }
-            let size = dest.layout.size;
-            alloc_buffers.buffered_write(
+            let mut buffer = alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size));
+            buffer.buffered_write(
                 val,
-                alloc_range(base_offset, size),
                 global,
                 atomic == AtomicWriteOp::SeqCst,
             )?;
@@ -708,7 +707,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                 let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
                 if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
                     if global.multi_threaded.get() {
-                        alloc_buffers.read_from_last_store(alloc_range(base_offset, size), global);
+                        let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, size));
+                        buffer.read_from_last_store(global);
                     }
                 }
             }
@@ -735,10 +735,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                 global.sc_read();
                 global.sc_write();
             }
-            let size = place.layout.size;
-            let range = alloc_range(base_offset, size);
-            alloc_buffers.read_from_last_store(range, global);
-            alloc_buffers.buffered_write(new_val, range, global, atomic == AtomicRwOp::SeqCst)?;
+            let range = alloc_range(base_offset, place.layout.size);
+            let mut buffer = alloc_buffers.get_store_buffer_mut(range);
+            buffer.read_from_last_store(global);
+            buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
         }
         Ok(())
     }
diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index c82a31d0a8..2cf9a98b13 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -53,33 +53,49 @@ impl StoreBufferAlloc {
     }
 
     /// Gets a store buffer associated with an atomic object in this allocation
-    fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
+    pub fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
         Ref::map(self.store_buffer.borrow(), |range_map| {
             let (.., store_buffer) = range_map.iter(range.start, range.size).next().unwrap();
             store_buffer
         })
     }
 
-    fn get_store_buffer_mut(&self, range: AllocRange) -> RefMut<'_, StoreBuffer> {
+    pub fn get_store_buffer_mut(&self, range: AllocRange) -> RefMut<'_, StoreBuffer> {
         RefMut::map(self.store_buffer.borrow_mut(), |range_map| {
             let (.., store_buffer) = range_map.iter_mut(range.start, range.size).next().unwrap();
             store_buffer
         })
     }
 
+}
+
+const STORE_BUFFER_LIMIT: usize = 128;
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StoreBuffer {
+    // Stores to this location in modification order
+    buffer: VecDeque<StoreElement>,
+}
+
+impl Default for StoreBuffer {
+    fn default() -> Self {
+        let mut buffer = VecDeque::new();
+        buffer.reserve(STORE_BUFFER_LIMIT);
+        Self { buffer }
+    }
+}
+
+impl<'mir, 'tcx: 'mir> StoreBuffer {
     /// Reads from the last store in modification order
-    pub fn read_from_last_store<'tcx>(&self, range: AllocRange, global: &GlobalState) {
-        let store_buffer = self.get_store_buffer(range);
-        let store_elem = store_buffer.buffer.back();
+    pub fn read_from_last_store(&self, global: &GlobalState) {
+        let store_elem = self.buffer.back();
         if let Some(store_elem) = store_elem {
             let (index, clocks) = global.current_thread_state();
             store_elem.load_impl(index, &clocks);
         }
     }
 
-    pub fn buffered_read<'tcx>(
+    pub fn buffered_read(
         &self,
-        range: AllocRange,
         global: &GlobalState,
         is_seqcst: bool,
         rng: &mut (impl rand::Rng + ?Sized),
@@ -87,14 +103,13 @@ impl StoreBufferAlloc {
     ) -> InterpResult<'tcx, Option<ScalarMaybeUninit<Tag>>> {
         // Having a live borrow to store_buffer while calling validate_atomic_load is fine
         // because the race detector doesn't touch store_buffer
-        let store_buffer = self.get_store_buffer(range);
 
         let store_elem = {
             // The `clocks` we got here must be dropped before calling validate_atomic_load
             // as the race detector will update it
             let (.., clocks) = global.current_thread_state();
             // Load from a valid entry in the store buffer
-            store_buffer.fetch_store(is_seqcst, &clocks, &mut *rng)
+            self.fetch_store(is_seqcst, &clocks, &mut *rng)
         };
 
         // Unlike in write_scalar_atomic, thread clock updates have to be done
@@ -110,37 +125,18 @@ impl StoreBufferAlloc {
         Ok(loaded)
     }
 
-    pub fn buffered_write<'tcx>(
+    pub fn buffered_write(
         &mut self,
         val: ScalarMaybeUninit<Tag>,
-        range: AllocRange,
         global: &GlobalState,
         is_seqcst: bool,
     ) -> InterpResult<'tcx> {
         let (index, clocks) = global.current_thread_state();
 
-        let mut store_buffer = self.get_store_buffer_mut(range);
-        store_buffer.store_impl(val, index, &clocks.clock, is_seqcst);
+        self.store_impl(val, index, &clocks.clock, is_seqcst);
         Ok(())
     }
-}
 
-const STORE_BUFFER_LIMIT: usize = 128;
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StoreBuffer {
-    // Stores to this location in modification order
-    buffer: VecDeque<StoreElement>,
-}
-
-impl Default for StoreBuffer {
-    fn default() -> Self {
-        let mut buffer = VecDeque::new();
-        buffer.reserve(STORE_BUFFER_LIMIT);
-        Self { buffer }
-    }
-}
-
-impl<'mir, 'tcx: 'mir> StoreBuffer {
     /// Selects a valid store element in the buffer.
     /// The buffer does not contain the value used to initialise the atomic object
     /// so a fresh atomic object has an empty store buffer until an explicit store.

From 53f4887659fd587ca551db664c317fb15998dfd0 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Fri, 6 May 2022 23:46:29 +0100
Subject: [PATCH 07/46] Use a new AllocationMap to store store buffers in the
 same allocation

---
 src/allocation_map.rs | 272 ++++++++++++++++++++++++++++++++++++++++++
 src/data_race.rs      |  26 ++--
 src/lib.rs            |   1 +
 src/machine.rs        |   8 +-
 src/weak_memory.rs    |  74 +++++++++---
 5 files changed, 342 insertions(+), 39 deletions(-)
 create mode 100644 src/allocation_map.rs

diff --git a/src/allocation_map.rs b/src/allocation_map.rs
new file mode 100644
index 0000000000..6c14ce1654
--- /dev/null
+++ b/src/allocation_map.rs
@@ -0,0 +1,272 @@
+//! Implements a map from allocation ranges to data.
+//! This is somewhat similar to RangeMap, but the ranges
+//! and data are discrete and non-splittable. An allocation in the
+//! map will always have the same range until explicitly removed
+
+use rustc_target::abi::Size;
+use std::ops::{Index, IndexMut, Range};
+
+use rustc_const_eval::interpret::AllocRange;
+
+#[derive(Clone, Debug)]
+struct Elem<T> {
+    /// The range covered by this element; never empty.
+    range: AllocRange,
+    /// The data stored for this element.
+    data: T,
+}
+
+/// Index of an allocation within the map
+type Position = usize;
+
+#[derive(Clone, Debug)]
+pub struct AllocationMap<T> {
+    v: Vec<Elem<T>>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum AccessType {
+    /// The access perfectly overlaps (same offset and range) with the exsiting allocation
+    PerfectlyOverlapping(Position),
+    /// The access does not touch any exising allocation
+    Empty(Position),
+    /// The access overlaps with one or more existing allocations
+    ImperfectlyOverlapping(Range<Position>),
+}
+
+impl<T> AllocationMap<T> {
+    pub fn new() -> Self {
+        Self { v: Vec::new() }
+    }
+
+    /// Finds the position of the allocation containing the given offset. If the offset is not
+    /// in an existing allocation, then returns Err containing the position
+    /// where such allocation should be inserted
+    fn find_offset(&self, offset: Size) -> Result<Position, Position> {
+        // We do a binary search.
+        let mut left = 0usize; // inclusive
+        let mut right = self.v.len(); // exclusive
+        loop {
+            if left == right {
+                // No element contains the given offset. But the
+                // index is where such element should be placed at.
+                return Err(left);
+            }
+            let candidate = left.checked_add(right).unwrap() / 2;
+            let elem = &self.v[candidate];
+            if offset < elem.range.start {
+                // We are too far right (offset is further left).
+                debug_assert!(candidate < right); // we are making progress
+                right = candidate;
+            } else if offset >= elem.range.end() {
+                // We are too far left (offset is further right).
+                debug_assert!(candidate >= left); // we are making progress
+                left = candidate + 1;
+            } else {
+                // This is it!
+                return Ok(candidate);
+            }
+        }
+    }
+
+    /// Determines whether a given access on `range` overlaps with
+    /// an existing allocation
+    pub fn access_type(&self, range: AllocRange) -> AccessType {
+        match self.find_offset(range.start) {
+            Ok(index) => {
+                // Start of the range belongs to an existing object, now let's check the overlapping situation
+                let elem = &self.v[index];
+                // FIXME: derive Eq for AllocRange in rustc
+                if elem.range.start == range.start && elem.range.size == range.size {
+                    // Happy case: perfectly overlapping access
+                    AccessType::PerfectlyOverlapping(index)
+                } else {
+                    // FIXME: add a last() method to AllocRange that returns the last inclusive offset (end() is exclusive)
+                    let end_index = match self.find_offset(range.end() - Size::from_bytes(1)) {
+                        // If the end lands in an existing object, add one to get the exclusive index
+                        Ok(inclusive) => inclusive + 1,
+                        Err(exclusive) => exclusive,
+                    };
+
+                    AccessType::ImperfectlyOverlapping(index..end_index)
+                }
+            }
+            Err(index) => {
+                // Start of the range doesn't belong to an existing object
+                match self.find_offset(range.end() - Size::from_bytes(1)) {
+                    // Neither does the end
+                    Err(end_index) =>
+                        if index == end_index {
+                            // There's nothing between the start and the end, so the range thing is empty
+                            AccessType::Empty(index)
+                        } else {
+                            // Otherwise we have entirely covered an existing object
+                            AccessType::ImperfectlyOverlapping(index..end_index)
+                        },
+                    // Otherwise at least part of it overlaps with something else
+                    Ok(end_index) => AccessType::ImperfectlyOverlapping(index..end_index + 1),
+                }
+            }
+        }
+    }
+
+    /// Inserts an object and its occupied range at given position
+    pub fn insert(&mut self, index: Position, range: AllocRange, data: T) {
+        self.v.insert(index, Elem { range, data });
+        // If we aren't the first element, then our start must be greater than the preivous element's end
+        if index > 0 {
+            debug_assert!(self.v[index - 1].range.end() <= range.start);
+        }
+        // If we aren't the last element, then our end must be smaller than next element's start
+        if index < self.v.len() - 1 {
+            debug_assert!(range.end() <= self.v[index + 1].range.start);
+        }
+    }
+
+    /// Removes an object at given position
+    pub fn remove(&mut self, index: Position) -> T {
+        self.v.remove(index).data
+    }
+}
+
+impl<T> Index<Position> for AllocationMap<T> {
+    type Output = T;
+
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.v[index].data
+    }
+}
+
+impl<T> IndexMut<Position> for AllocationMap<T> {
+    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+        &mut self.v[index].data
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rustc_const_eval::interpret::alloc_range;
+
+    use super::*;
+
+    #[test]
+    fn empty_map() {
+        // FIXME: make Size::from_bytes const
+        let four = Size::from_bytes(4);
+        let map = AllocationMap::<()>::new();
+
+        // Correctly tells where we should insert the first element (at index 0)
+        assert_eq!(map.find_offset(Size::from_bytes(3)), Err(0));
+
+        // Correctly tells the access type along with the supposed index
+        assert_eq!(map.access_type(alloc_range(Size::ZERO, four)), AccessType::Empty(0));
+    }
+
+    #[test]
+    #[should_panic]
+    fn no_overlapping_inserts() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert(0, alloc_range(four, four), "#");
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 ^ ^ ^ ^ 5 6 7 8 9 a b c d
+        map.insert(0, alloc_range(Size::from_bytes(1), four), "@");
+    }
+
+    #[test]
+    fn boundaries() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |#|#|#|#|_|_|...
+        //  0 1 2 3 4 5
+        map.insert(0, alloc_range(Size::ZERO, four), "#");
+        // |#|#|#|#|_|_|...
+        //  0 1 2 3 ^ 5
+        assert_eq!(map.find_offset(four), Err(1));
+        // |#|#|#|#|_|_|_|_|_|...
+        //  0 1 2 3 ^ ^ ^ ^ 8
+        assert_eq!(map.access_type(alloc_range(four, four)), AccessType::Empty(1));
+
+        let eight = Size::from_bytes(8);
+        // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert(1, alloc_range(eight, four), "@");
+        // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
+        //  0 1 2 3 4 5 6 ^ 8 9 a b c d
+        assert_eq!(map.find_offset(Size::from_bytes(7)), Err(1));
+        // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
+        //  0 1 2 3 ^ ^ ^ ^ 8 9 a b c d
+        assert_eq!(map.access_type(alloc_range(four, four)), AccessType::Empty(1));
+    }
+
+    #[test]
+    fn perfectly_overlapping() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |#|#|#|#|_|_|...
+        //  0 1 2 3 4 5
+        map.insert(0, alloc_range(Size::ZERO, four), "#");
+        // |#|#|#|#|_|_|...
+        //  ^ ^ ^ ^ 4 5
+        assert_eq!(map.find_offset(Size::ZERO), Ok(0));
+        assert_eq!(
+            map.access_type(alloc_range(Size::ZERO, four)),
+            AccessType::PerfectlyOverlapping(0)
+        );
+
+        // |#|#|#|#|@|@|@|@|_|...
+        //  0 1 2 3 4 5 6 7 8
+        map.insert(1, alloc_range(four, four), "@");
+        // |#|#|#|#|@|@|@|@|_|...
+        //  0 1 2 3 ^ ^ ^ ^ 8
+        assert_eq!(map.find_offset(four), Ok(1));
+        assert_eq!(map.access_type(alloc_range(four, four)), AccessType::PerfectlyOverlapping(1));
+    }
+
+    #[test]
+    fn straddling() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert(0, alloc_range(four, four), "#");
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 ^ ^ ^ ^ 6 7 8 9 a b c d
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(2), four)),
+            AccessType::ImperfectlyOverlapping(0..1)
+        );
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 2 3 4 5 ^ ^ ^ ^ a b c d
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(6), four)),
+            AccessType::ImperfectlyOverlapping(0..1)
+        );
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 ^ ^ ^ ^ ^ ^ ^ ^ a b c d
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(2), Size::from_bytes(8))),
+            AccessType::ImperfectlyOverlapping(0..1)
+        );
+
+        // |_|_|_|_|#|#|#|#|_|_|@|@|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert(1, alloc_range(Size::from_bytes(10), Size::from_bytes(2)), "@");
+        // |_|_|_|_|#|#|#|#|_|_|@|@|_|_|...
+        //  0 1 2 3 4 5 ^ ^ ^ ^ ^ ^ ^ ^
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(6), Size::from_bytes(8))),
+            AccessType::ImperfectlyOverlapping(0..2)
+        );
+    }
+}
diff --git a/src/data_race.rs b/src/data_race.rs
index 303cf7007e..d9bfbc1bdb 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -519,7 +519,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                     global.sc_read();
                 }
                 let mut rng = this.machine.rng.borrow_mut();
-                let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size));
+                let buffer =
+                    alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size));
                 let loaded = buffer.buffered_read(
                     global,
                     atomic == AtomicReadOp::SeqCst,
@@ -555,12 +556,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
             if atomic == AtomicWriteOp::SeqCst {
                 global.sc_write();
             }
-            let mut buffer = alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size));
-            buffer.buffered_write(
-                val,
-                global,
-                atomic == AtomicWriteOp::SeqCst,
-            )?;
+            let buffer =
+                alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size));
+            buffer.buffered_write(val, global, atomic == AtomicWriteOp::SeqCst)?;
         }
 
         Ok(())
@@ -624,17 +622,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
 
         let new_val = if min {
-            if lt {
-                &old
-            } else {
-                &rhs
-            }
+            if lt { &old } else { &rhs }
         } else {
-            if lt {
-                &rhs
-            } else {
-                &old
-            }
+            if lt { &rhs } else { &old }
         };
 
         this.allow_data_races_mut(|this| this.write_immediate(**new_val, &(*place).into()))?;
@@ -736,7 +726,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                 global.sc_write();
             }
             let range = alloc_range(base_offset, place.layout.size);
-            let mut buffer = alloc_buffers.get_store_buffer_mut(range);
+            let buffer = alloc_buffers.get_store_buffer_mut(range);
             buffer.read_from_last_store(global);
             buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
         }
diff --git a/src/lib.rs b/src/lib.rs
index 06ab2fabab..3270a57c49 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,6 +31,7 @@ extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
 
+mod allocation_map;
 mod data_race;
 mod diagnostics;
 mod eval;
diff --git a/src/machine.rs b/src/machine.rs
index aa2a930ccd..ca7fff7b08 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -636,13 +636,17 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
         let buffer_alloc = if ecx.machine.weak_memory {
             // FIXME: if this is an atomic obejct, we want to supply its initial value
             // while allocating the store buffer here.
-            Some(weak_memory::AllocExtra::new_allocation(alloc.size()))
+            Some(weak_memory::AllocExtra::new_allocation())
         } else {
             None
         };
         let alloc: Allocation<Tag, Self::AllocExtra> = alloc.convert_tag_add_extra(
             &ecx.tcx,
-            AllocExtra { stacked_borrows: stacks, data_race: race_alloc, weak_memory: buffer_alloc },
+            AllocExtra {
+                stacked_borrows: stacks,
+                data_race: race_alloc,
+                weak_memory: buffer_alloc,
+            },
             |ptr| Evaluator::tag_alloc_base_pointer(ecx, ptr),
         );
         Cow::Owned(alloc)
diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index 2cf9a98b13..34c669239d 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -12,7 +12,7 @@
 // load by each thread. This optimisation is done in tsan11
 // (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.h#L35-L37)
 // and here.
-// 
+//
 // 3. §4.5 of the paper wants an SC store to mark all existing stores in the buffer that happens before it
 // as SC. This is not done in the operational semantics but implemented correctly in tsan11
 // (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L160-L167)
@@ -25,48 +25,84 @@
 // and here.
 
 use std::{
-    cell::{Ref, RefCell, RefMut},
+    cell::{Ref, RefCell},
     collections::VecDeque,
 };
 
 use rustc_const_eval::interpret::{AllocRange, InterpResult, ScalarMaybeUninit};
 use rustc_data_structures::fx::FxHashMap;
-use rustc_target::abi::Size;
 
 use crate::{
+    allocation_map::{AccessType, AllocationMap},
     data_race::{GlobalState, ThreadClockSet},
-    RangeMap, Tag, VClock, VTimestamp, VectorIdx,
+    Tag, VClock, VTimestamp, VectorIdx,
 };
 
 pub type AllocExtra = StoreBufferAlloc;
+
 #[derive(Debug, Clone)]
 pub struct StoreBufferAlloc {
     /// Store buffer of each atomic object in this allocation
-    // Load may modify a StoreBuffer to record the loading thread's
-    // timestamp so we need interior mutability here.
-    store_buffer: RefCell<RangeMap<StoreBuffer>>,
+    // Behind a RefCell because we need to allocate/remove on read access
+    store_buffer: RefCell<AllocationMap<StoreBuffer>>,
 }
 
 impl StoreBufferAlloc {
-    pub fn new_allocation(len: Size) -> Self {
-        Self { store_buffer: RefCell::new(RangeMap::new(len, StoreBuffer::default())) }
+    pub fn new_allocation() -> Self {
+        Self { store_buffer: RefCell::new(AllocationMap::new()) }
     }
 
     /// Gets a store buffer associated with an atomic object in this allocation
     pub fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
-        Ref::map(self.store_buffer.borrow(), |range_map| {
-            let (.., store_buffer) = range_map.iter(range.start, range.size).next().unwrap();
-            store_buffer
-        })
+        let access_type = self.store_buffer.borrow().access_type(range);
+        let index = match access_type {
+            AccessType::PerfectlyOverlapping(index) => index,
+            AccessType::Empty(index) => {
+                // First atomic access on this range, allocate a new StoreBuffer
+                let mut buffer = self.store_buffer.borrow_mut();
+                buffer.insert(index, range, StoreBuffer::default());
+                index
+            }
+            AccessType::ImperfectlyOverlapping(index_range) => {
+                // Accesses that imperfectly overlaps with existing atomic objects
+                // do not have well-defined behaviours. But we don't throw a UB here
+                // because we have (or will) checked that all bytes in the current
+                // access are non-racy.
+                // The behaviour here is that we delete all the existing objects this
+                // access touches, and allocate a new and empty one for the exact range.
+                // A read on an empty buffer returns None, which means the program will
+                // observe the latest value in modification order at every byte.
+                let mut buffer = self.store_buffer.borrow_mut();
+                for index in index_range.clone() {
+                    buffer.remove(index);
+                }
+                buffer.insert(index_range.start, range, StoreBuffer::default());
+                index_range.start
+            }
+        };
+        Ref::map(self.store_buffer.borrow(), |buffer| &buffer[index])
     }
 
-    pub fn get_store_buffer_mut(&self, range: AllocRange) -> RefMut<'_, StoreBuffer> {
-        RefMut::map(self.store_buffer.borrow_mut(), |range_map| {
-            let (.., store_buffer) = range_map.iter_mut(range.start, range.size).next().unwrap();
-            store_buffer
-        })
+    /// Gets a mutable store buffer associated with an atomic object in this allocation
+    pub fn get_store_buffer_mut(&mut self, range: AllocRange) -> &mut StoreBuffer {
+        let buffer = self.store_buffer.get_mut();
+        let access_type = buffer.access_type(range);
+        let index = match access_type {
+            AccessType::PerfectlyOverlapping(index) => index,
+            AccessType::Empty(index) => {
+                buffer.insert(index, range, StoreBuffer::default());
+                index
+            }
+            AccessType::ImperfectlyOverlapping(index_range) => {
+                for index in index_range.clone() {
+                    buffer.remove(index);
+                }
+                buffer.insert(index_range.start, range, StoreBuffer::default());
+                index_range.start
+            }
+        };
+        &mut buffer[index]
     }
-
 }
 
 const STORE_BUFFER_LIMIT: usize = 128;

From a71b10381e1e956e87bcd75ffc5ec7273680aba6 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 7 May 2022 00:31:17 +0100
Subject: [PATCH 08/46] Add imperfectly overlapping test

---
 tests/run-pass/concurrency/weak_memory.rs | 24 ++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/tests/run-pass/concurrency/weak_memory.rs b/tests/run-pass/concurrency/weak_memory.rs
index efbbc45909..90820d4348 100644
--- a/tests/run-pass/concurrency/weak_memory.rs
+++ b/tests/run-pass/concurrency/weak_memory.rs
@@ -28,9 +28,10 @@
 // M. Batty, S. Owens, S. Sarkar, P. Sewell and T. Weber,
 // "Mathematizing C++ concurrency", ACM SIGPLAN Notices, vol. 46, no. 1, pp. 55-66, 2011.
 // Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
+#![feature(atomic_from_mut)]
 
 use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{fence, AtomicUsize};
+use std::sync::atomic::{fence, AtomicU16, AtomicU32, AtomicUsize};
 use std::thread::{spawn, yield_now};
 
 #[derive(Copy, Clone)]
@@ -196,6 +197,26 @@ fn test_mixed_access() {
     assert_eq!(r2, 2);
 }
 
+// Strictly speaking, atomic accesses that imperfectly overlap with existing
+// atomic objects are UB. Nonetheless we'd like to provide a sane value when
+// the access is not racy.
+fn test_imperfectly_overlapping_access() {
+    let mut qword = AtomicU32::new(42);
+    assert_eq!(qword.load(Relaxed), 42);
+    qword.store(u32::to_be(0xabbafafa), Relaxed);
+
+    let qword_mut = qword.get_mut();
+
+    let dwords_mut = unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(qword_mut) };
+
+    let (hi_mut, lo_mut) = dwords_mut.split_at_mut(1);
+
+    let (hi, lo) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
+
+    assert_eq!(u16::from_be(hi.load(Relaxed)), 0xabba);
+    assert_eq!(u16::from_be(lo.load(Relaxed)), 0xfafa);
+}
+
 // The following two tests are taken from Repairing Sequential Consistency in C/C++11
 // by Lahav et al.
 // https://plv.mpi-sws.org/scfix/paper.pdf
@@ -270,6 +291,7 @@ fn test_cpp20_rwc_syncs() {
 }
 
 pub fn main() {
+    test_imperfectly_overlapping_access();
     // TODO: does this make chances of spurious success
     // "sufficiently low"? This also takes a long time to run,
     // prehaps each function should be its own test case so they

From bf7fe68fba3a3bbfaef7fdbace268d0001e038cf Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 7 May 2022 00:54:54 +0100
Subject: [PATCH 09/46] Add -Zmiri-disable-weak-memory-emulation to README

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a55ebcb125..e5c2465944 100644
--- a/README.md
+++ b/README.md
@@ -317,11 +317,13 @@ to Miri failing to detect cases of undefined behavior in a program.
   can focus on other failures, but it means Miri can miss bugs in your program.
   Using this flag is **unsound**.
 * `-Zmiri-disable-data-race-detector` disables checking for data races.  Using
-  this flag is **unsound**.
+  this flag is **unsound**. This implies `-Zmiri-disable-weak-memory-emulation`.
 * `-Zmiri-disable-stacked-borrows` disables checking the experimental
   [Stacked Borrows] aliasing rules.  This can make Miri run faster, but it also
   means no aliasing violations will be detected.  Using this flag is **unsound**
   (but the affected soundness rules are experimental).
+* `-Zmiri-disable-weak-memory-emulation` disables the emulation of some C++11 weak
+  memory effects.
 * `-Zmiri-disable-validation` disables enforcing validity invariants, which are
   enforced by default.  This is mostly useful to focus on other failures (such
   as out-of-bounds accesses) first.  Setting this flag means Miri can miss bugs

From 11ca975cd83351aae3b113ceb754dde4a304a6b5 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 7 May 2022 01:07:16 +0100
Subject: [PATCH 10/46] Move type definitions together and clarify fetch_store
 on empty buffer

---
 src/weak_memory.rs | 59 +++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index 34c669239d..0d892a5b38 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -40,6 +40,8 @@ use crate::{
 
 pub type AllocExtra = StoreBufferAlloc;
 
+const STORE_BUFFER_LIMIT: usize = 128;
+
 #[derive(Debug, Clone)]
 pub struct StoreBufferAlloc {
     /// Store buffer of each atomic object in this allocation
@@ -47,6 +49,31 @@ pub struct StoreBufferAlloc {
     store_buffer: RefCell<AllocationMap<StoreBuffer>>,
 }
 
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StoreBuffer {
+    // Stores to this location in modification order
+    buffer: VecDeque<StoreElement>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct StoreElement {
+    /// The identifier of the vector index, corresponding to a thread
+    /// that performed the store.
+    store_index: VectorIdx,
+
+    /// Whether this store is SC.
+    is_seqcst: bool,
+
+    /// The timestamp of the storing thread when it performed the store
+    timestamp: VTimestamp,
+    /// The value of this store
+    val: ScalarMaybeUninit<Tag>,
+
+    /// Timestamp of first loads from this store element by each thread
+    /// Behind a RefCell to keep load op take &self
+    loads: RefCell<FxHashMap<VectorIdx, VTimestamp>>,
+}
+
 impl StoreBufferAlloc {
     pub fn new_allocation() -> Self {
         Self { store_buffer: RefCell::new(AllocationMap::new()) }
@@ -105,13 +132,6 @@ impl StoreBufferAlloc {
     }
 }
 
-const STORE_BUFFER_LIMIT: usize = 128;
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StoreBuffer {
-    // Stores to this location in modification order
-    buffer: VecDeque<StoreElement>,
-}
-
 impl Default for StoreBuffer {
     fn default() -> Self {
         let mut buffer = VecDeque::new();
@@ -175,7 +195,11 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
 
     /// Selects a valid store element in the buffer.
     /// The buffer does not contain the value used to initialise the atomic object
-    /// so a fresh atomic object has an empty store buffer until an explicit store.
+    /// so a fresh atomic object has an empty store buffer and this function
+    /// will return `None`. In this case, the caller should ensure that the non-buffered
+    /// value from `MiriEvalContext::read_scalar()` is observed by the program, which is
+    /// the initial value of the atomic object. `MiriEvalContext::read_scalar()` is always
+    /// the latest value in modification order so it is always correct to be observed by any thread.
     fn fetch_store<R: rand::Rng + ?Sized>(
         &self,
         is_seqcst: bool,
@@ -294,25 +318,6 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StoreElement {
-    /// The identifier of the vector index, corresponding to a thread
-    /// that performed the store.
-    store_index: VectorIdx,
-
-    /// Whether this store is SC.
-    is_seqcst: bool,
-
-    /// The timestamp of the storing thread when it performed the store
-    timestamp: VTimestamp,
-    /// The value of this store
-    val: ScalarMaybeUninit<Tag>,
-
-    /// Timestamp of first loads from this store element by each thread
-    /// Behind a RefCell to keep load op take &self
-    loads: RefCell<FxHashMap<VectorIdx, VTimestamp>>,
-}
-
 impl StoreElement {
     /// ATOMIC LOAD IMPL in the paper
     /// Unlike the operational semantics in the paper, we don't need to keep track

From 32627d5abb8791d2de10199964128ea8238d5c2b Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 7 May 2022 01:46:19 +0100
Subject: [PATCH 11/46] Disable weak memory emulation on scheduler-dependent
 data race tests

---
 tests/fail/data_race/alloc_read_race.rs              | 1 +
 tests/fail/data_race/alloc_write_race.rs             | 1 +
 tests/fail/data_race/dealloc_read_race_stack.rs      | 2 +-
 tests/fail/data_race/dealloc_write_race_stack.rs     | 2 +-
 tests/fail/data_race/read_write_race_stack.rs        | 2 +-
 tests/fail/data_race/relax_acquire_race.rs           | 1 +
 tests/fail/data_race/release_seq_race.rs             | 2 +-
 tests/fail/data_race/release_seq_race_same_thread.rs | 2 +-
 tests/fail/data_race/rmw_race.rs                     | 1 +
 tests/fail/data_race/write_write_race_stack.rs       | 2 +-
 tests/pass/concurrency/data_race.rs                  | 1 +
 11 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/fail/data_race/alloc_read_race.rs b/tests/fail/data_race/alloc_read_race.rs
index 093c9024f2..2ddbb65724 100644
--- a/tests/fail/data_race/alloc_read_race.rs
+++ b/tests/fail/data_race/alloc_read_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 #![feature(new_uninit)]
 
 use std::thread::spawn;
diff --git a/tests/fail/data_race/alloc_write_race.rs b/tests/fail/data_race/alloc_write_race.rs
index becebe6a12..d32eb55676 100644
--- a/tests/fail/data_race/alloc_write_race.rs
+++ b/tests/fail/data_race/alloc_write_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 #![feature(new_uninit)]
 
 use std::thread::spawn;
diff --git a/tests/fail/data_race/dealloc_read_race_stack.rs b/tests/fail/data_race/dealloc_read_race_stack.rs
index 6b573121e5..b70db5f4ac 100644
--- a/tests/fail/data_race/dealloc_read_race_stack.rs
+++ b/tests/fail/data_race/dealloc_read_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::ptr::null_mut;
diff --git a/tests/fail/data_race/dealloc_write_race_stack.rs b/tests/fail/data_race/dealloc_write_race_stack.rs
index 34a16b00b8..f2b49fc5f3 100644
--- a/tests/fail/data_race/dealloc_write_race_stack.rs
+++ b/tests/fail/data_race/dealloc_write_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::ptr::null_mut;
diff --git a/tests/fail/data_race/read_write_race_stack.rs b/tests/fail/data_race/read_write_race_stack.rs
index 5a1c0a4b6d..9edeed0af6 100644
--- a/tests/fail/data_race/read_write_race_stack.rs
+++ b/tests/fail/data_race/read_write_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation -Zmir-opt-level=0
+// compile-flags: -Zmiri-disable-isolation -Zmir-opt-level=0 -Zmiri-disable-weak-memory-emulation
 
 // Note: mir-opt-level set to 0 to prevent the read of stack_var in thread 1
 // from being optimized away and preventing the detection of the data-race.
diff --git a/tests/fail/data_race/relax_acquire_race.rs b/tests/fail/data_race/relax_acquire_race.rs
index 8b8616431f..20e63dc4b1 100644
--- a/tests/fail/data_race/relax_acquire_race.rs
+++ b/tests/fail/data_race/relax_acquire_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/release_seq_race.rs b/tests/fail/data_race/release_seq_race.rs
index 29c428b388..6ff84aa04b 100644
--- a/tests/fail/data_race/release_seq_race.rs
+++ b/tests/fail/data_race/release_seq_race.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/release_seq_race_same_thread.rs b/tests/fail/data_race/release_seq_race_same_thread.rs
index 54b9f49937..1245fb96f4 100644
--- a/tests/fail/data_race/release_seq_race_same_thread.rs
+++ b/tests/fail/data_race/release_seq_race_same_thread.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/rmw_race.rs b/tests/fail/data_race/rmw_race.rs
index fcf683a65d..c968c83422 100644
--- a/tests/fail/data_race/rmw_race.rs
+++ b/tests/fail/data_race/rmw_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/write_write_race_stack.rs b/tests/fail/data_race/write_write_race_stack.rs
index bfe1464cb5..daa3e5f5c4 100644
--- a/tests/fail/data_race/write_write_race_stack.rs
+++ b/tests/fail/data_race/write_write_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::ptr::null_mut;
diff --git a/tests/pass/concurrency/data_race.rs b/tests/pass/concurrency/data_race.rs
index 2dc0ee3f8f..c51080f474 100644
--- a/tests/pass/concurrency/data_race.rs
+++ b/tests/pass/concurrency/data_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 
 
 use std::sync::atomic::{AtomicUsize, fence, Ordering};

From f729f289255bcc8d1bfad614ac74bc51d411826a Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 7 May 2022 17:34:18 +0100
Subject: [PATCH 12/46] Move cpp20_rwc_syncs into compile-fail

---
 .../weak_memory/cpp20_rwc_syncs.rs            | 85 +++++++++++++++++++
 tests/run-pass/concurrency/weak_memory.rs     | 58 +------------
 2 files changed, 86 insertions(+), 57 deletions(-)
 create mode 100644 tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs

diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
new file mode 100644
index 0000000000..b9e395fd77
--- /dev/null
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@@ -0,0 +1,85 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks
+
+// https://plv.mpi-sws.org/scfix/paper.pdf
+// 2.2 Second Problem: SC Fences are Too Weak
+// This test should pass under the C++20 model Rust is using.
+// Unfortunately, Miri's weak memory emulation only follows C++11 model
+// as we don't know how to correctly emulate C++20's revised SC semantics,
+// so we have to stick to C++11 emulation from exiting research.
+
+use std::sync::atomic::Ordering::*;
+use std::thread::{spawn, yield_now};
+use std::sync::atomic::{fence, AtomicUsize};
+
+// Spins and yields until until it reads value
+fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Relaxed) != val {
+        yield_now();
+    }
+    val
+}
+
+// We can't create static items because we need to run each test
+// multiple tests
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    // A workaround to put the initialisation value in the store buffer
+    ret.store(val, Relaxed);
+    ret
+}
+
+fn test_cpp20_rwc_syncs() {
+    /*
+    int main() {
+        atomic_int x = 0;
+        atomic_int y = 0;
+
+        {{{ x.store(1,mo_relaxed);
+        ||| { r1=x.load(mo_relaxed).readsvalue(1);
+              fence(mo_seq_cst);
+              r2=y.load(mo_relaxed); }
+        ||| { y.store(1,mo_relaxed);
+              fence(mo_seq_cst);
+              r3=x.load(mo_relaxed); }
+        }}}
+        return 0;
+    }
+    */
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        reads_value(&x, 1);
+        fence(SeqCst);
+        y.load(Relaxed)
+    });
+
+    let j3 = spawn(move || {
+        y.store(1, Relaxed);
+        fence(SeqCst);
+        x.load(Relaxed)
+    });
+
+    j1.join().unwrap();
+    let b = j2.join().unwrap();
+    let c = j3.join().unwrap();
+
+    if (b, c) == (0, 0) {
+        // FIXME: the standalone compiletest-rs needs to support
+        // failure-status header to allow us to write assert_ne!((b, c), (0, 0))
+        // https://rustc-dev-guide.rust-lang.org/tests/headers.html#miscellaneous-headers
+        // because panic exits with 101 but compile-rs expects 1
+        let _ = unsafe { std::mem::MaybeUninit::<*const u32>::uninit().assume_init() }; //~ ERROR uninitialized
+    }
+}
+
+pub fn main() {
+    for _ in 0..500 {
+        test_cpp20_rwc_syncs();
+    }
+}
\ No newline at end of file
diff --git a/tests/run-pass/concurrency/weak_memory.rs b/tests/run-pass/concurrency/weak_memory.rs
index 90820d4348..e85c2d1960 100644
--- a/tests/run-pass/concurrency/weak_memory.rs
+++ b/tests/run-pass/concurrency/weak_memory.rs
@@ -31,7 +31,7 @@
 #![feature(atomic_from_mut)]
 
 use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{fence, AtomicU16, AtomicU32, AtomicUsize};
+use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize};
 use std::thread::{spawn, yield_now};
 
 #[derive(Copy, Clone)]
@@ -57,13 +57,6 @@ fn acquires_value(loc: &AtomicUsize, val: usize) -> usize {
     val
 }
 
-fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
-    while loc.load(Relaxed) != val {
-        yield_now();
-    }
-    val
-}
-
 fn test_corr() {
     let x = static_atomic(0);
     let y = static_atomic(0);
@@ -242,54 +235,6 @@ fn test_sc_store_buffering() {
     assert_ne!((a, b), (0, 0));
 }
 
-// 2.2 Second Problem: SC Fences are Too Weak
-// This test should pass under the C++20 model Rust is using.
-// Unfortunately, Miri's weak memory emulation only follows C++11 model
-// as we don't know how to correctly emulate C++20's revised SC semantics
-#[allow(dead_code)]
-fn test_cpp20_rwc_syncs() {
-    /*
-    int main() {
-        atomic_int x = 0;
-        atomic_int y = 0;
-
-        {{{ x.store(1,mo_relaxed);
-        ||| { r1=x.load(mo_relaxed).readsvalue(1);
-              fence(mo_seq_cst);
-              r2=y.load(mo_relaxed); }
-        ||| { y.store(1,mo_relaxed);
-              fence(mo_seq_cst);
-              r3=x.load(mo_relaxed); }
-        }}}
-        return 0;
-    }
-    */
-    let x = static_atomic(0);
-    let y = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.store(1, Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        reads_value(&x, 1);
-        fence(SeqCst);
-        y.load(Relaxed)
-    });
-
-    let j3 = spawn(move || {
-        y.store(1, Relaxed);
-        fence(SeqCst);
-        x.load(Relaxed)
-    });
-
-    j1.join().unwrap();
-    let b = j2.join().unwrap();
-    let c = j3.join().unwrap();
-
-    assert_ne!((b, c), (0, 0));
-}
-
 pub fn main() {
     test_imperfectly_overlapping_access();
     // TODO: does this make chances of spurious success
@@ -303,6 +248,5 @@ pub fn main() {
         test_wrc();
         test_corr();
         test_sc_store_buffering();
-        // test_cpp20_rwc_syncs();
     }
 }

From 89138a67dc2b9f7ac5363bad114984cb35033158 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Tue, 10 May 2022 23:25:18 +0100
Subject: [PATCH 13/46] Add more top-level comments

---
 src/weak_memory.rs | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index 0d892a5b38..fc2e220e5e 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -1,6 +1,34 @@
 //! Implementation of C++11-consistent weak memory emulation using store buffers
 //! based on Dynamic Race Detection for C++ ("the paper"):
 //! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
+//! 
+//! This implementation will never generate weak memory behaviours forbidden by the C++11 model,
+//! but it is incapable of producing all possible weak behaviours allowed by the model. There are
+//! certain weak behaviours observable on real hardware but not while using this.
+//! 
+//! Note that this implementation does not take into account of C++20's memory model revision to SC accesses
+//! and fences introduced by P0668 (https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0668r5.html).
+//! This implementation is not fully correct under the revised C++20 model and may generate behaviours C++20
+//! disallows.
+//! 
+//! Rust follows the full C++20 memory model (except for the Consume ordering). It is therefore
+//! possible for this implementation to generate behaviours never observable when the same program is compiled and
+//! run natively. Unfortunately, no literature exists at the time of writing which proposes an implementable and C++20-compatible
+//! relaxed memory model that supports all atomic operation existing in Rust. The closest one is
+//! A Promising Semantics for Relaxed-Memory Concurrency by Jeehoon Kang et al. (https://www.cs.tau.ac.il/~orilahav/papers/popl17.pdf)
+//! However, this model lacks SC accesses and is therefore unusable by Miri (SC accesses are everywhere in library code).
+//! 
+//! If you find anything that proposes a relaxed memory model that is C++20-consistent, supports all orderings Rust's atomic accesses
+//! and fences accept, and is implementable (with operational semanitcs), please open a GitHub issue!
+//! 
+//! One characteristic of this implementation, in contrast to some other notable operational models such as ones proposed in
+//! Taming Release-Acquire Consistency by Ori Lahav et al. (https://plv.mpi-sws.org/sra/paper.pdf) or Promising Semantics noted above,
+//! is that this implementation does not require each thread to hold an isolated view of the entire memory. Here, store buffers are per-location
+//! and shared across all threads. This is more memory efficient but does require store elements (representing writes to a location) to record
+//! information about reads, whereas in the other two models it is the other way round: reads points to the write it got its value from.
+//! Additionally, writes in our implementation do not have globally unique timestamps attached. In the other two models this timestamp is
+//! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view.
+//! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads.
 
 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
 // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
@@ -40,6 +68,10 @@ use crate::{
 
 pub type AllocExtra = StoreBufferAlloc;
 
+// Each store buffer must be bounded otherwise it will grow indefinitely.
+// However, bounding the store buffer means restricting the amount of weak
+// behaviours observable. The author picked 128 as a good tradeoff
+// so we follow them here.
 const STORE_BUFFER_LIMIT: usize = 128;
 
 #[derive(Debug, Clone)]

From 62b514e23571beb0f051b4927e08a31460d324ac Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Tue, 10 May 2022 23:34:38 +0100
Subject: [PATCH 14/46] Update README

---
 README.md          | 15 +++++++++------
 src/weak_memory.rs | 10 +++++-----
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index e5c2465944..ece45fca12 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,8 @@ for example:
   or an invalid enum discriminant)
 * **Experimental**: Violations of the [Stacked Borrows] rules governing aliasing
   for reference types
-* **Experimental**: Data races (but no weak memory effects)
+* **Experimental**: Data races
+* **Experimental**: Weak memory emulation
 
 On top of that, Miri will also tell you about memory leaks: when there is memory
 still allocated at the end of the execution, and that memory is not reachable
@@ -61,9 +62,11 @@ in your program, and cannot run all programs:
   not support networking. System API support varies between targets; if you run
   on Windows it is a good idea to use `--target x86_64-unknown-linux-gnu` to get
   better support.
-* Threading support is not finished yet. E.g., weak memory effects are not
-  emulated and spin loops (without syscalls) just loop forever. There is no
-  threading support on Windows.
+* Threading support is not finished yet. E.g. spin loops (without syscalls) just
+  loop forever. There is no threading support on Windows.
+* Weak memory emulation may produce weak behaivours unobservable by compiled
+  programs running on real hardware when `SeqCst` fences are used, and it cannot
+  produce all behaviors possibly observable on real hardware.
 
 [rust]: https://www.rust-lang.org/
 [mir]: https://github.com/rust-lang/rfcs/blob/master/text/1211-mir.md
@@ -322,13 +325,13 @@ to Miri failing to detect cases of undefined behavior in a program.
   [Stacked Borrows] aliasing rules.  This can make Miri run faster, but it also
   means no aliasing violations will be detected.  Using this flag is **unsound**
   (but the affected soundness rules are experimental).
-* `-Zmiri-disable-weak-memory-emulation` disables the emulation of some C++11 weak
-  memory effects.
 * `-Zmiri-disable-validation` disables enforcing validity invariants, which are
   enforced by default.  This is mostly useful to focus on other failures (such
   as out-of-bounds accesses) first.  Setting this flag means Miri can miss bugs
   in your program.  However, this can also help to make Miri run faster.  Using
   this flag is **unsound**.
+* `-Zmiri-disable-weak-memory-emulation` disables the emulation of some C++11 weak
+  memory effects.
 * `-Zmiri-measureme=<name>` enables `measureme` profiling for the interpreted program.
    This can be used to find which parts of your program are executing slowly under Miri.
    The profile is written out to a file with the prefix `<name>`, and can be processed
diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index fc2e220e5e..b9ab129231 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -1,26 +1,26 @@
 //! Implementation of C++11-consistent weak memory emulation using store buffers
 //! based on Dynamic Race Detection for C++ ("the paper"):
 //! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
-//! 
+//!
 //! This implementation will never generate weak memory behaviours forbidden by the C++11 model,
 //! but it is incapable of producing all possible weak behaviours allowed by the model. There are
 //! certain weak behaviours observable on real hardware but not while using this.
-//! 
+//!
 //! Note that this implementation does not take into account of C++20's memory model revision to SC accesses
 //! and fences introduced by P0668 (https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0668r5.html).
 //! This implementation is not fully correct under the revised C++20 model and may generate behaviours C++20
 //! disallows.
-//! 
+//!
 //! Rust follows the full C++20 memory model (except for the Consume ordering). It is therefore
 //! possible for this implementation to generate behaviours never observable when the same program is compiled and
 //! run natively. Unfortunately, no literature exists at the time of writing which proposes an implementable and C++20-compatible
 //! relaxed memory model that supports all atomic operation existing in Rust. The closest one is
 //! A Promising Semantics for Relaxed-Memory Concurrency by Jeehoon Kang et al. (https://www.cs.tau.ac.il/~orilahav/papers/popl17.pdf)
 //! However, this model lacks SC accesses and is therefore unusable by Miri (SC accesses are everywhere in library code).
-//! 
+//!
 //! If you find anything that proposes a relaxed memory model that is C++20-consistent, supports all orderings Rust's atomic accesses
 //! and fences accept, and is implementable (with operational semanitcs), please open a GitHub issue!
-//! 
+//!
 //! One characteristic of this implementation, in contrast to some other notable operational models such as ones proposed in
 //! Taming Release-Acquire Consistency by Ori Lahav et al. (https://plv.mpi-sws.org/sra/paper.pdf) or Promising Semantics noted above,
 //! is that this implementation does not require each thread to hold an isolated view of the entire memory. Here, store buffers are per-location

From 773131bb261a2a96426f9024271f5557bbbf5c1a Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Wed, 11 May 2022 23:52:38 +0100
Subject: [PATCH 15/46] Improve privacy and comments

---
 src/data_race.rs   | 14 ++++++++++----
 src/weak_memory.rs |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index d9bfbc1bdb..78b9b09f16 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -103,7 +103,7 @@ pub enum AtomicFenceOp {
 pub struct ThreadClockSet {
     /// The increasing clock representing timestamps
     /// that happen-before this thread.
-    pub clock: VClock,
+    pub(crate) clock: VClock,
 
     /// The set of timestamps that will happen-before this
     /// thread once it performs an acquire fence.
@@ -113,11 +113,17 @@ pub struct ThreadClockSet {
     /// have been released by this thread by a fence.
     fence_release: VClock,
 
-    pub fence_seqcst: VClock,
+    /// Timestamps of the last SC fence performed by each
+    /// thread, updated when this thread performs an SC fence
+    pub(crate) fence_seqcst: VClock,
 
-    pub write_seqcst: VClock,
+    /// Timestamps of the last SC write performed by each
+    /// thread, updated when this thread performs an SC fence
+    pub(crate) write_seqcst: VClock,
 
-    pub read_seqcst: VClock,
+    /// Timestamps of the last SC fence performed by each
+    /// thread, updated when this thread performs an SC read
+    pub(crate) read_seqcst: VClock,
 }
 
 impl ThreadClockSet {
diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index b9ab129231..46838c5c8a 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -88,7 +88,7 @@ pub struct StoreBuffer {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StoreElement {
+struct StoreElement {
     /// The identifier of the vector index, corresponding to a thread
     /// that performed the store.
     store_index: VectorIdx,

From 7d874db213cdb17f20c4960e314bcce328d2b61c Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Thu, 12 May 2022 22:04:37 +0100
Subject: [PATCH 16/46] Add tests showing weak memory behaviours

---
 .../consistency.rs}                           | 39 +---------
 .../consistency.stderr}                       |  0
 .../weak_memory/imperfectly_overlapping.rs    | 29 +++++++
 tests/run-pass/weak_memory/weak.rs            | 77 +++++++++++++++++++
 tests/run-pass/weak_memory/weak.stderr        |  2 +
 5 files changed, 112 insertions(+), 35 deletions(-)
 rename tests/run-pass/{concurrency/weak_memory.rs => weak_memory/consistency.rs} (80%)
 rename tests/run-pass/{concurrency/weak_memory.stderr => weak_memory/consistency.stderr} (100%)
 create mode 100644 tests/run-pass/weak_memory/imperfectly_overlapping.rs
 create mode 100644 tests/run-pass/weak_memory/weak.rs
 create mode 100644 tests/run-pass/weak_memory/weak.stderr

diff --git a/tests/run-pass/concurrency/weak_memory.rs b/tests/run-pass/weak_memory/consistency.rs
similarity index 80%
rename from tests/run-pass/concurrency/weak_memory.rs
rename to tests/run-pass/weak_memory/consistency.rs
index e85c2d1960..d7c44f6ac2 100644
--- a/tests/run-pass/concurrency/weak_memory.rs
+++ b/tests/run-pass/weak_memory/consistency.rs
@@ -1,8 +1,8 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
 // compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows
 
-// Weak memory emulation tests. All of the following test if
-// our weak memory emulation produces any inconsistent execution outcomes
+// The following tests check whether our weak memory emulation produces
+// any inconsistent execution outcomes
 //
 // Due to the random nature of choosing valid stores, it is always
 // possible that our tests spuriously succeeds: even though our weak
@@ -12,15 +12,6 @@
 //
 // To mitigate this, each test is ran enough times such that the chance
 // of spurious success is very low. These tests never supriously fail.
-//
-// Note that we can't effectively test whether our weak memory emulation
-// can produce *all* consistent execution outcomes. This may be possible
-// if Miri's scheduler is sufficiently random and explores all possible
-// interleavings of our small test cases after a reasonable number of runs.
-// However, since Miri's scheduler is not even pre-emptive, there will
-// always be possible interleavings (and possible execution outcomes),
-// that can never be observed regardless of how weak memory emulation is
-// implemented.
 
 // Test cases and their consistent outcomes are from
 // http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/
@@ -28,10 +19,9 @@
 // M. Batty, S. Owens, S. Sarkar, P. Sewell and T. Weber,
 // "Mathematizing C++ concurrency", ACM SIGPLAN Notices, vol. 46, no. 1, pp. 55-66, 2011.
 // Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
-#![feature(atomic_from_mut)]
 
+use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize};
 use std::thread::{spawn, yield_now};
 
 #[derive(Copy, Clone)]
@@ -41,7 +31,7 @@ unsafe impl<T> Send for EvilSend<T> {}
 unsafe impl<T> Sync for EvilSend<T> {}
 
 // We can't create static items because we need to run each test
-// multiple tests
+// multiple times
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
     // A workaround to put the initialisation value in the store buffer
@@ -190,26 +180,6 @@ fn test_mixed_access() {
     assert_eq!(r2, 2);
 }
 
-// Strictly speaking, atomic accesses that imperfectly overlap with existing
-// atomic objects are UB. Nonetheless we'd like to provide a sane value when
-// the access is not racy.
-fn test_imperfectly_overlapping_access() {
-    let mut qword = AtomicU32::new(42);
-    assert_eq!(qword.load(Relaxed), 42);
-    qword.store(u32::to_be(0xabbafafa), Relaxed);
-
-    let qword_mut = qword.get_mut();
-
-    let dwords_mut = unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(qword_mut) };
-
-    let (hi_mut, lo_mut) = dwords_mut.split_at_mut(1);
-
-    let (hi, lo) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
-
-    assert_eq!(u16::from_be(hi.load(Relaxed)), 0xabba);
-    assert_eq!(u16::from_be(lo.load(Relaxed)), 0xfafa);
-}
-
 // The following two tests are taken from Repairing Sequential Consistency in C/C++11
 // by Lahav et al.
 // https://plv.mpi-sws.org/scfix/paper.pdf
@@ -236,7 +206,6 @@ fn test_sc_store_buffering() {
 }
 
 pub fn main() {
-    test_imperfectly_overlapping_access();
     // TODO: does this make chances of spurious success
     // "sufficiently low"? This also takes a long time to run,
     // prehaps each function should be its own test case so they
diff --git a/tests/run-pass/concurrency/weak_memory.stderr b/tests/run-pass/weak_memory/consistency.stderr
similarity index 100%
rename from tests/run-pass/concurrency/weak_memory.stderr
rename to tests/run-pass/weak_memory/consistency.stderr
diff --git a/tests/run-pass/weak_memory/imperfectly_overlapping.rs b/tests/run-pass/weak_memory/imperfectly_overlapping.rs
new file mode 100644
index 0000000000..2a8e8e5f32
--- /dev/null
+++ b/tests/run-pass/weak_memory/imperfectly_overlapping.rs
@@ -0,0 +1,29 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+#![feature(atomic_from_mut)]
+
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::{AtomicU16, AtomicU32};
+
+// Strictly speaking, atomic accesses that imperfectly overlap with existing
+// atomic objects are UB. Nonetheless we'd like to provide a sane value when
+// the access is not racy.
+fn test_same_thread() {
+    let mut qword = AtomicU32::new(42);
+    assert_eq!(qword.load(Relaxed), 42);
+    qword.store(u32::to_be(0xabbafafa), Relaxed);
+
+    let qword_mut = qword.get_mut();
+
+    let dwords_mut = unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(qword_mut) };
+
+    let (hi_mut, lo_mut) = dwords_mut.split_at_mut(1);
+
+    let (hi, lo) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
+
+    assert_eq!(u16::from_be(hi.load(Relaxed)), 0xabba);
+    assert_eq!(u16::from_be(lo.load(Relaxed)), 0xfafa);
+}
+
+pub fn main() {
+    test_same_thread();
+}
diff --git a/tests/run-pass/weak_memory/weak.rs b/tests/run-pass/weak_memory/weak.rs
new file mode 100644
index 0000000000..ab0c20cc97
--- /dev/null
+++ b/tests/run-pass/weak_memory/weak.rs
@@ -0,0 +1,77 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks
+
+// Tests showing weak memory behaviours are exhibited. All tests
+// return true when the desired behaviour is seen.
+// This is scheduler and pseudo-RNG dependent, so each test is
+// run multiple times until one try returns true.
+// Spurious failure is possible, if you are really unlucky with
+// the RNG.
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test
+// multiple times
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    // A workaround to put the initialisation value in the store buffer
+    ret.store(val, Relaxed);
+    ret
+}
+
+fn relaxed() -> bool {
+    let x = static_atomic(0);
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+        x.store(2, Relaxed);
+    });
+
+    let j2 = spawn(move || x.load(Relaxed));
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    r2 == 1
+}
+
+// https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8
+fn seq_cst() -> bool {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        x.store(2, SeqCst);
+        x.store(3, SeqCst);
+    });
+
+    let j3 = spawn(move || x.load(SeqCst));
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    r3 == 1
+}
+
+// Asserts that the function returns true at least once in 100 runs
+macro_rules! assert_once {
+    ($f:ident) => {
+        assert!(std::iter::repeat_with(|| $f()).take(100).any(|x| x));
+    };
+}
+
+pub fn main() {
+    assert_once!(relaxed);
+    assert_once!(seq_cst);
+}
diff --git a/tests/run-pass/weak_memory/weak.stderr b/tests/run-pass/weak_memory/weak.stderr
new file mode 100644
index 0000000000..03676519d4
--- /dev/null
+++ b/tests/run-pass/weak_memory/weak.stderr
@@ -0,0 +1,2 @@
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+

From 6040c9f50aa9a66f1cd6d30a92f9483dd51feebe Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Thu, 12 May 2022 18:57:03 +0100
Subject: [PATCH 17/46] Refactor store buffer search conditions

---
 src/weak_memory.rs | 72 +++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 42 deletions(-)

diff --git a/src/weak_memory.rs b/src/weak_memory.rs
index 46838c5c8a..223567d3ca 100644
--- a/src/weak_memory.rs
+++ b/src/weak_memory.rs
@@ -252,59 +252,47 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
                 if !keep_searching {
                     return false;
                 }
-                // CoWR: if a store happens-before the current load,
-                // then we can't read-from anything earlier in modification order.
-                if store_elem.timestamp <= clocks.clock[store_elem.store_index] {
-                    log::info!("Stopped due to coherent write-read");
-                    keep_searching = false;
-                    return true;
-                }
 
-                // CoRR: if there was a load from this store which happened-before the current load,
-                // then we cannot read-from anything earlier in modification order.
-                if store_elem.loads.borrow().iter().any(|(&load_index, &load_timestamp)| {
+                keep_searching = if store_elem.timestamp <= clocks.clock[store_elem.store_index] {
+                    // CoWR: if a store happens-before the current load,
+                    // then we can't read-from anything earlier in modification order.
+                    log::info!("Stopping due to coherent write-read");
+                    false
+                } else if store_elem.loads.borrow().iter().any(|(&load_index, &load_timestamp)| {
                     load_timestamp <= clocks.clock[load_index]
                 }) {
-                    log::info!("Stopped due to coherent read-read");
-                    keep_searching = false;
-                    return true;
-                }
-
-                // The current load, which may be sequenced-after an SC fence, can only read-from
-                // the last store sequenced-before an SC fence in another thread (or any stores
-                // later than that SC fence)
-                if store_elem.timestamp <= clocks.fence_seqcst[store_elem.store_index] {
-                    log::info!("Stopped due to coherent load sequenced after sc fence");
-                    keep_searching = false;
-                    return true;
-                }
-
-                // The current non-SC load can only read-from the latest SC store (or any stores later than that
-                // SC store)
-                if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
+                    // CoRR: if there was a load from this store which happened-before the current load,
+                    // then we cannot read-from anything earlier in modification order.
+                    log::info!("Stopping due to coherent read-read");
+                    false
+                } else if store_elem.timestamp <= clocks.fence_seqcst[store_elem.store_index] {
+                    // The current load, which may be sequenced-after an SC fence, can only read-from
+                    // the last store sequenced-before an SC fence in another thread (or any stores
+                    // later than that SC fence)
+                    log::info!("Stopping due to coherent load sequenced after sc fence");
+                    false
+                } else if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
                     && store_elem.is_seqcst
                 {
-                    log::info!("Stopped due to needing to load from the last SC store");
-                    keep_searching = false;
-                    return true;
-                }
-
-                // The current SC load can only read-from the last store sequenced-before
-                // the last SC fence (or any stores later than the SC fence)
-                if is_seqcst && store_elem.timestamp <= clocks.read_seqcst[store_elem.store_index] {
-                    log::info!("Stopped due to sc load needing to load from the last SC store before an SC fence");
-                    keep_searching = false;
-                    return true;
-                }
+                    // The current non-SC load can only read-from the latest SC store (or any stores later than that
+                    // SC store)
+                    log::info!("Stopping due to needing to load from the last SC store");
+                    false
+                } else if is_seqcst && store_elem.timestamp <= clocks.read_seqcst[store_elem.store_index] {
+                    // The current SC load can only read-from the last store sequenced-before
+                    // the last SC fence (or any stores later than the SC fence)
+                    log::info!("Stopping due to sc load needing to load from the last SC store before an SC fence");
+                    false
+                } else {true};
 
                 true
             })
             .filter(|&store_elem| {
-                if is_seqcst {
+                if is_seqcst && store_elem.is_seqcst {
                     // An SC load needs to ignore all but last store maked SC (stores not marked SC are not
                     // affected)
-                    let include = !(store_elem.is_seqcst && found_sc);
-                    found_sc |= store_elem.is_seqcst;
+                    let include = !found_sc;
+                    found_sc = true;
                     include
                 } else {
                     true

From 13e34653461a274c5759f5c33b19285e3daa9df5 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Fri, 13 May 2022 00:15:57 +0100
Subject: [PATCH 18/46] Reduce the number of runs in consistency tests

---
 tests/run-pass/weak_memory/consistency.rs | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/run-pass/weak_memory/consistency.rs b/tests/run-pass/weak_memory/consistency.rs
index d7c44f6ac2..8705d0bc67 100644
--- a/tests/run-pass/weak_memory/consistency.rs
+++ b/tests/run-pass/weak_memory/consistency.rs
@@ -206,11 +206,7 @@ fn test_sc_store_buffering() {
 }
 
 pub fn main() {
-    // TODO: does this make chances of spurious success
-    // "sufficiently low"? This also takes a long time to run,
-    // prehaps each function should be its own test case so they
-    // can be run in parallel
-    for _ in 0..500 {
+    for _ in 0..100 {
         test_mixed_access();
         test_load_buffering_acq_rel();
         test_message_passing();

From 8739e45bef31493053816f350a268245b4c0b787 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Fri, 13 May 2022 23:23:58 +0100
Subject: [PATCH 19/46] Move data_race and weak_memory into a submodule

---
 src/{ => concurrency}/allocation_map.rs |  0
 src/{ => concurrency}/data_race.rs      | 16 ++++++++--------
 src/concurrency/mod.rs                  |  3 +++
 src/{ => concurrency}/weak_memory.rs    | 17 +++++++++--------
 src/lib.rs                              |  6 ++----
 src/machine.rs                          |  6 +++++-
 src/thread.rs                           |  1 +
 7 files changed, 28 insertions(+), 21 deletions(-)
 rename src/{ => concurrency}/allocation_map.rs (100%)
 rename src/{ => concurrency}/data_race.rs (99%)
 create mode 100644 src/concurrency/mod.rs
 rename src/{ => concurrency}/weak_memory.rs (97%)

diff --git a/src/allocation_map.rs b/src/concurrency/allocation_map.rs
similarity index 100%
rename from src/allocation_map.rs
rename to src/concurrency/allocation_map.rs
diff --git a/src/data_race.rs b/src/concurrency/data_race.rs
similarity index 99%
rename from src/data_race.rs
rename to src/concurrency/data_race.rs
index 78b9b09f16..c0137bf86d 100644
--- a/src/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -100,10 +100,10 @@ pub enum AtomicFenceOp {
 /// of a thread, contains the happens-before clock and
 /// additional metadata to model atomic fence operations.
 #[derive(Clone, Default, Debug)]
-pub struct ThreadClockSet {
+pub(super) struct ThreadClockSet {
     /// The increasing clock representing timestamps
     /// that happen-before this thread.
-    pub(crate) clock: VClock,
+    pub(super) clock: VClock,
 
     /// The set of timestamps that will happen-before this
     /// thread once it performs an acquire fence.
@@ -115,15 +115,15 @@ pub struct ThreadClockSet {
 
     /// Timestamps of the last SC fence performed by each
     /// thread, updated when this thread performs an SC fence
-    pub(crate) fence_seqcst: VClock,
+    pub(super) fence_seqcst: VClock,
 
     /// Timestamps of the last SC write performed by each
     /// thread, updated when this thread performs an SC fence
-    pub(crate) write_seqcst: VClock,
+    pub(super) write_seqcst: VClock,
 
     /// Timestamps of the last SC fence performed by each
     /// thread, updated when this thread performs an SC read
-    pub(crate) read_seqcst: VClock,
+    pub(super) read_seqcst: VClock,
 }
 
 impl ThreadClockSet {
@@ -166,7 +166,7 @@ pub struct DataRace;
 /// common case where no atomic operations
 /// exists on the memory cell.
 #[derive(Clone, PartialEq, Eq, Default, Debug)]
-pub struct AtomicMemoryCellClocks {
+struct AtomicMemoryCellClocks {
     /// The clock-vector of the timestamp of the last atomic
     /// read operation performed by each thread.
     /// This detects potential data-races between atomic read
@@ -1547,7 +1547,7 @@ impl GlobalState {
     /// Load the current vector clock in use and the current set of thread clocks
     /// in use for the vector.
     #[inline]
-    pub fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
+    pub(super) fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
         let index = self.current_index();
         let ref_vector = self.vector_clocks.borrow();
         let clocks = Ref::map(ref_vector, |vec| &vec[index]);
@@ -1557,7 +1557,7 @@ impl GlobalState {
     /// Load the current vector clock in use and the current set of thread clocks
     /// in use for the vector mutably for modification.
     #[inline]
-    pub fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+    pub(super) fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
         let index = self.current_index();
         let ref_vector = self.vector_clocks.borrow_mut();
         let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
diff --git a/src/concurrency/mod.rs b/src/concurrency/mod.rs
new file mode 100644
index 0000000000..ad1586bbf0
--- /dev/null
+++ b/src/concurrency/mod.rs
@@ -0,0 +1,3 @@
+mod allocation_map;
+pub mod data_race;
+pub mod weak_memory;
diff --git a/src/weak_memory.rs b/src/concurrency/weak_memory.rs
similarity index 97%
rename from src/weak_memory.rs
rename to src/concurrency/weak_memory.rs
index 223567d3ca..5bdadc804f 100644
--- a/src/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -60,10 +60,11 @@ use std::{
 use rustc_const_eval::interpret::{AllocRange, InterpResult, ScalarMaybeUninit};
 use rustc_data_structures::fx::FxHashMap;
 
-use crate::{
+use crate::{Tag, VClock, VTimestamp, VectorIdx};
+
+use super::{
     allocation_map::{AccessType, AllocationMap},
     data_race::{GlobalState, ThreadClockSet},
-    Tag, VClock, VTimestamp, VectorIdx,
 };
 
 pub type AllocExtra = StoreBufferAlloc;
@@ -82,7 +83,7 @@ pub struct StoreBufferAlloc {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-pub struct StoreBuffer {
+pub(super) struct StoreBuffer {
     // Stores to this location in modification order
     buffer: VecDeque<StoreElement>,
 }
@@ -112,7 +113,7 @@ impl StoreBufferAlloc {
     }
 
     /// Gets a store buffer associated with an atomic object in this allocation
-    pub fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
+    pub(super) fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
         let access_type = self.store_buffer.borrow().access_type(range);
         let index = match access_type {
             AccessType::PerfectlyOverlapping(index) => index,
@@ -143,7 +144,7 @@ impl StoreBufferAlloc {
     }
 
     /// Gets a mutable store buffer associated with an atomic object in this allocation
-    pub fn get_store_buffer_mut(&mut self, range: AllocRange) -> &mut StoreBuffer {
+    pub(super) fn get_store_buffer_mut(&mut self, range: AllocRange) -> &mut StoreBuffer {
         let buffer = self.store_buffer.get_mut();
         let access_type = buffer.access_type(range);
         let index = match access_type {
@@ -174,7 +175,7 @@ impl Default for StoreBuffer {
 
 impl<'mir, 'tcx: 'mir> StoreBuffer {
     /// Reads from the last store in modification order
-    pub fn read_from_last_store(&self, global: &GlobalState) {
+    pub(super) fn read_from_last_store(&self, global: &GlobalState) {
         let store_elem = self.buffer.back();
         if let Some(store_elem) = store_elem {
             let (index, clocks) = global.current_thread_state();
@@ -182,7 +183,7 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
         }
     }
 
-    pub fn buffered_read(
+    pub(super) fn buffered_read(
         &self,
         global: &GlobalState,
         is_seqcst: bool,
@@ -213,7 +214,7 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
         Ok(loaded)
     }
 
-    pub fn buffered_write(
+    pub(super) fn buffered_write(
         &mut self,
         val: ScalarMaybeUninit<Tag>,
         global: &GlobalState,
diff --git a/src/lib.rs b/src/lib.rs
index 3270a57c49..982d3873d5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,8 +31,7 @@ extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
 
-mod allocation_map;
-mod data_race;
+mod concurrency;
 mod diagnostics;
 mod eval;
 mod helpers;
@@ -46,7 +45,6 @@ mod stacked_borrows;
 mod sync;
 mod thread;
 mod vector_clock;
-mod weak_memory;
 
 // Establish a "crate-wide prelude": we often import `crate::*`.
 
@@ -65,7 +63,7 @@ pub use crate::shims::time::EvalContextExt as _;
 pub use crate::shims::tls::{EvalContextExt as _, TlsData};
 pub use crate::shims::EvalContextExt as _;
 
-pub use crate::data_race::{
+pub use crate::concurrency::data_race::{
     AtomicFenceOp, AtomicReadOp, AtomicRwOp, AtomicWriteOp,
     EvalContextExt as DataRaceEvalContextExt,
 };
diff --git a/src/machine.rs b/src/machine.rs
index ca7fff7b08..41c852747a 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -28,7 +28,11 @@ use rustc_span::Symbol;
 use rustc_target::abi::Size;
 use rustc_target::spec::abi::Abi;
 
-use crate::{shims::unix::FileHandler, *};
+use crate::{
+    concurrency::{data_race, weak_memory},
+    shims::unix::FileHandler,
+    *,
+};
 
 // Some global facts about the emulated machine.
 pub const PAGE_SIZE: u64 = 4 * 1024; // FIXME: adjust to target architecture
diff --git a/src/thread.rs b/src/thread.rs
index b6fb866f71..0d702fd9c8 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -12,6 +12,7 @@ use rustc_hir::def_id::DefId;
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_middle::mir::Mutability;
 
+use crate::concurrency::data_race;
 use crate::sync::SynchronizationState;
 use crate::*;
 

From 335667c7749930f9f30e7045bc29690dd81a85f8 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 14 May 2022 01:45:21 +0100
Subject: [PATCH 20/46] Move buffered functions into their own ext trait

---
 src/concurrency/data_race.rs   |  89 +++--------------------
 src/concurrency/weak_memory.rs | 124 +++++++++++++++++++++++++++++++--
 2 files changed, 128 insertions(+), 85 deletions(-)

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index c0137bf86d..22b72dadad 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -51,7 +51,6 @@ use std::{
     mem,
 };
 
-use rustc_const_eval::interpret::alloc_range;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_middle::{mir, ty::layout::TyAndLayout};
@@ -59,6 +58,8 @@ use rustc_target::abi::Size;
 
 use crate::*;
 
+use super::weak_memory::EvalContextExt as _;
+
 pub type AllocExtra = VClockAlloc;
 
 /// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive).
@@ -517,29 +518,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
         // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
-
-        if let Some(global) = &this.machine.data_race {
-            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
-            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                if atomic == AtomicReadOp::SeqCst {
-                    global.sc_read();
-                }
-                let mut rng = this.machine.rng.borrow_mut();
-                let buffer =
-                    alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size));
-                let loaded = buffer.buffered_read(
-                    global,
-                    atomic == AtomicReadOp::SeqCst,
-                    &mut *rng,
-                    || this.validate_atomic_load(place, atomic),
-                )?;
-
-                return Ok(loaded.unwrap_or(scalar));
-            }
-        }
-
-        this.validate_atomic_load(place, atomic)?;
-        Ok(scalar)
+        this.buffered_atomic_read(place, atomic, scalar, || {
+            this.validate_atomic_load(place, atomic)
+        })
     }
 
     /// Perform an atomic write operation at the memory location.
@@ -551,23 +532,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
-
         this.validate_atomic_store(dest, atomic)?;
-        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr)?;
-        if let (
-            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
-            crate::Evaluator { data_race: Some(global), .. },
-        ) = this.get_alloc_extra_mut(alloc_id)?
-        {
-            if atomic == AtomicWriteOp::SeqCst {
-                global.sc_write();
-            }
-            let buffer =
-                alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size));
-            buffer.buffered_write(val, global, atomic == AtomicWriteOp::SeqCst)?;
-        }
-
-        Ok(())
+        this.buffered_atomic_write(val, dest, atomic)
     }
 
     /// Perform an atomic operation on a memory location.
@@ -695,50 +661,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
             // in the modification order.
             // Since `old` is only a value and not the store element, we need to separately
             // find it in our store buffer and perform load_impl on it.
-            if let Some(global) = &this.machine.data_race {
-                if fail == AtomicReadOp::SeqCst {
-                    global.sc_read();
-                }
-                let size = place.layout.size;
-                let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
-                if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                    if global.multi_threaded.get() {
-                        let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, size));
-                        buffer.read_from_last_store(global);
-                    }
-                }
-            }
+            this.perform_read_on_buffered_latest(place, fail)?;
         }
 
         // Return the old value.
         Ok(res)
     }
 
-    fn buffered_atomic_rmw(
-        &mut self,
-        new_val: ScalarMaybeUninit<Tag>,
-        place: &MPlaceTy<'tcx, Tag>,
-        atomic: AtomicRwOp,
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
-        if let (
-            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
-            crate::Evaluator { data_race: Some(global), .. },
-        ) = this.get_alloc_extra_mut(alloc_id)?
-        {
-            if atomic == AtomicRwOp::SeqCst {
-                global.sc_read();
-                global.sc_write();
-            }
-            let range = alloc_range(base_offset, place.layout.size);
-            let buffer = alloc_buffers.get_store_buffer_mut(range);
-            buffer.read_from_last_store(global);
-            buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
-        }
-        Ok(())
-    }
-
     /// Update the data-race detector for an atomic read occurring at the
     /// associated memory-place and on the current thread.
     fn validate_atomic_load(
@@ -1572,13 +1501,13 @@ impl GlobalState {
     }
 
     // SC ATOMIC STORE rule in the paper.
-    fn sc_write(&self) {
+    pub(super) fn sc_write(&self) {
         let (index, clocks) = self.current_thread_state();
         self.last_sc_write.borrow_mut().set_at_index(&clocks.clock, index);
     }
 
     // SC ATOMIC READ rule in the paper.
-    fn sc_read(&self) {
+    pub(super) fn sc_read(&self) {
         let (.., mut clocks) = self.current_thread_state_mut();
         clocks.read_seqcst.join(&self.last_sc_fence.borrow());
     }
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 5bdadc804f..fba7d18cdf 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -57,10 +57,12 @@ use std::{
     collections::VecDeque,
 };
 
-use rustc_const_eval::interpret::{AllocRange, InterpResult, ScalarMaybeUninit};
+use rustc_const_eval::interpret::{
+    alloc_range, AllocRange, InterpResult, MPlaceTy, ScalarMaybeUninit,
+};
 use rustc_data_structures::fx::FxHashMap;
 
-use crate::{Tag, VClock, VTimestamp, VectorIdx};
+use crate::{AtomicReadOp, AtomicRwOp, AtomicWriteOp, Tag, VClock, VTimestamp, VectorIdx};
 
 use super::{
     allocation_map::{AccessType, AllocationMap},
@@ -113,7 +115,7 @@ impl StoreBufferAlloc {
     }
 
     /// Gets a store buffer associated with an atomic object in this allocation
-    pub(super) fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
+    fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
         let access_type = self.store_buffer.borrow().access_type(range);
         let index = match access_type {
             AccessType::PerfectlyOverlapping(index) => index,
@@ -144,7 +146,7 @@ impl StoreBufferAlloc {
     }
 
     /// Gets a mutable store buffer associated with an atomic object in this allocation
-    pub(super) fn get_store_buffer_mut(&mut self, range: AllocRange) -> &mut StoreBuffer {
+    fn get_store_buffer_mut(&mut self, range: AllocRange) -> &mut StoreBuffer {
         let buffer = self.store_buffer.get_mut();
         let access_type = buffer.access_type(range);
         let index = match access_type {
@@ -201,7 +203,7 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
             self.fetch_store(is_seqcst, &clocks, &mut *rng)
         };
 
-        // Unlike in write_scalar_atomic, thread clock updates have to be done
+        // Unlike in buffered_atomic_write, thread clock updates have to be done
         // after we've picked a store element from the store buffer, as presented
         // in ATOMIC LOAD rule of the paper. This is because fetch_store
         // requires access to ThreadClockSet.clock, which is updated by the race detector
@@ -353,3 +355,115 @@ impl StoreElement {
         self.val
     }
 }
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriEvalContextExt<'mir, 'tcx>
+{
+    fn buffered_atomic_rmw(
+        &mut self,
+        new_val: ScalarMaybeUninit<Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicRwOp,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let (
+            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            if atomic == AtomicRwOp::SeqCst {
+                global.sc_read();
+                global.sc_write();
+            }
+            let range = alloc_range(base_offset, place.layout.size);
+            let buffer = alloc_buffers.get_store_buffer_mut(range);
+            buffer.read_from_last_store(global);
+            buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
+        }
+        Ok(())
+    }
+
+    fn buffered_atomic_read(
+        &self,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp,
+        latest_in_mo: ScalarMaybeUninit<Tag>,
+        validate: impl FnOnce() -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_ref();
+        if let Some(global) = &this.machine.data_race {
+            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                if atomic == AtomicReadOp::SeqCst {
+                    global.sc_read();
+                }
+                let mut rng = this.machine.rng.borrow_mut();
+                let buffer =
+                    alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size));
+                let loaded = buffer.buffered_read(
+                    global,
+                    atomic == AtomicReadOp::SeqCst,
+                    &mut *rng,
+                    validate,
+                )?;
+
+                return Ok(loaded.unwrap_or(latest_in_mo));
+            }
+        }
+
+        // Race detector or weak memory disabled, simply read the latest value
+        validate()?;
+        Ok(latest_in_mo)
+    }
+
+    fn buffered_atomic_write(
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        dest: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr)?;
+        if let (
+            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            if atomic == AtomicWriteOp::SeqCst {
+                global.sc_write();
+            }
+            let buffer =
+                alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size));
+            buffer.buffered_write(val, global, atomic == AtomicWriteOp::SeqCst)?;
+        }
+
+        // Caller should've written to dest with the vanilla scalar write, we do nothing here
+        Ok(())
+    }
+
+    /// Caller should never need to consult the store buffer for the latest value.
+    /// This function is used exclusively for failed atomic_compare_exchange_scalar
+    /// to perform load_impl on the latest store element
+    fn perform_read_on_buffered_latest(
+        &self,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+
+        if let Some(global) = &this.machine.data_race {
+            if atomic == AtomicReadOp::SeqCst {
+                global.sc_read();
+            }
+            let size = place.layout.size;
+            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, size));
+                buffer.read_from_last_store(global);
+            }
+        }
+        Ok(())
+    }
+}

From 5a4a1bfccc50e0e6c6df98e4b95837de79e01023 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 15 May 2022 22:29:40 +0100
Subject: [PATCH 21/46] Remove incorrect comment

---
 src/concurrency/weak_memory.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index fba7d18cdf..f6466724b5 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -127,9 +127,7 @@ impl StoreBufferAlloc {
             }
             AccessType::ImperfectlyOverlapping(index_range) => {
                 // Accesses that imperfectly overlaps with existing atomic objects
-                // do not have well-defined behaviours. But we don't throw a UB here
-                // because we have (or will) checked that all bytes in the current
-                // access are non-racy.
+                // do not have well-defined behaviours.
                 // The behaviour here is that we delete all the existing objects this
                 // access touches, and allocate a new and empty one for the exact range.
                 // A read on an empty buffer returns None, which means the program will

From 6b54c9237789c275a82eeb483052b302f490b57c Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Mon, 16 May 2022 20:00:11 +0100
Subject: [PATCH 22/46] Throw UB on imperfectly overlapping access

---
 src/concurrency/allocation_map.rs             |  5 --
 src/concurrency/weak_memory.rs                | 53 ++++++++++---------
 .../weak_memory/imperfectly_overlapping.rs    | 15 +++---
 3 files changed, 36 insertions(+), 37 deletions(-)
 rename tests/{run-pass => compile-fail}/weak_memory/imperfectly_overlapping.rs (54%)

diff --git a/src/concurrency/allocation_map.rs b/src/concurrency/allocation_map.rs
index 6c14ce1654..5b6c06d59e 100644
--- a/src/concurrency/allocation_map.rs
+++ b/src/concurrency/allocation_map.rs
@@ -122,11 +122,6 @@ impl<T> AllocationMap<T> {
             debug_assert!(range.end() <= self.v[index + 1].range.start);
         }
     }
-
-    /// Removes an object at given position
-    pub fn remove(&mut self, index: Position) -> T {
-        self.v.remove(index).data
-    }
 }
 
 impl<T> Index<Position> for AllocationMap<T> {
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index f6466724b5..a796b56e2c 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -85,7 +85,7 @@ pub struct StoreBufferAlloc {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-pub(super) struct StoreBuffer {
+struct StoreBuffer {
     // Stores to this location in modification order
     buffer: VecDeque<StoreElement>,
 }
@@ -115,7 +115,10 @@ impl StoreBufferAlloc {
     }
 
     /// Gets a store buffer associated with an atomic object in this allocation
-    fn get_store_buffer(&self, range: AllocRange) -> Ref<'_, StoreBuffer> {
+    fn get_store_buffer<'tcx>(
+        &self,
+        range: AllocRange,
+    ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> {
         let access_type = self.store_buffer.borrow().access_type(range);
         let index = match access_type {
             AccessType::PerfectlyOverlapping(index) => index,
@@ -128,23 +131,23 @@ impl StoreBufferAlloc {
             AccessType::ImperfectlyOverlapping(index_range) => {
                 // Accesses that imperfectly overlaps with existing atomic objects
                 // do not have well-defined behaviours.
-                // The behaviour here is that we delete all the existing objects this
-                // access touches, and allocate a new and empty one for the exact range.
-                // A read on an empty buffer returns None, which means the program will
-                // observe the latest value in modification order at every byte.
-                let mut buffer = self.store_buffer.borrow_mut();
-                for index in index_range.clone() {
-                    buffer.remove(index);
+                // FIXME: if this access happens before all previous accesses on every object it overlaps
+                // with, then we would like to tolerate it. However this is not easy to check.
+                if index_range.start + 1 == index_range.end {
+                    throw_ub_format!("mixed-size access on an existing atomic object");
+                } else {
+                    throw_ub_format!("access overlaps with multiple existing atomic objects");
                 }
-                buffer.insert(index_range.start, range, StoreBuffer::default());
-                index_range.start
             }
         };
-        Ref::map(self.store_buffer.borrow(), |buffer| &buffer[index])
+        Ok(Ref::map(self.store_buffer.borrow(), |buffer| &buffer[index]))
     }
 
     /// Gets a mutable store buffer associated with an atomic object in this allocation
-    fn get_store_buffer_mut(&mut self, range: AllocRange) -> &mut StoreBuffer {
+    fn get_store_buffer_mut<'tcx>(
+        &mut self,
+        range: AllocRange,
+    ) -> InterpResult<'tcx, &mut StoreBuffer> {
         let buffer = self.store_buffer.get_mut();
         let access_type = buffer.access_type(range);
         let index = match access_type {
@@ -154,14 +157,14 @@ impl StoreBufferAlloc {
                 index
             }
             AccessType::ImperfectlyOverlapping(index_range) => {
-                for index in index_range.clone() {
-                    buffer.remove(index);
+                if index_range.start + 1 == index_range.end {
+                    throw_ub_format!("mixed-size access on an existing atomic object");
+                } else {
+                    throw_ub_format!("access overlaps with multiple existing atomic objects");
                 }
-                buffer.insert(index_range.start, range, StoreBuffer::default());
-                index_range.start
             }
         };
-        &mut buffer[index]
+        Ok(&mut buffer[index])
     }
 }
 
@@ -175,7 +178,7 @@ impl Default for StoreBuffer {
 
 impl<'mir, 'tcx: 'mir> StoreBuffer {
     /// Reads from the last store in modification order
-    pub(super) fn read_from_last_store(&self, global: &GlobalState) {
+    fn read_from_last_store(&self, global: &GlobalState) {
         let store_elem = self.buffer.back();
         if let Some(store_elem) = store_elem {
             let (index, clocks) = global.current_thread_state();
@@ -183,7 +186,7 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
         }
     }
 
-    pub(super) fn buffered_read(
+    fn buffered_read(
         &self,
         global: &GlobalState,
         is_seqcst: bool,
@@ -214,7 +217,7 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
         Ok(loaded)
     }
 
-    pub(super) fn buffered_write(
+    fn buffered_write(
         &mut self,
         val: ScalarMaybeUninit<Tag>,
         global: &GlobalState,
@@ -376,7 +379,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 global.sc_write();
             }
             let range = alloc_range(base_offset, place.layout.size);
-            let buffer = alloc_buffers.get_store_buffer_mut(range);
+            let buffer = alloc_buffers.get_store_buffer_mut(range)?;
             buffer.read_from_last_store(global);
             buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
         }
@@ -399,7 +402,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 }
                 let mut rng = this.machine.rng.borrow_mut();
                 let buffer =
-                    alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size));
+                    alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size))?;
                 let loaded = buffer.buffered_read(
                     global,
                     atomic == AtomicReadOp::SeqCst,
@@ -433,7 +436,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 global.sc_write();
             }
             let buffer =
-                alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size));
+                alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size))?;
             buffer.buffered_write(val, global, atomic == AtomicWriteOp::SeqCst)?;
         }
 
@@ -458,7 +461,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
             let size = place.layout.size;
             let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
             if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, size));
+                let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, size))?;
                 buffer.read_from_last_store(global);
             }
         }
diff --git a/tests/run-pass/weak_memory/imperfectly_overlapping.rs b/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
similarity index 54%
rename from tests/run-pass/weak_memory/imperfectly_overlapping.rs
rename to tests/compile-fail/weak_memory/imperfectly_overlapping.rs
index 2a8e8e5f32..e3f89b5b68 100644
--- a/tests/run-pass/weak_memory/imperfectly_overlapping.rs
+++ b/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
@@ -1,16 +1,15 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
 #![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]
 
+use std::intrinsics::atomic_load;
 use std::sync::atomic::Ordering::*;
 use std::sync::atomic::{AtomicU16, AtomicU32};
 
-// Strictly speaking, atomic accesses that imperfectly overlap with existing
-// atomic objects are UB. Nonetheless we'd like to provide a sane value when
-// the access is not racy.
 fn test_same_thread() {
     let mut qword = AtomicU32::new(42);
     assert_eq!(qword.load(Relaxed), 42);
-    qword.store(u32::to_be(0xabbafafa), Relaxed);
+    qword.store(0xabbafafa, Relaxed);
 
     let qword_mut = qword.get_mut();
 
@@ -18,10 +17,12 @@ fn test_same_thread() {
 
     let (hi_mut, lo_mut) = dwords_mut.split_at_mut(1);
 
-    let (hi, lo) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
+    let (hi, _) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
 
-    assert_eq!(u16::from_be(hi.load(Relaxed)), 0xabba);
-    assert_eq!(u16::from_be(lo.load(Relaxed)), 0xfafa);
+    unsafe {
+        //Equivalent to: hi.load(Ordering::SeqCst)
+        atomic_load(hi.get_mut() as *mut u16); //~ ERROR: mixed-size access on an existing atomic object
+    }
 }
 
 pub fn main() {

From 577054c6ded13a70ee07d6fc3397518ae1e81710 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Mon, 16 May 2022 22:26:38 +0100
Subject: [PATCH 23/46] Rename variables in AllocationMap

---
 src/concurrency/allocation_map.rs | 73 ++++++++++++++++---------------
 src/concurrency/weak_memory.rs    | 32 +++++++-------
 2 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/src/concurrency/allocation_map.rs b/src/concurrency/allocation_map.rs
index 5b6c06d59e..2524389c0b 100644
--- a/src/concurrency/allocation_map.rs
+++ b/src/concurrency/allocation_map.rs
@@ -49,7 +49,7 @@ impl<T> AllocationMap<T> {
         loop {
             if left == right {
                 // No element contains the given offset. But the
-                // index is where such element should be placed at.
+                // position is where such element should be placed at.
                 return Err(left);
             }
             let candidate = left.checked_add(right).unwrap() / 2;
@@ -73,53 +73,56 @@ impl<T> AllocationMap<T> {
     /// an existing allocation
     pub fn access_type(&self, range: AllocRange) -> AccessType {
         match self.find_offset(range.start) {
-            Ok(index) => {
+            Ok(pos) => {
                 // Start of the range belongs to an existing object, now let's check the overlapping situation
-                let elem = &self.v[index];
+                let elem = &self.v[pos];
                 // FIXME: derive Eq for AllocRange in rustc
                 if elem.range.start == range.start && elem.range.size == range.size {
                     // Happy case: perfectly overlapping access
-                    AccessType::PerfectlyOverlapping(index)
+                    AccessType::PerfectlyOverlapping(pos)
                 } else {
                     // FIXME: add a last() method to AllocRange that returns the last inclusive offset (end() is exclusive)
-                    let end_index = match self.find_offset(range.end() - Size::from_bytes(1)) {
-                        // If the end lands in an existing object, add one to get the exclusive index
-                        Ok(inclusive) => inclusive + 1,
-                        Err(exclusive) => exclusive,
+                    let end_pos = match self.find_offset(range.end() - Size::from_bytes(1)) {
+                        // If the end lands in an existing object, add one to get the exclusive position
+                        Ok(inclusive_pos) => inclusive_pos + 1,
+                        Err(exclusive_pos) => exclusive_pos,
                     };
 
-                    AccessType::ImperfectlyOverlapping(index..end_index)
+                    AccessType::ImperfectlyOverlapping(pos..end_pos)
                 }
             }
-            Err(index) => {
+            Err(pos) => {
                 // Start of the range doesn't belong to an existing object
                 match self.find_offset(range.end() - Size::from_bytes(1)) {
                     // Neither does the end
-                    Err(end_index) =>
-                        if index == end_index {
+                    Err(end_pos) =>
+                        if pos == end_pos {
                             // There's nothing between the start and the end, so the range thing is empty
-                            AccessType::Empty(index)
+                            AccessType::Empty(pos)
                         } else {
                             // Otherwise we have entirely covered an existing object
-                            AccessType::ImperfectlyOverlapping(index..end_index)
+                            AccessType::ImperfectlyOverlapping(pos..end_pos)
                         },
                     // Otherwise at least part of it overlaps with something else
-                    Ok(end_index) => AccessType::ImperfectlyOverlapping(index..end_index + 1),
+                    Ok(end_pos) => AccessType::ImperfectlyOverlapping(pos..end_pos + 1),
                 }
             }
         }
     }
 
     /// Inserts an object and its occupied range at given position
-    pub fn insert(&mut self, index: Position, range: AllocRange, data: T) {
-        self.v.insert(index, Elem { range, data });
+    // The Position can be calculated from AllocRange, but the only user of AllocationMap
+    // always calls access_type before calling insert/index/index_mut, and we don't
+    // want to repeat the binary search on each time, so we ask the caller to supply Position
+    pub fn insert_at_pos(&mut self, pos: Position, range: AllocRange, data: T) {
+        self.v.insert(pos, Elem { range, data });
         // If we aren't the first element, then our start must be greater than the preivous element's end
-        if index > 0 {
-            debug_assert!(self.v[index - 1].range.end() <= range.start);
+        if pos > 0 {
+            debug_assert!(self.v[pos - 1].range.end() <= range.start);
         }
         // If we aren't the last element, then our end must be smaller than next element's start
-        if index < self.v.len() - 1 {
-            debug_assert!(range.end() <= self.v[index + 1].range.start);
+        if pos < self.v.len() - 1 {
+            debug_assert!(range.end() <= self.v[pos + 1].range.start);
         }
     }
 }
@@ -127,14 +130,14 @@ impl<T> AllocationMap<T> {
 impl<T> Index<Position> for AllocationMap<T> {
     type Output = T;
 
-    fn index(&self, index: usize) -> &Self::Output {
-        &self.v[index].data
+    fn index(&self, pos: Position) -> &Self::Output {
+        &self.v[pos].data
     }
 }
 
 impl<T> IndexMut<Position> for AllocationMap<T> {
-    fn index_mut(&mut self, index: usize) -> &mut Self::Output {
-        &mut self.v[index].data
+    fn index_mut(&mut self, pos: Position) -> &mut Self::Output {
+        &mut self.v[pos].data
     }
 }
 
@@ -150,10 +153,10 @@ mod tests {
         let four = Size::from_bytes(4);
         let map = AllocationMap::<()>::new();
 
-        // Correctly tells where we should insert the first element (at index 0)
+        // Correctly tells where we should insert the first element (at position 0)
         assert_eq!(map.find_offset(Size::from_bytes(3)), Err(0));
 
-        // Correctly tells the access type along with the supposed index
+        // Correctly tells the access type along with the supposed position
         assert_eq!(map.access_type(alloc_range(Size::ZERO, four)), AccessType::Empty(0));
     }
 
@@ -166,10 +169,10 @@ mod tests {
 
         // |_|_|_|_|#|#|#|#|_|_|_|_|...
         //  0 1 2 3 4 5 6 7 8 9 a b c d
-        map.insert(0, alloc_range(four, four), "#");
+        map.insert_at_pos(0, alloc_range(four, four), "#");
         // |_|_|_|_|#|#|#|#|_|_|_|_|...
         //  0 ^ ^ ^ ^ 5 6 7 8 9 a b c d
-        map.insert(0, alloc_range(Size::from_bytes(1), four), "@");
+        map.insert_at_pos(0, alloc_range(Size::from_bytes(1), four), "@");
     }
 
     #[test]
@@ -180,7 +183,7 @@ mod tests {
 
         // |#|#|#|#|_|_|...
         //  0 1 2 3 4 5
-        map.insert(0, alloc_range(Size::ZERO, four), "#");
+        map.insert_at_pos(0, alloc_range(Size::ZERO, four), "#");
         // |#|#|#|#|_|_|...
         //  0 1 2 3 ^ 5
         assert_eq!(map.find_offset(four), Err(1));
@@ -191,7 +194,7 @@ mod tests {
         let eight = Size::from_bytes(8);
         // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
         //  0 1 2 3 4 5 6 7 8 9 a b c d
-        map.insert(1, alloc_range(eight, four), "@");
+        map.insert_at_pos(1, alloc_range(eight, four), "@");
         // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
         //  0 1 2 3 4 5 6 ^ 8 9 a b c d
         assert_eq!(map.find_offset(Size::from_bytes(7)), Err(1));
@@ -208,7 +211,7 @@ mod tests {
 
         // |#|#|#|#|_|_|...
         //  0 1 2 3 4 5
-        map.insert(0, alloc_range(Size::ZERO, four), "#");
+        map.insert_at_pos(0, alloc_range(Size::ZERO, four), "#");
         // |#|#|#|#|_|_|...
         //  ^ ^ ^ ^ 4 5
         assert_eq!(map.find_offset(Size::ZERO), Ok(0));
@@ -219,7 +222,7 @@ mod tests {
 
         // |#|#|#|#|@|@|@|@|_|...
         //  0 1 2 3 4 5 6 7 8
-        map.insert(1, alloc_range(four, four), "@");
+        map.insert_at_pos(1, alloc_range(four, four), "@");
         // |#|#|#|#|@|@|@|@|_|...
         //  0 1 2 3 ^ ^ ^ ^ 8
         assert_eq!(map.find_offset(four), Ok(1));
@@ -234,7 +237,7 @@ mod tests {
 
         // |_|_|_|_|#|#|#|#|_|_|_|_|...
         //  0 1 2 3 4 5 6 7 8 9 a b c d
-        map.insert(0, alloc_range(four, four), "#");
+        map.insert_at_pos(0, alloc_range(four, four), "#");
         // |_|_|_|_|#|#|#|#|_|_|_|_|...
         //  0 1 ^ ^ ^ ^ 6 7 8 9 a b c d
         assert_eq!(
@@ -256,7 +259,7 @@ mod tests {
 
         // |_|_|_|_|#|#|#|#|_|_|@|@|_|_|...
         //  0 1 2 3 4 5 6 7 8 9 a b c d
-        map.insert(1, alloc_range(Size::from_bytes(10), Size::from_bytes(2)), "@");
+        map.insert_at_pos(1, alloc_range(Size::from_bytes(10), Size::from_bytes(2)), "@");
         // |_|_|_|_|#|#|#|#|_|_|@|@|_|_|...
         //  0 1 2 3 4 5 ^ ^ ^ ^ ^ ^ ^ ^
         assert_eq!(
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index a796b56e2c..bd4ae06892 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -120,27 +120,27 @@ impl StoreBufferAlloc {
         range: AllocRange,
     ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> {
         let access_type = self.store_buffer.borrow().access_type(range);
-        let index = match access_type {
-            AccessType::PerfectlyOverlapping(index) => index,
-            AccessType::Empty(index) => {
+        let pos = match access_type {
+            AccessType::PerfectlyOverlapping(pos) => pos,
+            AccessType::Empty(pos) => {
                 // First atomic access on this range, allocate a new StoreBuffer
                 let mut buffer = self.store_buffer.borrow_mut();
-                buffer.insert(index, range, StoreBuffer::default());
-                index
+                buffer.insert_at_pos(pos, range, StoreBuffer::default());
+                pos
             }
-            AccessType::ImperfectlyOverlapping(index_range) => {
+            AccessType::ImperfectlyOverlapping(pos_range) => {
                 // Accesses that imperfectly overlaps with existing atomic objects
                 // do not have well-defined behaviours.
                 // FIXME: if this access happens before all previous accesses on every object it overlaps
                 // with, then we would like to tolerate it. However this is not easy to check.
-                if index_range.start + 1 == index_range.end {
+                if pos_range.start + 1 == pos_range.end {
                     throw_ub_format!("mixed-size access on an existing atomic object");
                 } else {
                     throw_ub_format!("access overlaps with multiple existing atomic objects");
                 }
             }
         };
-        Ok(Ref::map(self.store_buffer.borrow(), |buffer| &buffer[index]))
+        Ok(Ref::map(self.store_buffer.borrow(), |buffer| &buffer[pos]))
     }
 
     /// Gets a mutable store buffer associated with an atomic object in this allocation
@@ -150,21 +150,21 @@ impl StoreBufferAlloc {
     ) -> InterpResult<'tcx, &mut StoreBuffer> {
         let buffer = self.store_buffer.get_mut();
         let access_type = buffer.access_type(range);
-        let index = match access_type {
-            AccessType::PerfectlyOverlapping(index) => index,
-            AccessType::Empty(index) => {
-                buffer.insert(index, range, StoreBuffer::default());
-                index
+        let pos = match access_type {
+            AccessType::PerfectlyOverlapping(pos) => pos,
+            AccessType::Empty(pos) => {
+                buffer.insert_at_pos(pos, range, StoreBuffer::default());
+                pos
             }
-            AccessType::ImperfectlyOverlapping(index_range) => {
-                if index_range.start + 1 == index_range.end {
+            AccessType::ImperfectlyOverlapping(pos_range) => {
+                if pos_range.start + 1 == pos_range.end {
                     throw_ub_format!("mixed-size access on an existing atomic object");
                 } else {
                     throw_ub_format!("access overlaps with multiple existing atomic objects");
                 }
             }
         };
-        Ok(&mut buffer[index])
+        Ok(&mut buffer[pos])
     }
 }
 

From 92145373c350d1e41b852aa0332218f614a85bc3 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Mon, 16 May 2022 23:05:36 +0100
Subject: [PATCH 24/46] Put the initialisation value into the store buffer

---
 src/concurrency/data_race.rs              |  26 ++++--
 src/concurrency/weak_memory.rs            | 109 ++++++++++++++--------
 tests/run-pass/weak_memory/consistency.rs |  13 ++-
 tests/run-pass/weak_memory/weak.rs        |  36 ++++++-
 4 files changed, 134 insertions(+), 50 deletions(-)

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 22b72dadad..2d69d02a10 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -533,7 +533,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let this = self.eval_context_mut();
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
         this.validate_atomic_store(dest, atomic)?;
-        this.buffered_atomic_write(val, dest, atomic)
+        // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
+        // side effects from a read the program did not perform. So we have to initialise
+        // the store buffer with the value currently being written
+        // ONCE this is fixed please remove the hack in buffered_atomic_write() in weak_memory.rs
+        this.buffered_atomic_write(val, dest, atomic, val)
     }
 
     /// Perform an atomic operation on a memory location.
@@ -556,7 +560,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
         this.validate_atomic_rmw(place, atomic)?;
 
-        this.buffered_atomic_rmw(val.to_scalar_or_uninit(), place, atomic)?;
+        this.buffered_atomic_rmw(
+            val.to_scalar_or_uninit(),
+            place,
+            atomic,
+            old.to_scalar_or_uninit(),
+        )?;
         Ok(old)
     }
 
@@ -575,7 +584,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
         this.validate_atomic_rmw(place, atomic)?;
 
-        this.buffered_atomic_rmw(new, place, atomic)?;
+        this.buffered_atomic_rmw(new, place, atomic, old)?;
         Ok(old)
     }
 
@@ -603,7 +612,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
         this.validate_atomic_rmw(place, atomic)?;
 
-        this.buffered_atomic_rmw(new_val.to_scalar_or_uninit(), place, atomic)?;
+        this.buffered_atomic_rmw(
+            new_val.to_scalar_or_uninit(),
+            place,
+            atomic,
+            old.to_scalar_or_uninit(),
+        )?;
 
         // Return the old value.
         Ok(old)
@@ -654,14 +668,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         if cmpxchg_success {
             this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
             this.validate_atomic_rmw(place, success)?;
-            this.buffered_atomic_rmw(new, place, success)?;
+            this.buffered_atomic_rmw(new, place, success, old.to_scalar_or_uninit())?;
         } else {
             this.validate_atomic_load(place, fail)?;
             // A failed compare exchange is equivalent to a load, reading from the latest store
             // in the modification order.
             // Since `old` is only a value and not the store element, we need to separately
             // find it in our store buffer and perform load_impl on it.
-            this.perform_read_on_buffered_latest(place, fail)?;
+            this.perform_read_on_buffered_latest(place, fail, old.to_scalar_or_uninit())?;
         }
 
         // Return the old value.
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index bd4ae06892..1cb9fba715 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -81,11 +81,11 @@ const STORE_BUFFER_LIMIT: usize = 128;
 pub struct StoreBufferAlloc {
     /// Store buffer of each atomic object in this allocation
     // Behind a RefCell because we need to allocate/remove on read access
-    store_buffer: RefCell<AllocationMap<StoreBuffer>>,
+    store_buffers: RefCell<AllocationMap<StoreBuffer>>,
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-struct StoreBuffer {
+pub(super) struct StoreBuffer {
     // Stores to this location in modification order
     buffer: VecDeque<StoreElement>,
 }
@@ -111,21 +111,23 @@ struct StoreElement {
 
 impl StoreBufferAlloc {
     pub fn new_allocation() -> Self {
-        Self { store_buffer: RefCell::new(AllocationMap::new()) }
+        Self { store_buffers: RefCell::new(AllocationMap::new()) }
     }
 
     /// Gets a store buffer associated with an atomic object in this allocation
-    fn get_store_buffer<'tcx>(
+    /// Or creates one with the specified initial value
+    fn get_or_create_store_buffer<'tcx>(
         &self,
         range: AllocRange,
+        init: ScalarMaybeUninit<Tag>,
     ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> {
-        let access_type = self.store_buffer.borrow().access_type(range);
+        let access_type = self.store_buffers.borrow().access_type(range);
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
-                // First atomic access on this range, allocate a new StoreBuffer
-                let mut buffer = self.store_buffer.borrow_mut();
-                buffer.insert_at_pos(pos, range, StoreBuffer::default());
+                let new_buffer = StoreBuffer::new(init);
+                let mut buffers = self.store_buffers.borrow_mut();
+                buffers.insert_at_pos(pos, range, new_buffer);
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
@@ -140,20 +142,22 @@ impl StoreBufferAlloc {
                 }
             }
         };
-        Ok(Ref::map(self.store_buffer.borrow(), |buffer| &buffer[pos]))
+        Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
     }
 
     /// Gets a mutable store buffer associated with an atomic object in this allocation
-    fn get_store_buffer_mut<'tcx>(
+    fn get_or_create_store_buffer_mut<'tcx>(
         &mut self,
         range: AllocRange,
+        init: ScalarMaybeUninit<Tag>,
     ) -> InterpResult<'tcx, &mut StoreBuffer> {
-        let buffer = self.store_buffer.get_mut();
-        let access_type = buffer.access_type(range);
+        let buffers = self.store_buffers.get_mut();
+        let access_type = buffers.access_type(range);
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
-                buffer.insert_at_pos(pos, range, StoreBuffer::default());
+                let new_buffer = StoreBuffer::new(init);
+                buffers.insert_at_pos(pos, range, new_buffer);
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
@@ -164,19 +168,28 @@ impl StoreBufferAlloc {
                 }
             }
         };
-        Ok(&mut buffer[pos])
+        Ok(&mut buffers[pos])
     }
 }
 
-impl Default for StoreBuffer {
-    fn default() -> Self {
+impl<'mir, 'tcx: 'mir> StoreBuffer {
+    fn new(init: ScalarMaybeUninit<Tag>) -> Self {
         let mut buffer = VecDeque::new();
         buffer.reserve(STORE_BUFFER_LIMIT);
-        Self { buffer }
+        let mut ret = Self { buffer };
+        let store_elem = StoreElement {
+            // The thread index and timestamp of the initialisation write
+            // are never meaningfully used, so it's fine to leave them as 0
+            store_index: VectorIdx::from(0),
+            timestamp: 0,
+            val: init,
+            is_seqcst: false,
+            loads: RefCell::new(FxHashMap::default()),
+        };
+        ret.buffer.push_back(store_elem);
+        ret
     }
-}
 
-impl<'mir, 'tcx: 'mir> StoreBuffer {
     /// Reads from the last store in modification order
     fn read_from_last_store(&self, global: &GlobalState) {
         let store_elem = self.buffer.back();
@@ -192,7 +205,7 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
         is_seqcst: bool,
         rng: &mut (impl rand::Rng + ?Sized),
         validate: impl FnOnce() -> InterpResult<'tcx>,
-    ) -> InterpResult<'tcx, Option<ScalarMaybeUninit<Tag>>> {
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         // Having a live borrow to store_buffer while calling validate_atomic_load is fine
         // because the race detector doesn't touch store_buffer
 
@@ -210,10 +223,8 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
         // requires access to ThreadClockSet.clock, which is updated by the race detector
         validate()?;
 
-        let loaded = store_elem.map(|store_elem| {
-            let (index, clocks) = global.current_thread_state();
-            store_elem.load_impl(index, &clocks)
-        });
+        let (index, clocks) = global.current_thread_state();
+        let loaded = store_elem.load_impl(index, &clocks);
         Ok(loaded)
     }
 
@@ -230,23 +241,18 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
     }
 
     /// Selects a valid store element in the buffer.
-    /// The buffer does not contain the value used to initialise the atomic object
-    /// so a fresh atomic object has an empty store buffer and this function
-    /// will return `None`. In this case, the caller should ensure that the non-buffered
-    /// value from `MiriEvalContext::read_scalar()` is observed by the program, which is
-    /// the initial value of the atomic object. `MiriEvalContext::read_scalar()` is always
-    /// the latest value in modification order so it is always correct to be observed by any thread.
     fn fetch_store<R: rand::Rng + ?Sized>(
         &self,
         is_seqcst: bool,
         clocks: &ThreadClockSet,
         rng: &mut R,
-    ) -> Option<&StoreElement> {
+    ) -> &StoreElement {
         use rand::seq::IteratorRandom;
         let mut found_sc = false;
-        // FIXME: this should be an inclusive take_while (stops after a false predicate, but
+        // FIXME: we want an inclusive take_while (stops after a false predicate, but
         // includes the element that gave the false), but such function doesn't yet
         // exist in the standard libary https://github.com/rust-lang/rust/issues/62208
+        // so we have to hack around it with keep_searching
         let mut keep_searching = true;
         let candidates = self
             .buffer
@@ -303,7 +309,9 @@ impl<'mir, 'tcx: 'mir> StoreBuffer {
                 }
             });
 
-        candidates.choose(rng)
+        candidates
+            .choose(rng)
+            .expect("store buffer cannot be empty, an element is populated on construction")
     }
 
     /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
@@ -366,6 +374,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
         new_val: ScalarMaybeUninit<Tag>,
         place: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicRwOp,
+        init: ScalarMaybeUninit<Tag>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
@@ -379,7 +388,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 global.sc_write();
             }
             let range = alloc_range(base_offset, place.layout.size);
-            let buffer = alloc_buffers.get_store_buffer_mut(range)?;
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?;
             buffer.read_from_last_store(global);
             buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
         }
@@ -401,8 +410,10 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     global.sc_read();
                 }
                 let mut rng = this.machine.rng.borrow_mut();
-                let buffer =
-                    alloc_buffers.get_store_buffer(alloc_range(base_offset, place.layout.size))?;
+                let buffer = alloc_buffers.get_or_create_store_buffer(
+                    alloc_range(base_offset, place.layout.size),
+                    latest_in_mo,
+                )?;
                 let loaded = buffer.buffered_read(
                     global,
                     atomic == AtomicReadOp::SeqCst,
@@ -410,7 +421,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                     validate,
                 )?;
 
-                return Ok(loaded.unwrap_or(latest_in_mo));
+                return Ok(loaded);
             }
         }
 
@@ -424,6 +435,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
         val: ScalarMaybeUninit<Tag>,
         dest: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicWriteOp,
+        init: ScalarMaybeUninit<Tag>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr)?;
@@ -435,8 +447,23 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
             if atomic == AtomicWriteOp::SeqCst {
                 global.sc_write();
             }
-            let buffer =
-                alloc_buffers.get_store_buffer_mut(alloc_range(base_offset, dest.layout.size))?;
+
+            // UGLY HACK: in write_scalar_atomic() we don't know the value before our write,
+            // so init == val always. If the buffer is fresh then we would've duplicated an entry,
+            // so we need to remove it.
+            let was_empty = matches!(
+                alloc_buffers
+                    .store_buffers
+                    .borrow()
+                    .access_type(alloc_range(base_offset, dest.layout.size)),
+                AccessType::Empty(_)
+            );
+            let buffer = alloc_buffers
+                .get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?;
+            if was_empty {
+                buffer.buffer.pop_front();
+            }
+
             buffer.buffered_write(val, global, atomic == AtomicWriteOp::SeqCst)?;
         }
 
@@ -451,6 +478,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
         &self,
         place: &MPlaceTy<'tcx, Tag>,
         atomic: AtomicReadOp,
+        init: ScalarMaybeUninit<Tag>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
 
@@ -461,7 +489,8 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
             let size = place.layout.size;
             let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
             if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                let buffer = alloc_buffers.get_store_buffer(alloc_range(base_offset, size))?;
+                let buffer = alloc_buffers
+                    .get_or_create_store_buffer(alloc_range(base_offset, size), init)?;
                 buffer.read_from_last_store(global);
             }
         }
diff --git a/tests/run-pass/weak_memory/consistency.rs b/tests/run-pass/weak_memory/consistency.rs
index 8705d0bc67..67f0e8d35d 100644
--- a/tests/run-pass/weak_memory/consistency.rs
+++ b/tests/run-pass/weak_memory/consistency.rs
@@ -34,8 +34,6 @@ unsafe impl<T> Sync for EvilSend<T> {}
 // multiple times
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
-    // A workaround to put the initialisation value in the store buffer
-    ret.store(val, Relaxed);
     ret
 }
 
@@ -205,8 +203,19 @@ fn test_sc_store_buffering() {
     assert_ne!((a, b), (0, 0));
 }
 
+fn test_single_thread() {
+    let x = AtomicUsize::new(42);
+
+    assert_eq!(x.load(Relaxed), 42);
+
+    x.store(43, Relaxed);
+
+    assert_eq!(x.load(Relaxed), 43);
+}
+
 pub fn main() {
     for _ in 0..100 {
+        test_single_thread();
         test_mixed_access();
         test_load_buffering_acq_rel();
         test_message_passing();
diff --git a/tests/run-pass/weak_memory/weak.rs b/tests/run-pass/weak_memory/weak.rs
index ab0c20cc97..7538038192 100644
--- a/tests/run-pass/weak_memory/weak.rs
+++ b/tests/run-pass/weak_memory/weak.rs
@@ -22,11 +22,17 @@ unsafe impl<T> Sync for EvilSend<T> {}
 // multiple times
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
-    // A workaround to put the initialisation value in the store buffer
-    ret.store(val, Relaxed);
     ret
 }
 
+// Spins until it reads the given value
+fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Relaxed) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
 fn relaxed() -> bool {
     let x = static_atomic(0);
     let j1 = spawn(move || {
@@ -64,6 +70,31 @@ fn seq_cst() -> bool {
     r3 == 1
 }
 
+fn initialization_write() -> bool {
+    let x = static_atomic(11);
+    assert_eq!(x.load(Relaxed), 11);
+
+    let wait = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(22, Relaxed);
+        // Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
+        // after a relaxed write
+        wait.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        reads_value(wait, 1);
+        x.load(Relaxed)
+    });
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    r2 == 11
+}
+
+
 // Asserts that the function returns true at least once in 100 runs
 macro_rules! assert_once {
     ($f:ident) => {
@@ -74,4 +105,5 @@ macro_rules! assert_once {
 pub fn main() {
     assert_once!(relaxed);
     assert_once!(seq_cst);
+    assert_once!(initialization_write);
 }

From e2002b4c657b218d3866342078b5fd0ce3118021 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Tue, 17 May 2022 00:14:30 +0100
Subject: [PATCH 25/46] Amend experimental thread support warnings

---
 src/shims/unix/thread.rs                                 | 2 +-
 tests/pass/concurrency/channels.stderr                   | 2 +-
 tests/pass/concurrency/concurrent_caller_location.stderr | 2 +-
 tests/pass/concurrency/data_race.stderr                  | 2 +-
 tests/pass/concurrency/disable_data_race_detector.stderr | 2 +-
 tests/pass/concurrency/issue1643.stderr                  | 2 +-
 tests/pass/concurrency/linux-futex.stderr                | 2 +-
 tests/pass/concurrency/simple.stderr                     | 2 +-
 tests/pass/concurrency/sync.stderr                       | 2 +-
 tests/pass/concurrency/thread_locals.stderr              | 2 +-
 tests/pass/concurrency/tls_lib_drop.stderr               | 2 +-
 tests/pass/libc.stderr                                   | 2 +-
 tests/pass/panic/concurrent-panic.stderr                 | 2 +-
 tests/pass/threadleak_ignored.stderr                     | 2 +-
 tests/run-pass/weak_memory/consistency.stderr            | 2 +-
 tests/run-pass/weak_memory/weak.stderr                   | 2 +-
 16 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/src/shims/unix/thread.rs b/src/shims/unix/thread.rs
index 88c3fb0bc8..812cb7376b 100644
--- a/src/shims/unix/thread.rs
+++ b/src/shims/unix/thread.rs
@@ -14,7 +14,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
 
         this.tcx.sess.warn(
-            "thread support is experimental and incomplete: weak memory effects are not emulated.",
+            "thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.",
         );
 
         // Create the new thread
diff --git a/tests/pass/concurrency/channels.stderr b/tests/pass/concurrency/channels.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/channels.stderr
+++ b/tests/pass/concurrency/channels.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/concurrent_caller_location.stderr b/tests/pass/concurrency/concurrent_caller_location.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/concurrent_caller_location.stderr
+++ b/tests/pass/concurrency/concurrent_caller_location.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/data_race.stderr b/tests/pass/concurrency/data_race.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/data_race.stderr
+++ b/tests/pass/concurrency/data_race.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/disable_data_race_detector.stderr b/tests/pass/concurrency/disable_data_race_detector.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/disable_data_race_detector.stderr
+++ b/tests/pass/concurrency/disable_data_race_detector.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/issue1643.stderr b/tests/pass/concurrency/issue1643.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/issue1643.stderr
+++ b/tests/pass/concurrency/issue1643.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/linux-futex.stderr b/tests/pass/concurrency/linux-futex.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/linux-futex.stderr
+++ b/tests/pass/concurrency/linux-futex.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/simple.stderr b/tests/pass/concurrency/simple.stderr
index bb60638bd6..386dc92269 100644
--- a/tests/pass/concurrency/simple.stderr
+++ b/tests/pass/concurrency/simple.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
 thread '<unnamed>' panicked at 'Hello!', $DIR/simple.rs:LL:CC
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/pass/concurrency/sync.stderr b/tests/pass/concurrency/sync.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/sync.stderr
+++ b/tests/pass/concurrency/sync.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/thread_locals.stderr b/tests/pass/concurrency/thread_locals.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/thread_locals.stderr
+++ b/tests/pass/concurrency/thread_locals.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/concurrency/tls_lib_drop.stderr b/tests/pass/concurrency/tls_lib_drop.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/concurrency/tls_lib_drop.stderr
+++ b/tests/pass/concurrency/tls_lib_drop.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/libc.stderr b/tests/pass/libc.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/pass/libc.stderr
+++ b/tests/pass/libc.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/pass/panic/concurrent-panic.stderr b/tests/pass/panic/concurrent-panic.stderr
index ae132c9ee3..0d4a409dfe 100644
--- a/tests/pass/panic/concurrent-panic.stderr
+++ b/tests/pass/panic/concurrent-panic.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
 Thread 1 starting, will block on mutex
 Thread 1 reported it has started
diff --git a/tests/pass/threadleak_ignored.stderr b/tests/pass/threadleak_ignored.stderr
index aa03751185..9205eb70b2 100644
--- a/tests/pass/threadleak_ignored.stderr
+++ b/tests/pass/threadleak_ignored.stderr
@@ -1,3 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
 Dropping 0
diff --git a/tests/run-pass/weak_memory/consistency.stderr b/tests/run-pass/weak_memory/consistency.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/run-pass/weak_memory/consistency.stderr
+++ b/tests/run-pass/weak_memory/consistency.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 
diff --git a/tests/run-pass/weak_memory/weak.stderr b/tests/run-pass/weak_memory/weak.stderr
index 03676519d4..1d0ce4b385 100644
--- a/tests/run-pass/weak_memory/weak.stderr
+++ b/tests/run-pass/weak_memory/weak.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
 

From 31c01415cbb1793ae9492f40bbdd917cfe8d1fa5 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Tue, 17 May 2022 20:04:18 +0100
Subject: [PATCH 26/46] Replace yield_now() with spin loop hint

---
 tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs | 6 +++---
 tests/run-pass/weak_memory/consistency.rs         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
index b9e395fd77..423d0e0e8f 100644
--- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@@ -9,13 +9,13 @@
 // so we have to stick to C++11 emulation from exiting research.
 
 use std::sync::atomic::Ordering::*;
-use std::thread::{spawn, yield_now};
+use std::thread::spawn;
 use std::sync::atomic::{fence, AtomicUsize};
 
-// Spins and yields until until it reads value
+// Spins until it reads value
 fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
     while loc.load(Relaxed) != val {
-        yield_now();
+        std::hint::spin_loop();
     }
     val
 }
diff --git a/tests/run-pass/weak_memory/consistency.rs b/tests/run-pass/weak_memory/consistency.rs
index 67f0e8d35d..fa13803830 100644
--- a/tests/run-pass/weak_memory/consistency.rs
+++ b/tests/run-pass/weak_memory/consistency.rs
@@ -22,7 +22,7 @@
 
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering::*;
-use std::thread::{spawn, yield_now};
+use std::thread::spawn;
 
 #[derive(Copy, Clone)]
 struct EvilSend<T>(pub T);
@@ -37,10 +37,10 @@ fn static_atomic(val: usize) -> &'static AtomicUsize {
     ret
 }
 
-// Spins and yields until until acquires a pre-determined value
+// Spins until acquires a pre-determined value
 fn acquires_value(loc: &AtomicUsize, val: usize) -> usize {
     while loc.load(Acquire) != val {
-        yield_now();
+        std::hint::spin_loop();
     }
     val
 }

From 5ddd4eff0382ba454e8123c663f9e11c14597f79 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Thu, 19 May 2022 20:14:16 +0100
Subject: [PATCH 27/46] Spelling, punctuation and grammar

Co-authored-by: Ralf Jung <post@ralfj.de>
---
 README.md                                         | 2 +-
 tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs | 8 ++++----
 tests/run-pass/weak_memory/consistency.rs         | 2 +-
 tests/run-pass/weak_memory/weak.rs                | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index ece45fca12..938a64cd04 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ for example:
 * **Experimental**: Violations of the [Stacked Borrows] rules governing aliasing
   for reference types
 * **Experimental**: Data races
-* **Experimental**: Weak memory emulation
+* **Experimental**: Emulation of weak memory effects (i.e., reads can return outdated values)
 
 On top of that, Miri will also tell you about memory leaks: when there is memory
 still allocated at the end of the execution, and that memory is not reachable
diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
index 423d0e0e8f..34097f4a89 100644
--- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@@ -4,15 +4,15 @@
 // https://plv.mpi-sws.org/scfix/paper.pdf
 // 2.2 Second Problem: SC Fences are Too Weak
 // This test should pass under the C++20 model Rust is using.
-// Unfortunately, Miri's weak memory emulation only follows C++11 model
+// Unfortunately, Miri's weak memory emulation only follows the C++11 model
 // as we don't know how to correctly emulate C++20's revised SC semantics,
-// so we have to stick to C++11 emulation from exiting research.
+// so we have to stick to C++11 emulation from existing research.
 
 use std::sync::atomic::Ordering::*;
 use std::thread::spawn;
 use std::sync::atomic::{fence, AtomicUsize};
 
-// Spins until it reads value
+// Spins until it reads the given value
 fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
     while loc.load(Relaxed) != val {
         std::hint::spin_loop();
@@ -24,7 +24,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
 // multiple tests
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
-    // A workaround to put the initialisation value in the store buffer
+    // A workaround to put the initialization value in the store buffer.
     ret.store(val, Relaxed);
     ret
 }
diff --git a/tests/run-pass/weak_memory/consistency.rs b/tests/run-pass/weak_memory/consistency.rs
index fa13803830..8a7c1340cc 100644
--- a/tests/run-pass/weak_memory/consistency.rs
+++ b/tests/run-pass/weak_memory/consistency.rs
@@ -37,7 +37,7 @@ fn static_atomic(val: usize) -> &'static AtomicUsize {
     ret
 }
 
-// Spins until acquires a pre-determined value
+// Spins until it acquires a pre-determined value.
 fn acquires_value(loc: &AtomicUsize, val: usize) -> usize {
     while loc.load(Acquire) != val {
         std::hint::spin_loop();
diff --git a/tests/run-pass/weak_memory/weak.rs b/tests/run-pass/weak_memory/weak.rs
index 7538038192..70e1bf00f4 100644
--- a/tests/run-pass/weak_memory/weak.rs
+++ b/tests/run-pass/weak_memory/weak.rs
@@ -6,7 +6,7 @@
 // This is scheduler and pseudo-RNG dependent, so each test is
 // run multiple times until one try returns true.
 // Spurious failure is possible, if you are really unlucky with
-// the RNG.
+// the RNG and always read the latest value from the store buffer.
 
 use std::sync::atomic::AtomicUsize;
 use std::sync::atomic::Ordering::*;

From 6d27f188c21f98ff9d8f2f252becb18df27d6c4a Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 22 May 2022 22:07:50 +0100
Subject: [PATCH 28/46] Update src/concurrency/weak_memory.rs

Co-authored-by: Ralf Jung <post@ralfj.de>
---
 src/concurrency/weak_memory.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 1cb9fba715..51092478c3 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -102,6 +102,9 @@ struct StoreElement {
     /// The timestamp of the storing thread when it performed the store
     timestamp: VTimestamp,
     /// The value of this store
+    // FIXME: this means the store is either fully initialized or fully uninitialized;
+    // we will have to change this if we want to support atomics on
+    // partially initialized data.
     val: ScalarMaybeUninit<Tag>,
 
     /// Timestamp of first loads from this store element by each thread

From dafd813c16231834d3bce7875a8c616d1e49a8c1 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 22 May 2022 22:18:22 +0100
Subject: [PATCH 29/46] Move transmute into a separate function

---
 .../weak_memory/imperfectly_overlapping.rs    | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/tests/compile-fail/weak_memory/imperfectly_overlapping.rs b/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
index e3f89b5b68..6f91e147fa 100644
--- a/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
+++ b/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
@@ -6,21 +6,26 @@ use std::intrinsics::atomic_load;
 use std::sync::atomic::Ordering::*;
 use std::sync::atomic::{AtomicU16, AtomicU32};
 
+fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
+    unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
+}
+
 fn test_same_thread() {
-    let mut qword = AtomicU32::new(42);
-    assert_eq!(qword.load(Relaxed), 42);
-    qword.store(0xabbafafa, Relaxed);
+    let mut dword = AtomicU32::new(42);
+    assert_eq!(dword.load(Relaxed), 42);
+    dword.store(0xabbafafa, Relaxed);
 
-    let qword_mut = qword.get_mut();
+    let dword_mut = dword.get_mut();
 
-    let dwords_mut = unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(qword_mut) };
+    let words_mut = split_u32(dword_mut);
 
-    let (hi_mut, lo_mut) = dwords_mut.split_at_mut(1);
+    let (hi_mut, lo_mut) = words_mut.split_at_mut(1);
 
     let (hi, _) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
 
     unsafe {
-        //Equivalent to: hi.load(Ordering::SeqCst)
+        // Equivalent to: hi.load(Ordering::SeqCst)
+        // We need to use intrisics to for precise error location
         atomic_load(hi.get_mut() as *mut u16); //~ ERROR: mixed-size access on an existing atomic object
     }
 }

From 7dcb19ead429e526afac7800a0b90f2b39cfa4f1 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Mon, 23 May 2022 22:05:16 +0100
Subject: [PATCH 30/46] Add rust-only operation tests

---
 .../weak_memory/imperfectly_overlapping.rs    | 35 --------
 tests/run-pass/weak_memory/extra_cpp.rs       | 79 +++++++++++++++++++
 tests/run-pass/weak_memory/extra_cpp.stderr   |  2 +
 3 files changed, 81 insertions(+), 35 deletions(-)
 delete mode 100644 tests/compile-fail/weak_memory/imperfectly_overlapping.rs
 create mode 100644 tests/run-pass/weak_memory/extra_cpp.rs
 create mode 100644 tests/run-pass/weak_memory/extra_cpp.stderr

diff --git a/tests/compile-fail/weak_memory/imperfectly_overlapping.rs b/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
deleted file mode 100644
index 6f91e147fa..0000000000
--- a/tests/compile-fail/weak_memory/imperfectly_overlapping.rs
+++ /dev/null
@@ -1,35 +0,0 @@
-// ignore-windows: Concurrency on Windows is not supported yet.
-#![feature(atomic_from_mut)]
-#![feature(core_intrinsics)]
-
-use std::intrinsics::atomic_load;
-use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{AtomicU16, AtomicU32};
-
-fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
-    unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
-}
-
-fn test_same_thread() {
-    let mut dword = AtomicU32::new(42);
-    assert_eq!(dword.load(Relaxed), 42);
-    dword.store(0xabbafafa, Relaxed);
-
-    let dword_mut = dword.get_mut();
-
-    let words_mut = split_u32(dword_mut);
-
-    let (hi_mut, lo_mut) = words_mut.split_at_mut(1);
-
-    let (hi, _) = (AtomicU16::from_mut(&mut hi_mut[0]), AtomicU16::from_mut(&mut lo_mut[0]));
-
-    unsafe {
-        // Equivalent to: hi.load(Ordering::SeqCst)
-        // We need to use intrisics to for precise error location
-        atomic_load(hi.get_mut() as *mut u16); //~ ERROR: mixed-size access on an existing atomic object
-    }
-}
-
-pub fn main() {
-    test_same_thread();
-}
diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/run-pass/weak_memory/extra_cpp.rs
new file mode 100644
index 0000000000..b20ec58349
--- /dev/null
+++ b/tests/run-pass/weak_memory/extra_cpp.rs
@@ -0,0 +1,79 @@
+// compile-flags: -Zmiri-ignore-leaks
+
+// Tests operations not perfomable through C++'s atomic API
+// but doable in safe (at least sound) Rust.
+
+#![feature(atomic_from_mut)]
+
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize};
+use std::thread::spawn;
+
+fn static_atomic_mut(val: usize) -> &'static mut AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    ret
+}
+
+fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
+    unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
+}
+
+fn mem_replace() {
+    let mut x = AtomicU32::new(0);
+
+    let old_x = std::mem::replace(&mut x, AtomicU32::new(42));
+
+    assert_eq!(x.load(Relaxed), 42);
+    assert_eq!(old_x.load(Relaxed), 0);
+}
+
+fn assign_to_mut() {
+    let x = static_atomic_mut(0);
+    x.store(1, Relaxed);
+
+    *x = AtomicUsize::new(2);
+
+    assert_eq!(x.load(Relaxed), 2);
+}
+
+fn get_mut_write() {
+    let x = static_atomic_mut(0);
+    x.store(1, Relaxed);
+    {
+        let x_mut = x.get_mut();
+        *x_mut = 2;
+    }
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let r1 = j1.join().unwrap();
+    assert_eq!(r1, 2);
+}
+
+// This is technically doable in C++ with atomic_ref
+// but little literature exists atm on its involvement
+// in mixed size/atomicity accesses
+fn from_mut_split() {
+    let mut x: u32 = 0;
+
+    {
+        let x_atomic = AtomicU32::from_mut(&mut x);
+        x_atomic.store(u32::from_be(0xabbafafa), Relaxed);
+    }
+
+    let (x_hi, x_lo) = split_u32(&mut x).split_at_mut(1);
+
+    let x_hi_atomic = AtomicU16::from_mut(&mut x_hi[0]);
+    let x_lo_atomic = AtomicU16::from_mut(&mut x_lo[0]);
+
+    assert_eq!(x_hi_atomic.load(Relaxed), u16::from_be(0xabba));
+    assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa));
+}
+
+
+pub fn main() {
+    get_mut_write();
+    from_mut_split();
+    assign_to_mut();
+    mem_replace();
+}
diff --git a/tests/run-pass/weak_memory/extra_cpp.stderr b/tests/run-pass/weak_memory/extra_cpp.stderr
new file mode 100644
index 0000000000..1d0ce4b385
--- /dev/null
+++ b/tests/run-pass/weak_memory/extra_cpp.stderr
@@ -0,0 +1,2 @@
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+

From 2321b15342b05445ab7d7ac07a28382b454e0206 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Tue, 24 May 2022 21:07:22 +0100
Subject: [PATCH 31/46] Differentiate between not multithreading and temp
 disabling race detection

---
 src/concurrency/data_race.rs | 44 +++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 18 deletions(-)

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 2d69d02a10..7ac2ed615a 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -445,14 +445,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     #[inline]
     fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_ref();
-        let old = if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.replace(false)
-        } else {
-            false
-        };
+        if let Some(data_race) = &this.machine.data_race {
+            data_race.ongoing_atomic_access.set(true);
+        }
         let result = op(this);
         if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.set(old);
+            data_race.ongoing_atomic_access.set(false);
         }
         result
     }
@@ -466,14 +464,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R,
     ) -> R {
         let this = self.eval_context_mut();
-        let old = if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.replace(false)
-        } else {
-            false
-        };
+        if let Some(data_race) = &this.machine.data_race {
+            data_race.ongoing_atomic_access.set(true);
+        }
         let result = op(this);
         if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.set(old);
+            data_race.ongoing_atomic_access.set(false);
         }
         result
     }
@@ -923,7 +919,7 @@ impl VClockAlloc {
     }
 
     /// Detect data-races for an unsynchronized read operation, will not perform
-    /// data-race detection if `multi-threaded` is false, either due to no threads
+    /// data-race detection if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation for which data-race detection is handled separately, for example
     /// atomic read operations.
@@ -933,7 +929,7 @@ impl VClockAlloc {
         range: AllocRange,
         global: &GlobalState,
     ) -> InterpResult<'tcx> {
-        if global.multi_threaded.get() {
+        if global.race_detecting() {
             let (index, clocks) = global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (offset, range) in alloc_ranges.iter_mut(range.start, range.size) {
@@ -962,7 +958,7 @@ impl VClockAlloc {
         write_type: WriteType,
         global: &mut GlobalState,
     ) -> InterpResult<'tcx> {
-        if global.multi_threaded.get() {
+        if global.race_detecting() {
             let (index, clocks) = global.current_thread_state();
             for (offset, range) in self.alloc_ranges.get_mut().iter_mut(range.start, range.size) {
                 if let Err(DataRace) = range.write_race_detect(&*clocks, index, write_type) {
@@ -983,7 +979,7 @@ impl VClockAlloc {
     }
 
     /// Detect data-races for an unsynchronized write operation, will not perform
-    /// data-race threads if `multi-threaded` is false, either due to no threads
+    /// data-race threads if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation
     pub fn write<'tcx>(
@@ -996,7 +992,7 @@ impl VClockAlloc {
     }
 
     /// Detect data-races for an unsynchronized deallocate operation, will not perform
-    /// data-race threads if `multi-threaded` is false, either due to no threads
+    /// data-race threads if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation
     pub fn deallocate<'tcx>(
@@ -1026,7 +1022,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         if let Some(data_race) = &this.machine.data_race {
-            if data_race.multi_threaded.get() {
+            if data_race.race_detecting() {
                 let size = place.layout.size;
                 let (alloc_id, base_offset, _tag) = this.ptr_get_alloc_id(place.ptr)?;
                 // Load and log the atomic operation.
@@ -1116,6 +1112,10 @@ pub struct GlobalState {
     /// any data-races.
     multi_threaded: Cell<bool>,
 
+    /// A flag to mark we are currently performing
+    /// an atomic access to supress data race detection
+    ongoing_atomic_access: Cell<bool>,
+
     /// Mapping of a vector index to a known set of thread
     /// clocks, this is not directly mapping from a thread id
     /// since it may refer to multiple threads.
@@ -1167,6 +1167,7 @@ impl GlobalState {
     pub fn new() -> Self {
         let mut global_state = GlobalState {
             multi_threaded: Cell::new(false),
+            ongoing_atomic_access: Cell::new(false),
             vector_clocks: RefCell::new(IndexVec::new()),
             vector_info: RefCell::new(IndexVec::new()),
             thread_info: RefCell::new(IndexVec::new()),
@@ -1192,6 +1193,13 @@ impl GlobalState {
         global_state
     }
 
+    // We perform data race detection when there are more than 1 active thread
+    // and we are not currently in the middle of an atomic acces where data race
+    // is impossible
+    fn race_detecting(&self) -> bool {
+        self.multi_threaded.get() && !self.ongoing_atomic_access.get()
+    }
+
     // Try to find vector index values that can potentially be re-used
     // by a new thread instead of a new vector index being created.
     fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {

From 226ed41cca4cf83cd6afc18f0d3ce4ef2cdc8691 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Tue, 24 May 2022 22:03:04 +0100
Subject: [PATCH 32/46] Destroy store buffers on non-racy non-atomic accesses

---
 src/concurrency/allocation_map.rs |  8 ++++++++
 src/concurrency/data_race.rs      |  4 ++++
 src/concurrency/weak_memory.rs    | 28 ++++++++++++++++++++++++----
 src/machine.rs                    | 23 +++++++++++++++++------
 4 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/src/concurrency/allocation_map.rs b/src/concurrency/allocation_map.rs
index 2524389c0b..62469dcaf4 100644
--- a/src/concurrency/allocation_map.rs
+++ b/src/concurrency/allocation_map.rs
@@ -125,6 +125,14 @@ impl<T> AllocationMap<T> {
             debug_assert!(range.end() <= self.v[pos + 1].range.start);
         }
     }
+
+    pub fn remove_pos_range(&mut self, pos_range: Range<Position>) {
+        self.v.drain(pos_range);
+    }
+
+    pub fn remove_from_pos(&mut self, pos: Position) {
+        self.v.remove(pos);
+    }
 }
 
 impl<T> Index<Position> for AllocationMap<T> {
diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 7ac2ed615a..2483bcdf49 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -1200,6 +1200,10 @@ impl GlobalState {
         self.multi_threaded.get() && !self.ongoing_atomic_access.get()
     }
 
+    pub fn ongoing_atomic_access(&self) -> bool {
+        self.ongoing_atomic_access.get()
+    }
+
     // Try to find vector index values that can potentially be re-used
     // by a new thread instead of a new vector index being created.
     fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 51092478c3..7d8d7da6dc 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -11,10 +11,10 @@
 //! This implementation is not fully correct under the revised C++20 model and may generate behaviours C++20
 //! disallows.
 //!
-//! Rust follows the full C++20 memory model (except for the Consume ordering). It is therefore
-//! possible for this implementation to generate behaviours never observable when the same program is compiled and
-//! run natively. Unfortunately, no literature exists at the time of writing which proposes an implementable and C++20-compatible
-//! relaxed memory model that supports all atomic operation existing in Rust. The closest one is
+//! Rust follows the C++20 memory model (except for the Consume ordering and some operations not performable through C++'s
+//! std::atomic<T> API). It is therefore possible for this implementation to generate behaviours never observable when the
+//! same program is compiled and run natively. Unfortunately, no literature exists at the time of writing which proposes
+//! an implementable and C++20-compatible relaxed memory model that supports all atomic operation existing in Rust. The closest one is
 //! A Promising Semantics for Relaxed-Memory Concurrency by Jeehoon Kang et al. (https://www.cs.tau.ac.il/~orilahav/papers/popl17.pdf)
 //! However, this model lacks SC accesses and is therefore unusable by Miri (SC accesses are everywhere in library code).
 //!
@@ -117,6 +117,26 @@ impl StoreBufferAlloc {
         Self { store_buffers: RefCell::new(AllocationMap::new()) }
     }
 
+    /// When a non-atomic access happens on a location that has been atomically accessed
+    /// before without data race, we can determine that the non-atomic access fully happens
+    /// before all the prior atomic accesses so the location no longer needs to exhibit
+    /// any weak memory behaviours until further atomic accesses.
+    pub fn destroy_atomicity<'tcx>(&self, range: AllocRange) {
+        let mut buffers = self.store_buffers.borrow_mut();
+        let access_type = buffers.access_type(range);
+        match access_type {
+            AccessType::PerfectlyOverlapping(pos) => {
+                buffers.remove_from_pos(pos);
+            }
+            AccessType::ImperfectlyOverlapping(pos_range) => {
+                buffers.remove_pos_range(pos_range);
+            }
+            AccessType::Empty(_) => {
+                // Do nothing
+            }
+        }
+    }
+
     /// Gets a store buffer associated with an atomic object in this allocation
     /// Or creates one with the specified initial value
     fn get_or_create_store_buffer<'tcx>(
diff --git a/src/machine.rs b/src/machine.rs
index 41c852747a..6dc2a75b69 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -738,10 +738,17 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
                 machine.current_span(),
-            )
-        } else {
-            Ok(())
+            )?;
         }
+        if let Some(weak_memory) = &alloc_extra.weak_memory {
+            if !machine.data_race.as_ref().unwrap().ongoing_atomic_access() {
+                // This is a non-atomic access. And if we are accessing a previously atomically
+                // accessed location without racing with them, then the location no longer needs
+                // to exhibit weak-memory behaviours until a fresh atomic access happens
+                weak_memory.destroy_atomicity(range);
+            }
+        }
+        Ok(())
     }
 
     #[inline(always)]
@@ -762,10 +769,14 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
                 machine.current_span(),
-            )
-        } else {
-            Ok(())
+            )?;
         }
+        if let Some(weak_memory) = &alloc_extra.weak_memory {
+            if !machine.data_race.as_ref().unwrap().ongoing_atomic_access() {
+                weak_memory.destroy_atomicity(range);
+            }
+        }
+        Ok(())
     }
 
     #[inline(always)]

From 613d60db0bc0bfac9c3ad57245fc09e08795a550 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Wed, 25 May 2022 20:46:08 +0100
Subject: [PATCH 33/46] Allow non-racy mixed size accesses

---
 src/concurrency/data_race.rs                  | 63 ++++++++++++-
 src/concurrency/weak_memory.rs                | 92 +++++++++++++++----
 .../weak_memory/cpp20_rwc_syncs.rs            |  6 +-
 .../weak_memory/racing_mixed_size.rs          | 38 ++++++++
 tests/run-pass/weak_memory/extra_cpp.rs       | 89 +++++++++++++++++-
 5 files changed, 262 insertions(+), 26 deletions(-)
 create mode 100644 tests/compile-fail/weak_memory/racing_mixed_size.rs

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 2483bcdf49..8b8694ac18 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -287,6 +287,20 @@ impl MemoryCellClocks {
         Ok(())
     }
 
+    /// Checks if the memory cell write races with any prior atomic read or write
+    fn write_race_free_with_atomic(&mut self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() {
+            atomic.read_vector <= clocks.clock && atomic.write_vector <= clocks.clock
+        } else {
+            true
+        }
+    }
+
+    /// Checks if the memory cell read races with any prior atomic write
+    fn read_race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock } else { true }
+    }
+
     /// Update memory cell data-race tracking for atomic
     /// load relaxed semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
@@ -514,6 +528,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
         // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
+        this.validate_overlapping_atomic_read(place)?;
         this.buffered_atomic_read(place, atomic, scalar, || {
             this.validate_atomic_load(place, atomic)
         })
@@ -527,6 +542,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(dest)?;
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
         this.validate_atomic_store(dest, atomic)?;
         // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
@@ -547,6 +563,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
 
         // Atomics wrap around on overflow.
@@ -575,6 +592,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
         this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
 
@@ -595,6 +613,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
         let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
 
@@ -637,6 +656,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         use rand::Rng as _;
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic_write(place)?;
         // Failure ordering cannot be stronger than success ordering, therefore first attempt
         // to read with the failure ordering and if successful then try again with the success
         // read ordering and write in the success case.
@@ -686,6 +706,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
+        this.validate_overlapping_atomic_read(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -708,6 +729,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -733,6 +755,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic_write(place)?;
         this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
             if acquire {
                 memory.load_acquire(clocks, index)?;
@@ -918,6 +941,44 @@ impl VClockAlloc {
         )
     }
 
+    /// Detect racing atomic writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn read_race_free_with_atomic<'tcx>(
+        &self,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.borrow();
+            for (_, range) in alloc_ranges.iter(range.start, range.size) {
+                if !range.read_race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
+    /// Detect racing atomic read and writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn write_race_free_with_atomic<'tcx>(
+        &mut self,
+        range: AllocRange,
+        global: &GlobalState,
+    ) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.get_mut();
+            for (_, range) in alloc_ranges.iter_mut(range.start, range.size) {
+                if !range.write_race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
     /// Detect data-races for an unsynchronized read operation, will not perform
     /// data-race detection if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
@@ -1027,7 +1088,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                 let (alloc_id, base_offset, _tag) = this.ptr_get_alloc_id(place.ptr)?;
                 // Load and log the atomic operation.
                 // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
-                let alloc_meta = &this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
+                let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
                 log::trace!(
                     "Atomic op({}) with ordering {:?} on {:?} (size={})",
                     description,
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 7d8d7da6dc..a4fbd14f43 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -29,6 +29,13 @@
 //! Additionally, writes in our implementation do not have globally unique timestamps attached. In the other two models this timestamp is
 //! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view.
 //! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads.
+//!
+//! Safe/sound Rust allows for more operations on atomic locations than the C++20 atomic API was intended to allow, such as non-atomically accessing
+//! a previously atomically accessed location, or accessing previously atomically accessed locations with a differently sized operation
+//! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model.
+//! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
+//! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations.
+//! A mixed size/atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.
 
 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
 // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
@@ -117,6 +124,14 @@ impl StoreBufferAlloc {
         Self { store_buffers: RefCell::new(AllocationMap::new()) }
     }
 
+    /// Checks if the range imperfectly overlaps with existing buffers
+    /// Used to determine if mixed-size atomic accesses
+    fn is_overlapping(&self, range: AllocRange) -> bool {
+        let buffers = self.store_buffers.borrow();
+        let access_type = buffers.access_type(range);
+        matches!(access_type, AccessType::ImperfectlyOverlapping(_))
+    }
+
     /// When a non-atomic access happens on a location that has been atomically accessed
     /// before without data race, we can determine that the non-atomic access fully happens
     /// before all the prior atomic accesses so the location no longer needs to exhibit
@@ -148,21 +163,16 @@ impl StoreBufferAlloc {
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
-                let new_buffer = StoreBuffer::new(init);
                 let mut buffers = self.store_buffers.borrow_mut();
-                buffers.insert_at_pos(pos, range, new_buffer);
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
-                // Accesses that imperfectly overlaps with existing atomic objects
-                // do not have well-defined behaviours.
-                // FIXME: if this access happens before all previous accesses on every object it overlaps
-                // with, then we would like to tolerate it. However this is not easy to check.
-                if pos_range.start + 1 == pos_range.end {
-                    throw_ub_format!("mixed-size access on an existing atomic object");
-                } else {
-                    throw_ub_format!("access overlaps with multiple existing atomic objects");
-                }
+                // Once we reach here we would've already checked that this access is not racy
+                let mut buffers = self.store_buffers.borrow_mut();
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
             }
         };
         Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
@@ -179,16 +189,13 @@ impl StoreBufferAlloc {
         let pos = match access_type {
             AccessType::PerfectlyOverlapping(pos) => pos,
             AccessType::Empty(pos) => {
-                let new_buffer = StoreBuffer::new(init);
-                buffers.insert_at_pos(pos, range, new_buffer);
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
-                if pos_range.start + 1 == pos_range.end {
-                    throw_ub_format!("mixed-size access on an existing atomic object");
-                } else {
-                    throw_ub_format!("access overlaps with multiple existing atomic objects");
-                }
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
             }
         };
         Ok(&mut buffers[pos])
@@ -392,6 +399,55 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mi
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
     crate::MiriEvalContextExt<'mir, 'tcx>
 {
+    // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous
+    // atomic write. If it does, then we require it to be ordered (non-racy) with all previous atomic
+    // writes on all the bytes in range
+    fn validate_overlapping_atomic_read(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let crate::AllocExtra {
+            weak_memory: Some(alloc_buffers),
+            data_race: Some(alloc_clocks),
+            ..
+        } = this.get_alloc_extra(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks
+                    .read_race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
+            {
+                throw_ub_format!("racy imperfectly overlapping atomic access");
+            }
+        }
+        Ok(())
+    }
+
+    // Same as above but needs to be ordered with all previous atomic read or writes
+    fn validate_overlapping_atomic_write(
+        &mut self,
+        place: &MPlaceTy<'tcx, Tag>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let (
+            crate::AllocExtra {
+                weak_memory: Some(alloc_buffers),
+                data_race: Some(alloc_clocks),
+                ..
+            },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks.write_race_free_with_atomic(range, global)
+            {
+                throw_ub_format!("racy imperfectly overlapping atomic access");
+            }
+        }
+        Ok(())
+    }
+
     fn buffered_atomic_rmw(
         &mut self,
         new_val: ScalarMaybeUninit<Tag>,
diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
index 34097f4a89..7dad0a12e5 100644
--- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@@ -9,8 +9,8 @@
 // so we have to stick to C++11 emulation from existing research.
 
 use std::sync::atomic::Ordering::*;
-use std::thread::spawn;
 use std::sync::atomic::{fence, AtomicUsize};
+use std::thread::spawn;
 
 // Spins until it reads the given value
 fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
@@ -25,7 +25,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
     // A workaround to put the initialization value in the store buffer.
-    ret.store(val, Relaxed);
+    ret.load(Relaxed);
     ret
 }
 
@@ -82,4 +82,4 @@ pub fn main() {
     for _ in 0..500 {
         test_cpp20_rwc_syncs();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.rs b/tests/compile-fail/weak_memory/racing_mixed_size.rs
new file mode 100644
index 0000000000..d4ba48afe3
--- /dev/null
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.rs
@@ -0,0 +1,38 @@
+// compile-flags: -Zmiri-ignore-leaks
+
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic_u32(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// Wine's SRWLock implementation does this, which is definitely undefined in C++ memory model
+// https://github.com/wine-mirror/wine/blob/303f8042f9db508adaca02ef21f8de4992cb9c03/dlls/ntdll/sync.c#L543-L566
+// Though it probably works just fine on x86
+pub fn main() {
+    let x = static_atomic_u32(0);
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/run-pass/weak_memory/extra_cpp.rs
index b20ec58349..b1a683798b 100644
--- a/tests/run-pass/weak_memory/extra_cpp.rs
+++ b/tests/run-pass/weak_memory/extra_cpp.rs
@@ -4,13 +4,19 @@
 // but doable in safe (at least sound) Rust.
 
 #![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]
 
 use std::sync::atomic::Ordering::*;
-use std::sync::atomic::{AtomicU16, AtomicU32, AtomicUsize};
+use std::sync::atomic::{AtomicU16, AtomicU32};
 use std::thread::spawn;
 
-fn static_atomic_mut(val: usize) -> &'static mut AtomicUsize {
-    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+fn static_atomic_mut(val: u32) -> &'static mut AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
     ret
 }
 
@@ -18,6 +24,10 @@ fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
     unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
 }
 
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
 fn mem_replace() {
     let mut x = AtomicU32::new(0);
 
@@ -31,7 +41,7 @@ fn assign_to_mut() {
     let x = static_atomic_mut(0);
     x.store(1, Relaxed);
 
-    *x = AtomicUsize::new(2);
+    *x = AtomicU32::new(2);
 
     assert_eq!(x.load(Relaxed), 2);
 }
@@ -70,10 +80,81 @@ fn from_mut_split() {
     assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa));
 }
 
+// Although not possible to do in safe Rust,
+// we allow non-atomic and atomic reads to race
+// as this should be sound
+fn racing_mixed_atomicity_read() {
+    let x = static_atomic(0);
+    x.store(42, Relaxed);
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) }
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r1, 42);
+    assert_eq!(r2, 42);
+}
+
+fn racing_mixed_size_read() {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+fn racing_mixed_atomicity_and_size_read() {
+    let x = static_atomic(u32::from_be(0xabbafafa));
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { *x_ptr };
+    });
+
+    let j3 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi)
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    assert_eq!(r3, u16::from_be(0xabba));
+}
 
 pub fn main() {
     get_mut_write();
     from_mut_split();
     assign_to_mut();
     mem_replace();
+    racing_mixed_atomicity_read();
+    racing_mixed_size_read();
+    racing_mixed_atomicity_and_size_read();
 }

From bfa56454e9544278be9f5de7abad54bcee9af51c Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Wed, 25 May 2022 21:10:00 +0100
Subject: [PATCH 34/46] Split extra_cpp tests into sound and unsafe

---
 src/concurrency/weak_memory.rs                |  1 +
 tests/run-pass/weak_memory/extra_cpp.rs       | 83 +---------------
 .../run-pass/weak_memory/extra_cpp_unsafe.rs  | 97 +++++++++++++++++++
 .../weak_memory/extra_cpp_unsafe.stderr       |  2 +
 4 files changed, 102 insertions(+), 81 deletions(-)
 create mode 100644 tests/run-pass/weak_memory/extra_cpp_unsafe.rs
 create mode 100644 tests/run-pass/weak_memory/extra_cpp_unsafe.stderr

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index a4fbd14f43..942d71a52f 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -36,6 +36,7 @@
 //! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
 //! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations.
 //! A mixed size/atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.
+//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
 
 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
 // 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/run-pass/weak_memory/extra_cpp.rs
index b1a683798b..3edac581c3 100644
--- a/tests/run-pass/weak_memory/extra_cpp.rs
+++ b/tests/run-pass/weak_memory/extra_cpp.rs
@@ -15,19 +15,10 @@ fn static_atomic_mut(val: u32) -> &'static mut AtomicU32 {
     ret
 }
 
-fn static_atomic(val: u32) -> &'static AtomicU32 {
-    let ret = Box::leak(Box::new(AtomicU32::new(val)));
-    ret
-}
-
 fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
     unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
 }
 
-fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
-    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
-}
-
 fn mem_replace() {
     let mut x = AtomicU32::new(0);
 
@@ -71,6 +62,8 @@ fn from_mut_split() {
         x_atomic.store(u32::from_be(0xabbafafa), Relaxed);
     }
 
+    // Split the `AtomicU32` into two `AtomicU16`.
+    // Crucially, there is no non-atomic access to `x`! All accesses are atomic, but of different size.
     let (x_hi, x_lo) = split_u32(&mut x).split_at_mut(1);
 
     let x_hi_atomic = AtomicU16::from_mut(&mut x_hi[0]);
@@ -80,81 +73,9 @@ fn from_mut_split() {
     assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa));
 }
 
-// Although not possible to do in safe Rust,
-// we allow non-atomic and atomic reads to race
-// as this should be sound
-fn racing_mixed_atomicity_read() {
-    let x = static_atomic(0);
-    x.store(42, Relaxed);
-
-    let j1 = spawn(move || x.load(Relaxed));
-
-    let j2 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) }
-    });
-
-    let r1 = j1.join().unwrap();
-    let r2 = j2.join().unwrap();
-
-    assert_eq!(r1, 42);
-    assert_eq!(r2, 42);
-}
-
-fn racing_mixed_size_read() {
-    let x = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.load(Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        let x_split = split_u32_ptr(x_ptr);
-        unsafe {
-            let hi = &(*x_split)[0] as *const u16;
-            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
-        }
-    });
-
-    j1.join().unwrap();
-    j2.join().unwrap();
-}
-
-fn racing_mixed_atomicity_and_size_read() {
-    let x = static_atomic(u32::from_be(0xabbafafa));
-
-    let j1 = spawn(move || {
-        x.load(Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        unsafe { *x_ptr };
-    });
-
-    let j3 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        let x_split = split_u32_ptr(x_ptr);
-        unsafe {
-            let hi = &(*x_split)[0] as *const u16;
-            std::intrinsics::atomic_load_relaxed(hi)
-        }
-    });
-
-    j1.join().unwrap();
-    j2.join().unwrap();
-    let r3 = j3.join().unwrap();
-
-    assert_eq!(r3, u16::from_be(0xabba));
-}
-
 pub fn main() {
     get_mut_write();
     from_mut_split();
     assign_to_mut();
     mem_replace();
-    racing_mixed_atomicity_read();
-    racing_mixed_size_read();
-    racing_mixed_atomicity_and_size_read();
 }
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
new file mode 100644
index 0000000000..95cc97d4db
--- /dev/null
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
@@ -0,0 +1,97 @@
+// compile-flags: -Zmiri-ignore-leaks
+
+// Tests operations not perfomable through C++'s atomic API
+// but doable in unsafe Rust which we think *should* be fine.
+// Nonetheless they may be determined as inconsistent with the
+// memory model in the future.
+
+#![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// We allow non-atomic and atomic reads to race
+fn racing_mixed_atomicity_read() {
+    let x = static_atomic(0);
+    x.store(42, Relaxed);
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) }
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r1, 42);
+    assert_eq!(r2, 42);
+}
+
+// We allow mixed-size atomic reads to race
+fn racing_mixed_size_read() {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi);
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+// And the combination of both of above
+fn racing_mixed_atomicity_and_size_read() {
+    let x = static_atomic(u32::from_be(0xabbafafa));
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { *x_ptr };
+    });
+
+    let j3 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi)
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    assert_eq!(r3, u16::from_be(0xabba));
+}
+
+pub fn main() {
+    racing_mixed_atomicity_read();
+    racing_mixed_size_read();
+    racing_mixed_atomicity_and_size_read();
+}
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr b/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr
new file mode 100644
index 0000000000..1d0ce4b385
--- /dev/null
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr
@@ -0,0 +1,2 @@
+warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+

From 6a73dedb36516c89914bbdf7f97c425d8615e1ae Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Wed, 25 May 2022 21:54:30 +0100
Subject: [PATCH 35/46] Update experimental threading warning

---
 src/shims/unix/thread.rs                      |  2 +-
 .../weak_memory/cpp20_rwc_syncs.stderr        | 23 +++++++++++++++++++
 .../weak_memory/racing_mixed_size.rs          |  1 +
 .../weak_memory/racing_mixed_size.stderr      | 18 +++++++++++++++
 .../libc_pthread_create_main_terminate.stderr |  3 ++-
 .../libc_pthread_join_detached.stderr         |  3 ++-
 .../libc_pthread_join_joined.stderr           |  3 ++-
 .../concurrency/libc_pthread_join_main.stderr |  3 ++-
 .../libc_pthread_join_multiple.stderr         |  3 ++-
 .../concurrency/libc_pthread_join_self.stderr |  3 ++-
 .../thread_local_static_dealloc.stderr        |  3 ++-
 tests/fail/concurrency/too_few_args.stderr    |  3 ++-
 tests/fail/concurrency/too_many_args.stderr   |  3 ++-
 .../concurrency/unwind_top_of_stack.stderr    |  3 ++-
 tests/fail/data_race/alloc_read_race.stderr   |  3 ++-
 tests/fail/data_race/alloc_write_race.stderr  |  3 ++-
 .../atomic_read_na_write_race1.stderr         |  3 ++-
 .../atomic_read_na_write_race2.stderr         |  3 ++-
 .../atomic_write_na_read_race1.stderr         |  3 ++-
 .../atomic_write_na_read_race2.stderr         |  3 ++-
 .../atomic_write_na_write_race1.stderr        |  3 ++-
 .../atomic_write_na_write_race2.stderr        |  3 ++-
 .../dangling_thread_async_race.stderr         |  3 ++-
 .../data_race/dangling_thread_race.stderr     |  3 ++-
 .../fail/data_race/dealloc_read_race1.stderr  |  3 ++-
 .../fail/data_race/dealloc_read_race2.stderr  |  3 ++-
 .../data_race/dealloc_read_race_stack.stderr  |  3 ++-
 .../fail/data_race/dealloc_write_race1.stderr |  3 ++-
 .../fail/data_race/dealloc_write_race2.stderr |  3 ++-
 .../data_race/dealloc_write_race_stack.stderr |  3 ++-
 .../enable_after_join_to_main.stderr          |  3 ++-
 tests/fail/data_race/fence_after_load.stderr  |  3 ++-
 tests/fail/data_race/read_write_race.stderr   |  3 ++-
 .../data_race/read_write_race_stack.stderr    |  3 ++-
 .../fail/data_race/relax_acquire_race.stderr  |  3 ++-
 tests/fail/data_race/release_seq_race.stderr  |  3 ++-
 .../release_seq_race_same_thread.stderr       |  3 ++-
 tests/fail/data_race/rmw_race.stderr          |  3 ++-
 tests/fail/data_race/write_write_race.stderr  |  3 ++-
 .../data_race/write_write_race_stack.stderr   |  3 ++-
 .../sync/libc_pthread_mutex_deadlock.stderr   |  3 ++-
 .../libc_pthread_mutex_wrong_owner.stderr     |  3 ++-
 ...ibc_pthread_rwlock_read_wrong_owner.stderr |  3 ++-
 ..._pthread_rwlock_write_read_deadlock.stderr |  3 ++-
 ...pthread_rwlock_write_write_deadlock.stderr |  3 ++-
 ...bc_pthread_rwlock_write_wrong_owner.stderr |  3 ++-
 tests/pass/concurrency/channels.stderr        |  3 ++-
 .../concurrent_caller_location.stderr         |  3 ++-
 tests/pass/concurrency/data_race.stderr       |  3 ++-
 .../disable_data_race_detector.stderr         |  3 ++-
 tests/pass/concurrency/issue1643.stderr       |  3 ++-
 tests/pass/concurrency/linux-futex.stderr     |  3 ++-
 tests/pass/concurrency/simple.stderr          |  3 ++-
 tests/pass/concurrency/spin_loops.stderr      |  3 ++-
 tests/pass/concurrency/sync.stderr            |  3 ++-
 tests/pass/concurrency/thread_locals.stderr   |  3 ++-
 tests/pass/concurrency/tls_lib_drop.stderr    |  3 ++-
 tests/pass/libc.stderr                        |  3 ++-
 tests/pass/panic/concurrent-panic.stderr      |  3 ++-
 tests/pass/threadleak_ignored.stderr          |  3 ++-
 tests/run-pass/weak_memory/consistency.stderr |  3 ++-
 tests/run-pass/weak_memory/extra_cpp.rs       |  1 +
 tests/run-pass/weak_memory/extra_cpp.stderr   |  3 ++-
 .../run-pass/weak_memory/extra_cpp_unsafe.rs  |  1 +
 .../weak_memory/extra_cpp_unsafe.stderr       |  3 ++-
 tests/run-pass/weak_memory/weak.stderr        |  3 ++-
 66 files changed, 165 insertions(+), 61 deletions(-)
 create mode 100644 tests/compile-fail/weak_memory/cpp20_rwc_syncs.stderr
 create mode 100644 tests/compile-fail/weak_memory/racing_mixed_size.stderr

diff --git a/src/shims/unix/thread.rs b/src/shims/unix/thread.rs
index 812cb7376b..4dc40cf2fe 100644
--- a/src/shims/unix/thread.rs
+++ b/src/shims/unix/thread.rs
@@ -14,7 +14,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
 
         this.tcx.sess.warn(
-            "thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.",
+            "thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.\n(see https://github.com/rust-lang/miri/issues/1388)",
         );
 
         // Create the new thread
diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.stderr b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.stderr
new file mode 100644
index 0000000000..f4f467120e
--- /dev/null
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.stderr
@@ -0,0 +1,23 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
+error: Undefined Behavior: type validation failed at .value: encountered uninitialized raw pointer
+  --> $DIR/cpp20_rwc_syncs.rs:LL:CC
+   |
+LL |         let _ = unsafe { std::mem::MaybeUninit::<*const u32>::uninit().assume_init() };
+   |                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ type validation failed at .value: encountered uninitialized raw pointer
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+           
+   = note: inside `test_cpp20_rwc_syncs` at $DIR/cpp20_rwc_syncs.rs:LL:CC
+note: inside `main` at $DIR/cpp20_rwc_syncs.rs:LL:CC
+  --> $DIR/cpp20_rwc_syncs.rs:LL:CC
+   |
+LL |         test_cpp20_rwc_syncs();
+   |         ^^^^^^^^^^^^^^^^^^^^^^
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error; 1 warning emitted
+
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.rs b/tests/compile-fail/weak_memory/racing_mixed_size.rs
index d4ba48afe3..513d97edb5 100644
--- a/tests/compile-fail/weak_memory/racing_mixed_size.rs
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 // compile-flags: -Zmiri-ignore-leaks
 
 #![feature(core_intrinsics)]
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.stderr b/tests/compile-fail/weak_memory/racing_mixed_size.stderr
new file mode 100644
index 0000000000..b3074d93c9
--- /dev/null
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.stderr
@@ -0,0 +1,18 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
+error: Undefined Behavior: racy imperfectly overlapping atomic access
+  --> $DIR/racing_mixed_size.rs:LL:CC
+   |
+LL |             std::intrinsics::atomic_load_relaxed(hi);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+           
+   = note: inside closure at $DIR/racing_mixed_size.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error; 1 warning emitted
+
diff --git a/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr b/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr
index 0f7fbefe0a..2ce73fdaae 100644
--- a/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr
+++ b/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: the main thread terminated without waiting for all remaining threads
 
diff --git a/tests/fail/concurrency/libc_pthread_join_detached.stderr b/tests/fail/concurrency/libc_pthread_join_detached.stderr
index 688f61a98b..b106cc4c95 100644
--- a/tests/fail/concurrency/libc_pthread_join_detached.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_detached.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_detached.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_joined.stderr b/tests/fail/concurrency/libc_pthread_join_joined.stderr
index 518f72de5b..438998208d 100644
--- a/tests/fail/concurrency/libc_pthread_join_joined.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_joined.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_joined.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_main.stderr b/tests/fail/concurrency/libc_pthread_join_main.stderr
index 5d9ec148e0..04f2ab0740 100644
--- a/tests/fail/concurrency/libc_pthread_join_main.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_main.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_main.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_multiple.stderr b/tests/fail/concurrency/libc_pthread_join_multiple.stderr
index 57126a14ae..daf18c50e0 100644
--- a/tests/fail/concurrency/libc_pthread_join_multiple.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_multiple.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_multiple.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_self.stderr b/tests/fail/concurrency/libc_pthread_join_self.stderr
index d638d08939..b2e0779f5f 100644
--- a/tests/fail/concurrency/libc_pthread_join_self.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_self.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join itself
   --> $DIR/libc_pthread_join_self.rs:LL:CC
diff --git a/tests/fail/concurrency/thread_local_static_dealloc.stderr b/tests/fail/concurrency/thread_local_static_dealloc.stderr
index cdeb22fb31..ad5528dc55 100644
--- a/tests/fail/concurrency/thread_local_static_dealloc.stderr
+++ b/tests/fail/concurrency/thread_local_static_dealloc.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: pointer to ALLOC was dereferenced after this allocation got freed
   --> $DIR/thread_local_static_dealloc.rs:LL:CC
diff --git a/tests/fail/concurrency/too_few_args.stderr b/tests/fail/concurrency/too_few_args.stderr
index 7401b2902e..1ed8c5a510 100644
--- a/tests/fail/concurrency/too_few_args.stderr
+++ b/tests/fail/concurrency/too_few_args.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: callee has fewer arguments than expected
   --> $DIR/too_few_args.rs:LL:CC
diff --git a/tests/fail/concurrency/too_many_args.stderr b/tests/fail/concurrency/too_many_args.stderr
index 951b76317f..5602dab993 100644
--- a/tests/fail/concurrency/too_many_args.stderr
+++ b/tests/fail/concurrency/too_many_args.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: callee has more arguments than expected
   --> $DIR/too_many_args.rs:LL:CC
diff --git a/tests/fail/concurrency/unwind_top_of_stack.stderr b/tests/fail/concurrency/unwind_top_of_stack.stderr
index 600b8443d2..26a196a559 100644
--- a/tests/fail/concurrency/unwind_top_of_stack.stderr
+++ b/tests/fail/concurrency/unwind_top_of_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 thread '<unnamed>' panicked at 'explicit panic', $DIR/unwind_top_of_stack.rs:LL:CC
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/fail/data_race/alloc_read_race.stderr b/tests/fail/data_race/alloc_read_race.stderr
index 9d9006966b..0b247fb19b 100644
--- a/tests/fail/data_race/alloc_read_race.stderr
+++ b/tests/fail/data_race/alloc_read_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 2) and Allocate on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/alloc_read_race.rs:LL:CC
diff --git a/tests/fail/data_race/alloc_write_race.stderr b/tests/fail/data_race/alloc_write_race.stderr
index 318895cae6..3594980ef9 100644
--- a/tests/fail/data_race/alloc_write_race.stderr
+++ b/tests/fail/data_race/alloc_write_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Allocate on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/alloc_write_race.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_read_na_write_race1.stderr b/tests/fail/data_race/atomic_read_na_write_race1.stderr
index 09d7accb05..0c9aaf5a00 100644
--- a/tests/fail/data_race/atomic_read_na_write_race1.stderr
+++ b/tests/fail/data_race/atomic_read_na_write_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Atomic Load on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_read_na_write_race1.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_read_na_write_race2.stderr b/tests/fail/data_race/atomic_read_na_write_race2.stderr
index 739ce83d0b..6e3a1330f9 100644
--- a/tests/fail/data_race/atomic_read_na_write_race2.stderr
+++ b/tests/fail/data_race/atomic_read_na_write_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Atomic Load on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_read_na_write_race2.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_read_race1.stderr b/tests/fail/data_race/atomic_write_na_read_race1.stderr
index 6d67f58aae..4dc4ac1e67 100644
--- a/tests/fail/data_race/atomic_write_na_read_race1.stderr
+++ b/tests/fail/data_race/atomic_write_na_read_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 2) and Atomic Store on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_read_race1.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_read_race2.stderr b/tests/fail/data_race/atomic_write_na_read_race2.stderr
index d9950ebcb7..e665073c53 100644
--- a/tests/fail/data_race/atomic_write_na_read_race2.stderr
+++ b/tests/fail/data_race/atomic_write_na_read_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Atomic Store on Thread(id = 2) and Read on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_read_race2.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_write_race1.stderr b/tests/fail/data_race/atomic_write_na_write_race1.stderr
index 29ccf70212..a70c3b52de 100644
--- a/tests/fail/data_race/atomic_write_na_write_race1.stderr
+++ b/tests/fail/data_race/atomic_write_na_write_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Atomic Store on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_write_race1.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_write_race2.stderr b/tests/fail/data_race/atomic_write_na_write_race2.stderr
index 5488f05de0..79730d5079 100644
--- a/tests/fail/data_race/atomic_write_na_write_race2.stderr
+++ b/tests/fail/data_race/atomic_write_na_write_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Atomic Store on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_write_race2.rs:LL:CC
diff --git a/tests/fail/data_race/dangling_thread_async_race.stderr b/tests/fail/data_race/dangling_thread_async_race.stderr
index eccc243d69..21b3eefc5e 100644
--- a/tests/fail/data_race/dangling_thread_async_race.stderr
+++ b/tests/fail/data_race/dangling_thread_async_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dangling_thread_async_race.rs:LL:CC
diff --git a/tests/fail/data_race/dangling_thread_race.stderr b/tests/fail/data_race/dangling_thread_race.stderr
index 4dffeb1423..3ca8862a58 100644
--- a/tests/fail/data_race/dangling_thread_race.stderr
+++ b/tests/fail/data_race/dangling_thread_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 0, name = "main") and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dangling_thread_race.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_read_race1.stderr b/tests/fail/data_race/dealloc_read_race1.stderr
index 37196021ea..10b32003ff 100644
--- a/tests/fail/data_race/dealloc_read_race1.stderr
+++ b/tests/fail/data_race/dealloc_read_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 2) and Read on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_read_race1.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_read_race2.stderr b/tests/fail/data_race/dealloc_read_race2.stderr
index 03fb5dbea9..a21de1d9f7 100644
--- a/tests/fail/data_race/dealloc_read_race2.stderr
+++ b/tests/fail/data_race/dealloc_read_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: pointer to ALLOC was dereferenced after this allocation got freed
   --> $DIR/dealloc_read_race2.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_read_race_stack.stderr b/tests/fail/data_race/dealloc_read_race_stack.stderr
index 055724fe29..0f7213eb8d 100644
--- a/tests/fail/data_race/dealloc_read_race_stack.stderr
+++ b/tests/fail/data_race/dealloc_read_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 1) and Read on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_read_race_stack.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_write_race1.stderr b/tests/fail/data_race/dealloc_write_race1.stderr
index 7160f49af6..76258e9d8f 100644
--- a/tests/fail/data_race/dealloc_write_race1.stderr
+++ b/tests/fail/data_race/dealloc_write_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_write_race1.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_write_race2.stderr b/tests/fail/data_race/dealloc_write_race2.stderr
index cb0d0af867..d9aef72118 100644
--- a/tests/fail/data_race/dealloc_write_race2.stderr
+++ b/tests/fail/data_race/dealloc_write_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: pointer to ALLOC was dereferenced after this allocation got freed
   --> $DIR/dealloc_write_race2.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_write_race_stack.stderr b/tests/fail/data_race/dealloc_write_race_stack.stderr
index 05a8e1a8b7..70533f654b 100644
--- a/tests/fail/data_race/dealloc_write_race_stack.stderr
+++ b/tests/fail/data_race/dealloc_write_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 1) and Write on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_write_race_stack.rs:LL:CC
diff --git a/tests/fail/data_race/enable_after_join_to_main.stderr b/tests/fail/data_race/enable_after_join_to_main.stderr
index e612e08ade..58d33ffa8c 100644
--- a/tests/fail/data_race/enable_after_join_to_main.stderr
+++ b/tests/fail/data_race/enable_after_join_to_main.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 6) and Write on Thread(id = 5) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/enable_after_join_to_main.rs:LL:CC
diff --git a/tests/fail/data_race/fence_after_load.stderr b/tests/fail/data_race/fence_after_load.stderr
index 1445239132..1e3186b08f 100644
--- a/tests/fail/data_race/fence_after_load.stderr
+++ b/tests/fail/data_race/fence_after_load.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 0, name = "main") and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/fence_after_load.rs:LL:CC
diff --git a/tests/fail/data_race/read_write_race.stderr b/tests/fail/data_race/read_write_race.stderr
index fc04141830..5078e66254 100644
--- a/tests/fail/data_race/read_write_race.stderr
+++ b/tests/fail/data_race/read_write_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Read on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/read_write_race.rs:LL:CC
diff --git a/tests/fail/data_race/read_write_race_stack.stderr b/tests/fail/data_race/read_write_race_stack.stderr
index aad63731ca..843bea753b 100644
--- a/tests/fail/data_race/read_write_race_stack.stderr
+++ b/tests/fail/data_race/read_write_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 1) and Write on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/read_write_race_stack.rs:LL:CC
diff --git a/tests/fail/data_race/relax_acquire_race.stderr b/tests/fail/data_race/relax_acquire_race.stderr
index a437120c89..d2423ff916 100644
--- a/tests/fail/data_race/relax_acquire_race.stderr
+++ b/tests/fail/data_race/relax_acquire_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/relax_acquire_race.rs:LL:CC
diff --git a/tests/fail/data_race/release_seq_race.stderr b/tests/fail/data_race/release_seq_race.stderr
index 1a1c7ac64f..ffbf50c091 100644
--- a/tests/fail/data_race/release_seq_race.stderr
+++ b/tests/fail/data_race/release_seq_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/release_seq_race.rs:LL:CC
diff --git a/tests/fail/data_race/release_seq_race_same_thread.stderr b/tests/fail/data_race/release_seq_race_same_thread.stderr
index f357c0647d..b760215146 100644
--- a/tests/fail/data_race/release_seq_race_same_thread.stderr
+++ b/tests/fail/data_race/release_seq_race_same_thread.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/release_seq_race_same_thread.rs:LL:CC
diff --git a/tests/fail/data_race/rmw_race.stderr b/tests/fail/data_race/rmw_race.stderr
index dd3692c6dc..c6b09ba5f0 100644
--- a/tests/fail/data_race/rmw_race.stderr
+++ b/tests/fail/data_race/rmw_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/rmw_race.rs:LL:CC
diff --git a/tests/fail/data_race/write_write_race.stderr b/tests/fail/data_race/write_write_race.stderr
index dafee7dbf8..5acba97486 100644
--- a/tests/fail/data_race/write_write_race.stderr
+++ b/tests/fail/data_race/write_write_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/write_write_race.rs:LL:CC
diff --git a/tests/fail/data_race/write_write_race_stack.stderr b/tests/fail/data_race/write_write_race_stack.stderr
index 8d113673ac..d052206f4c 100644
--- a/tests/fail/data_race/write_write_race_stack.stderr
+++ b/tests/fail/data_race/write_write_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 1) and Write on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/write_write_race_stack.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_mutex_deadlock.stderr b/tests/fail/sync/libc_pthread_mutex_deadlock.stderr
index ac37096ad8..d1f9ee6cdd 100644
--- a/tests/fail/sync/libc_pthread_mutex_deadlock.stderr
+++ b/tests/fail/sync/libc_pthread_mutex_deadlock.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: deadlock: the evaluated program deadlocked
   --> $DIR/libc_pthread_mutex_deadlock.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr b/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr
index 6603b264d9..e9f0e2d4c1 100644
--- a/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr
+++ b/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: unlocked a default mutex that was not locked by the current thread
   --> $DIR/libc_pthread_mutex_wrong_owner.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr b/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr
index d3820f0dcb..c25ab25a3d 100644
--- a/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: unlocked an rwlock that was not locked by the active thread
   --> $DIR/libc_pthread_rwlock_read_wrong_owner.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr b/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr
index 748a363a27..8fc2ae4c82 100644
--- a/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: deadlock: the evaluated program deadlocked
   --> $DIR/libc_pthread_rwlock_write_read_deadlock.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr b/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr
index c6a03ff9af..86c67925fb 100644
--- a/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: deadlock: the evaluated program deadlocked
   --> $DIR/libc_pthread_rwlock_write_write_deadlock.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr b/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr
index 02a6cf11c0..8965d55a48 100644
--- a/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: unlocked an rwlock that was not locked by the active thread
   --> $DIR/libc_pthread_rwlock_write_wrong_owner.rs:LL:CC
diff --git a/tests/pass/concurrency/channels.stderr b/tests/pass/concurrency/channels.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/channels.stderr
+++ b/tests/pass/concurrency/channels.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/concurrent_caller_location.stderr b/tests/pass/concurrency/concurrent_caller_location.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/concurrent_caller_location.stderr
+++ b/tests/pass/concurrency/concurrent_caller_location.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/data_race.stderr b/tests/pass/concurrency/data_race.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/data_race.stderr
+++ b/tests/pass/concurrency/data_race.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/disable_data_race_detector.stderr b/tests/pass/concurrency/disable_data_race_detector.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/disable_data_race_detector.stderr
+++ b/tests/pass/concurrency/disable_data_race_detector.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/issue1643.stderr b/tests/pass/concurrency/issue1643.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/issue1643.stderr
+++ b/tests/pass/concurrency/issue1643.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/linux-futex.stderr b/tests/pass/concurrency/linux-futex.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/linux-futex.stderr
+++ b/tests/pass/concurrency/linux-futex.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/simple.stderr b/tests/pass/concurrency/simple.stderr
index 386dc92269..0ba9e8645b 100644
--- a/tests/pass/concurrency/simple.stderr
+++ b/tests/pass/concurrency/simple.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 thread '<unnamed>' panicked at 'Hello!', $DIR/simple.rs:LL:CC
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/pass/concurrency/spin_loops.stderr b/tests/pass/concurrency/spin_loops.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/spin_loops.stderr
+++ b/tests/pass/concurrency/spin_loops.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/sync.stderr b/tests/pass/concurrency/sync.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/sync.stderr
+++ b/tests/pass/concurrency/sync.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/thread_locals.stderr b/tests/pass/concurrency/thread_locals.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/thread_locals.stderr
+++ b/tests/pass/concurrency/thread_locals.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/tls_lib_drop.stderr b/tests/pass/concurrency/tls_lib_drop.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/concurrency/tls_lib_drop.stderr
+++ b/tests/pass/concurrency/tls_lib_drop.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/libc.stderr b/tests/pass/libc.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/pass/libc.stderr
+++ b/tests/pass/libc.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/panic/concurrent-panic.stderr b/tests/pass/panic/concurrent-panic.stderr
index 0d4a409dfe..b90cc01bb8 100644
--- a/tests/pass/panic/concurrent-panic.stderr
+++ b/tests/pass/panic/concurrent-panic.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 Thread 1 starting, will block on mutex
 Thread 1 reported it has started
diff --git a/tests/pass/threadleak_ignored.stderr b/tests/pass/threadleak_ignored.stderr
index 9205eb70b2..af327a3012 100644
--- a/tests/pass/threadleak_ignored.stderr
+++ b/tests/pass/threadleak_ignored.stderr
@@ -1,3 +1,4 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 Dropping 0
diff --git a/tests/run-pass/weak_memory/consistency.stderr b/tests/run-pass/weak_memory/consistency.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/run-pass/weak_memory/consistency.stderr
+++ b/tests/run-pass/weak_memory/consistency.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/run-pass/weak_memory/extra_cpp.rs
index 3edac581c3..750c628458 100644
--- a/tests/run-pass/weak_memory/extra_cpp.rs
+++ b/tests/run-pass/weak_memory/extra_cpp.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 // compile-flags: -Zmiri-ignore-leaks
 
 // Tests operations not perfomable through C++'s atomic API
diff --git a/tests/run-pass/weak_memory/extra_cpp.stderr b/tests/run-pass/weak_memory/extra_cpp.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/run-pass/weak_memory/extra_cpp.stderr
+++ b/tests/run-pass/weak_memory/extra_cpp.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
index 95cc97d4db..7c375d7345 100644
--- a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 // compile-flags: -Zmiri-ignore-leaks
 
 // Tests operations not perfomable through C++'s atomic API
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr b/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/run-pass/weak_memory/weak.stderr b/tests/run-pass/weak_memory/weak.stderr
index 1d0ce4b385..9fe6daa778 100644
--- a/tests/run-pass/weak_memory/weak.stderr
+++ b/tests/run-pass/weak_memory/weak.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental: weak memory effects are not fully compatible with the Rust atomics memory model.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 

From a7c832b04a2e11c39c58606182d2666b853c3602 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 29 May 2022 09:57:24 +0100
Subject: [PATCH 36/46] Wording improvements

Co-authored-by: Ralf Jung <post@ralfj.de>
---
 src/concurrency/weak_memory.rs                          | 4 +++-
 src/machine.rs                                          | 4 ++--
 tests/compile-fail/weak_memory/racing_mixed_size.stderr | 4 ++--
 tests/run-pass/weak_memory/extra_cpp_unsafe.rs          | 2 +-
 4 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 942d71a52f..888f9edceb 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -417,7 +417,9 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 && !alloc_clocks
                     .read_race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
             {
-                throw_ub_format!("racy imperfectly overlapping atomic access");
+                throw_ub_format!(
+                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model"
+                );
             }
         }
         Ok(())
diff --git a/src/machine.rs b/src/machine.rs
index 6dc2a75b69..b8cb890870 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -743,8 +743,8 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
         if let Some(weak_memory) = &alloc_extra.weak_memory {
             if !machine.data_race.as_ref().unwrap().ongoing_atomic_access() {
                 // This is a non-atomic access. And if we are accessing a previously atomically
-                // accessed location without racing with them, then the location no longer needs
-                // to exhibit weak-memory behaviours until a fresh atomic access happens
+                // accessed location without racing with them, then the location no longer
+                // exhibits weak-memory behaviors until a fresh atomic access happens.
                 weak_memory.destroy_atomicity(range);
             }
         }
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.stderr b/tests/compile-fail/weak_memory/racing_mixed_size.stderr
index b3074d93c9..b03424a861 100644
--- a/tests/compile-fail/weak_memory/racing_mixed_size.stderr
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.stderr
@@ -1,11 +1,11 @@
 warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
          (see https://github.com/rust-lang/miri/issues/1388)
 
-error: Undefined Behavior: racy imperfectly overlapping atomic access
+error: Undefined Behavior: racy imperfectly overlapping atomic access is not possible in the C++20 memory model
   --> $DIR/racing_mixed_size.rs:LL:CC
    |
 LL |             std::intrinsics::atomic_load_relaxed(hi);
-   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model
    |
    = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
    = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
index 7c375d7345..de9a3af3fd 100644
--- a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
@@ -62,7 +62,7 @@ fn racing_mixed_size_read() {
     j2.join().unwrap();
 }
 
-// And the combination of both of above
+// And we allow the combination of both of the above.
 fn racing_mixed_atomicity_and_size_read() {
     let x = static_atomic(u32::from_be(0xabbafafa));
 

From ceb173d64773736e0c60ba6104912c725f07c2c9 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 29 May 2022 12:03:45 +0100
Subject: [PATCH 37/46] Move logic out of machine.rs

---
 src/concurrency/weak_memory.rs | 28 +++++++++++++++-------------
 src/machine.rs                 | 11 ++---------
 2 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 888f9edceb..3c692783d1 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -135,20 +135,22 @@ impl StoreBufferAlloc {
 
     /// When a non-atomic access happens on a location that has been atomically accessed
     /// before without data race, we can determine that the non-atomic access fully happens
-    /// before all the prior atomic accesses so the location no longer needs to exhibit
+    /// after all the prior atomic accesses so the location no longer needs to exhibit
     /// any weak memory behaviours until further atomic accesses.
-    pub fn destroy_atomicity<'tcx>(&self, range: AllocRange) {
-        let mut buffers = self.store_buffers.borrow_mut();
-        let access_type = buffers.access_type(range);
-        match access_type {
-            AccessType::PerfectlyOverlapping(pos) => {
-                buffers.remove_from_pos(pos);
-            }
-            AccessType::ImperfectlyOverlapping(pos_range) => {
-                buffers.remove_pos_range(pos_range);
-            }
-            AccessType::Empty(_) => {
-                // Do nothing
+    pub fn memory_accessed<'tcx>(&self, range: AllocRange, global: &GlobalState) {
+        if !global.ongoing_atomic_access() {
+            let mut buffers = self.store_buffers.borrow_mut();
+            let access_type = buffers.access_type(range);
+            match access_type {
+                AccessType::PerfectlyOverlapping(pos) => {
+                    buffers.remove_from_pos(pos);
+                }
+                AccessType::ImperfectlyOverlapping(pos_range) => {
+                    buffers.remove_pos_range(pos_range);
+                }
+                AccessType::Empty(_) => {
+                    // The range had no weak behaivours attached, do nothing
+                }
             }
         }
     }
diff --git a/src/machine.rs b/src/machine.rs
index b8cb890870..5ee2d9a9ab 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -741,12 +741,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
             )?;
         }
         if let Some(weak_memory) = &alloc_extra.weak_memory {
-            if !machine.data_race.as_ref().unwrap().ongoing_atomic_access() {
-                // This is a non-atomic access. And if we are accessing a previously atomically
-                // accessed location without racing with them, then the location no longer
-                // exhibits weak-memory behaviors until a fresh atomic access happens.
-                weak_memory.destroy_atomicity(range);
-            }
+            weak_memory.memory_accessed(range, machine.data_race.as_ref().unwrap());
         }
         Ok(())
     }
@@ -772,9 +767,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
             )?;
         }
         if let Some(weak_memory) = &alloc_extra.weak_memory {
-            if !machine.data_race.as_ref().unwrap().ongoing_atomic_access() {
-                weak_memory.destroy_atomicity(range);
-            }
+            weak_memory.memory_accessed(range, machine.data_race.as_ref().unwrap());
         }
         Ok(())
     }

From 4a07f78dadd3e5608157486b204fd4be2cde15a7 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 29 May 2022 15:05:07 +0100
Subject: [PATCH 38/46] Forbade all racing mixed size atomic accesses

---
 src/concurrency/data_race.rs                  | 52 +++++------------
 src/concurrency/weak_memory.rs                | 41 +++-----------
 .../weak_memory/racing_mixed_size.rs          |  1 -
 .../weak_memory/racing_mixed_size_read.rs     | 39 +++++++++++++
 .../weak_memory/racing_mixed_size_read.stderr | 18 ++++++
 .../run-pass/weak_memory/extra_cpp_unsafe.rs  | 56 -------------------
 6 files changed, 78 insertions(+), 129 deletions(-)
 create mode 100644 tests/compile-fail/weak_memory/racing_mixed_size_read.rs
 create mode 100644 tests/compile-fail/weak_memory/racing_mixed_size_read.stderr

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 8b8694ac18..61cd6a3c0c 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -287,8 +287,8 @@ impl MemoryCellClocks {
         Ok(())
     }
 
-    /// Checks if the memory cell write races with any prior atomic read or write
-    fn write_race_free_with_atomic(&mut self, clocks: &ThreadClockSet) -> bool {
+    /// Checks if the memory cell access is ordered with all prior atomic reads and writes
+    fn race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool {
         if let Some(atomic) = self.atomic() {
             atomic.read_vector <= clocks.clock && atomic.write_vector <= clocks.clock
         } else {
@@ -296,11 +296,6 @@ impl MemoryCellClocks {
         }
     }
 
-    /// Checks if the memory cell read races with any prior atomic write
-    fn read_race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool {
-        if let Some(atomic) = self.atomic() { atomic.write_vector <= clocks.clock } else { true }
-    }
-
     /// Update memory cell data-race tracking for atomic
     /// load relaxed semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
@@ -528,7 +523,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
         // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
-        this.validate_overlapping_atomic_read(place)?;
+        this.validate_overlapping_atomic(place)?;
         this.buffered_atomic_read(place, atomic, scalar, || {
             this.validate_atomic_load(place, atomic)
         })
@@ -542,7 +537,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.validate_overlapping_atomic_write(dest)?;
+        this.validate_overlapping_atomic(dest)?;
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
         this.validate_atomic_store(dest, atomic)?;
         // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
@@ -563,7 +558,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
-        this.validate_overlapping_atomic_write(place)?;
+        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
 
         // Atomics wrap around on overflow.
@@ -592,7 +587,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_mut();
 
-        this.validate_overlapping_atomic_write(place)?;
+        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
         this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
 
@@ -613,7 +608,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
-        this.validate_overlapping_atomic_write(place)?;
+        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
         let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
 
@@ -656,7 +651,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         use rand::Rng as _;
         let this = self.eval_context_mut();
 
-        this.validate_overlapping_atomic_write(place)?;
+        this.validate_overlapping_atomic(place)?;
         // Failure ordering cannot be stronger than success ordering, therefore first attempt
         // to read with the failure ordering and if successful then try again with the success
         // read ordering and write in the success case.
@@ -706,7 +701,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
-        this.validate_overlapping_atomic_read(place)?;
+        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -729,7 +724,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.validate_overlapping_atomic_write(place)?;
+        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -755,7 +750,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
-        this.validate_overlapping_atomic_write(place)?;
+        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
             if acquire {
                 memory.load_acquire(clocks, index)?;
@@ -941,9 +936,9 @@ impl VClockAlloc {
         )
     }
 
-    /// Detect racing atomic writes (not data races)
+    /// Detect racing atomic read and writes (not data races)
     /// on every byte of the current access range
-    pub(super) fn read_race_free_with_atomic<'tcx>(
+    pub(super) fn race_free_with_atomic<'tcx>(
         &self,
         range: AllocRange,
         global: &GlobalState,
@@ -952,26 +947,7 @@ impl VClockAlloc {
             let (_, clocks) = global.current_thread_state();
             let alloc_ranges = self.alloc_ranges.borrow();
             for (_, range) in alloc_ranges.iter(range.start, range.size) {
-                if !range.read_race_free_with_atomic(&clocks) {
-                    return false;
-                }
-            }
-        }
-        true
-    }
-
-    /// Detect racing atomic read and writes (not data races)
-    /// on every byte of the current access range
-    pub(super) fn write_race_free_with_atomic<'tcx>(
-        &mut self,
-        range: AllocRange,
-        global: &GlobalState,
-    ) -> bool {
-        if global.race_detecting() {
-            let (_, clocks) = global.current_thread_state();
-            let alloc_ranges = self.alloc_ranges.get_mut();
-            for (_, range) in alloc_ranges.iter_mut(range.start, range.size) {
-                if !range.write_race_free_with_atomic(&clocks) {
+                if !range.race_free_with_atomic(&clocks) {
                     return false;
                 }
             }
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 3c692783d1..9bf46bb23b 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -35,7 +35,8 @@
 //! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model.
 //! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
 //! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations.
-//! A mixed size/atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.
+//! A mixed atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.
+//! Mixed size atomic accesses must not race with any other atomic access, whether read or write, or a UB will be thrown.
 //! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
 
 // Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
@@ -403,9 +404,9 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
     crate::MiriEvalContextExt<'mir, 'tcx>
 {
     // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous
-    // atomic write. If it does, then we require it to be ordered (non-racy) with all previous atomic
-    // writes on all the bytes in range
-    fn validate_overlapping_atomic_read(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> {
+    // atomic read or write. If it does, then we require it to be ordered (non-racy) with all previous atomic
+    // accesses on all the bytes in range
+    fn validate_overlapping_atomic(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
         if let crate::AllocExtra {
@@ -417,37 +418,9 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
             let range = alloc_range(base_offset, place.layout.size);
             if alloc_buffers.is_overlapping(range)
                 && !alloc_clocks
-                    .read_race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
+                    .race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
             {
-                throw_ub_format!(
-                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model"
-                );
-            }
-        }
-        Ok(())
-    }
-
-    // Same as above but needs to be ordered with all previous atomic read or writes
-    fn validate_overlapping_atomic_write(
-        &mut self,
-        place: &MPlaceTy<'tcx, Tag>,
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
-        if let (
-            crate::AllocExtra {
-                weak_memory: Some(alloc_buffers),
-                data_race: Some(alloc_clocks),
-                ..
-            },
-            crate::Evaluator { data_race: Some(global), .. },
-        ) = this.get_alloc_extra_mut(alloc_id)?
-        {
-            let range = alloc_range(base_offset, place.layout.size);
-            if alloc_buffers.is_overlapping(range)
-                && !alloc_clocks.write_race_free_with_atomic(range, global)
-            {
-                throw_ub_format!("racy imperfectly overlapping atomic access");
+                throw_ub_format!("racy imperfectly overlapping atomic access is not possible in the C++20 memory model");
             }
         }
         Ok(())
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.rs b/tests/compile-fail/weak_memory/racing_mixed_size.rs
index 513d97edb5..6d53670a4e 100644
--- a/tests/compile-fail/weak_memory/racing_mixed_size.rs
+++ b/tests/compile-fail/weak_memory/racing_mixed_size.rs
@@ -1,5 +1,4 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-ignore-leaks
 
 #![feature(core_intrinsics)]
 
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size_read.rs b/tests/compile-fail/weak_memory/racing_mixed_size_read.rs
new file mode 100644
index 0000000000..0129b55aff
--- /dev/null
+++ b/tests/compile-fail/weak_memory/racing_mixed_size_read.rs
@@ -0,0 +1,39 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// Racing mixed size reads may cause two loads to read-from
+// the same store but observe different values, which doesn't make
+// sense under the formal model so we forbade this.
+pub fn main() {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size_read.stderr b/tests/compile-fail/weak_memory/racing_mixed_size_read.stderr
new file mode 100644
index 0000000000..80cc2fe756
--- /dev/null
+++ b/tests/compile-fail/weak_memory/racing_mixed_size_read.stderr
@@ -0,0 +1,18 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
+error: Undefined Behavior: racy imperfectly overlapping atomic access is not possible in the C++20 memory model
+  --> $DIR/racing_mixed_size_read.rs:LL:CC
+   |
+LL |             std::intrinsics::atomic_load_relaxed(hi);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+           
+   = note: inside closure at $DIR/racing_mixed_size_read.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error; 1 warning emitted
+
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
index de9a3af3fd..478e436e59 100644
--- a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
@@ -18,10 +18,6 @@ fn static_atomic(val: u32) -> &'static AtomicU32 {
     ret
 }
 
-fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
-    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
-}
-
 // We allow non-atomic and atomic reads to race
 fn racing_mixed_atomicity_read() {
     let x = static_atomic(0);
@@ -41,58 +37,6 @@ fn racing_mixed_atomicity_read() {
     assert_eq!(r2, 42);
 }
 
-// We allow mixed-size atomic reads to race
-fn racing_mixed_size_read() {
-    let x = static_atomic(0);
-
-    let j1 = spawn(move || {
-        x.load(Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        let x_split = split_u32_ptr(x_ptr);
-        unsafe {
-            let hi = &(*x_split)[0] as *const u16;
-            std::intrinsics::atomic_load_relaxed(hi);
-        }
-    });
-
-    j1.join().unwrap();
-    j2.join().unwrap();
-}
-
-// And we allow the combination of both of the above.
-fn racing_mixed_atomicity_and_size_read() {
-    let x = static_atomic(u32::from_be(0xabbafafa));
-
-    let j1 = spawn(move || {
-        x.load(Relaxed);
-    });
-
-    let j2 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        unsafe { *x_ptr };
-    });
-
-    let j3 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        let x_split = split_u32_ptr(x_ptr);
-        unsafe {
-            let hi = &(*x_split)[0] as *const u16;
-            std::intrinsics::atomic_load_relaxed(hi)
-        }
-    });
-
-    j1.join().unwrap();
-    j2.join().unwrap();
-    let r3 = j3.join().unwrap();
-
-    assert_eq!(r3, u16::from_be(0xabba));
-}
-
 pub fn main() {
     racing_mixed_atomicity_read();
-    racing_mixed_size_read();
-    racing_mixed_atomicity_and_size_read();
 }

From 8215702d5a27c77197cf8d12e03caca6c3884783 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 29 May 2022 19:48:36 +0100
Subject: [PATCH 39/46] Refer to GitHub issue on overwritten init value

---
 src/concurrency/data_race.rs                      | 1 +
 src/concurrency/weak_memory.rs                    | 1 +
 src/machine.rs                                    | 2 --
 tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs | 1 +
 4 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 61cd6a3c0c..35baf97b72 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -544,6 +544,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // side effects from a read the program did not perform. So we have to initialise
         // the store buffer with the value currently being written
         // ONCE this is fixed please remove the hack in buffered_atomic_write() in weak_memory.rs
+        // https://github.com/rust-lang/miri/issues/2164
         this.buffered_atomic_write(val, dest, atomic, val)
     }
 
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 9bf46bb23b..237a13ea86 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -508,6 +508,7 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
             // UGLY HACK: in write_scalar_atomic() we don't know the value before our write,
             // so init == val always. If the buffer is fresh then we would've duplicated an entry,
             // so we need to remove it.
+            // See https://github.com/rust-lang/miri/issues/2164
             let was_empty = matches!(
                 alloc_buffers
                     .store_buffers
diff --git a/src/machine.rs b/src/machine.rs
index 5ee2d9a9ab..1ae49edd60 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -638,8 +638,6 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
             None
         };
         let buffer_alloc = if ecx.machine.weak_memory {
-            // FIXME: if this is an atomic obejct, we want to supply its initial value
-            // while allocating the store buffer here.
             Some(weak_memory::AllocExtra::new_allocation())
         } else {
             None
diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
index 7dad0a12e5..7fe24d6383 100644
--- a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
@@ -25,6 +25,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
 fn static_atomic(val: usize) -> &'static AtomicUsize {
     let ret = Box::leak(Box::new(AtomicUsize::new(val)));
     // A workaround to put the initialization value in the store buffer.
+    // See https://github.com/rust-lang/miri/issues/2164
     ret.load(Relaxed);
     ret
 }

From c73107164089249477d56fbc580104231459a2a6 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 29 May 2022 21:10:36 +0100
Subject: [PATCH 40/46] Give flag temp disabling race detector a better name

---
 src/concurrency/data_race.rs   | 25 +++++++++++++------------
 src/concurrency/weak_memory.rs |  6 ++++--
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index 35baf97b72..f6f0ce528e 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -455,11 +455,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_ref();
         if let Some(data_race) = &this.machine.data_race {
-            data_race.ongoing_atomic_access.set(true);
+            data_race.ongoing_action_data_race_free.set(true);
         }
         let result = op(this);
         if let Some(data_race) = &this.machine.data_race {
-            data_race.ongoing_atomic_access.set(false);
+            data_race.ongoing_action_data_race_free.set(false);
         }
         result
     }
@@ -474,11 +474,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> R {
         let this = self.eval_context_mut();
         if let Some(data_race) = &this.machine.data_race {
-            data_race.ongoing_atomic_access.set(true);
+            data_race.ongoing_action_data_race_free.set(true);
         }
         let result = op(this);
         if let Some(data_race) = &this.machine.data_race {
-            data_race.ongoing_atomic_access.set(false);
+            data_race.ongoing_action_data_race_free.set(false);
         }
         result
     }
@@ -1151,8 +1151,9 @@ pub struct GlobalState {
     multi_threaded: Cell<bool>,
 
     /// A flag to mark we are currently performing
-    /// an atomic access to supress data race detection
-    ongoing_atomic_access: Cell<bool>,
+    /// a data race free action (such as atomic access)
+    /// to supress the race detector
+    ongoing_action_data_race_free: Cell<bool>,
 
     /// Mapping of a vector index to a known set of thread
     /// clocks, this is not directly mapping from a thread id
@@ -1205,7 +1206,7 @@ impl GlobalState {
     pub fn new() -> Self {
         let mut global_state = GlobalState {
             multi_threaded: Cell::new(false),
-            ongoing_atomic_access: Cell::new(false),
+            ongoing_action_data_race_free: Cell::new(false),
             vector_clocks: RefCell::new(IndexVec::new()),
             vector_info: RefCell::new(IndexVec::new()),
             thread_info: RefCell::new(IndexVec::new()),
@@ -1232,14 +1233,14 @@ impl GlobalState {
     }
 
     // We perform data race detection when there are more than 1 active thread
-    // and we are not currently in the middle of an atomic acces where data race
-    // is impossible
+    // and we have not temporarily disabled race detection to perform something
+    // data race free
     fn race_detecting(&self) -> bool {
-        self.multi_threaded.get() && !self.ongoing_atomic_access.get()
+        self.multi_threaded.get() && !self.ongoing_action_data_race_free.get()
     }
 
-    pub fn ongoing_atomic_access(&self) -> bool {
-        self.ongoing_atomic_access.get()
+    pub fn ongoing_action_data_race_free(&self) -> bool {
+        self.ongoing_action_data_race_free.get()
     }
 
     // Try to find vector index values that can potentially be re-used
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 237a13ea86..dc32a3ddca 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -139,7 +139,7 @@ impl StoreBufferAlloc {
     /// after all the prior atomic accesses so the location no longer needs to exhibit
     /// any weak memory behaviours until further atomic accesses.
     pub fn memory_accessed<'tcx>(&self, range: AllocRange, global: &GlobalState) {
-        if !global.ongoing_atomic_access() {
+        if !global.ongoing_action_data_race_free() {
             let mut buffers = self.store_buffers.borrow_mut();
             let access_type = buffers.access_type(range);
             match access_type {
@@ -420,7 +420,9 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 && !alloc_clocks
                     .race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
             {
-                throw_ub_format!("racy imperfectly overlapping atomic access is not possible in the C++20 memory model");
+                throw_ub_format!(
+                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model"
+                );
             }
         }
         Ok(())

From 6d0c76ea1b5021ec526421825a4c62b467c5b1e2 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 29 May 2022 22:53:57 +0100
Subject: [PATCH 41/46] Specify only perfectly overlapping accesses can race

---
 tests/run-pass/weak_memory/extra_cpp_unsafe.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
index 478e436e59..d77a090e6e 100644
--- a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
+++ b/tests/run-pass/weak_memory/extra_cpp_unsafe.rs
@@ -18,7 +18,7 @@ fn static_atomic(val: u32) -> &'static AtomicU32 {
     ret
 }
 
-// We allow non-atomic and atomic reads to race
+// We allow perfectly overlapping non-atomic and atomic reads to race
 fn racing_mixed_atomicity_read() {
     let x = static_atomic(0);
     x.store(42, Relaxed);

From 65f39bd5cf708c5d934ccb6d20efa7b9e54bab2a Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sat, 4 Jun 2022 18:18:46 +0100
Subject: [PATCH 42/46] Move tests to new directories

---
 tests/{compile-fail => fail}/weak_memory/cpp20_rwc_syncs.rs       | 0
 tests/{compile-fail => fail}/weak_memory/cpp20_rwc_syncs.stderr   | 0
 tests/{compile-fail => fail}/weak_memory/racing_mixed_size.rs     | 0
 tests/{compile-fail => fail}/weak_memory/racing_mixed_size.stderr | 0
 .../{compile-fail => fail}/weak_memory/racing_mixed_size_read.rs  | 0
 .../weak_memory/racing_mixed_size_read.stderr                     | 0
 tests/{run-pass => pass}/weak_memory/consistency.rs               | 0
 tests/{run-pass => pass}/weak_memory/consistency.stderr           | 0
 tests/{run-pass => pass}/weak_memory/extra_cpp.rs                 | 0
 tests/{run-pass => pass}/weak_memory/extra_cpp.stderr             | 0
 tests/{run-pass => pass}/weak_memory/extra_cpp_unsafe.rs          | 0
 tests/{run-pass => pass}/weak_memory/extra_cpp_unsafe.stderr      | 0
 tests/{run-pass => pass}/weak_memory/weak.rs                      | 0
 tests/{run-pass => pass}/weak_memory/weak.stderr                  | 0
 14 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/{compile-fail => fail}/weak_memory/cpp20_rwc_syncs.rs (100%)
 rename tests/{compile-fail => fail}/weak_memory/cpp20_rwc_syncs.stderr (100%)
 rename tests/{compile-fail => fail}/weak_memory/racing_mixed_size.rs (100%)
 rename tests/{compile-fail => fail}/weak_memory/racing_mixed_size.stderr (100%)
 rename tests/{compile-fail => fail}/weak_memory/racing_mixed_size_read.rs (100%)
 rename tests/{compile-fail => fail}/weak_memory/racing_mixed_size_read.stderr (100%)
 rename tests/{run-pass => pass}/weak_memory/consistency.rs (100%)
 rename tests/{run-pass => pass}/weak_memory/consistency.stderr (100%)
 rename tests/{run-pass => pass}/weak_memory/extra_cpp.rs (100%)
 rename tests/{run-pass => pass}/weak_memory/extra_cpp.stderr (100%)
 rename tests/{run-pass => pass}/weak_memory/extra_cpp_unsafe.rs (100%)
 rename tests/{run-pass => pass}/weak_memory/extra_cpp_unsafe.stderr (100%)
 rename tests/{run-pass => pass}/weak_memory/weak.rs (100%)
 rename tests/{run-pass => pass}/weak_memory/weak.stderr (100%)

diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs b/tests/fail/weak_memory/cpp20_rwc_syncs.rs
similarity index 100%
rename from tests/compile-fail/weak_memory/cpp20_rwc_syncs.rs
rename to tests/fail/weak_memory/cpp20_rwc_syncs.rs
diff --git a/tests/compile-fail/weak_memory/cpp20_rwc_syncs.stderr b/tests/fail/weak_memory/cpp20_rwc_syncs.stderr
similarity index 100%
rename from tests/compile-fail/weak_memory/cpp20_rwc_syncs.stderr
rename to tests/fail/weak_memory/cpp20_rwc_syncs.stderr
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.rs b/tests/fail/weak_memory/racing_mixed_size.rs
similarity index 100%
rename from tests/compile-fail/weak_memory/racing_mixed_size.rs
rename to tests/fail/weak_memory/racing_mixed_size.rs
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size.stderr b/tests/fail/weak_memory/racing_mixed_size.stderr
similarity index 100%
rename from tests/compile-fail/weak_memory/racing_mixed_size.stderr
rename to tests/fail/weak_memory/racing_mixed_size.stderr
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size_read.rs b/tests/fail/weak_memory/racing_mixed_size_read.rs
similarity index 100%
rename from tests/compile-fail/weak_memory/racing_mixed_size_read.rs
rename to tests/fail/weak_memory/racing_mixed_size_read.rs
diff --git a/tests/compile-fail/weak_memory/racing_mixed_size_read.stderr b/tests/fail/weak_memory/racing_mixed_size_read.stderr
similarity index 100%
rename from tests/compile-fail/weak_memory/racing_mixed_size_read.stderr
rename to tests/fail/weak_memory/racing_mixed_size_read.stderr
diff --git a/tests/run-pass/weak_memory/consistency.rs b/tests/pass/weak_memory/consistency.rs
similarity index 100%
rename from tests/run-pass/weak_memory/consistency.rs
rename to tests/pass/weak_memory/consistency.rs
diff --git a/tests/run-pass/weak_memory/consistency.stderr b/tests/pass/weak_memory/consistency.stderr
similarity index 100%
rename from tests/run-pass/weak_memory/consistency.stderr
rename to tests/pass/weak_memory/consistency.stderr
diff --git a/tests/run-pass/weak_memory/extra_cpp.rs b/tests/pass/weak_memory/extra_cpp.rs
similarity index 100%
rename from tests/run-pass/weak_memory/extra_cpp.rs
rename to tests/pass/weak_memory/extra_cpp.rs
diff --git a/tests/run-pass/weak_memory/extra_cpp.stderr b/tests/pass/weak_memory/extra_cpp.stderr
similarity index 100%
rename from tests/run-pass/weak_memory/extra_cpp.stderr
rename to tests/pass/weak_memory/extra_cpp.stderr
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.rs b/tests/pass/weak_memory/extra_cpp_unsafe.rs
similarity index 100%
rename from tests/run-pass/weak_memory/extra_cpp_unsafe.rs
rename to tests/pass/weak_memory/extra_cpp_unsafe.rs
diff --git a/tests/run-pass/weak_memory/extra_cpp_unsafe.stderr b/tests/pass/weak_memory/extra_cpp_unsafe.stderr
similarity index 100%
rename from tests/run-pass/weak_memory/extra_cpp_unsafe.stderr
rename to tests/pass/weak_memory/extra_cpp_unsafe.stderr
diff --git a/tests/run-pass/weak_memory/weak.rs b/tests/pass/weak_memory/weak.rs
similarity index 100%
rename from tests/run-pass/weak_memory/weak.rs
rename to tests/pass/weak_memory/weak.rs
diff --git a/tests/run-pass/weak_memory/weak.stderr b/tests/pass/weak_memory/weak.stderr
similarity index 100%
rename from tests/run-pass/weak_memory/weak.stderr
rename to tests/pass/weak_memory/weak.stderr

From 137903671305d30ede80586388cd4db535aa92be Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 5 Jun 2022 10:37:40 +0100
Subject: [PATCH 43/46] Simplify known C++20 inconsistency test

---
 .../cpp20_rwc_syncs.rs                        | 11 +++++----
 tests/fail/should-pass/cpp20_rwc_syncs.stderr |  3 +++
 tests/fail/weak_memory/cpp20_rwc_syncs.stderr | 23 -------------------
 3 files changed, 9 insertions(+), 28 deletions(-)
 rename tests/fail/{weak_memory => should-pass}/cpp20_rwc_syncs.rs (83%)
 create mode 100644 tests/fail/should-pass/cpp20_rwc_syncs.stderr
 delete mode 100644 tests/fail/weak_memory/cpp20_rwc_syncs.stderr

diff --git a/tests/fail/weak_memory/cpp20_rwc_syncs.rs b/tests/fail/should-pass/cpp20_rwc_syncs.rs
similarity index 83%
rename from tests/fail/weak_memory/cpp20_rwc_syncs.rs
rename to tests/fail/should-pass/cpp20_rwc_syncs.rs
index 7fe24d6383..e5192cd0d6 100644
--- a/tests/fail/weak_memory/cpp20_rwc_syncs.rs
+++ b/tests/fail/should-pass/cpp20_rwc_syncs.rs
@@ -1,5 +1,6 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
 // compile-flags: -Zmiri-ignore-leaks
+// error-pattern:
 
 // https://plv.mpi-sws.org/scfix/paper.pdf
 // 2.2 Second Problem: SC Fences are Too Weak
@@ -70,12 +71,12 @@ fn test_cpp20_rwc_syncs() {
     let b = j2.join().unwrap();
     let c = j3.join().unwrap();
 
+    // We cannot write assert_ne!() since ui_test's fail
+    // tests expect exit status 1, whereas panics produce 101.
+    // Our ui_test does not yet support overriding failure status codes.
     if (b, c) == (0, 0) {
-        // FIXME: the standalone compiletest-rs needs to support
-        // failure-status header to allow us to write assert_ne!((b, c), (0, 0))
-        // https://rustc-dev-guide.rust-lang.org/tests/headers.html#miscellaneous-headers
-        // because panic exits with 101 but compile-rs expects 1
-        let _ = unsafe { std::mem::MaybeUninit::<*const u32>::uninit().assume_init() }; //~ ERROR uninitialized
+        // This *should* be unreachable, but Miri will reach it.
+        std::process::exit(1);
     }
 }
 
diff --git a/tests/fail/should-pass/cpp20_rwc_syncs.stderr b/tests/fail/should-pass/cpp20_rwc_syncs.stderr
new file mode 100644
index 0000000000..9fe6daa778
--- /dev/null
+++ b/tests/fail/should-pass/cpp20_rwc_syncs.stderr
@@ -0,0 +1,3 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
diff --git a/tests/fail/weak_memory/cpp20_rwc_syncs.stderr b/tests/fail/weak_memory/cpp20_rwc_syncs.stderr
deleted file mode 100644
index f4f467120e..0000000000
--- a/tests/fail/weak_memory/cpp20_rwc_syncs.stderr
+++ /dev/null
@@ -1,23 +0,0 @@
-warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
-         (see https://github.com/rust-lang/miri/issues/1388)
-
-error: Undefined Behavior: type validation failed at .value: encountered uninitialized raw pointer
-  --> $DIR/cpp20_rwc_syncs.rs:LL:CC
-   |
-LL |         let _ = unsafe { std::mem::MaybeUninit::<*const u32>::uninit().assume_init() };
-   |                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ type validation failed at .value: encountered uninitialized raw pointer
-   |
-   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
-   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
-           
-   = note: inside `test_cpp20_rwc_syncs` at $DIR/cpp20_rwc_syncs.rs:LL:CC
-note: inside `main` at $DIR/cpp20_rwc_syncs.rs:LL:CC
-  --> $DIR/cpp20_rwc_syncs.rs:LL:CC
-   |
-LL |         test_cpp20_rwc_syncs();
-   |         ^^^^^^^^^^^^^^^^^^^^^^
-
-note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
-
-error: aborting due to previous error; 1 warning emitted
-

From 6fb7c131ed48d33b8290ce2bf970c37fd9781828 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 5 Jun 2022 20:47:01 +0100
Subject: [PATCH 44/46] Remove unused lifetimes

---
 src/concurrency/data_race.rs   | 6 +-----
 src/concurrency/weak_memory.rs | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/concurrency/data_race.rs b/src/concurrency/data_race.rs
index f6f0ce528e..28b09d2f90 100644
--- a/src/concurrency/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -939,11 +939,7 @@ impl VClockAlloc {
 
     /// Detect racing atomic read and writes (not data races)
     /// on every byte of the current access range
-    pub(super) fn race_free_with_atomic<'tcx>(
-        &self,
-        range: AllocRange,
-        global: &GlobalState,
-    ) -> bool {
+    pub(super) fn race_free_with_atomic(&self, range: AllocRange, global: &GlobalState) -> bool {
         if global.race_detecting() {
             let (_, clocks) = global.current_thread_state();
             let alloc_ranges = self.alloc_ranges.borrow();
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index dc32a3ddca..a771a09ed1 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -138,7 +138,7 @@ impl StoreBufferAlloc {
     /// before without data race, we can determine that the non-atomic access fully happens
     /// after all the prior atomic accesses so the location no longer needs to exhibit
     /// any weak memory behaviours until further atomic accesses.
-    pub fn memory_accessed<'tcx>(&self, range: AllocRange, global: &GlobalState) {
+    pub fn memory_accessed(&self, range: AllocRange, global: &GlobalState) {
         if !global.ongoing_action_data_race_free() {
             let mut buffers = self.store_buffers.borrow_mut();
             let access_type = buffers.access_type(range);

From bf7a5c41540a4005eb0f8d66afda205691fbb14b Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 5 Jun 2022 21:48:07 +0100
Subject: [PATCH 45/46] Add more backgrounds on lazy store buffers

Co-authored-by: Ralf Jung <post@ralfj.de>
---
 src/concurrency/weak_memory.rs | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index a771a09ed1..02af0efe9f 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -30,7 +30,18 @@
 //! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view.
 //! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads.
 //!
-//! Safe/sound Rust allows for more operations on atomic locations than the C++20 atomic API was intended to allow, such as non-atomically accessing
+//! The C++ memory model is built around the notion of an 'atomic object', so it would be natural
+//! to attach store buffers to atomic objects. However, Rust follows LLVM in that it only has
+//! 'atomic accesses'. Therefore Miri cannot know when and where atomic 'objects' are being
+//! created or destroyed, to manage its store buffers. Instead, we hence lazily create an
+//! atomic object on the first atomic access to a given region, and we destroy that object
+//! on the next non-atomic or imperfectly overlapping atomic access to that region.
+//! These lazy (de)allocations happen in memory_accessed() on non-atomic accesses, and
+//! get_or_create_store_buffer() on atomic accesses. This mostly works well, but it does
+//! lead to some issues (https://github.com/rust-lang/miri/issues/2164).
+//!
+//! One consequence of this difference is that safe/sound Rust allows for more operations on atomic locations
+//! than the C++20 atomic API was intended to allow, such as non-atomically accessing
 //! a previously atomically accessed location, or accessing previously atomically accessed locations with a differently sized operation
 //! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model.
 //! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
@@ -156,8 +167,8 @@ impl StoreBufferAlloc {
         }
     }
 
-    /// Gets a store buffer associated with an atomic object in this allocation
-    /// Or creates one with the specified initial value
+    /// Gets a store buffer associated with an atomic object in this allocation,
+    /// or creates one with the specified initial value if no atomic object exists yet.
     fn get_or_create_store_buffer<'tcx>(
         &self,
         range: AllocRange,

From 1b32d14255aed79e1ff308e8c47b8cb884ed9703 Mon Sep 17 00:00:00 2001
From: Andy Wang <cbeuw.andy@gmail.com>
Date: Sun, 5 Jun 2022 22:11:55 +0100
Subject: [PATCH 46/46] Make racy imperfectly overlapping atomic access
 unsupported instead of UB

Co-authored-by: Ralf Jung <post@ralfj.de>
---
 src/concurrency/weak_memory.rs                       | 4 ++--
 tests/fail/weak_memory/racing_mixed_size.stderr      | 7 +++----
 tests/fail/weak_memory/racing_mixed_size_read.stderr | 7 +++----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
index 02af0efe9f..da36fcd2fb 100644
--- a/src/concurrency/weak_memory.rs
+++ b/src/concurrency/weak_memory.rs
@@ -431,8 +431,8 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
                 && !alloc_clocks
                     .race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
             {
-                throw_ub_format!(
-                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model"
+                throw_unsup_format!(
+                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation"
                 );
             }
         }
diff --git a/tests/fail/weak_memory/racing_mixed_size.stderr b/tests/fail/weak_memory/racing_mixed_size.stderr
index b03424a861..fc6be84315 100644
--- a/tests/fail/weak_memory/racing_mixed_size.stderr
+++ b/tests/fail/weak_memory/racing_mixed_size.stderr
@@ -1,14 +1,13 @@
 warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
          (see https://github.com/rust-lang/miri/issues/1388)
 
-error: Undefined Behavior: racy imperfectly overlapping atomic access is not possible in the C++20 memory model
+error: unsupported operation: racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
   --> $DIR/racing_mixed_size.rs:LL:CC
    |
 LL |             std::intrinsics::atomic_load_relaxed(hi);
-   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
    |
-   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
-   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support
            
    = note: inside closure at $DIR/racing_mixed_size.rs:LL:CC
 
diff --git a/tests/fail/weak_memory/racing_mixed_size_read.stderr b/tests/fail/weak_memory/racing_mixed_size_read.stderr
index 80cc2fe756..846d03f544 100644
--- a/tests/fail/weak_memory/racing_mixed_size_read.stderr
+++ b/tests/fail/weak_memory/racing_mixed_size_read.stderr
@@ -1,14 +1,13 @@
 warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
          (see https://github.com/rust-lang/miri/issues/1388)
 
-error: Undefined Behavior: racy imperfectly overlapping atomic access is not possible in the C++20 memory model
+error: unsupported operation: racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
   --> $DIR/racing_mixed_size_read.rs:LL:CC
    |
 LL |             std::intrinsics::atomic_load_relaxed(hi);
-   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
    |
-   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
-   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support
            
    = note: inside closure at $DIR/racing_mixed_size_read.rs:LL:CC