diff --git a/README.md b/README.md
index a55ebcb125..938a64cd04 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,8 @@ for example:
   or an invalid enum discriminant)
 * **Experimental**: Violations of the [Stacked Borrows] rules governing aliasing
   for reference types
-* **Experimental**: Data races (but no weak memory effects)
+* **Experimental**: Data races
+* **Experimental**: Emulation of weak memory effects (i.e., reads can return outdated values)
 
 On top of that, Miri will also tell you about memory leaks: when there is memory
 still allocated at the end of the execution, and that memory is not reachable
@@ -61,9 +62,11 @@ in your program, and cannot run all programs:
   not support networking. System API support varies between targets; if you run
   on Windows it is a good idea to use `--target x86_64-unknown-linux-gnu` to get
   better support.
-* Threading support is not finished yet. E.g., weak memory effects are not
-  emulated and spin loops (without syscalls) just loop forever. There is no
-  threading support on Windows.
+* Threading support is not finished yet. E.g. spin loops (without syscalls) just
+  loop forever. There is no threading support on Windows.
+* Weak memory emulation may produce weak behaivours unobservable by compiled
+  programs running on real hardware when `SeqCst` fences are used, and it cannot
+  produce all behaviors possibly observable on real hardware.
 
 [rust]: https://www.rust-lang.org/
 [mir]: https://github.com/rust-lang/rfcs/blob/master/text/1211-mir.md
@@ -317,7 +320,7 @@ to Miri failing to detect cases of undefined behavior in a program.
   can focus on other failures, but it means Miri can miss bugs in your program.
   Using this flag is **unsound**.
 * `-Zmiri-disable-data-race-detector` disables checking for data races.  Using
-  this flag is **unsound**.
+  this flag is **unsound**. This implies `-Zmiri-disable-weak-memory-emulation`.
 * `-Zmiri-disable-stacked-borrows` disables checking the experimental
   [Stacked Borrows] aliasing rules.  This can make Miri run faster, but it also
   means no aliasing violations will be detected.  Using this flag is **unsound**
@@ -327,6 +330,8 @@ to Miri failing to detect cases of undefined behavior in a program.
   as out-of-bounds accesses) first.  Setting this flag means Miri can miss bugs
   in your program.  However, this can also help to make Miri run faster.  Using
   this flag is **unsound**.
+* `-Zmiri-disable-weak-memory-emulation` disables the emulation of some C++11 weak
+  memory effects.
 * `-Zmiri-measureme=<name>` enables `measureme` profiling for the interpreted program.
    This can be used to find which parts of your program are executing slowly under Miri.
    The profile is written out to a file with the prefix `<name>`, and can be processed
diff --git a/src/bin/miri.rs b/src/bin/miri.rs
index e3f38956da..907e620404 100644
--- a/src/bin/miri.rs
+++ b/src/bin/miri.rs
@@ -318,6 +318,7 @@ fn main() {
             miri_config.stacked_borrows = false;
         } else if arg == "-Zmiri-disable-data-race-detector" {
             miri_config.data_race_detector = false;
+            miri_config.weak_memory_emulation = false;
         } else if arg == "-Zmiri-disable-alignment-check" {
             miri_config.check_alignment = miri::AlignmentCheck::None;
         } else if arg == "-Zmiri-symbolic-alignment-check" {
@@ -340,6 +341,8 @@ fn main() {
                 isolation_enabled = Some(false);
             }
             miri_config.isolated_op = miri::IsolatedOp::Allow;
+        } else if arg == "-Zmiri-disable-weak-memory-emulation" {
+            miri_config.weak_memory_emulation = false;
         } else if let Some(param) = arg.strip_prefix("-Zmiri-isolation-error=") {
             if matches!(isolation_enabled, Some(false)) {
                 panic!("-Zmiri-isolation-error cannot be used along with -Zmiri-disable-isolation");
diff --git a/src/concurrency/allocation_map.rs b/src/concurrency/allocation_map.rs
new file mode 100644
index 0000000000..62469dcaf4
--- /dev/null
+++ b/src/concurrency/allocation_map.rs
@@ -0,0 +1,278 @@
+//! Implements a map from allocation ranges to data.
+//! This is somewhat similar to RangeMap, but the ranges
+//! and data are discrete and non-splittable. An allocation in the
+//! map will always have the same range until explicitly removed
+
+use rustc_target::abi::Size;
+use std::ops::{Index, IndexMut, Range};
+
+use rustc_const_eval::interpret::AllocRange;
+
+#[derive(Clone, Debug)]
+struct Elem<T> {
+    /// The range covered by this element; never empty.
+    range: AllocRange,
+    /// The data stored for this element.
+    data: T,
+}
+
+/// Index of an allocation within the map
+type Position = usize;
+
+#[derive(Clone, Debug)]
+pub struct AllocationMap<T> {
+    v: Vec<Elem<T>>,
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum AccessType {
+    /// The access perfectly overlaps (same offset and range) with the exsiting allocation
+    PerfectlyOverlapping(Position),
+    /// The access does not touch any exising allocation
+    Empty(Position),
+    /// The access overlaps with one or more existing allocations
+    ImperfectlyOverlapping(Range<Position>),
+}
+
+impl<T> AllocationMap<T> {
+    pub fn new() -> Self {
+        Self { v: Vec::new() }
+    }
+
+    /// Finds the position of the allocation containing the given offset. If the offset is not
+    /// in an existing allocation, then returns Err containing the position
+    /// where such allocation should be inserted
+    fn find_offset(&self, offset: Size) -> Result<Position, Position> {
+        // We do a binary search.
+        let mut left = 0usize; // inclusive
+        let mut right = self.v.len(); // exclusive
+        loop {
+            if left == right {
+                // No element contains the given offset. But the
+                // position is where such element should be placed at.
+                return Err(left);
+            }
+            let candidate = left.checked_add(right).unwrap() / 2;
+            let elem = &self.v[candidate];
+            if offset < elem.range.start {
+                // We are too far right (offset is further left).
+                debug_assert!(candidate < right); // we are making progress
+                right = candidate;
+            } else if offset >= elem.range.end() {
+                // We are too far left (offset is further right).
+                debug_assert!(candidate >= left); // we are making progress
+                left = candidate + 1;
+            } else {
+                // This is it!
+                return Ok(candidate);
+            }
+        }
+    }
+
+    /// Determines whether a given access on `range` overlaps with
+    /// an existing allocation
+    pub fn access_type(&self, range: AllocRange) -> AccessType {
+        match self.find_offset(range.start) {
+            Ok(pos) => {
+                // Start of the range belongs to an existing object, now let's check the overlapping situation
+                let elem = &self.v[pos];
+                // FIXME: derive Eq for AllocRange in rustc
+                if elem.range.start == range.start && elem.range.size == range.size {
+                    // Happy case: perfectly overlapping access
+                    AccessType::PerfectlyOverlapping(pos)
+                } else {
+                    // FIXME: add a last() method to AllocRange that returns the last inclusive offset (end() is exclusive)
+                    let end_pos = match self.find_offset(range.end() - Size::from_bytes(1)) {
+                        // If the end lands in an existing object, add one to get the exclusive position
+                        Ok(inclusive_pos) => inclusive_pos + 1,
+                        Err(exclusive_pos) => exclusive_pos,
+                    };
+
+                    AccessType::ImperfectlyOverlapping(pos..end_pos)
+                }
+            }
+            Err(pos) => {
+                // Start of the range doesn't belong to an existing object
+                match self.find_offset(range.end() - Size::from_bytes(1)) {
+                    // Neither does the end
+                    Err(end_pos) =>
+                        if pos == end_pos {
+                            // There's nothing between the start and the end, so the range thing is empty
+                            AccessType::Empty(pos)
+                        } else {
+                            // Otherwise we have entirely covered an existing object
+                            AccessType::ImperfectlyOverlapping(pos..end_pos)
+                        },
+                    // Otherwise at least part of it overlaps with something else
+                    Ok(end_pos) => AccessType::ImperfectlyOverlapping(pos..end_pos + 1),
+                }
+            }
+        }
+    }
+
+    /// Inserts an object and its occupied range at given position
+    // The Position can be calculated from AllocRange, but the only user of AllocationMap
+    // always calls access_type before calling insert/index/index_mut, and we don't
+    // want to repeat the binary search on each time, so we ask the caller to supply Position
+    pub fn insert_at_pos(&mut self, pos: Position, range: AllocRange, data: T) {
+        self.v.insert(pos, Elem { range, data });
+        // If we aren't the first element, then our start must be greater than the preivous element's end
+        if pos > 0 {
+            debug_assert!(self.v[pos - 1].range.end() <= range.start);
+        }
+        // If we aren't the last element, then our end must be smaller than next element's start
+        if pos < self.v.len() - 1 {
+            debug_assert!(range.end() <= self.v[pos + 1].range.start);
+        }
+    }
+
+    pub fn remove_pos_range(&mut self, pos_range: Range<Position>) {
+        self.v.drain(pos_range);
+    }
+
+    pub fn remove_from_pos(&mut self, pos: Position) {
+        self.v.remove(pos);
+    }
+}
+
+impl<T> Index<Position> for AllocationMap<T> {
+    type Output = T;
+
+    fn index(&self, pos: Position) -> &Self::Output {
+        &self.v[pos].data
+    }
+}
+
+impl<T> IndexMut<Position> for AllocationMap<T> {
+    fn index_mut(&mut self, pos: Position) -> &mut Self::Output {
+        &mut self.v[pos].data
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use rustc_const_eval::interpret::alloc_range;
+
+    use super::*;
+
+    #[test]
+    fn empty_map() {
+        // FIXME: make Size::from_bytes const
+        let four = Size::from_bytes(4);
+        let map = AllocationMap::<()>::new();
+
+        // Correctly tells where we should insert the first element (at position 0)
+        assert_eq!(map.find_offset(Size::from_bytes(3)), Err(0));
+
+        // Correctly tells the access type along with the supposed position
+        assert_eq!(map.access_type(alloc_range(Size::ZERO, four)), AccessType::Empty(0));
+    }
+
+    #[test]
+    #[should_panic]
+    fn no_overlapping_inserts() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert_at_pos(0, alloc_range(four, four), "#");
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 ^ ^ ^ ^ 5 6 7 8 9 a b c d
+        map.insert_at_pos(0, alloc_range(Size::from_bytes(1), four), "@");
+    }
+
+    #[test]
+    fn boundaries() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |#|#|#|#|_|_|...
+        //  0 1 2 3 4 5
+        map.insert_at_pos(0, alloc_range(Size::ZERO, four), "#");
+        // |#|#|#|#|_|_|...
+        //  0 1 2 3 ^ 5
+        assert_eq!(map.find_offset(four), Err(1));
+        // |#|#|#|#|_|_|_|_|_|...
+        //  0 1 2 3 ^ ^ ^ ^ 8
+        assert_eq!(map.access_type(alloc_range(four, four)), AccessType::Empty(1));
+
+        let eight = Size::from_bytes(8);
+        // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert_at_pos(1, alloc_range(eight, four), "@");
+        // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
+        //  0 1 2 3 4 5 6 ^ 8 9 a b c d
+        assert_eq!(map.find_offset(Size::from_bytes(7)), Err(1));
+        // |#|#|#|#|_|_|_|_|@|@|@|@|_|_|...
+        //  0 1 2 3 ^ ^ ^ ^ 8 9 a b c d
+        assert_eq!(map.access_type(alloc_range(four, four)), AccessType::Empty(1));
+    }
+
+    #[test]
+    fn perfectly_overlapping() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |#|#|#|#|_|_|...
+        //  0 1 2 3 4 5
+        map.insert_at_pos(0, alloc_range(Size::ZERO, four), "#");
+        // |#|#|#|#|_|_|...
+        //  ^ ^ ^ ^ 4 5
+        assert_eq!(map.find_offset(Size::ZERO), Ok(0));
+        assert_eq!(
+            map.access_type(alloc_range(Size::ZERO, four)),
+            AccessType::PerfectlyOverlapping(0)
+        );
+
+        // |#|#|#|#|@|@|@|@|_|...
+        //  0 1 2 3 4 5 6 7 8
+        map.insert_at_pos(1, alloc_range(four, four), "@");
+        // |#|#|#|#|@|@|@|@|_|...
+        //  0 1 2 3 ^ ^ ^ ^ 8
+        assert_eq!(map.find_offset(four), Ok(1));
+        assert_eq!(map.access_type(alloc_range(four, four)), AccessType::PerfectlyOverlapping(1));
+    }
+
+    #[test]
+    fn straddling() {
+        let four = Size::from_bytes(4);
+
+        let mut map = AllocationMap::<&str>::new();
+
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert_at_pos(0, alloc_range(four, four), "#");
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 ^ ^ ^ ^ 6 7 8 9 a b c d
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(2), four)),
+            AccessType::ImperfectlyOverlapping(0..1)
+        );
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 2 3 4 5 ^ ^ ^ ^ a b c d
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(6), four)),
+            AccessType::ImperfectlyOverlapping(0..1)
+        );
+        // |_|_|_|_|#|#|#|#|_|_|_|_|...
+        //  0 1 ^ ^ ^ ^ ^ ^ ^ ^ a b c d
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(2), Size::from_bytes(8))),
+            AccessType::ImperfectlyOverlapping(0..1)
+        );
+
+        // |_|_|_|_|#|#|#|#|_|_|@|@|_|_|...
+        //  0 1 2 3 4 5 6 7 8 9 a b c d
+        map.insert_at_pos(1, alloc_range(Size::from_bytes(10), Size::from_bytes(2)), "@");
+        // |_|_|_|_|#|#|#|#|_|_|@|@|_|_|...
+        //  0 1 2 3 4 5 ^ ^ ^ ^ ^ ^ ^ ^
+        assert_eq!(
+            map.access_type(alloc_range(Size::from_bytes(6), Size::from_bytes(8))),
+            AccessType::ImperfectlyOverlapping(0..2)
+        );
+    }
+}
diff --git a/src/data_race.rs b/src/concurrency/data_race.rs
similarity index 89%
rename from src/data_race.rs
rename to src/concurrency/data_race.rs
index eb67a487b5..28b09d2f90 100644
--- a/src/data_race.rs
+++ b/src/concurrency/data_race.rs
@@ -12,7 +12,7 @@
 //! The implementation also models races with memory allocation and deallocation via treating allocation and
 //! deallocation as a type of write internally for detecting data-races.
 //!
-//! This does not explore weak memory orders and so can still miss data-races
+//! Weak memory orders are explored but not all weak behaviours are exhibited, so it can still miss data-races
 //! but should not report false-positives
 //!
 //! Data-race definition from(<https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races>):
@@ -29,22 +29,6 @@
 //! This means that the thread-index can be safely re-used, starting on the next timestamp for the newly created
 //! thread.
 //!
-//! The sequentially consistent ordering corresponds to the ordering that the threads
-//! are currently scheduled, this means that the data-race detector has no additional
-//! logic for sequentially consistent accesses at the moment since they are indistinguishable
-//! from acquire/release operations. If weak memory orderings are explored then this
-//! may need to change or be updated accordingly.
-//!
-//! Per the C++ spec for the memory model a sequentially consistent operation:
-//!   "A load operation with this memory order performs an acquire operation,
-//!    a store performs a release operation, and read-modify-write performs
-//!    both an acquire operation and a release operation, plus a single total
-//!    order exists in which all threads observe all modifications in the same
-//!    order (see Sequentially-consistent ordering below) "
-//! So in the absence of weak memory effects a seq-cst load & a seq-cst store is identical
-//! to an acquire load and a release store given the global sequentially consistent order
-//! of the schedule.
-//!
 //! The timestamps used in the data-race detector assign each sequence of non-atomic operations
 //! followed by a single atomic or concurrent operation a single timestamp.
 //! Write, Read, Write, ThreadJoin will be represented by a single timestamp value on a thread.
@@ -74,6 +58,8 @@ use rustc_target::abi::Size;
 
 use crate::*;
 
+use super::weak_memory::EvalContextExt as _;
+
 pub type AllocExtra = VClockAlloc;
 
 /// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive).
@@ -115,10 +101,10 @@ pub enum AtomicFenceOp {
 /// of a thread, contains the happens-before clock and
 /// additional metadata to model atomic fence operations.
 #[derive(Clone, Default, Debug)]
-struct ThreadClockSet {
+pub(super) struct ThreadClockSet {
     /// The increasing clock representing timestamps
     /// that happen-before this thread.
-    clock: VClock,
+    pub(super) clock: VClock,
 
     /// The set of timestamps that will happen-before this
     /// thread once it performs an acquire fence.
@@ -127,6 +113,18 @@ struct ThreadClockSet {
     /// The last timestamp of happens-before relations that
     /// have been released by this thread by a fence.
     fence_release: VClock,
+
+    /// Timestamps of the last SC fence performed by each
+    /// thread, updated when this thread performs an SC fence
+    pub(super) fence_seqcst: VClock,
+
+    /// Timestamps of the last SC write performed by each
+    /// thread, updated when this thread performs an SC fence
+    pub(super) write_seqcst: VClock,
+
+    /// Timestamps of the last SC fence performed by each
+    /// thread, updated when this thread performs an SC read
+    pub(super) read_seqcst: VClock,
 }
 
 impl ThreadClockSet {
@@ -289,6 +287,15 @@ impl MemoryCellClocks {
         Ok(())
     }
 
+    /// Checks if the memory cell access is ordered with all prior atomic reads and writes
+    fn race_free_with_atomic(&self, clocks: &ThreadClockSet) -> bool {
+        if let Some(atomic) = self.atomic() {
+            atomic.read_vector <= clocks.clock && atomic.write_vector <= clocks.clock
+        } else {
+            true
+        }
+    }
+
     /// Update memory cell data-race tracking for atomic
     /// load relaxed semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
@@ -447,14 +454,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     #[inline]
     fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_ref();
-        let old = if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.replace(false)
-        } else {
-            false
-        };
+        if let Some(data_race) = &this.machine.data_race {
+            data_race.ongoing_action_data_race_free.set(true);
+        }
         let result = op(this);
         if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.set(old);
+            data_race.ongoing_action_data_race_free.set(false);
         }
         result
     }
@@ -468,14 +473,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R,
     ) -> R {
         let this = self.eval_context_mut();
-        let old = if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.replace(false)
-        } else {
-            false
-        };
+        if let Some(data_race) = &this.machine.data_race {
+            data_race.ongoing_action_data_race_free.set(true);
+        }
         let result = op(this);
         if let Some(data_race) = &this.machine.data_race {
-            data_race.multi_threaded.set(old);
+            data_race.ongoing_action_data_race_free.set(false);
         }
         result
     }
@@ -514,9 +517,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_ref();
+        // This will read from the last store in the modification order of this location. In case
+        // weak memory emulation is enabled, this may not be the store we will pick to actually read from and return.
+        // This is fine with StackedBorrow and race checks because they don't concern metadata on
+        // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
+        // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(&place.into()))?;
-        this.validate_atomic_load(place, atomic)?;
-        Ok(scalar)
+        this.validate_overlapping_atomic(place)?;
+        this.buffered_atomic_read(place, atomic, scalar, || {
+            this.validate_atomic_load(place, atomic)
+        })
     }
 
     /// Perform an atomic write operation at the memory location.
@@ -527,8 +537,15 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic(dest)?;
         this.allow_data_races_mut(move |this| this.write_scalar(val, &(*dest).into()))?;
-        this.validate_atomic_store(dest, atomic)
+        this.validate_atomic_store(dest, atomic)?;
+        // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
+        // side effects from a read the program did not perform. So we have to initialise
+        // the store buffer with the value currently being written
+        // ONCE this is fixed please remove the hack in buffered_atomic_write() in weak_memory.rs
+        // https://github.com/rust-lang/miri/issues/2164
+        this.buffered_atomic_write(val, dest, atomic, val)
     }
 
     /// Perform an atomic operation on a memory location.
@@ -542,6 +559,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
 
         // Atomics wrap around on overflow.
@@ -550,6 +568,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         this.allow_data_races_mut(|this| this.write_immediate(*val, &(*place).into()))?;
 
         this.validate_atomic_rmw(place, atomic)?;
+
+        this.buffered_atomic_rmw(
+            val.to_scalar_or_uninit(),
+            place,
+            atomic,
+            old.to_scalar_or_uninit(),
+        )?;
         Ok(old)
     }
 
@@ -563,9 +588,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_scalar(&place.into()))?;
         this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
+
         this.validate_atomic_rmw(place, atomic)?;
+
+        this.buffered_atomic_rmw(new, place, atomic, old)?;
         Ok(old)
     }
 
@@ -580,6 +609,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(&place.into()))?;
         let lt = this.binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar()?.to_bool()?;
 
@@ -593,6 +623,13 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
         this.validate_atomic_rmw(place, atomic)?;
 
+        this.buffered_atomic_rmw(
+            new_val.to_scalar_or_uninit(),
+            place,
+            atomic,
+            old.to_scalar_or_uninit(),
+        )?;
+
         // Return the old value.
         Ok(old)
     }
@@ -615,6 +652,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         use rand::Rng as _;
         let this = self.eval_context_mut();
 
+        this.validate_overlapping_atomic(place)?;
         // Failure ordering cannot be stronger than success ordering, therefore first attempt
         // to read with the failure ordering and if successful then try again with the success
         // read ordering and write in the success case.
@@ -642,8 +680,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         if cmpxchg_success {
             this.allow_data_races_mut(|this| this.write_scalar(new, &(*place).into()))?;
             this.validate_atomic_rmw(place, success)?;
+            this.buffered_atomic_rmw(new, place, success, old.to_scalar_or_uninit())?;
         } else {
             this.validate_atomic_load(place, fail)?;
+            // A failed compare exchange is equivalent to a load, reading from the latest store
+            // in the modification order.
+            // Since `old` is only a value and not the store element, we need to separately
+            // find it in our store buffer and perform load_impl on it.
+            this.perform_read_on_buffered_latest(place, fail, old.to_scalar_or_uninit())?;
         }
 
         // Return the old value.
@@ -658,6 +702,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicReadOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
+        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -680,6 +725,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(
             place,
             atomic,
@@ -705,6 +751,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
+        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
             if acquire {
                 memory.load_acquire(clocks, index)?;
@@ -723,7 +770,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         if let Some(data_race) = &mut this.machine.data_race {
-            data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+            data_race.maybe_perform_sync_operation(|index, mut clocks| {
                 log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
 
                 // Apply data-race detection for the current fences
@@ -737,6 +784,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                     // Either Release | AcqRel | SeqCst
                     clocks.apply_release_fence();
                 }
+                if atomic == AtomicFenceOp::SeqCst {
+                    data_race.last_sc_fence.borrow_mut().set_at_index(&clocks.clock, index);
+                    clocks.fence_seqcst.join(&data_race.last_sc_fence.borrow());
+                    clocks.write_seqcst.join(&data_race.last_sc_write.borrow());
+                }
 
                 // Increment timestamp in case of release semantics.
                 Ok(atomic != AtomicFenceOp::Acquire)
@@ -885,8 +937,23 @@ impl VClockAlloc {
         )
     }
 
+    /// Detect racing atomic read and writes (not data races)
+    /// on every byte of the current access range
+    pub(super) fn race_free_with_atomic(&self, range: AllocRange, global: &GlobalState) -> bool {
+        if global.race_detecting() {
+            let (_, clocks) = global.current_thread_state();
+            let alloc_ranges = self.alloc_ranges.borrow();
+            for (_, range) in alloc_ranges.iter(range.start, range.size) {
+                if !range.race_free_with_atomic(&clocks) {
+                    return false;
+                }
+            }
+        }
+        true
+    }
+
     /// Detect data-races for an unsynchronized read operation, will not perform
-    /// data-race detection if `multi-threaded` is false, either due to no threads
+    /// data-race detection if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation for which data-race detection is handled separately, for example
     /// atomic read operations.
@@ -896,7 +963,7 @@ impl VClockAlloc {
         range: AllocRange,
         global: &GlobalState,
     ) -> InterpResult<'tcx> {
-        if global.multi_threaded.get() {
+        if global.race_detecting() {
             let (index, clocks) = global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (offset, range) in alloc_ranges.iter_mut(range.start, range.size) {
@@ -925,7 +992,7 @@ impl VClockAlloc {
         write_type: WriteType,
         global: &mut GlobalState,
     ) -> InterpResult<'tcx> {
-        if global.multi_threaded.get() {
+        if global.race_detecting() {
             let (index, clocks) = global.current_thread_state();
             for (offset, range) in self.alloc_ranges.get_mut().iter_mut(range.start, range.size) {
                 if let Err(DataRace) = range.write_race_detect(&*clocks, index, write_type) {
@@ -946,7 +1013,7 @@ impl VClockAlloc {
     }
 
     /// Detect data-races for an unsynchronized write operation, will not perform
-    /// data-race threads if `multi-threaded` is false, either due to no threads
+    /// data-race threads if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation
     pub fn write<'tcx>(
@@ -959,7 +1026,7 @@ impl VClockAlloc {
     }
 
     /// Detect data-races for an unsynchronized deallocate operation, will not perform
-    /// data-race threads if `multi-threaded` is false, either due to no threads
+    /// data-race threads if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
     /// operation
     pub fn deallocate<'tcx>(
@@ -989,12 +1056,12 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         if let Some(data_race) = &this.machine.data_race {
-            if data_race.multi_threaded.get() {
+            if data_race.race_detecting() {
                 let size = place.layout.size;
                 let (alloc_id, base_offset, _tag) = this.ptr_get_alloc_id(place.ptr)?;
                 // Load and log the atomic operation.
                 // Note that atomic loads are possible even from read-only allocations, so `get_alloc_extra_mut` is not an option.
-                let alloc_meta = &this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
+                let alloc_meta = this.get_alloc_extra(alloc_id)?.data_race.as_ref().unwrap();
                 log::trace!(
                     "Atomic op({}) with ordering {:?} on {:?} (size={})",
                     description,
@@ -1079,6 +1146,11 @@ pub struct GlobalState {
     /// any data-races.
     multi_threaded: Cell<bool>,
 
+    /// A flag to mark we are currently performing
+    /// a data race free action (such as atomic access)
+    /// to supress the race detector
+    ongoing_action_data_race_free: Cell<bool>,
+
     /// Mapping of a vector index to a known set of thread
     /// clocks, this is not directly mapping from a thread id
     /// since it may refer to multiple threads.
@@ -1116,6 +1188,12 @@ pub struct GlobalState {
     /// The associated vector index will be moved into re-use candidates
     /// after the join operation occurs.
     terminated_threads: RefCell<FxHashMap<ThreadId, VectorIdx>>,
+
+    /// The timestamp of last SC fence performed by each thread
+    last_sc_fence: RefCell<VClock>,
+
+    /// The timestamp of last SC write performed by each thread
+    last_sc_write: RefCell<VClock>,
 }
 
 impl GlobalState {
@@ -1124,6 +1202,7 @@ impl GlobalState {
     pub fn new() -> Self {
         let mut global_state = GlobalState {
             multi_threaded: Cell::new(false),
+            ongoing_action_data_race_free: Cell::new(false),
             vector_clocks: RefCell::new(IndexVec::new()),
             vector_info: RefCell::new(IndexVec::new()),
             thread_info: RefCell::new(IndexVec::new()),
@@ -1131,6 +1210,8 @@ impl GlobalState {
             active_thread_count: Cell::new(1),
             reuse_candidates: RefCell::new(FxHashSet::default()),
             terminated_threads: RefCell::new(FxHashMap::default()),
+            last_sc_fence: RefCell::new(VClock::default()),
+            last_sc_write: RefCell::new(VClock::default()),
         };
 
         // Setup the main-thread since it is not explicitly created:
@@ -1147,6 +1228,17 @@ impl GlobalState {
         global_state
     }
 
+    // We perform data race detection when there are more than 1 active thread
+    // and we have not temporarily disabled race detection to perform something
+    // data race free
+    fn race_detecting(&self) -> bool {
+        self.multi_threaded.get() && !self.ongoing_action_data_race_free.get()
+    }
+
+    pub fn ongoing_action_data_race_free(&self) -> bool {
+        self.ongoing_action_data_race_free.get()
+    }
+
     // Try to find vector index values that can potentially be re-used
     // by a new thread instead of a new vector index being created.
     fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
@@ -1445,7 +1537,7 @@ impl GlobalState {
     /// Load the current vector clock in use and the current set of thread clocks
     /// in use for the vector.
     #[inline]
-    fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
+    pub(super) fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
         let index = self.current_index();
         let ref_vector = self.vector_clocks.borrow();
         let clocks = Ref::map(ref_vector, |vec| &vec[index]);
@@ -1455,7 +1547,7 @@ impl GlobalState {
     /// Load the current vector clock in use and the current set of thread clocks
     /// in use for the vector mutably for modification.
     #[inline]
-    fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+    pub(super) fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
         let index = self.current_index();
         let ref_vector = self.vector_clocks.borrow_mut();
         let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
@@ -1468,4 +1560,16 @@ impl GlobalState {
     fn current_index(&self) -> VectorIdx {
         self.current_index.get()
     }
+
+    // SC ATOMIC STORE rule in the paper.
+    pub(super) fn sc_write(&self) {
+        let (index, clocks) = self.current_thread_state();
+        self.last_sc_write.borrow_mut().set_at_index(&clocks.clock, index);
+    }
+
+    // SC ATOMIC READ rule in the paper.
+    pub(super) fn sc_read(&self) {
+        let (.., mut clocks) = self.current_thread_state_mut();
+        clocks.read_seqcst.join(&self.last_sc_fence.borrow());
+    }
 }
diff --git a/src/concurrency/mod.rs b/src/concurrency/mod.rs
new file mode 100644
index 0000000000..ad1586bbf0
--- /dev/null
+++ b/src/concurrency/mod.rs
@@ -0,0 +1,3 @@
+mod allocation_map;
+pub mod data_race;
+pub mod weak_memory;
diff --git a/src/concurrency/weak_memory.rs b/src/concurrency/weak_memory.rs
new file mode 100644
index 0000000000..da36fcd2fb
--- /dev/null
+++ b/src/concurrency/weak_memory.rs
@@ -0,0 +1,570 @@
+//! Implementation of C++11-consistent weak memory emulation using store buffers
+//! based on Dynamic Race Detection for C++ ("the paper"):
+//! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
+//!
+//! This implementation will never generate weak memory behaviours forbidden by the C++11 model,
+//! but it is incapable of producing all possible weak behaviours allowed by the model. There are
+//! certain weak behaviours observable on real hardware but not while using this.
+//!
+//! Note that this implementation does not take into account of C++20's memory model revision to SC accesses
+//! and fences introduced by P0668 (https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0668r5.html).
+//! This implementation is not fully correct under the revised C++20 model and may generate behaviours C++20
+//! disallows.
+//!
+//! Rust follows the C++20 memory model (except for the Consume ordering and some operations not performable through C++'s
+//! std::atomic<T> API). It is therefore possible for this implementation to generate behaviours never observable when the
+//! same program is compiled and run natively. Unfortunately, no literature exists at the time of writing which proposes
+//! an implementable and C++20-compatible relaxed memory model that supports all atomic operation existing in Rust. The closest one is
+//! A Promising Semantics for Relaxed-Memory Concurrency by Jeehoon Kang et al. (https://www.cs.tau.ac.il/~orilahav/papers/popl17.pdf)
+//! However, this model lacks SC accesses and is therefore unusable by Miri (SC accesses are everywhere in library code).
+//!
+//! If you find anything that proposes a relaxed memory model that is C++20-consistent, supports all orderings Rust's atomic accesses
+//! and fences accept, and is implementable (with operational semanitcs), please open a GitHub issue!
+//!
+//! One characteristic of this implementation, in contrast to some other notable operational models such as ones proposed in
+//! Taming Release-Acquire Consistency by Ori Lahav et al. (https://plv.mpi-sws.org/sra/paper.pdf) or Promising Semantics noted above,
+//! is that this implementation does not require each thread to hold an isolated view of the entire memory. Here, store buffers are per-location
+//! and shared across all threads. This is more memory efficient but does require store elements (representing writes to a location) to record
+//! information about reads, whereas in the other two models it is the other way round: reads points to the write it got its value from.
+//! Additionally, writes in our implementation do not have globally unique timestamps attached. In the other two models this timestamp is
+//! used to make sure a value in a thread's view is not overwritten by a write that occured earlier than the one in the existing view.
+//! In our implementation, this is detected using read information attached to store elements, as there is no data strucutre representing reads.
+//!
+//! The C++ memory model is built around the notion of an 'atomic object', so it would be natural
+//! to attach store buffers to atomic objects. However, Rust follows LLVM in that it only has
+//! 'atomic accesses'. Therefore Miri cannot know when and where atomic 'objects' are being
+//! created or destroyed, to manage its store buffers. Instead, we hence lazily create an
+//! atomic object on the first atomic access to a given region, and we destroy that object
+//! on the next non-atomic or imperfectly overlapping atomic access to that region.
+//! These lazy (de)allocations happen in memory_accessed() on non-atomic accesses, and
+//! get_or_create_store_buffer() on atomic accesses. This mostly works well, but it does
+//! lead to some issues (https://github.com/rust-lang/miri/issues/2164).
+//!
+//! One consequence of this difference is that safe/sound Rust allows for more operations on atomic locations
+//! than the C++20 atomic API was intended to allow, such as non-atomically accessing
+//! a previously atomically accessed location, or accessing previously atomically accessed locations with a differently sized operation
+//! (such as accessing the top 16 bits of an AtomicU32). These senarios are generally undiscussed in formalisations of C++ memory model.
+//! In Rust, these operations can only be done through a `&mut AtomicFoo` reference or one derived from it, therefore these operations
+//! can only happen after all previous accesses on the same locations. This implementation is adapted to allow these operations.
+//! A mixed atomicity read that races with writes, or a write that races with reads or writes will still cause UBs to be thrown.
+//! Mixed size atomic accesses must not race with any other atomic access, whether read or write, or a UB will be thrown.
+//! You can refer to test cases in weak_memory/extra_cpp.rs and weak_memory/extra_cpp_unsafe.rs for examples of these operations.
+
+// Our and the author's own implementation (tsan11) of the paper have some deviations from the provided operational semantics in §5.3:
+// 1. In the operational semantics, store elements keep a copy of the atomic object's vector clock (AtomicCellClocks::sync_vector in miri),
+// but this is not used anywhere so it's omitted here.
+//
+// 2. In the operational semantics, each store element keeps the timestamp of a thread when it loads from the store.
+// If the same thread loads from the same store element multiple times, then the timestamps at all loads are saved in a list of load elements.
+// This is not necessary as later loads by the same thread will always have greater timetstamp values, so we only need to record the timestamp of the first
+// load by each thread. This optimisation is done in tsan11
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.h#L35-L37)
+// and here.
+//
+// 3. §4.5 of the paper wants an SC store to mark all existing stores in the buffer that happens before it
+// as SC. This is not done in the operational semantics but implemented correctly in tsan11
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L160-L167)
+// and here.
+//
+// 4. W_SC ; R_SC case requires the SC load to ignore all but last store maked SC (stores not marked SC are not
+// affected). But this rule is applied to all loads in ReadsFromSet from the paper (last two lines of code), not just SC load.
+// This is implemented correctly in tsan11
+// (https://github.com/ChrisLidbury/tsan11/blob/ecbd6b81e9b9454e01cba78eb9d88684168132c7/lib/tsan/rtl/tsan_relaxed.cc#L295)
+// and here.
+
+use std::{
+    cell::{Ref, RefCell},
+    collections::VecDeque,
+};
+
+use rustc_const_eval::interpret::{
+    alloc_range, AllocRange, InterpResult, MPlaceTy, ScalarMaybeUninit,
+};
+use rustc_data_structures::fx::FxHashMap;
+
+use crate::{AtomicReadOp, AtomicRwOp, AtomicWriteOp, Tag, VClock, VTimestamp, VectorIdx};
+
+use super::{
+    allocation_map::{AccessType, AllocationMap},
+    data_race::{GlobalState, ThreadClockSet},
+};
+
+pub type AllocExtra = StoreBufferAlloc;
+
+// Each store buffer must be bounded otherwise it will grow indefinitely.
+// However, bounding the store buffer means restricting the amount of weak
+// behaviours observable. The author picked 128 as a good tradeoff
+// so we follow them here.
+const STORE_BUFFER_LIMIT: usize = 128;
+
+#[derive(Debug, Clone)]
+pub struct StoreBufferAlloc {
+    /// Store buffer of each atomic object in this allocation
+    // Behind a RefCell because we need to allocate/remove on read access
+    store_buffers: RefCell<AllocationMap<StoreBuffer>>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub(super) struct StoreBuffer {
+    // Stores to this location in modification order
+    buffer: VecDeque<StoreElement>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+struct StoreElement {
+    /// The identifier of the vector index, corresponding to a thread
+    /// that performed the store.
+    store_index: VectorIdx,
+
+    /// Whether this store is SC.
+    is_seqcst: bool,
+
+    /// The timestamp of the storing thread when it performed the store
+    timestamp: VTimestamp,
+    /// The value of this store
+    // FIXME: this means the store is either fully initialized or fully uninitialized;
+    // we will have to change this if we want to support atomics on
+    // partially initialized data.
+    val: ScalarMaybeUninit<Tag>,
+
+    /// Timestamp of first loads from this store element by each thread
+    /// Behind a RefCell to keep load op take &self
+    loads: RefCell<FxHashMap<VectorIdx, VTimestamp>>,
+}
+
+impl StoreBufferAlloc {
+    pub fn new_allocation() -> Self {
+        Self { store_buffers: RefCell::new(AllocationMap::new()) }
+    }
+
+    /// Checks if the range imperfectly overlaps with existing buffers
+    /// Used to determine if mixed-size atomic accesses
+    fn is_overlapping(&self, range: AllocRange) -> bool {
+        let buffers = self.store_buffers.borrow();
+        let access_type = buffers.access_type(range);
+        matches!(access_type, AccessType::ImperfectlyOverlapping(_))
+    }
+
+    /// When a non-atomic access happens on a location that has been atomically accessed
+    /// before without data race, we can determine that the non-atomic access fully happens
+    /// after all the prior atomic accesses so the location no longer needs to exhibit
+    /// any weak memory behaviours until further atomic accesses.
+    pub fn memory_accessed(&self, range: AllocRange, global: &GlobalState) {
+        if !global.ongoing_action_data_race_free() {
+            let mut buffers = self.store_buffers.borrow_mut();
+            let access_type = buffers.access_type(range);
+            match access_type {
+                AccessType::PerfectlyOverlapping(pos) => {
+                    buffers.remove_from_pos(pos);
+                }
+                AccessType::ImperfectlyOverlapping(pos_range) => {
+                    buffers.remove_pos_range(pos_range);
+                }
+                AccessType::Empty(_) => {
+                    // The range had no weak behaivours attached, do nothing
+                }
+            }
+        }
+    }
+
+    /// Gets a store buffer associated with an atomic object in this allocation,
+    /// or creates one with the specified initial value if no atomic object exists yet.
+    fn get_or_create_store_buffer<'tcx>(
+        &self,
+        range: AllocRange,
+        init: ScalarMaybeUninit<Tag>,
+    ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> {
+        let access_type = self.store_buffers.borrow().access_type(range);
+        let pos = match access_type {
+            AccessType::PerfectlyOverlapping(pos) => pos,
+            AccessType::Empty(pos) => {
+                let mut buffers = self.store_buffers.borrow_mut();
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
+                pos
+            }
+            AccessType::ImperfectlyOverlapping(pos_range) => {
+                // Once we reach here we would've already checked that this access is not racy
+                let mut buffers = self.store_buffers.borrow_mut();
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
+            }
+        };
+        Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
+    }
+
+    /// Gets a mutable store buffer associated with an atomic object in this allocation
+    fn get_or_create_store_buffer_mut<'tcx>(
+        &mut self,
+        range: AllocRange,
+        init: ScalarMaybeUninit<Tag>,
+    ) -> InterpResult<'tcx, &mut StoreBuffer> {
+        let buffers = self.store_buffers.get_mut();
+        let access_type = buffers.access_type(range);
+        let pos = match access_type {
+            AccessType::PerfectlyOverlapping(pos) => pos,
+            AccessType::Empty(pos) => {
+                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
+                pos
+            }
+            AccessType::ImperfectlyOverlapping(pos_range) => {
+                buffers.remove_pos_range(pos_range.clone());
+                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
+                pos_range.start
+            }
+        };
+        Ok(&mut buffers[pos])
+    }
+}
+
+impl<'mir, 'tcx: 'mir> StoreBuffer {
+    fn new(init: ScalarMaybeUninit<Tag>) -> Self {
+        let mut buffer = VecDeque::new();
+        buffer.reserve(STORE_BUFFER_LIMIT);
+        let mut ret = Self { buffer };
+        let store_elem = StoreElement {
+            // The thread index and timestamp of the initialisation write
+            // are never meaningfully used, so it's fine to leave them as 0
+            store_index: VectorIdx::from(0),
+            timestamp: 0,
+            val: init,
+            is_seqcst: false,
+            loads: RefCell::new(FxHashMap::default()),
+        };
+        ret.buffer.push_back(store_elem);
+        ret
+    }
+
+    /// Reads from the last store in modification order
+    fn read_from_last_store(&self, global: &GlobalState) {
+        let store_elem = self.buffer.back();
+        if let Some(store_elem) = store_elem {
+            let (index, clocks) = global.current_thread_state();
+            store_elem.load_impl(index, &clocks);
+        }
+    }
+
+    fn buffered_read(
+        &self,
+        global: &GlobalState,
+        is_seqcst: bool,
+        rng: &mut (impl rand::Rng + ?Sized),
+        validate: impl FnOnce() -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        // Having a live borrow to store_buffer while calling validate_atomic_load is fine
+        // because the race detector doesn't touch store_buffer
+
+        let store_elem = {
+            // The `clocks` we got here must be dropped before calling validate_atomic_load
+            // as the race detector will update it
+            let (.., clocks) = global.current_thread_state();
+            // Load from a valid entry in the store buffer
+            self.fetch_store(is_seqcst, &clocks, &mut *rng)
+        };
+
+        // Unlike in buffered_atomic_write, thread clock updates have to be done
+        // after we've picked a store element from the store buffer, as presented
+        // in ATOMIC LOAD rule of the paper. This is because fetch_store
+        // requires access to ThreadClockSet.clock, which is updated by the race detector
+        validate()?;
+
+        let (index, clocks) = global.current_thread_state();
+        let loaded = store_elem.load_impl(index, &clocks);
+        Ok(loaded)
+    }
+
+    fn buffered_write(
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        global: &GlobalState,
+        is_seqcst: bool,
+    ) -> InterpResult<'tcx> {
+        let (index, clocks) = global.current_thread_state();
+
+        self.store_impl(val, index, &clocks.clock, is_seqcst);
+        Ok(())
+    }
+
+    /// Selects a valid store element in the buffer.
+    fn fetch_store<R: rand::Rng + ?Sized>(
+        &self,
+        is_seqcst: bool,
+        clocks: &ThreadClockSet,
+        rng: &mut R,
+    ) -> &StoreElement {
+        use rand::seq::IteratorRandom;
+        let mut found_sc = false;
+        // FIXME: we want an inclusive take_while (stops after a false predicate, but
+        // includes the element that gave the false), but such function doesn't yet
+        // exist in the standard libary https://github.com/rust-lang/rust/issues/62208
+        // so we have to hack around it with keep_searching
+        let mut keep_searching = true;
+        let candidates = self
+            .buffer
+            .iter()
+            .rev()
+            .take_while(move |&store_elem| {
+                if !keep_searching {
+                    return false;
+                }
+
+                keep_searching = if store_elem.timestamp <= clocks.clock[store_elem.store_index] {
+                    // CoWR: if a store happens-before the current load,
+                    // then we can't read-from anything earlier in modification order.
+                    log::info!("Stopping due to coherent write-read");
+                    false
+                } else if store_elem.loads.borrow().iter().any(|(&load_index, &load_timestamp)| {
+                    load_timestamp <= clocks.clock[load_index]
+                }) {
+                    // CoRR: if there was a load from this store which happened-before the current load,
+                    // then we cannot read-from anything earlier in modification order.
+                    log::info!("Stopping due to coherent read-read");
+                    false
+                } else if store_elem.timestamp <= clocks.fence_seqcst[store_elem.store_index] {
+                    // The current load, which may be sequenced-after an SC fence, can only read-from
+                    // the last store sequenced-before an SC fence in another thread (or any stores
+                    // later than that SC fence)
+                    log::info!("Stopping due to coherent load sequenced after sc fence");
+                    false
+                } else if store_elem.timestamp <= clocks.write_seqcst[store_elem.store_index]
+                    && store_elem.is_seqcst
+                {
+                    // The current non-SC load can only read-from the latest SC store (or any stores later than that
+                    // SC store)
+                    log::info!("Stopping due to needing to load from the last SC store");
+                    false
+                } else if is_seqcst && store_elem.timestamp <= clocks.read_seqcst[store_elem.store_index] {
+                    // The current SC load can only read-from the last store sequenced-before
+                    // the last SC fence (or any stores later than the SC fence)
+                    log::info!("Stopping due to sc load needing to load from the last SC store before an SC fence");
+                    false
+                } else {true};
+
+                true
+            })
+            .filter(|&store_elem| {
+                if is_seqcst && store_elem.is_seqcst {
+                    // An SC load needs to ignore all but last store maked SC (stores not marked SC are not
+                    // affected)
+                    let include = !found_sc;
+                    found_sc = true;
+                    include
+                } else {
+                    true
+                }
+            });
+
+        candidates
+            .choose(rng)
+            .expect("store buffer cannot be empty, an element is populated on construction")
+    }
+
+    /// ATOMIC STORE IMPL in the paper (except we don't need the location's vector clock)
+    fn store_impl(
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        index: VectorIdx,
+        thread_clock: &VClock,
+        is_seqcst: bool,
+    ) {
+        let store_elem = StoreElement {
+            store_index: index,
+            timestamp: thread_clock[index],
+            // In the language provided in the paper, an atomic store takes the value from a
+            // non-atomic memory location.
+            // But we already have the immediate value here so we don't need to do the memory
+            // access
+            val,
+            is_seqcst,
+            loads: RefCell::new(FxHashMap::default()),
+        };
+        self.buffer.push_back(store_elem);
+        if self.buffer.len() > STORE_BUFFER_LIMIT {
+            self.buffer.pop_front();
+        }
+        if is_seqcst {
+            // Every store that happens before this needs to be marked as SC
+            // so that in a later SC load, only the last SC store (i.e. this one) or stores that
+            // aren't ordered by hb with the last SC is picked.
+            self.buffer.iter_mut().rev().for_each(|elem| {
+                if elem.timestamp <= thread_clock[elem.store_index] {
+                    elem.is_seqcst = true;
+                }
+            })
+        }
+    }
+}
+
+impl StoreElement {
+    /// ATOMIC LOAD IMPL in the paper
+    /// Unlike the operational semantics in the paper, we don't need to keep track
+    /// of the thread timestamp for every single load. Keeping track of the first (smallest)
+    /// timestamp of each thread that has loaded from a store is sufficient: if the earliest
+    /// load of another thread happens before the current one, then we must stop searching the store
+    /// buffer regardless of subsequent loads by the same thread; if the earliest load of another
+    /// thread doesn't happen before the current one, then no subsequent load by the other thread
+    /// can happen before the current one.
+    fn load_impl(&self, index: VectorIdx, clocks: &ThreadClockSet) -> ScalarMaybeUninit<Tag> {
+        let _ = self.loads.borrow_mut().try_insert(index, clocks.clock[index]);
+        self.val
+    }
+}
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriEvalContextExt<'mir, 'tcx>
+{
+    // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous
+    // atomic read or write. If it does, then we require it to be ordered (non-racy) with all previous atomic
+    // accesses on all the bytes in range
+    fn validate_overlapping_atomic(&self, place: &MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let crate::AllocExtra {
+            weak_memory: Some(alloc_buffers),
+            data_race: Some(alloc_clocks),
+            ..
+        } = this.get_alloc_extra(alloc_id)?
+        {
+            let range = alloc_range(base_offset, place.layout.size);
+            if alloc_buffers.is_overlapping(range)
+                && !alloc_clocks
+                    .race_free_with_atomic(range, this.machine.data_race.as_ref().unwrap())
+            {
+                throw_unsup_format!(
+                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation"
+                );
+            }
+        }
+        Ok(())
+    }
+
+    fn buffered_atomic_rmw(
+        &mut self,
+        new_val: ScalarMaybeUninit<Tag>,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicRwOp,
+        init: ScalarMaybeUninit<Tag>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+        if let (
+            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            if atomic == AtomicRwOp::SeqCst {
+                global.sc_read();
+                global.sc_write();
+            }
+            let range = alloc_range(base_offset, place.layout.size);
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?;
+            buffer.read_from_last_store(global);
+            buffer.buffered_write(new_val, global, atomic == AtomicRwOp::SeqCst)?;
+        }
+        Ok(())
+    }
+
+    fn buffered_atomic_read(
+        &self,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp,
+        latest_in_mo: ScalarMaybeUninit<Tag>,
+        validate: impl FnOnce() -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_ref();
+        if let Some(global) = &this.machine.data_race {
+            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                if atomic == AtomicReadOp::SeqCst {
+                    global.sc_read();
+                }
+                let mut rng = this.machine.rng.borrow_mut();
+                let buffer = alloc_buffers.get_or_create_store_buffer(
+                    alloc_range(base_offset, place.layout.size),
+                    latest_in_mo,
+                )?;
+                let loaded = buffer.buffered_read(
+                    global,
+                    atomic == AtomicReadOp::SeqCst,
+                    &mut *rng,
+                    validate,
+                )?;
+
+                return Ok(loaded);
+            }
+        }
+
+        // Race detector or weak memory disabled, simply read the latest value
+        validate()?;
+        Ok(latest_in_mo)
+    }
+
+    fn buffered_atomic_write(
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        dest: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp,
+        init: ScalarMaybeUninit<Tag>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr)?;
+        if let (
+            crate::AllocExtra { weak_memory: Some(alloc_buffers), .. },
+            crate::Evaluator { data_race: Some(global), .. },
+        ) = this.get_alloc_extra_mut(alloc_id)?
+        {
+            if atomic == AtomicWriteOp::SeqCst {
+                global.sc_write();
+            }
+
+            // UGLY HACK: in write_scalar_atomic() we don't know the value before our write,
+            // so init == val always. If the buffer is fresh then we would've duplicated an entry,
+            // so we need to remove it.
+            // See https://github.com/rust-lang/miri/issues/2164
+            let was_empty = matches!(
+                alloc_buffers
+                    .store_buffers
+                    .borrow()
+                    .access_type(alloc_range(base_offset, dest.layout.size)),
+                AccessType::Empty(_)
+            );
+            let buffer = alloc_buffers
+                .get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?;
+            if was_empty {
+                buffer.buffer.pop_front();
+            }
+
+            buffer.buffered_write(val, global, atomic == AtomicWriteOp::SeqCst)?;
+        }
+
+        // Caller should've written to dest with the vanilla scalar write, we do nothing here
+        Ok(())
+    }
+
+    /// Caller should never need to consult the store buffer for the latest value.
+    /// This function is used exclusively for failed atomic_compare_exchange_scalar
+    /// to perform load_impl on the latest store element
+    fn perform_read_on_buffered_latest(
+        &self,
+        place: &MPlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp,
+        init: ScalarMaybeUninit<Tag>,
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+
+        if let Some(global) = &this.machine.data_race {
+            if atomic == AtomicReadOp::SeqCst {
+                global.sc_read();
+            }
+            let size = place.layout.size;
+            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr)?;
+            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                let buffer = alloc_buffers
+                    .get_or_create_store_buffer(alloc_range(base_offset, size), init)?;
+                buffer.read_from_last_store(global);
+            }
+        }
+        Ok(())
+    }
+}
diff --git a/src/eval.rs b/src/eval.rs
index a782dfa3fc..bdf527a0d1 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -105,6 +105,8 @@ pub struct MiriConfig {
     pub tag_raw: bool,
     /// Determine if data race detection should be enabled
     pub data_race_detector: bool,
+    /// Determine if weak memory emulation should be enabled. Requires data race detection to be enabled
+    pub weak_memory_emulation: bool,
     /// Rate of spurious failures for compare_exchange_weak atomic operations,
     /// between 0.0 and 1.0, defaulting to 0.8 (80% chance of failure).
     pub cmpxchg_weak_failure_rate: f64,
@@ -142,6 +144,7 @@ impl Default for MiriConfig {
             tracked_alloc_ids: HashSet::default(),
             tag_raw: false,
             data_race_detector: true,
+            weak_memory_emulation: true,
             cmpxchg_weak_failure_rate: 0.8,
             measureme_out: None,
             panic_on_unsupported: false,
diff --git a/src/lib.rs b/src/lib.rs
index f7c256656a..982d3873d5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -31,7 +31,7 @@ extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
 
-mod data_race;
+mod concurrency;
 mod diagnostics;
 mod eval;
 mod helpers;
@@ -63,7 +63,7 @@ pub use crate::shims::time::EvalContextExt as _;
 pub use crate::shims::tls::{EvalContextExt as _, TlsData};
 pub use crate::shims::EvalContextExt as _;
 
-pub use crate::data_race::{
+pub use crate::concurrency::data_race::{
     AtomicFenceOp, AtomicReadOp, AtomicRwOp, AtomicWriteOp,
     EvalContextExt as DataRaceEvalContextExt,
 };
diff --git a/src/machine.rs b/src/machine.rs
index 369bb92c6f..1ae49edd60 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -28,7 +28,11 @@ use rustc_span::Symbol;
 use rustc_target::abi::Size;
 use rustc_target::spec::abi::Abi;
 
-use crate::{shims::unix::FileHandler, *};
+use crate::{
+    concurrency::{data_race, weak_memory},
+    shims::unix::FileHandler,
+    *,
+};
 
 // Some global facts about the emulated machine.
 pub const PAGE_SIZE: u64 = 4 * 1024; // FIXME: adjust to target architecture
@@ -190,6 +194,9 @@ pub struct AllocExtra {
     /// Data race detection via the use of a vector-clock,
     ///  this is only added if it is enabled.
     pub data_race: Option<data_race::AllocExtra>,
+    /// Weak memory emulation via the use of store buffers,
+    ///  this is only added if it is enabled.
+    pub weak_memory: Option<weak_memory::AllocExtra>,
 }
 
 /// Precomputed layouts of primitive types
@@ -323,6 +330,9 @@ pub struct Evaluator<'mir, 'tcx> {
 
     /// Corresponds to -Zmiri-mute-stdout-stderr and doesn't write the output but acts as if it succeeded.
     pub(crate) mute_stdout_stderr: bool,
+
+    /// Whether weak memory emulation is enabled
+    pub(crate) weak_memory: bool,
 }
 
 impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
@@ -378,6 +388,7 @@ impl<'mir, 'tcx> Evaluator<'mir, 'tcx> {
             check_alignment: config.check_alignment,
             cmpxchg_weak_failure_rate: config.cmpxchg_weak_failure_rate,
             mute_stdout_stderr: config.mute_stdout_stderr,
+            weak_memory: config.weak_memory_emulation,
         }
     }
 
@@ -626,9 +637,18 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
         } else {
             None
         };
+        let buffer_alloc = if ecx.machine.weak_memory {
+            Some(weak_memory::AllocExtra::new_allocation())
+        } else {
+            None
+        };
         let alloc: Allocation<Tag, Self::AllocExtra> = alloc.convert_tag_add_extra(
             &ecx.tcx,
-            AllocExtra { stacked_borrows: stacks, data_race: race_alloc },
+            AllocExtra {
+                stacked_borrows: stacks,
+                data_race: race_alloc,
+                weak_memory: buffer_alloc,
+            },
             |ptr| Evaluator::tag_alloc_base_pointer(ecx, ptr),
         );
         Cow::Owned(alloc)
@@ -716,10 +736,12 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
                 machine.current_span(),
-            )
-        } else {
-            Ok(())
+            )?;
+        }
+        if let Some(weak_memory) = &alloc_extra.weak_memory {
+            weak_memory.memory_accessed(range, machine.data_race.as_ref().unwrap());
         }
+        Ok(())
     }
 
     #[inline(always)]
@@ -740,10 +762,12 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 range,
                 machine.stacked_borrows.as_ref().unwrap(),
                 machine.current_span(),
-            )
-        } else {
-            Ok(())
+            )?;
         }
+        if let Some(weak_memory) = &alloc_extra.weak_memory {
+            weak_memory.memory_accessed(range, machine.data_race.as_ref().unwrap());
+        }
+        Ok(())
     }
 
     #[inline(always)]
diff --git a/src/shims/unix/thread.rs b/src/shims/unix/thread.rs
index 88c3fb0bc8..4dc40cf2fe 100644
--- a/src/shims/unix/thread.rs
+++ b/src/shims/unix/thread.rs
@@ -14,7 +14,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
 
         this.tcx.sess.warn(
-            "thread support is experimental and incomplete: weak memory effects are not emulated.",
+            "thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.\n(see https://github.com/rust-lang/miri/issues/1388)",
         );
 
         // Create the new thread
diff --git a/src/thread.rs b/src/thread.rs
index b6fb866f71..0d702fd9c8 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -12,6 +12,7 @@ use rustc_hir::def_id::DefId;
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_middle::mir::Mutability;
 
+use crate::concurrency::data_race;
 use crate::sync::SynchronizationState;
 use crate::*;
 
diff --git a/src/vector_clock.rs b/src/vector_clock.rs
index e13e9c39fc..716fdba0f6 100644
--- a/src/vector_clock.rs
+++ b/src/vector_clock.rs
@@ -108,10 +108,8 @@ impl VClock {
 
     /// Set the element at the current index of the vector
     pub fn set_at_index(&mut self, other: &Self, idx: VectorIdx) {
-        let idx = idx.index();
-        let mut_slice = self.get_mut_with_min_len(idx + 1);
-        let slice = other.as_slice();
-        mut_slice[idx] = slice[idx];
+        let mut_slice = self.get_mut_with_min_len(idx.index() + 1);
+        mut_slice[idx.index()] = other[idx];
     }
 
     /// Set the vector to the all-zero vector
diff --git a/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr b/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr
index 0f7fbefe0a..2ce73fdaae 100644
--- a/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr
+++ b/tests/fail/concurrency/libc_pthread_create_main_terminate.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: the main thread terminated without waiting for all remaining threads
 
diff --git a/tests/fail/concurrency/libc_pthread_join_detached.stderr b/tests/fail/concurrency/libc_pthread_join_detached.stderr
index 688f61a98b..b106cc4c95 100644
--- a/tests/fail/concurrency/libc_pthread_join_detached.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_detached.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_detached.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_joined.stderr b/tests/fail/concurrency/libc_pthread_join_joined.stderr
index 518f72de5b..438998208d 100644
--- a/tests/fail/concurrency/libc_pthread_join_joined.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_joined.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_joined.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_main.stderr b/tests/fail/concurrency/libc_pthread_join_main.stderr
index 5d9ec148e0..04f2ab0740 100644
--- a/tests/fail/concurrency/libc_pthread_join_main.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_main.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_main.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_multiple.stderr b/tests/fail/concurrency/libc_pthread_join_multiple.stderr
index 57126a14ae..daf18c50e0 100644
--- a/tests/fail/concurrency/libc_pthread_join_multiple.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_multiple.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join a detached or already joined thread
   --> $DIR/libc_pthread_join_multiple.rs:LL:CC
diff --git a/tests/fail/concurrency/libc_pthread_join_self.stderr b/tests/fail/concurrency/libc_pthread_join_self.stderr
index d638d08939..b2e0779f5f 100644
--- a/tests/fail/concurrency/libc_pthread_join_self.stderr
+++ b/tests/fail/concurrency/libc_pthread_join_self.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: trying to join itself
   --> $DIR/libc_pthread_join_self.rs:LL:CC
diff --git a/tests/fail/concurrency/thread_local_static_dealloc.stderr b/tests/fail/concurrency/thread_local_static_dealloc.stderr
index cdeb22fb31..ad5528dc55 100644
--- a/tests/fail/concurrency/thread_local_static_dealloc.stderr
+++ b/tests/fail/concurrency/thread_local_static_dealloc.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: pointer to ALLOC was dereferenced after this allocation got freed
   --> $DIR/thread_local_static_dealloc.rs:LL:CC
diff --git a/tests/fail/concurrency/too_few_args.stderr b/tests/fail/concurrency/too_few_args.stderr
index 7401b2902e..1ed8c5a510 100644
--- a/tests/fail/concurrency/too_few_args.stderr
+++ b/tests/fail/concurrency/too_few_args.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: callee has fewer arguments than expected
   --> $DIR/too_few_args.rs:LL:CC
diff --git a/tests/fail/concurrency/too_many_args.stderr b/tests/fail/concurrency/too_many_args.stderr
index 951b76317f..5602dab993 100644
--- a/tests/fail/concurrency/too_many_args.stderr
+++ b/tests/fail/concurrency/too_many_args.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: callee has more arguments than expected
   --> $DIR/too_many_args.rs:LL:CC
diff --git a/tests/fail/concurrency/unwind_top_of_stack.stderr b/tests/fail/concurrency/unwind_top_of_stack.stderr
index 600b8443d2..26a196a559 100644
--- a/tests/fail/concurrency/unwind_top_of_stack.stderr
+++ b/tests/fail/concurrency/unwind_top_of_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 thread '<unnamed>' panicked at 'explicit panic', $DIR/unwind_top_of_stack.rs:LL:CC
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/fail/data_race/alloc_read_race.rs b/tests/fail/data_race/alloc_read_race.rs
index 093c9024f2..2ddbb65724 100644
--- a/tests/fail/data_race/alloc_read_race.rs
+++ b/tests/fail/data_race/alloc_read_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 #![feature(new_uninit)]
 
 use std::thread::spawn;
diff --git a/tests/fail/data_race/alloc_read_race.stderr b/tests/fail/data_race/alloc_read_race.stderr
index 9d9006966b..0b247fb19b 100644
--- a/tests/fail/data_race/alloc_read_race.stderr
+++ b/tests/fail/data_race/alloc_read_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 2) and Allocate on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/alloc_read_race.rs:LL:CC
diff --git a/tests/fail/data_race/alloc_write_race.rs b/tests/fail/data_race/alloc_write_race.rs
index becebe6a12..d32eb55676 100644
--- a/tests/fail/data_race/alloc_write_race.rs
+++ b/tests/fail/data_race/alloc_write_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 #![feature(new_uninit)]
 
 use std::thread::spawn;
diff --git a/tests/fail/data_race/alloc_write_race.stderr b/tests/fail/data_race/alloc_write_race.stderr
index 318895cae6..3594980ef9 100644
--- a/tests/fail/data_race/alloc_write_race.stderr
+++ b/tests/fail/data_race/alloc_write_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Allocate on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/alloc_write_race.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_read_na_write_race1.stderr b/tests/fail/data_race/atomic_read_na_write_race1.stderr
index 09d7accb05..0c9aaf5a00 100644
--- a/tests/fail/data_race/atomic_read_na_write_race1.stderr
+++ b/tests/fail/data_race/atomic_read_na_write_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Atomic Load on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_read_na_write_race1.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_read_na_write_race2.stderr b/tests/fail/data_race/atomic_read_na_write_race2.stderr
index 739ce83d0b..6e3a1330f9 100644
--- a/tests/fail/data_race/atomic_read_na_write_race2.stderr
+++ b/tests/fail/data_race/atomic_read_na_write_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Atomic Load on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_read_na_write_race2.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_read_race1.stderr b/tests/fail/data_race/atomic_write_na_read_race1.stderr
index 6d67f58aae..4dc4ac1e67 100644
--- a/tests/fail/data_race/atomic_write_na_read_race1.stderr
+++ b/tests/fail/data_race/atomic_write_na_read_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 2) and Atomic Store on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_read_race1.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_read_race2.stderr b/tests/fail/data_race/atomic_write_na_read_race2.stderr
index d9950ebcb7..e665073c53 100644
--- a/tests/fail/data_race/atomic_write_na_read_race2.stderr
+++ b/tests/fail/data_race/atomic_write_na_read_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Atomic Store on Thread(id = 2) and Read on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_read_race2.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_write_race1.stderr b/tests/fail/data_race/atomic_write_na_write_race1.stderr
index 29ccf70212..a70c3b52de 100644
--- a/tests/fail/data_race/atomic_write_na_write_race1.stderr
+++ b/tests/fail/data_race/atomic_write_na_write_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Atomic Store on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_write_race1.rs:LL:CC
diff --git a/tests/fail/data_race/atomic_write_na_write_race2.stderr b/tests/fail/data_race/atomic_write_na_write_race2.stderr
index 5488f05de0..79730d5079 100644
--- a/tests/fail/data_race/atomic_write_na_write_race2.stderr
+++ b/tests/fail/data_race/atomic_write_na_write_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Atomic Store on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/atomic_write_na_write_race2.rs:LL:CC
diff --git a/tests/fail/data_race/dangling_thread_async_race.stderr b/tests/fail/data_race/dangling_thread_async_race.stderr
index eccc243d69..21b3eefc5e 100644
--- a/tests/fail/data_race/dangling_thread_async_race.stderr
+++ b/tests/fail/data_race/dangling_thread_async_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dangling_thread_async_race.rs:LL:CC
diff --git a/tests/fail/data_race/dangling_thread_race.stderr b/tests/fail/data_race/dangling_thread_race.stderr
index 4dffeb1423..3ca8862a58 100644
--- a/tests/fail/data_race/dangling_thread_race.stderr
+++ b/tests/fail/data_race/dangling_thread_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 0, name = "main") and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dangling_thread_race.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_read_race1.stderr b/tests/fail/data_race/dealloc_read_race1.stderr
index 37196021ea..10b32003ff 100644
--- a/tests/fail/data_race/dealloc_read_race1.stderr
+++ b/tests/fail/data_race/dealloc_read_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 2) and Read on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_read_race1.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_read_race2.stderr b/tests/fail/data_race/dealloc_read_race2.stderr
index 03fb5dbea9..a21de1d9f7 100644
--- a/tests/fail/data_race/dealloc_read_race2.stderr
+++ b/tests/fail/data_race/dealloc_read_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: pointer to ALLOC was dereferenced after this allocation got freed
   --> $DIR/dealloc_read_race2.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_read_race_stack.rs b/tests/fail/data_race/dealloc_read_race_stack.rs
index 6b573121e5..b70db5f4ac 100644
--- a/tests/fail/data_race/dealloc_read_race_stack.rs
+++ b/tests/fail/data_race/dealloc_read_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::ptr::null_mut;
diff --git a/tests/fail/data_race/dealloc_read_race_stack.stderr b/tests/fail/data_race/dealloc_read_race_stack.stderr
index 055724fe29..0f7213eb8d 100644
--- a/tests/fail/data_race/dealloc_read_race_stack.stderr
+++ b/tests/fail/data_race/dealloc_read_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 1) and Read on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_read_race_stack.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_write_race1.stderr b/tests/fail/data_race/dealloc_write_race1.stderr
index 7160f49af6..76258e9d8f 100644
--- a/tests/fail/data_race/dealloc_write_race1.stderr
+++ b/tests/fail/data_race/dealloc_write_race1.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_write_race1.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_write_race2.stderr b/tests/fail/data_race/dealloc_write_race2.stderr
index cb0d0af867..d9aef72118 100644
--- a/tests/fail/data_race/dealloc_write_race2.stderr
+++ b/tests/fail/data_race/dealloc_write_race2.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: pointer to ALLOC was dereferenced after this allocation got freed
   --> $DIR/dealloc_write_race2.rs:LL:CC
diff --git a/tests/fail/data_race/dealloc_write_race_stack.rs b/tests/fail/data_race/dealloc_write_race_stack.rs
index 34a16b00b8..f2b49fc5f3 100644
--- a/tests/fail/data_race/dealloc_write_race_stack.rs
+++ b/tests/fail/data_race/dealloc_write_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::ptr::null_mut;
diff --git a/tests/fail/data_race/dealloc_write_race_stack.stderr b/tests/fail/data_race/dealloc_write_race_stack.stderr
index 05a8e1a8b7..70533f654b 100644
--- a/tests/fail/data_race/dealloc_write_race_stack.stderr
+++ b/tests/fail/data_race/dealloc_write_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Deallocate on Thread(id = 1) and Write on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/dealloc_write_race_stack.rs:LL:CC
diff --git a/tests/fail/data_race/enable_after_join_to_main.stderr b/tests/fail/data_race/enable_after_join_to_main.stderr
index e612e08ade..58d33ffa8c 100644
--- a/tests/fail/data_race/enable_after_join_to_main.stderr
+++ b/tests/fail/data_race/enable_after_join_to_main.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 6) and Write on Thread(id = 5) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/enable_after_join_to_main.rs:LL:CC
diff --git a/tests/fail/data_race/fence_after_load.stderr b/tests/fail/data_race/fence_after_load.stderr
index 1445239132..1e3186b08f 100644
--- a/tests/fail/data_race/fence_after_load.stderr
+++ b/tests/fail/data_race/fence_after_load.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 0, name = "main") and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/fence_after_load.rs:LL:CC
diff --git a/tests/fail/data_race/read_write_race.stderr b/tests/fail/data_race/read_write_race.stderr
index fc04141830..5078e66254 100644
--- a/tests/fail/data_race/read_write_race.stderr
+++ b/tests/fail/data_race/read_write_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Read on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/read_write_race.rs:LL:CC
diff --git a/tests/fail/data_race/read_write_race_stack.rs b/tests/fail/data_race/read_write_race_stack.rs
index 5a1c0a4b6d..9edeed0af6 100644
--- a/tests/fail/data_race/read_write_race_stack.rs
+++ b/tests/fail/data_race/read_write_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation -Zmir-opt-level=0
+// compile-flags: -Zmiri-disable-isolation -Zmir-opt-level=0 -Zmiri-disable-weak-memory-emulation
 
 // Note: mir-opt-level set to 0 to prevent the read of stack_var in thread 1
 // from being optimized away and preventing the detection of the data-race.
diff --git a/tests/fail/data_race/read_write_race_stack.stderr b/tests/fail/data_race/read_write_race_stack.stderr
index aad63731ca..843bea753b 100644
--- a/tests/fail/data_race/read_write_race_stack.stderr
+++ b/tests/fail/data_race/read_write_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 1) and Write on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/read_write_race_stack.rs:LL:CC
diff --git a/tests/fail/data_race/relax_acquire_race.rs b/tests/fail/data_race/relax_acquire_race.rs
index 8b8616431f..20e63dc4b1 100644
--- a/tests/fail/data_race/relax_acquire_race.rs
+++ b/tests/fail/data_race/relax_acquire_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/relax_acquire_race.stderr b/tests/fail/data_race/relax_acquire_race.stderr
index a437120c89..d2423ff916 100644
--- a/tests/fail/data_race/relax_acquire_race.stderr
+++ b/tests/fail/data_race/relax_acquire_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/relax_acquire_race.rs:LL:CC
diff --git a/tests/fail/data_race/release_seq_race.rs b/tests/fail/data_race/release_seq_race.rs
index 29c428b388..6ff84aa04b 100644
--- a/tests/fail/data_race/release_seq_race.rs
+++ b/tests/fail/data_race/release_seq_race.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/release_seq_race.stderr b/tests/fail/data_race/release_seq_race.stderr
index 1a1c7ac64f..ffbf50c091 100644
--- a/tests/fail/data_race/release_seq_race.stderr
+++ b/tests/fail/data_race/release_seq_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/release_seq_race.rs:LL:CC
diff --git a/tests/fail/data_race/release_seq_race_same_thread.rs b/tests/fail/data_race/release_seq_race_same_thread.rs
index 54b9f49937..1245fb96f4 100644
--- a/tests/fail/data_race/release_seq_race_same_thread.rs
+++ b/tests/fail/data_race/release_seq_race_same_thread.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/release_seq_race_same_thread.stderr b/tests/fail/data_race/release_seq_race_same_thread.stderr
index f357c0647d..b760215146 100644
--- a/tests/fail/data_race/release_seq_race_same_thread.stderr
+++ b/tests/fail/data_race/release_seq_race_same_thread.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/release_seq_race_same_thread.rs:LL:CC
diff --git a/tests/fail/data_race/rmw_race.rs b/tests/fail/data_race/rmw_race.rs
index fcf683a65d..c968c83422 100644
--- a/tests/fail/data_race/rmw_race.rs
+++ b/tests/fail/data_race/rmw_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/fail/data_race/rmw_race.stderr b/tests/fail/data_race/rmw_race.stderr
index dd3692c6dc..c6b09ba5f0 100644
--- a/tests/fail/data_race/rmw_race.stderr
+++ b/tests/fail/data_race/rmw_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Read on Thread(id = 3) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/rmw_race.rs:LL:CC
diff --git a/tests/fail/data_race/write_write_race.stderr b/tests/fail/data_race/write_write_race.stderr
index dafee7dbf8..5acba97486 100644
--- a/tests/fail/data_race/write_write_race.stderr
+++ b/tests/fail/data_race/write_write_race.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 2) and Write on Thread(id = 1) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/write_write_race.rs:LL:CC
diff --git a/tests/fail/data_race/write_write_race_stack.rs b/tests/fail/data_race/write_write_race_stack.rs
index bfe1464cb5..daa3e5f5c4 100644
--- a/tests/fail/data_race/write_write_race_stack.rs
+++ b/tests/fail/data_race/write_write_race_stack.rs
@@ -1,5 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
-// compile-flags: -Zmiri-disable-isolation
+// compile-flags: -Zmiri-disable-isolation -Zmiri-disable-weak-memory-emulation
 
 use std::thread::{spawn, sleep};
 use std::ptr::null_mut;
diff --git a/tests/fail/data_race/write_write_race_stack.stderr b/tests/fail/data_race/write_write_race_stack.stderr
index 8d113673ac..d052206f4c 100644
--- a/tests/fail/data_race/write_write_race_stack.stderr
+++ b/tests/fail/data_race/write_write_race_stack.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: Data race detected between Write on Thread(id = 1) and Write on Thread(id = 2) at ALLOC (current vector clock = VClock, conflicting timestamp = VClock)
   --> $DIR/write_write_race_stack.rs:LL:CC
diff --git a/tests/fail/should-pass/cpp20_rwc_syncs.rs b/tests/fail/should-pass/cpp20_rwc_syncs.rs
new file mode 100644
index 0000000000..e5192cd0d6
--- /dev/null
+++ b/tests/fail/should-pass/cpp20_rwc_syncs.rs
@@ -0,0 +1,87 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks
+// error-pattern:
+
+// https://plv.mpi-sws.org/scfix/paper.pdf
+// 2.2 Second Problem: SC Fences are Too Weak
+// This test should pass under the C++20 model Rust is using.
+// Unfortunately, Miri's weak memory emulation only follows the C++11 model
+// as we don't know how to correctly emulate C++20's revised SC semantics,
+// so we have to stick to C++11 emulation from existing research.
+
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::{fence, AtomicUsize};
+use std::thread::spawn;
+
+// Spins until it reads the given value
+fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Relaxed) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
+// We can't create static items because we need to run each test
+// multiple tests
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    // A workaround to put the initialization value in the store buffer.
+    // See https://github.com/rust-lang/miri/issues/2164
+    ret.load(Relaxed);
+    ret
+}
+
+fn test_cpp20_rwc_syncs() {
+    /*
+    int main() {
+        atomic_int x = 0;
+        atomic_int y = 0;
+
+        {{{ x.store(1,mo_relaxed);
+        ||| { r1=x.load(mo_relaxed).readsvalue(1);
+              fence(mo_seq_cst);
+              r2=y.load(mo_relaxed); }
+        ||| { y.store(1,mo_relaxed);
+              fence(mo_seq_cst);
+              r3=x.load(mo_relaxed); }
+        }}}
+        return 0;
+    }
+    */
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        reads_value(&x, 1);
+        fence(SeqCst);
+        y.load(Relaxed)
+    });
+
+    let j3 = spawn(move || {
+        y.store(1, Relaxed);
+        fence(SeqCst);
+        x.load(Relaxed)
+    });
+
+    j1.join().unwrap();
+    let b = j2.join().unwrap();
+    let c = j3.join().unwrap();
+
+    // We cannot write assert_ne!() since ui_test's fail
+    // tests expect exit status 1, whereas panics produce 101.
+    // Our ui_test does not yet support overriding failure status codes.
+    if (b, c) == (0, 0) {
+        // This *should* be unreachable, but Miri will reach it.
+        std::process::exit(1);
+    }
+}
+
+pub fn main() {
+    for _ in 0..500 {
+        test_cpp20_rwc_syncs();
+    }
+}
diff --git a/tests/fail/should-pass/cpp20_rwc_syncs.stderr b/tests/fail/should-pass/cpp20_rwc_syncs.stderr
new file mode 100644
index 0000000000..9fe6daa778
--- /dev/null
+++ b/tests/fail/should-pass/cpp20_rwc_syncs.stderr
@@ -0,0 +1,3 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
diff --git a/tests/fail/sync/libc_pthread_mutex_deadlock.stderr b/tests/fail/sync/libc_pthread_mutex_deadlock.stderr
index ac37096ad8..d1f9ee6cdd 100644
--- a/tests/fail/sync/libc_pthread_mutex_deadlock.stderr
+++ b/tests/fail/sync/libc_pthread_mutex_deadlock.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: deadlock: the evaluated program deadlocked
   --> $DIR/libc_pthread_mutex_deadlock.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr b/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr
index 6603b264d9..e9f0e2d4c1 100644
--- a/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr
+++ b/tests/fail/sync/libc_pthread_mutex_wrong_owner.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: unlocked a default mutex that was not locked by the current thread
   --> $DIR/libc_pthread_mutex_wrong_owner.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr b/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr
index d3820f0dcb..c25ab25a3d 100644
--- a/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_read_wrong_owner.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: unlocked an rwlock that was not locked by the active thread
   --> $DIR/libc_pthread_rwlock_read_wrong_owner.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr b/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr
index 748a363a27..8fc2ae4c82 100644
--- a/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_write_read_deadlock.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: deadlock: the evaluated program deadlocked
   --> $DIR/libc_pthread_rwlock_write_read_deadlock.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr b/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr
index c6a03ff9af..86c67925fb 100644
--- a/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_write_write_deadlock.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: deadlock: the evaluated program deadlocked
   --> $DIR/libc_pthread_rwlock_write_write_deadlock.rs:LL:CC
diff --git a/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr b/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr
index 02a6cf11c0..8965d55a48 100644
--- a/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr
+++ b/tests/fail/sync/libc_pthread_rwlock_write_wrong_owner.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 error: Undefined Behavior: unlocked an rwlock that was not locked by the active thread
   --> $DIR/libc_pthread_rwlock_write_wrong_owner.rs:LL:CC
diff --git a/tests/fail/weak_memory/racing_mixed_size.rs b/tests/fail/weak_memory/racing_mixed_size.rs
new file mode 100644
index 0000000000..6d53670a4e
--- /dev/null
+++ b/tests/fail/weak_memory/racing_mixed_size.rs
@@ -0,0 +1,38 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic_u32(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// Wine's SRWLock implementation does this, which is definitely undefined in C++ memory model
+// https://github.com/wine-mirror/wine/blob/303f8042f9db508adaca02ef21f8de4992cb9c03/dlls/ntdll/sync.c#L543-L566
+// Though it probably works just fine on x86
+pub fn main() {
+    let x = static_atomic_u32(0);
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
diff --git a/tests/fail/weak_memory/racing_mixed_size.stderr b/tests/fail/weak_memory/racing_mixed_size.stderr
new file mode 100644
index 0000000000..fc6be84315
--- /dev/null
+++ b/tests/fail/weak_memory/racing_mixed_size.stderr
@@ -0,0 +1,17 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
+error: unsupported operation: racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+  --> $DIR/racing_mixed_size.rs:LL:CC
+   |
+LL |             std::intrinsics::atomic_load_relaxed(hi);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+   |
+   = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support
+           
+   = note: inside closure at $DIR/racing_mixed_size.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error; 1 warning emitted
+
diff --git a/tests/fail/weak_memory/racing_mixed_size_read.rs b/tests/fail/weak_memory/racing_mixed_size_read.rs
new file mode 100644
index 0000000000..0129b55aff
--- /dev/null
+++ b/tests/fail/weak_memory/racing_mixed_size_read.rs
@@ -0,0 +1,39 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
+    unsafe { std::mem::transmute::<*const u32, *const [u16; 2]>(dword) }
+}
+
+// Racing mixed size reads may cause two loads to read-from
+// the same store but observe different values, which doesn't make
+// sense under the formal model so we forbade this.
+pub fn main() {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.load(Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        let x_split = split_u32_ptr(x_ptr);
+        unsafe {
+            let hi = &(*x_split)[0] as *const u16;
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
diff --git a/tests/fail/weak_memory/racing_mixed_size_read.stderr b/tests/fail/weak_memory/racing_mixed_size_read.stderr
new file mode 100644
index 0000000000..846d03f544
--- /dev/null
+++ b/tests/fail/weak_memory/racing_mixed_size_read.stderr
@@ -0,0 +1,17 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
+error: unsupported operation: racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+  --> $DIR/racing_mixed_size_read.rs:LL:CC
+   |
+LL |             std::intrinsics::atomic_load_relaxed(hi);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+   |
+   = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support
+           
+   = note: inside closure at $DIR/racing_mixed_size_read.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error; 1 warning emitted
+
diff --git a/tests/pass/concurrency/channels.stderr b/tests/pass/concurrency/channels.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/channels.stderr
+++ b/tests/pass/concurrency/channels.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/concurrent_caller_location.stderr b/tests/pass/concurrency/concurrent_caller_location.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/concurrent_caller_location.stderr
+++ b/tests/pass/concurrency/concurrent_caller_location.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/data_race.rs b/tests/pass/concurrency/data_race.rs
index 2dc0ee3f8f..c51080f474 100644
--- a/tests/pass/concurrency/data_race.rs
+++ b/tests/pass/concurrency/data_race.rs
@@ -1,4 +1,5 @@
 // ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-weak-memory-emulation
 
 
 use std::sync::atomic::{AtomicUsize, fence, Ordering};
diff --git a/tests/pass/concurrency/data_race.stderr b/tests/pass/concurrency/data_race.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/data_race.stderr
+++ b/tests/pass/concurrency/data_race.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/disable_data_race_detector.stderr b/tests/pass/concurrency/disable_data_race_detector.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/disable_data_race_detector.stderr
+++ b/tests/pass/concurrency/disable_data_race_detector.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/issue1643.stderr b/tests/pass/concurrency/issue1643.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/issue1643.stderr
+++ b/tests/pass/concurrency/issue1643.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/linux-futex.stderr b/tests/pass/concurrency/linux-futex.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/linux-futex.stderr
+++ b/tests/pass/concurrency/linux-futex.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/simple.stderr b/tests/pass/concurrency/simple.stderr
index bb60638bd6..0ba9e8645b 100644
--- a/tests/pass/concurrency/simple.stderr
+++ b/tests/pass/concurrency/simple.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 thread '<unnamed>' panicked at 'Hello!', $DIR/simple.rs:LL:CC
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/pass/concurrency/spin_loops.stderr b/tests/pass/concurrency/spin_loops.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/spin_loops.stderr
+++ b/tests/pass/concurrency/spin_loops.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/sync.stderr b/tests/pass/concurrency/sync.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/sync.stderr
+++ b/tests/pass/concurrency/sync.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/thread_locals.stderr b/tests/pass/concurrency/thread_locals.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/thread_locals.stderr
+++ b/tests/pass/concurrency/thread_locals.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/concurrency/tls_lib_drop.stderr b/tests/pass/concurrency/tls_lib_drop.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/concurrency/tls_lib_drop.stderr
+++ b/tests/pass/concurrency/tls_lib_drop.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/libc.stderr b/tests/pass/libc.stderr
index 03676519d4..9fe6daa778 100644
--- a/tests/pass/libc.stderr
+++ b/tests/pass/libc.stderr
@@ -1,2 +1,3 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
diff --git a/tests/pass/panic/concurrent-panic.stderr b/tests/pass/panic/concurrent-panic.stderr
index ae132c9ee3..b90cc01bb8 100644
--- a/tests/pass/panic/concurrent-panic.stderr
+++ b/tests/pass/panic/concurrent-panic.stderr
@@ -1,4 +1,5 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 Thread 1 starting, will block on mutex
 Thread 1 reported it has started
diff --git a/tests/pass/threadleak_ignored.stderr b/tests/pass/threadleak_ignored.stderr
index aa03751185..af327a3012 100644
--- a/tests/pass/threadleak_ignored.stderr
+++ b/tests/pass/threadleak_ignored.stderr
@@ -1,3 +1,4 @@
-warning: thread support is experimental and incomplete: weak memory effects are not emulated.
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
 
 Dropping 0
diff --git a/tests/pass/weak_memory/consistency.rs b/tests/pass/weak_memory/consistency.rs
new file mode 100644
index 0000000000..8a7c1340cc
--- /dev/null
+++ b/tests/pass/weak_memory/consistency.rs
@@ -0,0 +1,226 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks -Zmiri-disable-stacked-borrows
+
+// The following tests check whether our weak memory emulation produces
+// any inconsistent execution outcomes
+//
+// Due to the random nature of choosing valid stores, it is always
+// possible that our tests spuriously succeeds: even though our weak
+// memory emulation code has incorrectly identified a store in
+// modification order as being valid, it may be never chosen by
+// the RNG and never observed in our tests.
+//
+// To mitigate this, each test is ran enough times such that the chance
+// of spurious success is very low. These tests never supriously fail.
+
+// Test cases and their consistent outcomes are from
+// http://svr-pes20-cppmem.cl.cam.ac.uk/cppmem/
+// Based on
+// M. Batty, S. Owens, S. Sarkar, P. Sewell and T. Weber,
+// "Mathematizing C++ concurrency", ACM SIGPLAN Notices, vol. 46, no. 1, pp. 55-66, 2011.
+// Available: https://ss265.host.cs.st-andrews.ac.uk/papers/n3132.pdf.
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test
+// multiple times
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    ret
+}
+
+// Spins until it acquires a pre-determined value.
+fn acquires_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Acquire) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
+fn test_corr() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+        x.store(2, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        let r2 = x.load(Relaxed); // -------------------------------------+
+        y.store(1, Release); // ---------------------+                    |
+        r2 //                                        |                    |
+    }); //                                           |                    |
+    //                                               |synchronizes-with   |happens-before
+    let j3 = spawn(move || { //                      |                    |
+        acquires_value(&y, 1); // <------------------+                    |
+        x.load(Relaxed) // <----------------------------------------------+
+        // The two reads on x are ordered by hb, so they cannot observe values
+        // differently from the modification order. If the first read observed
+        // 2, then the second read must observe 2 as well.
+    });
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+    if r2 == 2 {
+        assert_eq!(r3, 2);
+    }
+}
+
+fn test_wrc() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Release); // ---------------------+---------------------+
+    }); //                                           |                     |
+    //                                               |synchronizes-with    |
+    let j2 = spawn(move || { //                      |                     |
+        acquires_value(&x, 1); // <------------------+                     |
+        y.store(1, Release); // ---------------------+                     |happens-before
+    }); //                                           |                     |
+    //                                               |synchronizes-with    |
+    let j3 = spawn(move || { //                      |                     |
+        acquires_value(&y, 1); // <------------------+                     |
+        x.load(Relaxed) // <-----------------------------------------------+
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    assert_eq!(r3, 1);
+}
+
+fn test_message_passing() {
+    let mut var = 0u32;
+    let ptr = &mut var as *mut u32;
+    let x = EvilSend(ptr);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        unsafe { *x.0 = 1 }; // -----------------------------------------+
+        y.store(1, Release); // ---------------------+                   |
+    }); //                                           |                   |
+    //                                               |synchronizes-with  | happens-before
+    let j2 = spawn(move || { //                      |                   |
+        acquires_value(&y, 1); // <------------------+                   |
+        unsafe { *x.0 } // <---------------------------------------------+
+    });
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r2, 1);
+}
+
+// LB+acq_rel+acq_rel
+fn test_load_buffering_acq_rel() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+    let j1 = spawn(move || {
+        let r1 = x.load(Acquire);
+        y.store(1, Release);
+        r1
+    });
+
+    let j2 = spawn(move || {
+        let r2 = y.load(Acquire);
+        x.store(1, Release);
+        r2
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    // 3 consistent outcomes: (0,0), (0,1), (1,0)
+    assert_ne!((r1, r2), (1, 1));
+}
+
+fn test_mixed_access() {
+    /*
+    int main() {
+      atomic_int x = 0;
+      {{{
+        x.store(1, mo_relaxed);
+      }}}
+
+      x.store(2, mo_relaxed);
+
+      {{{
+        r1 = x.load(mo_relaxed);
+      }}}
+
+      return 0;
+    }
+        */
+    let x = static_atomic(0);
+
+    spawn(move || {
+        x.store(1, Relaxed);
+    })
+    .join()
+    .unwrap();
+
+    x.store(2, Relaxed);
+
+    let r2 = spawn(move || x.load(Relaxed)).join().unwrap();
+
+    assert_eq!(r2, 2);
+}
+
+// The following two tests are taken from Repairing Sequential Consistency in C/C++11
+// by Lahav et al.
+// https://plv.mpi-sws.org/scfix/paper.pdf
+
+// Test case SB
+fn test_sc_store_buffering() {
+    let x = static_atomic(0);
+    let y = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, SeqCst);
+        y.load(SeqCst)
+    });
+
+    let j2 = spawn(move || {
+        y.store(1, SeqCst);
+        x.load(SeqCst)
+    });
+
+    let a = j1.join().unwrap();
+    let b = j2.join().unwrap();
+
+    assert_ne!((a, b), (0, 0));
+}
+
+fn test_single_thread() {
+    let x = AtomicUsize::new(42);
+
+    assert_eq!(x.load(Relaxed), 42);
+
+    x.store(43, Relaxed);
+
+    assert_eq!(x.load(Relaxed), 43);
+}
+
+pub fn main() {
+    for _ in 0..100 {
+        test_single_thread();
+        test_mixed_access();
+        test_load_buffering_acq_rel();
+        test_message_passing();
+        test_wrc();
+        test_corr();
+        test_sc_store_buffering();
+    }
+}
diff --git a/tests/pass/weak_memory/consistency.stderr b/tests/pass/weak_memory/consistency.stderr
new file mode 100644
index 0000000000..9fe6daa778
--- /dev/null
+++ b/tests/pass/weak_memory/consistency.stderr
@@ -0,0 +1,3 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
diff --git a/tests/pass/weak_memory/extra_cpp.rs b/tests/pass/weak_memory/extra_cpp.rs
new file mode 100644
index 0000000000..750c628458
--- /dev/null
+++ b/tests/pass/weak_memory/extra_cpp.rs
@@ -0,0 +1,82 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks
+
+// Tests operations not perfomable through C++'s atomic API
+// but doable in safe (at least sound) Rust.
+
+#![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::Ordering::*;
+use std::sync::atomic::{AtomicU16, AtomicU32};
+use std::thread::spawn;
+
+fn static_atomic_mut(val: u32) -> &'static mut AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+fn split_u32(dword: &mut u32) -> &mut [u16; 2] {
+    unsafe { std::mem::transmute::<&mut u32, &mut [u16; 2]>(dword) }
+}
+
+fn mem_replace() {
+    let mut x = AtomicU32::new(0);
+
+    let old_x = std::mem::replace(&mut x, AtomicU32::new(42));
+
+    assert_eq!(x.load(Relaxed), 42);
+    assert_eq!(old_x.load(Relaxed), 0);
+}
+
+fn assign_to_mut() {
+    let x = static_atomic_mut(0);
+    x.store(1, Relaxed);
+
+    *x = AtomicU32::new(2);
+
+    assert_eq!(x.load(Relaxed), 2);
+}
+
+fn get_mut_write() {
+    let x = static_atomic_mut(0);
+    x.store(1, Relaxed);
+    {
+        let x_mut = x.get_mut();
+        *x_mut = 2;
+    }
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let r1 = j1.join().unwrap();
+    assert_eq!(r1, 2);
+}
+
+// This is technically doable in C++ with atomic_ref
+// but little literature exists atm on its involvement
+// in mixed size/atomicity accesses
+fn from_mut_split() {
+    let mut x: u32 = 0;
+
+    {
+        let x_atomic = AtomicU32::from_mut(&mut x);
+        x_atomic.store(u32::from_be(0xabbafafa), Relaxed);
+    }
+
+    // Split the `AtomicU32` into two `AtomicU16`.
+    // Crucially, there is no non-atomic access to `x`! All accesses are atomic, but of different size.
+    let (x_hi, x_lo) = split_u32(&mut x).split_at_mut(1);
+
+    let x_hi_atomic = AtomicU16::from_mut(&mut x_hi[0]);
+    let x_lo_atomic = AtomicU16::from_mut(&mut x_lo[0]);
+
+    assert_eq!(x_hi_atomic.load(Relaxed), u16::from_be(0xabba));
+    assert_eq!(x_lo_atomic.load(Relaxed), u16::from_be(0xfafa));
+}
+
+pub fn main() {
+    get_mut_write();
+    from_mut_split();
+    assign_to_mut();
+    mem_replace();
+}
diff --git a/tests/pass/weak_memory/extra_cpp.stderr b/tests/pass/weak_memory/extra_cpp.stderr
new file mode 100644
index 0000000000..9fe6daa778
--- /dev/null
+++ b/tests/pass/weak_memory/extra_cpp.stderr
@@ -0,0 +1,3 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
diff --git a/tests/pass/weak_memory/extra_cpp_unsafe.rs b/tests/pass/weak_memory/extra_cpp_unsafe.rs
new file mode 100644
index 0000000000..d77a090e6e
--- /dev/null
+++ b/tests/pass/weak_memory/extra_cpp_unsafe.rs
@@ -0,0 +1,42 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks
+
+// Tests operations not perfomable through C++'s atomic API
+// but doable in unsafe Rust which we think *should* be fine.
+// Nonetheless they may be determined as inconsistent with the
+// memory model in the future.
+
+#![feature(atomic_from_mut)]
+#![feature(core_intrinsics)]
+
+use std::sync::atomic::AtomicU32;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+fn static_atomic(val: u32) -> &'static AtomicU32 {
+    let ret = Box::leak(Box::new(AtomicU32::new(val)));
+    ret
+}
+
+// We allow perfectly overlapping non-atomic and atomic reads to race
+fn racing_mixed_atomicity_read() {
+    let x = static_atomic(0);
+    x.store(42, Relaxed);
+
+    let j1 = spawn(move || x.load(Relaxed));
+
+    let j2 = spawn(move || {
+        let x_ptr = x as *const AtomicU32 as *const u32;
+        unsafe { std::intrinsics::atomic_load_relaxed(x_ptr) }
+    });
+
+    let r1 = j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    assert_eq!(r1, 42);
+    assert_eq!(r2, 42);
+}
+
+pub fn main() {
+    racing_mixed_atomicity_read();
+}
diff --git a/tests/pass/weak_memory/extra_cpp_unsafe.stderr b/tests/pass/weak_memory/extra_cpp_unsafe.stderr
new file mode 100644
index 0000000000..9fe6daa778
--- /dev/null
+++ b/tests/pass/weak_memory/extra_cpp_unsafe.stderr
@@ -0,0 +1,3 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+
diff --git a/tests/pass/weak_memory/weak.rs b/tests/pass/weak_memory/weak.rs
new file mode 100644
index 0000000000..70e1bf00f4
--- /dev/null
+++ b/tests/pass/weak_memory/weak.rs
@@ -0,0 +1,109 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-ignore-leaks
+
+// Tests showing weak memory behaviours are exhibited. All tests
+// return true when the desired behaviour is seen.
+// This is scheduler and pseudo-RNG dependent, so each test is
+// run multiple times until one try returns true.
+// Spurious failure is possible, if you are really unlucky with
+// the RNG and always read the latest value from the store buffer.
+
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering::*;
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test
+// multiple times
+fn static_atomic(val: usize) -> &'static AtomicUsize {
+    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
+    ret
+}
+
+// Spins until it reads the given value
+fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
+    while loc.load(Relaxed) != val {
+        std::hint::spin_loop();
+    }
+    val
+}
+
+fn relaxed() -> bool {
+    let x = static_atomic(0);
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+        x.store(2, Relaxed);
+    });
+
+    let j2 = spawn(move || x.load(Relaxed));
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    r2 == 1
+}
+
+// https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8
+fn seq_cst() -> bool {
+    let x = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        x.store(2, SeqCst);
+        x.store(3, SeqCst);
+    });
+
+    let j3 = spawn(move || x.load(SeqCst));
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+    let r3 = j3.join().unwrap();
+
+    r3 == 1
+}
+
+fn initialization_write() -> bool {
+    let x = static_atomic(11);
+    assert_eq!(x.load(Relaxed), 11);
+
+    let wait = static_atomic(0);
+
+    let j1 = spawn(move || {
+        x.store(22, Relaxed);
+        // Relaxed is intentional. We want to test if the thread 2 reads the initialisation write
+        // after a relaxed write
+        wait.store(1, Relaxed);
+    });
+
+    let j2 = spawn(move || {
+        reads_value(wait, 1);
+        x.load(Relaxed)
+    });
+
+    j1.join().unwrap();
+    let r2 = j2.join().unwrap();
+
+    r2 == 11
+}
+
+
+// Asserts that the function returns true at least once in 100 runs
+macro_rules! assert_once {
+    ($f:ident) => {
+        assert!(std::iter::repeat_with(|| $f()).take(100).any(|x| x));
+    };
+}
+
+pub fn main() {
+    assert_once!(relaxed);
+    assert_once!(seq_cst);
+    assert_once!(initialization_write);
+}
diff --git a/tests/pass/weak_memory/weak.stderr b/tests/pass/weak_memory/weak.stderr
new file mode 100644
index 0000000000..9fe6daa778
--- /dev/null
+++ b/tests/pass/weak_memory/weak.stderr
@@ -0,0 +1,3 @@
+warning: thread support is experimental: the scheduler is not preemptive, and can get stuck in spin loops.
+         (see https://github.com/rust-lang/miri/issues/1388)
+