From 89814f1b3f6c239f472dea4798a1189a30d7efa2 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Mon, 2 Nov 2020 00:23:27 +0000
Subject: [PATCH 01/17] Initial data-race detector, passes all current tests
 but additional tests are required

---
 Cargo.lock                |    7 +
 Cargo.toml                |    1 +
 src/data_race.rs          | 1406 +++++++++++++++++++++++++++++++++++++
 src/lib.rs                |    5 +
 src/machine.rs            |   11 +-
 src/shims/intrinsics.rs   |  425 +++++++----
 src/shims/posix/sync.rs   |   16 +-
 src/shims/posix/thread.rs |   17 +-
 src/sync.rs               |   64 +-
 src/thread.rs             |   30 +-
 10 files changed, 1802 insertions(+), 180 deletions(-)
 create mode 100644 src/data_race.rs
diff --git a/Cargo.lock b/Cargo.lock
index 8c73cb0553..78838acb2a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -282,6 +282,7 @@ dependencies = [
  "rustc-workspace-hack",
  "rustc_version",
  "shell-escape",
+ "smallvec",
 ]
 
 [[package]]
@@ -496,6 +497,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "45bb67a18fa91266cc7807181f62f9178a6873bfad7dc788c42e6430db40184f"
 
+[[package]]
+name = "smallvec"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbee7696b84bbf3d89a1c2eccff0850e3047ed46bfcd2e92c29a2d074d57e252"
+
 [[package]]
 name = "socket2"
 version = "0.3.15"
diff --git a/Cargo.toml b/Cargo.toml
index c36a97bb0a..4413dab321 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,6 +30,7 @@ log = "0.4"
 shell-escape = "0.1.4"
 hex = "0.4.0"
 rand = "0.7"
+smallvec = "1.4.2"
 
 # A noop dependency that changes in the Rust repository, it's a bit of a hack.
 # See the `src/tools/rustc-workspace-hack/README.md` file in `rust-lang/rust`
diff --git a/src/data_race.rs b/src/data_race.rs
new file mode 100644
index 0000000000..5952606394
--- /dev/null
+++ b/src/data_race.rs
@@ -0,0 +1,1406 @@
+//! Implementation of a data-race detector
+//!  uses Lamport Timestamps / Vector-clocks
+//!  base on the Dyamic Race Detection for C++:
+//!     - https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
+//!  to extend data-race detection to work correctly with fences
+//!  and RMW operations
+//! This does not explore weak memory orders and so can still miss data-races
+//!  but should not report false-positives
+
+use std::{fmt::{self, Debug}, cmp::Ordering, rc::Rc, cell::{Cell, RefCell, Ref, RefMut}, ops::Index};
+
+use rustc_index::vec::{Idx, IndexVec};
+use rustc_target::abi::Size;
+use rustc_middle::ty::layout::TyAndLayout;
+use rustc_data_structures::fx::FxHashMap;
+
+use smallvec::SmallVec;
+
+use crate::*;
+
+pub type AllocExtra = VClockAlloc;
+pub type MemoryExtra = Rc<GlobalState>;
+
+/// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive)
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum AtomicRWOp {
+    Relaxed,
+    Acquire,
+    Release,
+    AcqRel,
+    SeqCst,
+}
+
+/// Valid atomic read operations, subset of atomic::Ordering
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum AtomicReadOp {
+    Relaxed,
+    Acquire,
+    SeqCst,
+}
+
+/// Valid atomic write operations, subset of atomic::Ordering
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum AtomicWriteOp {
+    Relaxed,
+    Release,
+    SeqCst,
+}
+
+
+/// Valid atomic fence operations, subset of atomic::Ordering
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
+pub enum AtomicFenceOp {
+    Acquire,
+    Release,
+    AcqRel,
+    SeqCst,
+}
+
+/// Evaluation context extensions
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
+
+    /// Variant of `read_immediate` that does not perform `data-race` checks.
+    fn read_immediate_racy(&self, op: MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
+        let this = self.eval_context_ref();
+        let data_race = &*this.memory.extra.data_race;
+        let old = data_race.multi_threaded.get();
+
+        data_race.multi_threaded.set(false);
+        let res = this.read_immediate(op.into());
+
+        data_race.multi_threaded.set(old);
+        res
+    }
+    
+    /// Variant of `write_immediate` that does not perform `data-race` checks.
+    fn write_immediate_racy(
+        &mut self, src: Immediate<Tag>, dest: MPlaceTy<'tcx, Tag>
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        let old = data_race.multi_threaded.get();
+
+        data_race.multi_threaded.set(false);
+        let imm = this.write_immediate(src, dest.into());
+
+        let data_race = &*this.memory.extra.data_race;
+        data_race.multi_threaded.set(old);
+        imm
+    }
+
+    /// Variant of `read_scalar` that does not perform data-race checks.
+    fn read_scalar_racy(
+        &self, op: MPlaceTy<'tcx, Tag>
+    )-> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        Ok(self.read_immediate_racy(op)?.to_scalar_or_uninit())
+    }
+
+    /// Variant of `write_scalar` that does not perform data-race checks.
+    fn write_scalar_racy(
+        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>
+    ) -> InterpResult<'tcx> {
+        self.write_immediate_racy(Immediate::Scalar(val.into()), dest)
+    }
+
+    /// Variant of `read_scalar_at_offset` helper function that does not perform
+    /// `data-race checks.
+    fn read_scalar_at_offset_racy(
+        &self,
+        op: OpTy<'tcx, Tag>,
+        offset: u64,
+        layout: TyAndLayout<'tcx>,
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_ref();
+        let op_place = this.deref_operand(op)?;
+        let offset = Size::from_bytes(offset);
+        // Ensure that the following read at an offset is within bounds
+        assert!(op_place.layout.size >= offset + layout.size);
+        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
+        this.read_scalar_racy(value_place.into())
+    }
+
+    /// Variant of `write_scalar_at_offfset` helper function that does not perform
+    ///  data-race checks.
+    fn write_scalar_at_offset_racy(
+        &mut self,
+        op: OpTy<'tcx, Tag>,
+        offset: u64,
+        value: impl Into<ScalarMaybeUninit<Tag>>,
+        layout: TyAndLayout<'tcx>,
+    ) -> InterpResult<'tcx, ()> {
+        let this = self.eval_context_mut();
+        let op_place = this.deref_operand(op)?;
+        let offset = Size::from_bytes(offset);
+        // Ensure that the following read at an offset is within bounds
+        assert!(op_place.layout.size >= offset + layout.size);
+        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
+        this.write_scalar_racy(value.into(), value_place.into())
+    }
+
+    /// Load the data race allocation state for a given memory place
+    ///  also returns the size and the offset of the result in the allocation
+    ///  metadata
+    fn load_data_race_state<'a>(
+        &'a mut self, place: MPlaceTy<'tcx, Tag>
+    ) -> InterpResult<'tcx, (&'a mut VClockAlloc, Size, Size)> where 'mir: 'a {
+        let this = self.eval_context_mut();
+
+        let ptr = place.ptr.assert_ptr();
+        let size = place.layout.size;
+        let data_race = &mut this.memory.get_raw_mut(ptr.alloc_id)?.extra.data_race;
+
+        Ok((data_race, size, ptr.offset))
+    }
+    
+    /// Update the data-race detector for an atomic read occuring at the
+    ///  associated memory-place and on the current thread
+    fn validate_atomic_load(
+        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        if data_race.multi_threaded.get() {
+            data_race.advance_vector_clock();
+
+            let (
+                alloc, size, offset
+            ) = this.load_data_race_state(place)?;
+            log::trace!(
+                "Atomic load on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
+                alloc.global.current_thread(), atomic,
+                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
+            );
+
+            let mut current_state = alloc.global.current_thread_state_mut();
+            if atomic == AtomicReadOp::Relaxed {
+                // Perform relaxed atomic load
+                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                    range.load_relaxed(&mut *current_state);
+                }
+            }else{
+                // Perform acquire(or seq-cst) atomic load
+                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                    range.acquire(&mut *current_state);
+                }
+            }
+
+            // Log changes to atomic memory
+            if log::log_enabled!(log::Level::Trace) {
+                for range in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                    log::trace!(
+                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
+                        range.atomic_ops
+                    );
+                }
+            }
+
+            std::mem::drop(current_state);
+            let data_race = &*this.memory.extra.data_race;
+            data_race.advance_vector_clock();
+        }
+        Ok(())
+    }
+
+    /// Update the data-race detector for an atomic write occuring at the
+    ///  associated memory-place and on the current thread
+    fn validate_atomic_store(
+        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        if data_race.multi_threaded.get() {
+            data_race.advance_vector_clock();
+
+            let (
+                alloc, size, offset
+            ) = this.load_data_race_state(place)?;
+            let current_thread = alloc.global.current_thread();
+            let mut current_state = alloc.global.current_thread_state_mut();
+            log::trace!(
+                "Atomic store on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
+                current_thread, atomic,
+                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
+            );
+
+            if atomic == AtomicWriteOp::Relaxed {
+                // Perform relaxed atomic store
+                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                    range.store_relaxed(&mut *current_state, current_thread);
+                }
+            }else{
+                // Perform release(or seq-cst) atomic store
+                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                    range.release(&mut *current_state, current_thread);
+                }
+            }
+
+            // Log changes to atomic memory
+            if log::log_enabled!(log::Level::Trace) {
+                for range in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                    log::trace!(
+                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
+                        range.atomic_ops
+                    );
+                }
+            }
+
+            std::mem::drop(current_state);
+            let data_race = &*this.memory.extra.data_race;
+            data_race.advance_vector_clock();
+        }
+        Ok(())
+    }
+
+    /// Update the data-race detector for an atomic read-modify-write occuring
+    ///  at the associated memory place and on the current thread
+    fn validate_atomic_rmw(
+        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRWOp
+    ) -> InterpResult<'tcx> {
+        use AtomicRWOp::*;
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        if data_race.multi_threaded.get() {
+            data_race.advance_vector_clock();
+
+            let (
+                alloc, size, offset
+            ) = this.load_data_race_state(place)?;
+            let current_thread = alloc.global.current_thread();
+            let mut current_state = alloc.global.current_thread_state_mut();
+            log::trace!(
+                "Atomic RMW on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
+                current_thread, atomic,
+                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
+            );
+
+            let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
+            let release = matches!(atomic, Release | AcqRel | SeqCst);
+            for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                //FIXME: this is probably still slightly wrong due to the quirks
+                // in the c++11 memory model
+                if acquire {
+                    // Atomic RW-Op acquire
+                    range.acquire(&mut *current_state);
+                }else{
+                    range.load_relaxed(&mut *current_state);
+                }
+                if release {
+                    // Atomic RW-Op release
+                    range.rmw_release(&mut *current_state, current_thread);
+                }else{
+                    range.rmw_relaxed(&mut *current_state);
+                }
+            }
+
+            // Log changes to atomic memory
+            if log::log_enabled!(log::Level::Trace) {
+                for range in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                    log::trace!(
+                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
+                        range.atomic_ops
+                    );
+                }
+            }
+
+            std::mem::drop(current_state);
+            let data_race = &*this.memory.extra.data_race;
+            data_race.advance_vector_clock();
+        }
+        Ok(())
+    }
+
+    /// Update the data-race detector for an atomic fence on the current thread
+    fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        if data_race.multi_threaded.get() {
+            data_race.advance_vector_clock();
+
+            log::trace!("Atomic fence on {:?} with ordering {:?}", data_race.current_thread(), atomic);
+            // Apply data-race detection for the current fences
+            //  this treats AcqRel and SeqCst as the same as a acquire
+            //  and release fence applied in the same timestamp.
+            if atomic != AtomicFenceOp::Release {
+                // Either Acquire | AcqRel | SeqCst
+                data_race.current_thread_state_mut().apply_acquire_fence();
+            }
+            if atomic != AtomicFenceOp::Acquire {
+                // Either Release | AcqRel | SeqCst
+                data_race.current_thread_state_mut().apply_release_fence();
+            }
+
+            data_race.advance_vector_clock();
+        }
+        Ok(())
+    }
+}
+
+/// Handle for locks to express their
+///  acquire-release semantics
+#[derive(Clone, Debug, Default)]
+pub struct DataRaceLockHandle {
+
+    /// Internal acquire-release clock
+    ///  to express the acquire release sync
+    ///  found in concurrency primitives
+    clock: VClock,
+}
+impl DataRaceLockHandle {
+    pub fn set_values(&mut self, other: &Self) {
+        self.clock.set_values(&other.clock)
+    }
+    pub fn reset(&mut self) {
+        self.clock.set_zero_vector();
+    }
+}
+
+
+/// Avoid an atomic allocation for the common
+///  case with atomic operations where the number
+///  of active release sequences is small
+#[derive(Clone, PartialEq, Eq)]
+enum AtomicReleaseSequences {
+
+    /// Contains one or no values
+    ///  if empty: (None, reset vector clock)
+    ///  if one:   (Some(thread), thread_clock)
+    ReleaseOneOrEmpty(Option<ThreadId>, VClock),
+
+    /// Contains two or more values
+    ///  stored in a hash-map of thread id to
+    ///  vector clocks
+    ReleaseMany(FxHashMap<ThreadId, VClock>)
+}
+impl AtomicReleaseSequences {
+
+    /// Return an empty set of atomic release sequences
+    #[inline]
+    fn new() -> AtomicReleaseSequences {
+        Self::ReleaseOneOrEmpty(None, VClock::default())
+    }
+
+    /// Remove all values except for the value stored at `thread` and set
+    ///  the vector clock to the associated `clock` value
+    #[inline]
+    fn clear_and_set(&mut self, thread: ThreadId, clock: &VClock) {
+        match self {
+            Self::ReleaseOneOrEmpty(id, rel_clock) => {
+                *id = Some(thread);
+                rel_clock.set_values(clock);
+            }
+            Self::ReleaseMany(_) => {
+                *self = Self::ReleaseOneOrEmpty(Some(thread), clock.clone());
+            }
+        }
+    }
+
+    /// Remove all values except for the value stored at `thread`
+    #[inline]
+    fn clear_and_retain(&mut self, thread: ThreadId) {
+        match self {
+            Self::ReleaseOneOrEmpty(id, rel_clock) => {
+                // Keep or forget depending on id
+                if *id == Some(thread) {
+                    *id = None;
+                    rel_clock.set_zero_vector();
+                }
+            },
+            Self::ReleaseMany(hash_map) => {
+                // Retain only the thread element, so reduce to size
+                //  of 1 or 0, and move to smaller format
+                if let Some(clock) = hash_map.remove(&thread) {
+                    *self = Self::ReleaseOneOrEmpty(Some(thread), clock);
+                }else{
+                    *self = Self::new();
+                }
+            }
+        }
+    }
+
+    /// Insert a release sequence at `thread` with values `clock`
+    fn insert(&mut self, thread: ThreadId, clock: &VClock) {
+        match self {
+            Self::ReleaseOneOrEmpty(id, rel_clock) => {
+                if id.map_or(true, |id| id == thread) {
+                    *id = Some(thread);
+                    rel_clock.set_values(clock);
+                }else{
+                    let mut hash_map = FxHashMap::default();
+                    hash_map.insert(thread, clock.clone());
+                    hash_map.insert(id.unwrap(), rel_clock.clone());
+                    *self = Self::ReleaseMany(hash_map);
+                }
+            },
+            Self::ReleaseMany(hash_map) => {
+                hash_map.insert(thread, clock.clone());
+            }
+        }
+    }
+
+    /// Return the release sequence at `thread` if one exists
+    #[inline]
+    fn load(&self, thread: ThreadId) -> Option<&VClock> {
+        match self {
+            Self::ReleaseOneOrEmpty(id, clock) => {
+                if *id == Some(thread) {
+                    Some(clock)
+                }else{
+                    None
+                }
+            },
+            Self::ReleaseMany(hash_map) => {
+                hash_map.get(&thread)
+            }
+        }
+    }
+}
+
+/// Custom debug implementation to correctly
+///  print debug as a logical mapping from threads
+///  to vector-clocks
+impl Debug for AtomicReleaseSequences {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::ReleaseOneOrEmpty(None,_) => {
+                f.debug_map().finish()
+            },
+            Self::ReleaseOneOrEmpty(Some(id), clock) => {
+                f.debug_map().entry(&id, &clock).finish()
+            },
+            Self::ReleaseMany(hash_map) => {
+                Debug::fmt(hash_map, f)
+            }
+        }
+    }
+}
+
+/// Externally stored memory cell clocks
+///  explicitly to reduce memory usage for the
+///  common case where no atomic operations
+///  exists on the memory cell
+#[derive(Clone, PartialEq, Eq, Debug)]
+struct AtomicMemoryCellClocks {
+
+    /// Synchronization vector for acquire-release semantics
+    sync_vector: VClock,
+
+    /// The Hash-Map of all threads for which a release
+    ///  sequence exists in the memory cell 
+    release_sequences: AtomicReleaseSequences,
+}
+
+/// Memory Cell vector clock metadata
+///  for data-race detection
+#[derive(Clone, PartialEq, Eq, Debug)]
+struct MemoryCellClocks {
+
+    /// The vector-clock of the last write
+    write: Timestamp,
+
+    /// The id of the thread that performed the last write to this memory location
+    write_thread: ThreadId,
+
+    /// The vector-clock of the set of previous reads
+    ///  each index is set to the timestamp that the associated
+    ///  thread last read this value.
+    read: VClock,
+
+    /// Atomic acquire & release sequence tracking clocks
+    ///  for non-atomic memory in the common case this
+    ///  value is set to None
+    atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
+}
+
+/// Create a default memory cell clocks instance
+///  for uninitialized memory
+impl Default for MemoryCellClocks {
+    fn default() -> Self {
+        MemoryCellClocks {
+            read: VClock::default(),
+            write: 0,
+            write_thread: ThreadId::new(u32::MAX as usize),
+            atomic_ops: None
+        }
+    }
+}
+
+impl MemoryCellClocks {
+
+    /// Load the internal atomic memory cells if they exist
+    #[inline]
+    fn atomic(&mut self) -> Option<&AtomicMemoryCellClocks> {
+        match &self.atomic_ops {
+            Some(op) => Some(&*op),
+            None => None
+        }
+    }
+
+    /// Load or create the internal atomic memory metadata
+    ///  if it does not exist
+    #[inline]
+    fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks {
+        self.atomic_ops.get_or_insert_with(|| {
+            Box::new(AtomicMemoryCellClocks {
+                sync_vector: VClock::default(),
+                release_sequences: AtomicReleaseSequences::new()
+            })
+        })
+    }
+
+    /// Update memory cell data-race tracking for atomic
+    ///  load acquire semantics, is a no-op if this memory was
+    ///  not used previously as atomic memory
+    fn acquire(&mut self, clocks: &mut ThreadClockSet) {
+        if let Some(atomic) = self.atomic() {
+            clocks.clock.join(&atomic.sync_vector);
+        }
+    }
+    /// Update memory cell data-race tracking for atomic
+    ///  load relaxed semantics, is a no-op if this memory was
+    ///  not used previously as atomic memory
+    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet) {
+        if let Some(atomic) = self.atomic() {
+            clocks.fence_acquire.join(&atomic.sync_vector);
+        }
+    }
+
+
+    /// Update the memory cell data-race tracking for atomic
+    ///  store release semantics
+    fn release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) {
+        let atomic = self.atomic_mut();
+        atomic.sync_vector.set_values(&clocks.clock);
+        atomic.release_sequences.clear_and_set(thread, &clocks.clock);
+    }
+    /// Update the memory cell data-race tracking for atomic
+    ///  store relaxed semantics
+    fn store_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) {
+        let atomic = self.atomic_mut();
+        atomic.sync_vector.set_values(&clocks.fence_release);
+        if let Some(release) = atomic.release_sequences.load(thread) {
+            atomic.sync_vector.join(release);
+        }
+        atomic.release_sequences.clear_and_retain(thread);
+    }
+    /// Update the memory cell data-race tracking for atomic
+    ///  store release semantics for RMW operations
+    fn rmw_release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) {
+        let atomic = self.atomic_mut();
+        atomic.sync_vector.join(&clocks.clock);
+        atomic.release_sequences.insert(thread, &clocks.clock);
+    }
+    /// Update the memory cell data-race tracking for atomic
+    ///  store relaxed semantics for RMW operations
+    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet) {
+        let atomic = self.atomic_mut();
+        atomic.sync_vector.join(&clocks.fence_release);
+    }
+    
+    
+
+    /// Detect races for non-atomic read operations at the current memory cell
+    ///  returns true if a data-race is detected
+    fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> bool {
+        if self.write <= clocks.clock[self.write_thread] {
+            self.read.set_at_thread(&clocks.clock, thread);
+            false
+        }else{
+            true
+        }
+    }
+
+    /// Detect races for non-atomic write operations at the current memory cell
+    ///  returns true if a data-race is detected
+    fn write_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> bool {
+        if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
+            self.write = clocks.clock[thread];
+            self.write_thread = thread;
+            self.read.set_zero_vector();
+            false
+        }else{
+            true
+        }
+    }
+}
+
+/// Vector clock metadata for a logical memory allocation
+#[derive(Debug, Clone)]
+pub struct VClockAlloc {
+
+    /// Range of Vector clocks, mapping to the vector-clock
+    ///  index of the last write to the bytes in this allocation
+    alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
+
+    // Pointer to global state
+    global: MemoryExtra,
+}
+
+impl VClockAlloc {
+
+    /// Create a new data-race allocation detector
+    pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc {
+        VClockAlloc {
+            global: Rc::clone(global),
+            alloc_ranges: RefCell::new(
+                RangeMap::new(len, MemoryCellClocks::default())
+            )
+        }
+    }
+
+    /// Report a data-race found in the program
+    ///  this finds the two racing threads and the type
+    ///  of data-race that occured, this will also
+    ///  return info about the memory location the data-race
+    ///  occured in
+    #[cold]
+    #[inline(never)]
+    fn report_data_race<'tcx>(
+        global: &MemoryExtra, range: &MemoryCellClocks, action: &str,
+        pointer: Pointer<Tag>, len: Size
+    ) -> InterpResult<'tcx> {
+        let current_thread = global.current_thread();
+        let current_state = global.current_thread_state();
+        let mut write_clock = VClock::default();
+        let (
+            other_action, other_thread, other_clock
+        ) = if range.write > current_state.clock[range.write_thread] {
+
+            // Create effective write-clock that the data-race occured with
+            let wclock = write_clock.get_mut_with_min_len(
+                current_state.clock.as_slice().len()
+                .max(range.write_thread.to_u32() as usize + 1)
+            );
+            wclock[range.write_thread.to_u32() as usize] = range.write;
+            ("WRITE", range.write_thread, write_clock.as_slice())
+        }else{
+
+            // Find index in the read-clock that the data-race occured with
+            let read_slice = range.read.as_slice();
+            let clock_slice = current_state.clock.as_slice();
+            let conflicting_index = read_slice.iter()
+                .zip(clock_slice.iter())
+                .enumerate().find_map(|(idx,(&read, &clock))| {
+                    if read > clock {
+                        Some(idx)
+                    }else{
+                        None
+                    }
+            }).unwrap_or_else(|| {
+                assert!(read_slice.len() > clock_slice.len(), "BUG: cannot find read race yet reported data-race");
+                let rest_read = &read_slice[clock_slice.len()..];
+                rest_read.iter().enumerate().find_map(|(idx, &val)| {
+                    if val > 0 {
+                        Some(idx + clock_slice.len())
+                    }else{
+                        None
+                    }
+                }).expect("Invariant broken for read-slice, no 0 element at the tail")
+            });
+            ("READ", ThreadId::new(conflicting_index), range.read.as_slice())
+        };
+
+        let current_thread_info = global.print_thread_metadata(current_thread);
+        let other_thread_info = global.print_thread_metadata(other_thread);
+        
+        // Throw the data-race detection
+        throw_ub_format!(
+            "Data race detected between {} on {} and {} on {}, memory({:?},offset={},size={})\
+            \n\t\t -current vector clock = {:?}\
+            \n\t\t -conflicting timestamp = {:?}",
+            action, current_thread_info, 
+            other_action, other_thread_info,
+            pointer.alloc_id, pointer.offset.bytes(), len.bytes(),
+            current_state.clock,
+            other_clock
+        )
+    }
+
+    /// Detect data-races for an unsychronized read operation, will not perform
+    ///  data-race threads if `multi-threaded` is false, either due to no threads
+    ///  being created or if it is temporarily disabled during a racy read or write
+    ///  operation
+    pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
+        if self.global.multi_threaded.get() {
+            let current_thread = self.global.current_thread();
+            let current_state = self.global.current_thread_state();
+
+            // The alloc-ranges are not split, however changes are not going to be made
+            //  to the ranges being tested, so this is ok
+            let mut alloc_ranges = self.alloc_ranges.borrow_mut();
+            for range in alloc_ranges.iter_mut(pointer.offset, len) {
+                if range.read_race_detect(&*current_state, current_thread) {
+                    // Report data-race
+                    return Self::report_data_race(
+                        &self.global,range, "READ", pointer, len
+                    );
+                }
+            }
+            Ok(())
+        }else{
+            Ok(())
+        }
+    }
+    /// Detect data-races for an unsychronized write operation, will not perform
+    ///  data-race threads if `multi-threaded` is false, either due to no threads
+    ///  being created or if it is temporarily disabled during a racy read or write
+    ///  operation
+    pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
+        if self.global.multi_threaded.get() {
+            let current_thread = self.global.current_thread();
+            let current_state = self.global.current_thread_state();
+            for range in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
+                if range.write_race_detect(&*current_state, current_thread) {
+                    // Report data-race
+                    return Self::report_data_race(
+                        &self.global, range, "WRITE", pointer, len
+                    );
+                }
+            }
+            Ok(())
+        }else{
+            Ok(())
+        }
+    }
+    /// Detect data-races for an unsychronized deallocate operation, will not perform
+    ///  data-race threads if `multi-threaded` is false, either due to no threads
+    ///  being created or if it is temporarily disabled during a racy read or write
+    ///  operation
+    pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
+        if self.global.multi_threaded.get() {
+            let current_thread = self.global.current_thread();
+            let current_state = self.global.current_thread_state();
+            for range in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
+                if range.write_race_detect(&*current_state, current_thread) {
+                    // Report data-race
+                    return Self::report_data_race(
+                        &self.global, range, "DEALLOCATE", pointer, len
+                    );
+                }
+            }
+           Ok(())
+        }else{
+            Ok(())
+        }
+    }
+}
+
+/// The current set of vector clocks describing the state
+///  of a thread, contains the happens-before clock and
+///  additional metadata to model atomic fence operations
+#[derive(Clone, Default, Debug)]
+struct ThreadClockSet {
+    /// The increasing clock representing timestamps
+    ///  that happen-before this thread.
+    clock: VClock,
+
+    /// The set of timestamps that will happen-before this
+    ///  thread once it performs an acquire fence
+    fence_acquire: VClock,
+
+    /// The last timesamp of happens-before relations that
+    ///  have been released by this thread by a fence
+    fence_release: VClock,
+}
+
+impl ThreadClockSet {
+
+    /// Apply the effects of a release fence to this
+    ///  set of thread vector clocks
+    #[inline]
+    fn apply_release_fence(&mut self) {
+        self.fence_release.set_values(&self.clock);
+    }
+
+    /// Apply the effects of a acquire fence to this
+    ///  set of thread vector clocks
+    #[inline]
+    fn apply_acquire_fence(&mut self) {
+        self.clock.join(&self.fence_acquire);
+    }
+
+    /// Increment the happens-before clock at a
+    ///  known index
+    #[inline]
+    fn increment_clock(&mut self, thread: ThreadId) {
+        self.clock.increment_thread(thread);
+    }
+
+    /// Join the happens-before clock with that of
+    ///  another thread, used to model thread join
+    ///  operations
+    fn join_with(&mut self, other: &ThreadClockSet) {
+        self.clock.join(&other.clock);
+    }
+}
+
+/// Global data-race detection state, contains the currently
+///  executing thread as well as the vector-clocks associated
+///  with each of the threads.
+#[derive(Debug, Clone)]
+pub struct GlobalState {
+
+    /// Set to true once the first additional
+    ///  thread has launched, due to the dependency
+    ///  between before and after a thread launch
+    /// Any data-races must be recorded after this
+    ///  so concurrent execution can ignore recording
+    ///  any data-races
+    multi_threaded: Cell<bool>,
+
+    /// The current vector clock for all threads
+    ///  this includes threads that have terminated
+    ///  execution
+    thread_clocks: RefCell<IndexVec<ThreadId, ThreadClockSet>>,
+
+    /// Thread name cache for better diagnostics on the reporting
+    ///  of a data-race
+    thread_names: RefCell<IndexVec<ThreadId, Option<Box<str>>>>,
+
+    /// The current thread being executed,
+    ///  this is mirrored from the scheduler since
+    ///  it is required for loading the current vector
+    ///  clock for data-race detection
+    current_thread_id: Cell<ThreadId>,
+}
+impl GlobalState {
+
+    /// Create a new global state, setup with just thread-id=0
+    ///  advanced to timestamp = 1
+    pub fn new() -> Self {
+        let mut vec = IndexVec::new();
+        let thread_id = vec.push(ThreadClockSet::default());
+        vec[thread_id].increment_clock(thread_id);
+        GlobalState {
+            multi_threaded: Cell::new(false),
+            thread_clocks: RefCell::new(vec),
+            thread_names: RefCell::new(IndexVec::new()),
+            current_thread_id: Cell::new(thread_id),
+        }
+    }
+    
+
+    // Hook for thread creation, enabled multi-threaded execution and marks
+    //  the current thread timestamp as happening-before the current thread
+    #[inline]
+    pub fn thread_created(&self, thread: ThreadId) {
+
+        // Enable multi-threaded execution mode now that there are at least
+        //  two threads
+        self.multi_threaded.set(true);
+        let current_thread = self.current_thread_id.get();
+        let mut vectors = self.thread_clocks.borrow_mut();
+        vectors.ensure_contains_elem(thread, Default::default);
+        let (current, created) = vectors.pick2_mut(current_thread, thread);
+
+        // Pre increment clocks before atomic operation
+        current.increment_clock(current_thread);
+
+        // The current thread happens-before the created thread
+        //  so update the created vector clock
+        created.join_with(current);
+
+        // Post increment clocks after atomic operation
+        current.increment_clock(current_thread);
+        created.increment_clock(thread);
+    }
+
+    /// Hook on a thread join to update the implicit happens-before relation
+    ///  between the joined thead and the current thread
+    #[inline]
+    pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
+        let mut vectors = self.thread_clocks.borrow_mut();
+        let (current, join) = vectors.pick2_mut(current_thread, join_thread);
+
+        // Pre increment clocks before atomic operation
+        current.increment_clock(current_thread);
+        join.increment_clock(join_thread);
+
+        // The join thread happens-before the current thread
+        //   so update the current vector clock
+        current.join_with(join);
+
+        // Post increment clocks after atomic operation
+        current.increment_clock(current_thread);
+        join.increment_clock(join_thread);
+    }
+
+    /// Hook for updating the local tracker of the currently
+    ///  enabled thread, should always be updated whenever
+    ///  `active_thread` in thread.rs is updated
+    #[inline]
+    pub fn thread_set_active(&self, thread: ThreadId) {
+        self.current_thread_id.set(thread);
+    }
+
+    /// Hook for updating the local tracker of the threads name
+    ///  this should always mirror the local value in thread.rs
+    ///  the thread name is used for improved diagnostics
+    ///  during a data-race
+    #[inline]
+    pub fn thread_set_name(&self, name: String) {
+        let name = name.into_boxed_str();
+        let mut names = self.thread_names.borrow_mut();
+        let thread = self.current_thread_id.get();
+        names.ensure_contains_elem(thread, Default::default);
+        names[thread] = Some(name);
+    }
+
+
+    /// Advance the vector clock for a thread
+    ///  this is called before and after any atomic/synchronizing operations
+    ///  that may manipulate state
+    #[inline]
+    fn advance_vector_clock(&self) {
+        let thread = self.current_thread_id.get();
+        let mut vectors = self.thread_clocks.borrow_mut();
+        vectors[thread].increment_clock(thread);
+
+        // Log the increment in the atomic vector clock
+        log::trace!("Atomic vector clock increase for {:?} to {:?}",thread, vectors[thread].clock);
+    }
+    
+
+    /// Internal utility to identify a thread stored internally
+    ///  returns the id and the name for better diagnostics
+    fn print_thread_metadata(&self, thread: ThreadId) -> String {
+        if let Some(Some(name)) = self.thread_names.borrow().get(thread) {
+            let name: &str = name;
+            format!("Thread(id = {:?}, name = {:?})", thread.to_u32(), &*name)
+        }else{
+            format!("Thread(id = {:?})", thread.to_u32())
+        }
+    }
+
+
+    /// Acquire a lock, express that the previous call of
+    ///  `validate_lock_release` must happen before this
+    pub fn validate_lock_acquire(&self, lock: &DataRaceLockHandle, thread: ThreadId) {
+        let mut ref_vector = self.thread_clocks.borrow_mut();
+        ref_vector[thread].increment_clock(thread);
+
+        let clocks = &mut ref_vector[thread];
+        clocks.clock.join(&lock.clock);
+
+        ref_vector[thread].increment_clock(thread);
+    }
+
+    /// Release a lock handle, express that this happens-before
+    ///  any subsequent calls to `validate_lock_acquire`
+    pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
+        let mut ref_vector = self.thread_clocks.borrow_mut();
+        ref_vector[thread].increment_clock(thread);
+
+        let clocks = &ref_vector[thread];
+        lock.clock.set_values(&clocks.clock);
+
+        ref_vector[thread].increment_clock(thread);
+    }
+
+    /// Release a lock handle, express that this happens-before
+    ///  any subsequent calls to `validate_lock_acquire` as well
+    ///  as any previous calls to this function after any
+    ///  `validate_lock_release` calls
+    pub fn validate_lock_release_shared(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
+        let mut ref_vector = self.thread_clocks.borrow_mut();
+        ref_vector[thread].increment_clock(thread);
+
+        let clocks = &ref_vector[thread];
+        lock.clock.join(&clocks.clock);
+
+        ref_vector[thread].increment_clock(thread);
+    }
+
+    /// Load the thread clock set associated with the current thread
+    #[inline]
+    fn current_thread_state(&self) -> Ref<'_, ThreadClockSet> {
+        let ref_vector = self.thread_clocks.borrow();
+        let thread = self.current_thread_id.get();
+        Ref::map(ref_vector, |vector| &vector[thread])
+    }
+
+    /// Load the thread clock set associated with the current thread
+    ///  mutably for modification
+    #[inline]
+    fn current_thread_state_mut(&self) -> RefMut<'_, ThreadClockSet> {
+        let ref_vector = self.thread_clocks.borrow_mut();
+        let thread = self.current_thread_id.get();
+        RefMut::map(ref_vector, |vector| &mut vector[thread])
+    }
+
+    /// Return the current thread, should be the same
+    ///  as the data-race active thread
+    #[inline]
+    fn current_thread(&self) -> ThreadId {
+        self.current_thread_id.get()
+    }
+}
+
+
+/// The size of the vector-clock to store inline
+///  clock vectors larger than this will be stored on the heap
+const SMALL_VECTOR: usize = 4;
+
+/// The type of the time-stamps recorded in the data-race detector
+///  set to a type of unsigned integer
+type Timestamp = u32;
+
+/// A vector clock for detecting data-races
+///  invariants:
+///   - the last element in a VClock must not be 0
+///     -- this means that derive(PartialEq & Eq) is correct
+///     --  as there is no implicit zero tail that might be equal
+///     --  also simplifies the implementation of PartialOrd
+#[derive(Clone, PartialEq, Eq, Default, Debug)]
+pub struct VClock(SmallVec<[Timestamp; SMALL_VECTOR]>);
+
+impl VClock {
+
+    /// Load the backing slice behind the clock vector.
+    #[inline]
+    fn as_slice(&self) -> &[Timestamp] {
+        self.0.as_slice()
+    }
+
+    /// Get a mutable slice to the internal vector with minimum `min_len`
+    ///  elements, to preserve invariants this vector must modify
+    ///  the `min_len`-1 nth element to a non-zero value
+    #[inline]
+    fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [Timestamp] {
+        if self.0.len() < min_len {
+            self.0.resize(min_len, 0);
+        }
+        assert!(self.0.len() >= min_len);
+        self.0.as_mut_slice()
+    }
+
+    /// Increment the vector clock at a known index
+    #[inline]
+    fn increment_index(&mut self, idx: usize) {
+        let mut_slice = self.get_mut_with_min_len(idx + 1);
+        let idx_ref = &mut mut_slice[idx];
+        *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow")
+    }
+
+    // Increment the vector element representing the progress
+    //  of execution in the given thread
+    #[inline]
+    pub fn increment_thread(&mut self, thread: ThreadId) {
+        self.increment_index(thread.to_u32() as usize);
+    }
+
+    // Join the two vector-clocks together, this
+    //  sets each vector-element to the maximum value
+    //  of that element in either of the two source elements.
+    pub fn join(&mut self, other: &Self) {
+        let rhs_slice = other.as_slice();
+        let lhs_slice = self.get_mut_with_min_len(rhs_slice.len());
+
+        // Element-wise set to maximum.
+        for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) {
+            *l = r.max(*l);
+        }
+    }
+
+    /// Joins with a thread at a known index
+    fn set_at_index(&mut self, other: &Self, idx: usize){
+        let mut_slice = self.get_mut_with_min_len(idx + 1);
+        let slice = other.as_slice();
+        mut_slice[idx] = slice[idx];
+    }
+
+    /// Join with a threads vector clock only at the desired index
+    ///  returns true if the value updated
+    #[inline]
+    pub fn set_at_thread(&mut self, other: &Self, thread: ThreadId){
+        self.set_at_index(other, thread.to_u32() as usize);
+    }
+
+    /// Clear the vector to all zeros, stored as an empty internal
+    ///  vector
+    #[inline]
+    pub fn set_zero_vector(&mut self) {
+        self.0.clear();
+    }
+
+    /// Set the values stored in this vector clock
+    ///  to the values stored in another.
+    pub fn set_values(&mut self, new_value: &VClock) {
+        let new_slice = new_value.as_slice();
+        self.0.resize(new_slice.len(), 0);
+        self.0.copy_from_slice(new_slice);
+    }
+}
+
+
+impl PartialOrd for VClock {
+    fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
+
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // Iterate through the combined vector slice
+        //  keeping track of the order that is currently possible to satisfy.
+        // If an ordering relation is detected to be impossible, then bail and
+        //  directly return None
+        let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
+        let mut order = match iter.next() {
+            Some((lhs, rhs)) => lhs.cmp(rhs),
+            None => Ordering::Equal
+        };
+        for (l, r) in iter {
+            match order {
+                Ordering::Equal => order = l.cmp(r),
+                Ordering::Less => if l > r {
+                    return None
+                },
+                Ordering::Greater => if l < r {
+                    return None
+                }
+            }
+        }
+
+        //Now test if either left or right have trailing elements
+        // by the invariant the trailing elements have at least 1
+        // non zero value, so no additional calculation is required
+        // to determine the result of the PartialOrder
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        match l_len.cmp(&r_len) {
+            // Equal has no additional elements: return current order
+            Ordering::Equal => Some(order),
+            // Right has at least 1 element > than the implicit 0,
+            //  so the only valid values are Ordering::Less or None
+            Ordering::Less => match order {
+                Ordering::Less | Ordering::Equal => Some(Ordering::Less),
+                Ordering::Greater => None
+            }
+            // Left has at least 1 element > than the implicit 0,
+            //  so the only valid values are Ordering::Greater or None
+            Ordering::Greater => match order {
+                Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
+                Ordering::Less => None
+            }
+        }
+    }
+
+    fn lt(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If l_len > r_len then at least one element
+        //  in l_len is > than r_len, therefore the result
+        //  is either Some(Greater) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len <= r_len {
+            // If any elements on the left are greater than the right
+            //  then the result is None or Some(Greater), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l <= r, finally
+            //  the case where the values are potentially equal needs to be considered
+            //  and false returned as well
+            let mut equal = l_len == r_len;
+            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
+                if l > r {
+                    return false
+                }else if l < r {
+                    equal = false;
+                }
+            }
+            !equal
+        }else{
+            false
+        }
+    }
+
+    fn le(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If l_len > r_len then at least one element
+        //  in l_len is > than r_len, therefore the result
+        //  is either Some(Greater) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len <= r_len {
+            // If any elements on the left are greater than the right
+            //  then the result is None or Some(Greater), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l <= r
+            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r)
+        }else{
+            false
+        }
+    }
+
+    fn gt(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If r_len > l_len then at least one element
+        //  in r_len is > than l_len, therefore the result
+        //  is either Some(Less) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len >= r_len {
+            // If any elements on the left are less than the right
+            //  then the result is None or Some(Less), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l >=, finally
+            //  the case where the values are potentially equal needs to be considered
+            //  and false returned as well
+            let mut equal = l_len == r_len;
+            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
+                if l < r {
+                    return false
+                }else if l > r {
+                    equal = false;
+                }
+            }
+            !equal
+        }else{
+            false
+        }
+    }
+
+    fn ge(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If r_len > l_len then at least one element
+        //  in r_len is > than l_len, therefore the result
+        //  is either Some(Less) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len >= r_len {
+            // If any elements on the left are less than the right
+            //  then the result is None or Some(Less), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l >= r
+            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r)
+        }else{
+            false
+        }
+    }
+}
+
+impl Index<ThreadId> for VClock {
+    type Output = Timestamp;
+
+    #[inline]
+    fn index(&self, index: ThreadId) -> &Timestamp {
+       self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
+    }
+}
+
+
+/// Test vector clock ordering operations
+///  data-race detection is tested in the external
+///  test suite
+#[cfg(test)]
+mod tests {
+    use super::{VClock, Timestamp};
+    use std::cmp::Ordering;
+
+    #[test]
+    fn test_equal() {
+        let mut c1 = VClock::default();
+        let mut c2 = VClock::default();
+        assert_eq!(c1, c2);
+        c1.increment_index(5);
+        assert_ne!(c1, c2);
+        c2.increment_index(53);
+        assert_ne!(c1, c2);
+        c1.increment_index(53);
+        assert_ne!(c1, c2);
+        c2.increment_index(5);
+        assert_eq!(c1, c2);
+    }
+
+    #[test]
+    fn test_partial_order() {
+        // Small test
+        assert_order(&[1], &[1], Some(Ordering::Equal));
+        assert_order(&[1], &[2], Some(Ordering::Less));
+        assert_order(&[2], &[1], Some(Ordering::Greater));
+        assert_order(&[1], &[1,2], Some(Ordering::Less));
+        assert_order(&[2], &[1,2], None);
+
+        // Misc tests
+        assert_order(&[400], &[0, 1], None);
+
+        // Large test
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None);
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+    }
+
+    fn from_slice(mut slice: &[Timestamp]) -> VClock {
+        while let Some(0) = slice.last() {
+            slice = &slice[..slice.len() - 1]
+        }
+        VClock(smallvec::SmallVec::from_slice(slice))
+    }
+
+    fn assert_order(l: &[Timestamp], r: &[Timestamp], o: Option<Ordering>) {
+        let l = from_slice(l);
+        let r = from_slice(r);
+
+        //Test partial_cmp
+        let compare = l.partial_cmp(&r);
+        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r);
+        let alt_compare = r.partial_cmp(&l);
+        assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
+
+        //Test operatorsm with faster implementations
+        assert_eq!(
+            matches!(compare,Some(Ordering::Less)), l < r,
+            "Invalid (<):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r,
+            "Invalid (<=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Greater)), l > r,
+            "Invalid (>):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r,
+            "Invalid (>=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Less)), r < l,
+            "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l,
+            "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Greater)), r > l,
+            "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l,
+            "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
+        );
+    }
+}
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
index d4802f3b11..f384787e4c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,6 +22,7 @@ extern crate rustc_mir;
 extern crate rustc_span;
 extern crate rustc_target;
 
+mod data_race;
 mod diagnostics;
 mod eval;
 mod helpers;
@@ -52,6 +53,10 @@ pub use crate::shims::panic::{CatchUnwindData, EvalContextExt as _};
 pub use crate::shims::tls::{EvalContextExt as _, TlsData};
 pub use crate::shims::EvalContextExt as _;
 
+pub use crate::data_race::{
+    AtomicReadOp, AtomicWriteOp, AtomicRWOp, AtomicFenceOp, DataRaceLockHandle,
+    EvalContextExt as DataRaceEvalContextExt
+};
 pub use crate::diagnostics::{
     register_diagnostic, report_error, EvalContextExt as DiagnosticsEvalContextExt,
     TerminationInfo, NonHaltingDiagnostic,
diff --git a/src/machine.rs b/src/machine.rs
index e9f9298e56..363513f636 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -109,12 +109,15 @@ impl fmt::Display for MiriMemoryKind {
 pub struct AllocExtra {
     /// Stacked Borrows state is only added if it is enabled.
     pub stacked_borrows: Option<stacked_borrows::AllocExtra>,
+    /// Data race detection via the use of a vector-clock.
+    pub data_race: data_race::AllocExtra,
 }
 
 /// Extra global memory data
 #[derive(Clone, Debug)]
 pub struct MemoryExtra {
     pub stacked_borrows: Option<stacked_borrows::MemoryExtra>,
+    pub data_race: data_race::MemoryExtra,
     pub intptrcast: intptrcast::MemoryExtra,
 
     /// Mapping extern static names to their canonical allocation.
@@ -144,8 +147,10 @@ impl MemoryExtra {
         } else {
             None
         };
+        let data_race = Rc::new(data_race::GlobalState::new());
         MemoryExtra {
             stacked_borrows,
+            data_race,
             intptrcast: Default::default(),
             extern_statics: FxHashMap::default(),
             rng: RefCell::new(rng),
@@ -467,6 +472,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 // No stacks, no tag.
                 (None, Tag::Untagged)
             };
+        let race_alloc = data_race::AllocExtra::new_allocation(&memory_extra.data_race, alloc.size);
         let mut stacked_borrows = memory_extra.stacked_borrows.as_ref().map(|sb| sb.borrow_mut());
         let alloc: Allocation<Tag, Self::AllocExtra> = alloc.with_tags_and_extra(
             |alloc| {
@@ -478,7 +484,7 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                     Tag::Untagged
                 }
             },
-            AllocExtra { stacked_borrows: stacks },
+            AllocExtra { stacked_borrows: stacks, data_race: race_alloc },
         );
         (Cow::Owned(alloc), base_tag)
     }
@@ -584,6 +590,7 @@ impl AllocationExtra<Tag> for AllocExtra {
         ptr: Pointer<Tag>,
         size: Size,
     ) -> InterpResult<'tcx> {
+        alloc.extra.data_race.read(ptr, size)?;
         if let Some(stacked_borrows) = &alloc.extra.stacked_borrows {
             stacked_borrows.memory_read(ptr, size)
         } else {
@@ -597,6 +604,7 @@ impl AllocationExtra<Tag> for AllocExtra {
         ptr: Pointer<Tag>,
         size: Size,
     ) -> InterpResult<'tcx> {
+        alloc.extra.data_race.write(ptr, size)?;
         if let Some(stacked_borrows) = &mut alloc.extra.stacked_borrows {
             stacked_borrows.memory_written(ptr, size)
         } else {
@@ -610,6 +618,7 @@ impl AllocationExtra<Tag> for AllocExtra {
         ptr: Pointer<Tag>,
         size: Size,
     ) -> InterpResult<'tcx> {
+        alloc.extra.data_race.deallocate(ptr, size)?;
         if let Some(stacked_borrows) = &mut alloc.extra.stacked_borrows {
             stacked_borrows.memory_deallocated(ptr, size)
         } else {
diff --git a/src/shims/intrinsics.rs b/src/shims/intrinsics.rs
index b401bd8ada..2bb15e712c 100644
--- a/src/shims/intrinsics.rs
+++ b/src/shims/intrinsics.rs
@@ -4,7 +4,7 @@ use log::trace;
 
 use rustc_attr as attr;
 use rustc_ast::ast::FloatTy;
-use rustc_middle::{mir, ty};
+use rustc_middle::{mir, mir::BinOp, ty};
 use rustc_middle::ty::layout::IntegerExt;
 use rustc_apfloat::{Float, Round};
 use rustc_target::abi::{Align, Integer, LayoutOf};
@@ -306,157 +306,117 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             }
 
             // Atomic operations
-            #[rustfmt::skip]
-            | "atomic_load"
-            | "atomic_load_relaxed"
-            | "atomic_load_acq"
-            => {
-                let &[place] = check_arg_count(args)?;
-                let place = this.deref_operand(place)?;
-                let val = this.read_scalar(place.into())?; // make sure it fits into a scalar; otherwise it cannot be atomic
-
-                // Check alignment requirements. Atomics must always be aligned to their size,
-                // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
-                // be 8-aligned).
-                let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-                this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
-
-                this.write_scalar(val, dest)?;
-            }
-
-            #[rustfmt::skip]
-            | "atomic_store"
-            | "atomic_store_relaxed"
-            | "atomic_store_rel"
-            => {
-                let &[place, val] = check_arg_count(args)?;
-                let place = this.deref_operand(place)?;
-                let val = this.read_scalar(val)?; // make sure it fits into a scalar; otherwise it cannot be atomic
+            "atomic_load" => this.atomic_load(args, dest, AtomicReadOp::SeqCst)?,
+            "atomic_load_relaxed" => this.atomic_load(args, dest, AtomicReadOp::Relaxed)?,
+            "atomic_load_acq" => this.atomic_load(args, dest, AtomicReadOp::Acquire)?,
+
+            "atomic_store" => this.atomic_store(args, AtomicWriteOp::SeqCst)?,
+            "atomic_store_relaxed" => this.atomic_store(args, AtomicWriteOp::Relaxed)?,
+            "atomic_store_rel" => this.atomic_store(args, AtomicWriteOp::Release)?,
+
+            "atomic_fence_acq" => this.atomic_fence(args, AtomicFenceOp::Acquire)?,
+            "atomic_fence_rel" => this.atomic_fence(args, AtomicFenceOp::Release)?,
+            "atomic_fence_acqrel" => this.atomic_fence(args, AtomicFenceOp::AcqRel)?,
+            "atomic_fence" => this.atomic_fence(args, AtomicFenceOp::SeqCst)?,
+
+            "atomic_singlethreadfence_acq" => this.compiler_fence(args, AtomicFenceOp::Acquire)?,
+            "atomic_singlethreadfence_rel" => this.compiler_fence(args, AtomicFenceOp::Release)?,
+            "atomic_singlethreadfence_acqrel" => this.compiler_fence(args, AtomicFenceOp::AcqRel)?,
+            "atomic_singlethreadfence" => this.compiler_fence(args, AtomicFenceOp::SeqCst)?,
+
+            "atomic_xchg" => this.atomic_exchange(args, dest, AtomicRWOp::SeqCst)?,
+            "atomic_xchg_acq" => this.atomic_exchange(args, dest, AtomicRWOp::Acquire)?,
+            "atomic_xchg_rel" => this.atomic_exchange(args, dest, AtomicRWOp::Release)?,
+            "atomic_xchg_acqrel" => this.atomic_exchange(args, dest, AtomicRWOp::AcqRel)?,
+            "atomic_xchg_relaxed" => this.atomic_exchange(args, dest, AtomicRWOp::Relaxed)?,
+
+            "atomic_cxchg" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::SeqCst
+            )?,
+            "atomic_cxchg_acq" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Acquire
+            )?,
+            "atomic_cxchg_rel" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::Release, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchg_acqrel" => this.atomic_compare_exchange
+            (args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Acquire
+            )?,
+            "atomic_cxchg_relaxed" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::Relaxed, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchg_acq_failrelaxed" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchg_acqrel_failrelaxed" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchg_failrelaxed" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchg_failacq" => this.atomic_compare_exchange(
+                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Acquire
+            )?,
+
+            "atomic_cxchgweak" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::SeqCst
+            )?,
+            "atomic_cxchgweak_acq" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Acquire
+            )?,
+            "atomic_cxchgweak_rel" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::Release, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchgweak_acqrel" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Acquire
+            )?,
+            "atomic_cxchgweak_relaxed" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::Relaxed, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchgweak_acq_failrelaxed" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchgweak_acqrel_failrelaxed" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchgweak_failrelaxed" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Relaxed
+            )?,
+            "atomic_cxchgweak_failacq" => this.atomic_compare_exchange_weak(
+                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Acquire
+            )?,
+
+            "atomic_or" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::SeqCst)?,
+            "atomic_or_acq" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::Acquire)?,
+            "atomic_or_rel" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::Release)?,
+            "atomic_or_acqrel" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::AcqRel)?,
+            "atomic_or_relaxed" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::Relaxed)?,
+            "atomic_xor" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::SeqCst)?,
+            "atomic_xor_acq" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::Acquire)?,
+            "atomic_xor_rel" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::Release)?,
+            "atomic_xor_acqrel" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::AcqRel)?,
+            "atomic_xor_relaxed" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::Relaxed)?,
+            "atomic_and" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::SeqCst)?,
+            "atomic_and_acq" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::Acquire)?,
+            "atomic_and_rel" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::Release)?,
+            "atomic_and_acqrel" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::AcqRel)?,
+            "atomic_and_relaxed" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::Relaxed)?,
+            "atomic_nand" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::SeqCst)?,
+            "atomic_nand_acq" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::Acquire)?,
+            "atomic_nand_rel" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::Release)?,
+            "atomic_nand_acqrel" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::AcqRel)?,
+            "atomic_nand_relaxed" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::Relaxed)?,
+            "atomic_xadd" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::SeqCst)?,
+            "atomic_xadd_acq" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::Acquire)?,
+            "atomic_xadd_rel" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::Release)?,
+            "atomic_xadd_acqrel" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::AcqRel)?,
+            "atomic_xadd_relaxed" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::Relaxed)?,
+            "atomic_xsub" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::SeqCst)?,
+            "atomic_xsub_acq" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::Acquire)?,
+            "atomic_xsub_rel" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::Release)?,
+            "atomic_xsub_acqrel" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::AcqRel)?,
+            "atomic_xsub_relaxed" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::Relaxed)?,
 
-                // Check alignment requirements. Atomics must always be aligned to their size,
-                // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
-                // be 8-aligned).
-                let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-                this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
-
-                this.write_scalar(val, place.into())?;
-            }
-
-            #[rustfmt::skip]
-            | "atomic_fence_acq"
-            | "atomic_fence_rel"
-            | "atomic_fence_acqrel"
-            | "atomic_fence"
-            | "atomic_singlethreadfence_acq"
-            | "atomic_singlethreadfence_rel"
-            | "atomic_singlethreadfence_acqrel"
-            | "atomic_singlethreadfence"
-            => {
-                let &[] = check_arg_count(args)?;
-                // FIXME: this will become relevant once we try to detect data races.
-            }
-
-            _ if intrinsic_name.starts_with("atomic_xchg") => {
-                let &[place, new] = check_arg_count(args)?;
-                let place = this.deref_operand(place)?;
-                let new = this.read_scalar(new)?;
-                let old = this.read_scalar(place.into())?;
-
-                // Check alignment requirements. Atomics must always be aligned to their size,
-                // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
-                // be 8-aligned).
-                let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-                this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
-
-                this.write_scalar(old, dest)?; // old value is returned
-                this.write_scalar(new, place.into())?;
-            }
-
-            _ if intrinsic_name.starts_with("atomic_cxchg") => {
-                let &[place, expect_old, new] = check_arg_count(args)?;
-                let place = this.deref_operand(place)?;
-                let expect_old = this.read_immediate(expect_old)?; // read as immediate for the sake of `binary_op()`
-                let new = this.read_scalar(new)?;
-                let old = this.read_immediate(place.into())?; // read as immediate for the sake of `binary_op()`
-
-                // Check alignment requirements. Atomics must always be aligned to their size,
-                // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
-                // be 8-aligned).
-                let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-                this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
-
-                // `binary_op` will bail if either of them is not a scalar.
-                let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
-                let res = Immediate::ScalarPair(old.to_scalar_or_uninit(), eq.into());
-                // Return old value.
-                this.write_immediate(res, dest)?;
-                // Update ptr depending on comparison.
-                if eq.to_bool()? {
-                    this.write_scalar(new, place.into())?;
-                }
-            }
-
-            #[rustfmt::skip]
-            | "atomic_or"
-            | "atomic_or_acq"
-            | "atomic_or_rel"
-            | "atomic_or_acqrel"
-            | "atomic_or_relaxed"
-            | "atomic_xor"
-            | "atomic_xor_acq"
-            | "atomic_xor_rel"
-            | "atomic_xor_acqrel"
-            | "atomic_xor_relaxed"
-            | "atomic_and"
-            | "atomic_and_acq"
-            | "atomic_and_rel"
-            | "atomic_and_acqrel"
-            | "atomic_and_relaxed"
-            | "atomic_nand"
-            | "atomic_nand_acq"
-            | "atomic_nand_rel"
-            | "atomic_nand_acqrel"
-            | "atomic_nand_relaxed"
-            | "atomic_xadd"
-            | "atomic_xadd_acq"
-            | "atomic_xadd_rel"
-            | "atomic_xadd_acqrel"
-            | "atomic_xadd_relaxed"
-            | "atomic_xsub"
-            | "atomic_xsub_acq"
-            | "atomic_xsub_rel"
-            | "atomic_xsub_acqrel"
-            | "atomic_xsub_relaxed"
-            => {
-                let &[place, rhs] = check_arg_count(args)?;
-                let place = this.deref_operand(place)?;
-                if !place.layout.ty.is_integral() {
-                    bug!("Atomic arithmetic operations only work on integer types");
-                }
-                let rhs = this.read_immediate(rhs)?;
-                let old = this.read_immediate(place.into())?;
-
-                // Check alignment requirements. Atomics must always be aligned to their size,
-                // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
-                // be 8-aligned).
-                let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
-                this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
-
-                this.write_immediate(*old, dest)?; // old value is returned
-                let (op, neg) = match intrinsic_name.split('_').nth(1).unwrap() {
-                    "or" => (mir::BinOp::BitOr, false),
-                    "xor" => (mir::BinOp::BitXor, false),
-                    "and" => (mir::BinOp::BitAnd, false),
-                    "xadd" => (mir::BinOp::Add, false),
-                    "xsub" => (mir::BinOp::Sub, false),
-                    "nand" => (mir::BinOp::BitAnd, true),
-                    _ => bug!(),
-                };
-                // Atomics wrap around on overflow.
-                let val = this.binary_op(op, old, rhs)?;
-                let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-                this.write_immediate(*val, place.into())?;
-            }
 
             // Query type information
             "assert_inhabited" |
@@ -498,6 +458,169 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         Ok(())
     }
 
+    fn atomic_load(
+        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+
+        let &[place] = check_arg_count(args)?;
+        let place = this.deref_operand(place)?;
+
+        // make sure it fits into a scalar; otherwise it cannot be atomic
+        let val = this.read_scalar_racy(place)?;
+        this.validate_atomic_load(place, atomic)?;
+
+        // Check alignment requirements. Atomics must always be aligned to their size,
+        // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
+        // be 8-aligned).
+        let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
+        this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
+        this.write_scalar(val, dest)?;
+        Ok(())
+    }
+
+    fn atomic_store(&mut self, args: &[OpTy<'tcx, Tag>], atomic: AtomicWriteOp) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        let &[place, val] = check_arg_count(args)?;
+        let place = this.deref_operand(place)?;
+        let val = this.read_scalar(val)?; // make sure it fits into a scalar; otherwise it cannot be atomic
+
+        // Check alignment requirements. Atomics must always be aligned to their size,
+        // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
+        // be 8-aligned).
+        let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
+        this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
+
+        // Perform atomic store
+        this.write_scalar_racy(val, place)?;
+
+        this.validate_atomic_store(place, atomic)?;
+        Ok(())
+    }
+
+    fn compiler_fence(&mut self, args: &[OpTy<'tcx, Tag>], atomic: AtomicFenceOp) -> InterpResult<'tcx> {
+        let &[] = check_arg_count(args)?;
+        let _ = atomic;
+        //FIXME: compiler fences are currently ignored
+        Ok(())
+    }
+
+    fn atomic_fence(&mut self, args: &[OpTy<'tcx, Tag>], atomic: AtomicFenceOp) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let &[] = check_arg_count(args)?;
+        this.validate_atomic_fence(atomic)?;
+        Ok(())
+    }
+
+    fn atomic_op(
+        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
+        op: mir::BinOp, neg: bool, atomic: AtomicRWOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        let &[place, rhs] = check_arg_count(args)?;
+        let place = this.deref_operand(place)?;
+        if !place.layout.ty.is_integral() {
+            bug!("Atomic arithmetic operations only work on integer types");
+        }
+        let rhs = this.read_immediate(rhs)?;
+        let old = this.read_immediate_racy(place)?;
+
+        // Check alignment requirements. Atomics must always be aligned to their size,
+        // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
+        // be 8-aligned).
+        let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
+        this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
+        this.write_immediate(*old, dest)?; // old value is returned
+
+        // Atomics wrap around on overflow.
+        let val = this.binary_op(op, old, rhs)?;
+        let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
+        this.write_immediate_racy(*val, place)?;
+
+        this.validate_atomic_rmw(place, atomic)?;
+        Ok(())
+    }
+    
+    fn atomic_exchange(
+        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>, atomic: AtomicRWOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        let &[place, new] = check_arg_count(args)?;
+        let place = this.deref_operand(place)?;
+        let new = this.read_scalar(new)?;
+        let old = this.read_scalar_racy(place)?;
+
+        // Check alignment requirements. Atomics must always be aligned to their size,
+        // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
+        // be 8-aligned).
+        let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
+        this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
+
+        this.write_scalar(old, dest)?; // old value is returned
+        this.write_scalar_racy(new, place)?;
+
+        this.validate_atomic_rmw(place, atomic)?;
+        Ok(())
+    }
+
+    fn atomic_compare_exchange(
+        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
+        success: AtomicRWOp, fail: AtomicReadOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+
+        let &[place, expect_old, new] = check_arg_count(args)?;
+        let place = this.deref_operand(place)?;
+        let expect_old = this.read_immediate(expect_old)?; // read as immediate for the sake of `binary_op()`
+        let new = this.read_scalar(new)?;
+
+        // Failure ordering cannot be stronger than success ordering, therefore first attempt
+        //  to read with the failure ordering and if successfull then try again with the success
+        //  read ordering and write in the success case.
+        // Read as immediate for the sake of `binary_op()`
+        let old = this.read_immediate_racy(place)?; 
+
+        // Check alignment requirements. Atomics must always be aligned to their size,
+        // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
+        // be 8-aligned).
+        let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
+        this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
+
+        // `binary_op` will bail if either of them is not a scalar.
+        let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
+        let res = Immediate::ScalarPair(old.to_scalar_or_uninit(), eq.into());
+
+        // Return old value.
+        this.write_immediate(res, dest)?;
+
+        // Update ptr depending on comparison.
+        //  if successful, perform a full rw-atomic validation
+        //  otherwise treat this as an atomic load with the fail ordering
+        if eq.to_bool()? {
+            this.write_scalar_racy(new, place)?;
+            this.validate_atomic_rmw(place, success)?;
+        } else {
+            this.validate_atomic_load(place, fail)?;
+        }
+
+        Ok(())
+    }
+
+    fn atomic_compare_exchange_weak(
+        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
+        success: AtomicRWOp, fail: AtomicReadOp
+    ) -> InterpResult<'tcx> {
+
+        // FIXME: the weak part of this is currently not modelled,
+        //  it is assumed to always succeed unconditionally.
+        self.atomic_compare_exchange(args, dest, success, fail)
+    }
+
     fn float_to_int_unchecked<F>(
         &self,
         f: F,
diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs
index a0b5db42ed..332e79071a 100644
--- a/src/shims/posix/sync.rs
+++ b/src/shims/posix/sync.rs
@@ -62,7 +62,7 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    ecx.read_scalar_at_offset(mutex_op, offset, ecx.machine.layouts.i32)
+    ecx.read_scalar_at_offset_racy(mutex_op, offset, ecx.machine.layouts.i32)
 }
 
 fn mutex_set_kind<'mir, 'tcx: 'mir>(
@@ -71,14 +71,14 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>(
     kind: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    ecx.write_scalar_at_offset(mutex_op, offset, kind, ecx.machine.layouts.i32)
+    ecx.write_scalar_at_offset_racy(mutex_op, offset, kind, ecx.machine.layouts.i32)
 }
 
 fn mutex_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset(mutex_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_racy(mutex_op, 4, ecx.machine.layouts.u32)
 }
 
 fn mutex_set_id<'mir, 'tcx: 'mir>(
@@ -86,7 +86,7 @@ fn mutex_set_id<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset(mutex_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_racy(mutex_op, 4, id, ecx.machine.layouts.u32)
 }
 
 fn mutex_get_or_create_id<'mir, 'tcx: 'mir>(
@@ -116,7 +116,7 @@ fn rwlock_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     rwlock_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset(rwlock_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_racy(rwlock_op, 4, ecx.machine.layouts.u32)
 }
 
 fn rwlock_set_id<'mir, 'tcx: 'mir>(
@@ -124,7 +124,7 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>(
     rwlock_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset(rwlock_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_racy(rwlock_op, 4, id, ecx.machine.layouts.u32)
 }
 
 fn rwlock_get_or_create_id<'mir, 'tcx: 'mir>(
@@ -177,7 +177,7 @@ fn cond_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     cond_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset(cond_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_racy(cond_op, 4, ecx.machine.layouts.u32)
 }
 
 fn cond_set_id<'mir, 'tcx: 'mir>(
@@ -185,7 +185,7 @@ fn cond_set_id<'mir, 'tcx: 'mir>(
     cond_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset(cond_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_racy(cond_op, 4, id, ecx.machine.layouts.u32)
 }
 
 fn cond_get_or_create_id<'mir, 'tcx: 'mir>(
diff --git a/src/shims/posix/thread.rs b/src/shims/posix/thread.rs
index 7c9c489e6f..e420457765 100644
--- a/src/shims/posix/thread.rs
+++ b/src/shims/posix/thread.rs
@@ -19,21 +19,29 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
              For example, Miri does not detect data races yet.",
         );
 
+        // Create the new thread
         let new_thread_id = this.create_thread();
-        // Also switch to new thread so that we can push the first stackframe.
-        let old_thread_id = this.set_active_thread(new_thread_id);
 
+        // Write the current thread-id, switch to the next thread later
+        //  to treat this write operation as occuring on this thread index
         let thread_info_place = this.deref_operand(thread)?;
         this.write_scalar(
             Scalar::from_uint(new_thread_id.to_u32(), thread_info_place.layout.size),
             thread_info_place.into(),
         )?;
 
+        // Read the function argument that will be sent to the new thread
+        //  again perform the read before the thread starts executing.
         let fn_ptr = this.read_scalar(start_routine)?.check_init()?;
-        let instance = this.memory.get_fn(fn_ptr)?.as_instance()?;
-
         let func_arg = this.read_immediate(arg)?;
 
+        // Also switch to new thread so that we can push the first stackframe.
+        //  after this all accesses will be treated as occuring in the new thread
+        let old_thread_id = this.set_active_thread(new_thread_id);
+
+        // Perform the function pointer load in the new thread frame
+        let instance = this.memory.get_fn(fn_ptr)?.as_instance()?;
+
         // Note: the returned value is currently ignored (see the FIXME in
         // pthread_join below) because the Rust standard library does not use
         // it.
@@ -47,6 +55,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             StackPopCleanup::None { cleanup: true },
         )?;
 
+        // Restore the old active thread frame
         this.set_active_thread(old_thread_id);
 
         Ok(0)
diff --git a/src/sync.rs b/src/sync.rs
index 0c12da8d68..3469afdcd2 100644
--- a/src/sync.rs
+++ b/src/sync.rs
@@ -61,6 +61,8 @@ struct Mutex {
     lock_count: usize,
     /// The queue of threads waiting for this mutex.
     queue: VecDeque<ThreadId>,
+    /// Data race handle
+    data_race: DataRaceLockHandle
 }
 
 declare_id!(RwLockId);
@@ -77,6 +79,10 @@ struct RwLock {
     writer_queue: VecDeque<ThreadId>,
     /// The queue of reader threads waiting for this lock.
     reader_queue: VecDeque<ThreadId>,
+    /// Data race handle for writers
+    data_race: DataRaceLockHandle,
+    /// Data race handle for readers
+    data_race_reader: DataRaceLockHandle,
 }
 
 declare_id!(CondvarId);
@@ -94,12 +100,14 @@ struct CondvarWaiter {
 #[derive(Default, Debug)]
 struct Condvar {
     waiters: VecDeque<CondvarWaiter>,
+    data_race: DataRaceLockHandle,
 }
 
 /// The futex state.
 #[derive(Default, Debug)]
 struct Futex {
     waiters: VecDeque<FutexWaiter>,
+    data_race: DataRaceLockHandle,
 }
 
 /// A thread waiting on a futex.
@@ -205,6 +213,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             mutex.owner = Some(thread);
         }
         mutex.lock_count = mutex.lock_count.checked_add(1).unwrap();
+        this.memory.extra.data_race.validate_lock_acquire(&mutex.data_race, thread);
     }
 
     /// Try unlocking by decreasing the lock count and returning the old lock
@@ -232,6 +241,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
                 mutex.owner = None;
                 // The mutex is completely unlocked. Try transfering ownership
                 // to another thread.
+                this.memory.extra.data_race.validate_lock_release(&mut mutex.data_race, current_owner);
                 this.mutex_dequeue_and_lock(id);
             }
             Some(old_lock_count)
@@ -284,15 +294,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
         assert!(!this.rwlock_is_write_locked(id), "the lock is write locked");
         trace!("rwlock_reader_lock: {:?} now also held (one more time) by {:?}", id, reader);
-        let count = this.machine.threads.sync.rwlocks[id].readers.entry(reader).or_insert(0);
+        let rwlock = &mut this.machine.threads.sync.rwlocks[id];
+        let count = rwlock.readers.entry(reader).or_insert(0);
         *count = count.checked_add(1).expect("the reader counter overflowed");
+        this.memory.extra.data_race.validate_lock_acquire(&rwlock.data_race, reader);
     }
 
     /// Try read-unlock the lock for `reader` and potentially give the lock to a new owner.
     /// Returns `true` if succeeded, `false` if this `reader` did not hold the lock.
     fn rwlock_reader_unlock(&mut self, id: RwLockId, reader: ThreadId) -> bool {
         let this = self.eval_context_mut();
-        match this.machine.threads.sync.rwlocks[id].readers.entry(reader) {
+        let rwlock = &mut this.machine.threads.sync.rwlocks[id];
+        match rwlock.readers.entry(reader) {
             Entry::Occupied(mut entry) => {
                 let count = entry.get_mut();
                 assert!(*count > 0, "rwlock locked with count == 0");
@@ -306,8 +319,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             }
             Entry::Vacant(_) => return false, // we did not even own this lock
         }
+        this.memory.extra.data_race.validate_lock_release_shared(&mut rwlock.data_race_reader, reader);
+
         // The thread was a reader. If the lock is not held any more, give it to a writer.
         if this.rwlock_is_locked(id).not() {
+
+            // All the readers are finished, so set the writer data-race handle to the value
+            //  of the union of all reader data race handles, since the set of readers
+            //  happen-before the writers
+            let rwlock = &mut this.machine.threads.sync.rwlocks[id];
+            rwlock.data_race.set_values(&rwlock.data_race_reader);
             this.rwlock_dequeue_and_lock_writer(id);
         }
         true
@@ -332,7 +353,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
         assert!(!this.rwlock_is_locked(id), "the rwlock is already locked");
         trace!("rwlock_writer_lock: {:?} now held by {:?}", id, writer);
-        this.machine.threads.sync.rwlocks[id].writer = Some(writer);
+        let rwlock = &mut this.machine.threads.sync.rwlocks[id];
+        rwlock.writer = Some(writer);
+        this.memory.extra.data_race.validate_lock_acquire(&rwlock.data_race, writer);
     }
 
     #[inline]
@@ -347,6 +370,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             }
             rwlock.writer = None;
             trace!("rwlock_writer_unlock: {:?} unlocked by {:?}", id, expected_writer);
+            // Release memory to both reader and writer vector clocks
+            //  since this writer happens-before both the union of readers once they are finished
+            //  and the next writer
+            this.memory.extra.data_race.validate_lock_release(&mut rwlock.data_race, current_writer);
+            this.memory.extra.data_race.validate_lock_release(&mut rwlock.data_race_reader, current_writer);
             // The thread was a writer.
             //
             // We are prioritizing writers here against the readers. As a
@@ -405,10 +433,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     /// variable.
     fn condvar_signal(&mut self, id: CondvarId) -> Option<(ThreadId, MutexId)> {
         let this = self.eval_context_mut();
-        this.machine.threads.sync.condvars[id]
-            .waiters
+        let current_thread = this.get_active_thread();
+        let condvar = &mut this.machine.threads.sync.condvars[id];
+        let data_race = &mut this.memory.extra.data_race;
+
+        // Each condvar signal happens-before the end of the condvar wake
+        data_race.validate_lock_release(&mut condvar.data_race, current_thread);
+        condvar.waiters
             .pop_front()
-            .map(|waiter| (waiter.thread, waiter.mutex))
+            .map(|waiter| {
+                data_race.validate_lock_acquire(&mut condvar.data_race, waiter.thread);
+                (waiter.thread, waiter.mutex)
+            })
     }
 
     #[inline]
@@ -420,15 +456,25 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
     fn futex_wait(&mut self, addr: Pointer<stacked_borrows::Tag>, thread: ThreadId) {
         let this = self.eval_context_mut();
-        let waiters = &mut this.machine.threads.sync.futexes.entry(addr.erase_tag()).or_default().waiters;
+        let futex = &mut this.machine.threads.sync.futexes.entry(addr.erase_tag()).or_default();
+        let waiters = &mut futex.waiters;
         assert!(waiters.iter().all(|waiter| waiter.thread != thread), "thread is already waiting");
         waiters.push_back(FutexWaiter { thread });
     }
 
     fn futex_wake(&mut self, addr: Pointer<stacked_borrows::Tag>) -> Option<ThreadId> {
         let this = self.eval_context_mut();
-        let waiters = &mut this.machine.threads.sync.futexes.get_mut(&addr.erase_tag())?.waiters;
-        waiters.pop_front().map(|waiter| waiter.thread)
+        let current_thread = this.get_active_thread();
+        let futex = &mut this.machine.threads.sync.futexes.get_mut(&addr.erase_tag())?;
+        let data_race =  &mut this.memory.extra.data_race;
+
+        // Each futex-wake happens-before the end of the futex wait
+        data_race.validate_lock_release(&mut futex.data_race, current_thread);
+        let res = futex.waiters.pop_front().map(|waiter| {
+            data_race.validate_lock_acquire(&futex.data_race, waiter.thread);  
+            waiter.thread
+        });
+        res
     }
 
     fn futex_remove_waiter(&mut self, addr: Pointer<stacked_borrows::Tag>, thread: ThreadId) {
diff --git a/src/thread.rs b/src/thread.rs
index eeaee7dc44..08aeaa4fd0 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -327,7 +327,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
     }
 
     /// Mark that the active thread tries to join the thread with `joined_thread_id`.
-    fn join_thread(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> {
+    fn join_thread(&mut self, joined_thread_id: ThreadId, data_race: &data_race::GlobalState) -> InterpResult<'tcx> {
         if self.threads[joined_thread_id].join_status != ThreadJoinStatus::Joinable {
             throw_ub_format!("trying to join a detached or already joined thread");
         }
@@ -351,6 +351,9 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
                 self.active_thread,
                 joined_thread_id
             );
+        }else{
+            // The thread has already terminated - mark join happens-before
+            data_race.thread_joined(self.active_thread, joined_thread_id);
         }
         Ok(())
     }
@@ -425,7 +428,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
 
     /// Wakes up threads joining on the active one and deallocates thread-local statics.
     /// The `AllocId` that can now be freed is returned.
-    fn thread_terminated(&mut self) -> Vec<AllocId> {
+    fn thread_terminated(&mut self, data_race: &data_race::GlobalState) -> Vec<AllocId> {
         let mut free_tls_statics = Vec::new();
         {
             let mut thread_local_statics = self.thread_local_alloc_ids.borrow_mut();
@@ -443,6 +446,8 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
         // Check if we need to unblock any threads.
         for (i, thread) in self.threads.iter_enumerated_mut() {
             if thread.state == ThreadState::BlockedOnJoin(self.active_thread) {
+                // The thread has terminated, mark happens-before edge to joining thread
+                data_race.thread_joined(i, self.active_thread);
                 trace!("unblocking {:?} because {:?} terminated", i, self.active_thread);
                 thread.state = ThreadState::Enabled;
             }
@@ -456,7 +461,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
     /// used in stateless model checkers such as Loom: run the active thread as
     /// long as we can and switch only when we have to (the active thread was
     /// blocked, terminated, or has explicitly asked to be preempted).
-    fn schedule(&mut self) -> InterpResult<'tcx, SchedulingAction> {
+    fn schedule(&mut self, data_race: &data_race::GlobalState) -> InterpResult<'tcx, SchedulingAction> {
         // Check whether the thread has **just** terminated (`check_terminated`
         // checks whether the thread has popped all its stack and if yes, sets
         // the thread state to terminated).
@@ -501,6 +506,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
             if thread.state == ThreadState::Enabled {
                 if !self.yield_active_thread || id != self.active_thread {
                     self.active_thread = id;
+                    data_race.thread_set_active(self.active_thread);
                     break;
                 }
             }
@@ -554,7 +560,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn create_thread(&mut self) -> ThreadId {
         let this = self.eval_context_mut();
-        this.machine.threads.create_thread()
+        let id = this.machine.threads.create_thread();
+        this.memory.extra.data_race.thread_created(id);
+        id
     }
 
     #[inline]
@@ -566,12 +574,15 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn join_thread(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.machine.threads.join_thread(joined_thread_id)
+        let data_race = &*this.memory.extra.data_race;
+        this.machine.threads.join_thread(joined_thread_id, data_race)?;
+        Ok(())
     }
 
     #[inline]
     fn set_active_thread(&mut self, thread_id: ThreadId) -> ThreadId {
         let this = self.eval_context_mut();
+        this.memory.extra.data_race.thread_set_active(thread_id);
         this.machine.threads.set_active_thread_id(thread_id)
     }
 
@@ -626,6 +637,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn set_active_thread_name(&mut self, new_thread_name: Vec<u8>) {
         let this = self.eval_context_mut();
+        if let Ok(string) = String::from_utf8(new_thread_name.clone()) {
+            this.memory.extra.data_race.thread_set_name(string);
+        }
         this.machine.threads.set_thread_name(new_thread_name);
     }
 
@@ -695,7 +709,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn schedule(&mut self) -> InterpResult<'tcx, SchedulingAction> {
         let this = self.eval_context_mut();
-        this.machine.threads.schedule()
+        let data_race = &*this.memory.extra.data_race;
+        this.machine.threads.schedule(data_race)
     }
 
     /// Handles thread termination of the active thread: wakes up threads joining on this one,
@@ -705,7 +720,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn thread_terminated(&mut self) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        for alloc_id in this.machine.threads.thread_terminated() {
+        let data_race = &*this.memory.extra.data_race;
+        for alloc_id in this.machine.threads.thread_terminated(data_race) {
             let ptr = this.memory.global_base_pointer(alloc_id.into())?;
             this.memory.deallocate(ptr, None, MiriMemoryKind::Tls.into())?;
         }

From ed32b260f9243b1ebb08c033acd3d5079407a309 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Mon, 2 Nov 2020 01:46:42 +0000
Subject: [PATCH 02/17] Add tests, and fix bug in atomic RMW relaxed stores

---
 src/data_race.rs                              |   5 +-
 src/shims/posix/thread.rs                     |   3 +-
 .../compile-fail/data_race/read_write_race.rs |  26 ++++
 .../data_race/relax_acquire_race.rs           |  42 +++++++
 .../data_race/release_seq_race.rs             |  46 +++++++
 tests/compile-fail/data_race/rmw_race.rs      |  43 +++++++
 .../data_race/write_write_race.rs             |  26 ++++
 tests/run-pass/concurrency/data_race.rs       | 119 ++++++++++++++++++
 tests/run-pass/concurrency/data_race.stderr   |   2 +
 tests/run-pass/concurrency/linux-futex.stderr |   2 +-
 tests/run-pass/concurrency/simple.stderr      |   2 +-
 tests/run-pass/concurrency/sync.stderr        |   2 +-
 .../run-pass/concurrency/thread_locals.stderr |   2 +-
 .../run-pass/concurrency/tls_lib_drop.stderr  |   2 +-
 tests/run-pass/libc.stderr                    |   2 +-
 tests/run-pass/panic/concurrent-panic.stderr  |   2 +-
 16 files changed, 315 insertions(+), 11 deletions(-)
 create mode 100644 tests/compile-fail/data_race/read_write_race.rs
 create mode 100644 tests/compile-fail/data_race/relax_acquire_race.rs
 create mode 100644 tests/compile-fail/data_race/release_seq_race.rs
 create mode 100644 tests/compile-fail/data_race/rmw_race.rs
 create mode 100644 tests/compile-fail/data_race/write_write_race.rs
 create mode 100644 tests/run-pass/concurrency/data_race.rs
 create mode 100644 tests/run-pass/concurrency/data_race.stderr

diff --git a/src/data_race.rs b/src/data_race.rs
index 5952606394..ac928071be 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -404,8 +404,9 @@ impl AtomicReleaseSequences {
     fn clear_and_retain(&mut self, thread: ThreadId) {
         match self {
             Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                // Keep or forget depending on id
-                if *id == Some(thread) {
+                // If the id is the same, then reatin the value
+                //  otherwise delete and clear the release vector clock
+                if *id != Some(thread) {
                     *id = None;
                     rel_clock.set_zero_vector();
                 }
diff --git a/src/shims/posix/thread.rs b/src/shims/posix/thread.rs
index e420457765..e823a7d88d 100644
--- a/src/shims/posix/thread.rs
+++ b/src/shims/posix/thread.rs
@@ -15,8 +15,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
 
         this.tcx.sess.warn(
-            "thread support is experimental. \
-             For example, Miri does not detect data races yet.",
+            "thread support is experimental.",
         );
 
         // Create the new thread
diff --git a/tests/compile-fail/data_race/read_write_race.rs b/tests/compile-fail/data_race/read_write_race.rs
new file mode 100644
index 0000000000..ece99b4a87
--- /dev/null
+++ b/tests/compile-fail/data_race/read_write_race.rs
@@ -0,0 +1,26 @@
+
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0
+        });
+
+        let j2 = spawn(move || {
+            *c.0 = 64; //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
\ No newline at end of file
diff --git a/tests/compile-fail/data_race/relax_acquire_race.rs b/tests/compile-fail/data_race/relax_acquire_race.rs
new file mode 100644
index 0000000000..cc96083546
--- /dev/null
+++ b/tests/compile-fail/data_race/relax_acquire_race.rs
@@ -0,0 +1,42 @@
+
+use std::thread::spawn;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+static SYNC: AtomicUsize = AtomicUsize::new(0);
+
+pub fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 1;
+            SYNC.store(1, Ordering::Release);
+        });
+
+        let j2 = spawn(move || {
+            if SYNC.load(Ordering::Acquire) == 1 {
+                SYNC.store(2, Ordering::Relaxed);
+            }
+        });
+
+        let j3 = spawn(move || {
+            if SYNC.load(Ordering::Acquire) == 2 {
+                *c.0 //~ ERROR Data race
+            }else{
+                0
+            }
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+        j3.join().unwrap();
+    }
+}
\ No newline at end of file
diff --git a/tests/compile-fail/data_race/release_seq_race.rs b/tests/compile-fail/data_race/release_seq_race.rs
new file mode 100644
index 0000000000..8b3ffbcd9d
--- /dev/null
+++ b/tests/compile-fail/data_race/release_seq_race.rs
@@ -0,0 +1,46 @@
+// compile-flags: -Zmiri-disable-isolation
+
+use std::thread::{spawn, sleep};
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::time::Duration;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+static SYNC: AtomicUsize = AtomicUsize::new(0);
+
+pub fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 1;
+            SYNC.store(1, Ordering::Release);
+            sleep(Duration::from_millis(100));
+            SYNC.store(3, Ordering::Relaxed);
+        });
+
+        let j2 = spawn(move || {
+            // Blocks the acquire-release sequence
+            SYNC.store(2, Ordering::Relaxed);
+        });
+
+        let j3 = spawn(move || {
+            sleep(Duration::from_millis(1000));
+            if SYNC.load(Ordering::Acquire) == 3 {
+                *c.0 //~ ERROR Data race
+            }else{
+                0
+            }
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+        j3.join().unwrap();
+    }
+}
\ No newline at end of file
diff --git a/tests/compile-fail/data_race/rmw_race.rs b/tests/compile-fail/data_race/rmw_race.rs
new file mode 100644
index 0000000000..9c31c79ebf
--- /dev/null
+++ b/tests/compile-fail/data_race/rmw_race.rs
@@ -0,0 +1,43 @@
+
+use std::thread::spawn;
+use std::sync::atomic::{AtomicUsize, Ordering};
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+static SYNC: AtomicUsize = AtomicUsize::new(0);
+
+pub fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 1;
+            SYNC.store(1, Ordering::Release);
+        });
+
+        let j2 = spawn(move || {
+            if SYNC.swap(2, Ordering::Relaxed) == 1 {
+                // Blocks the acquire-release sequence
+                SYNC.store(3, Ordering::Relaxed);
+            }
+        });
+
+        let j3 = spawn(move || {
+            if SYNC.load(Ordering::Acquire) == 3 {
+                *c.0 //~ ERROR Data race
+            }else{
+                0
+            }
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+        j3.join().unwrap();
+    }
+}
\ No newline at end of file
diff --git a/tests/compile-fail/data_race/write_write_race.rs b/tests/compile-fail/data_race/write_write_race.rs
new file mode 100644
index 0000000000..22caf5f0f7
--- /dev/null
+++ b/tests/compile-fail/data_race/write_write_race.rs
@@ -0,0 +1,26 @@
+
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 32;
+        });
+
+        let j2 = spawn(move || {
+            *c.0 = 64; //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
\ No newline at end of file
diff --git a/tests/run-pass/concurrency/data_race.rs b/tests/run-pass/concurrency/data_race.rs
new file mode 100644
index 0000000000..bc4a4e30e8
--- /dev/null
+++ b/tests/run-pass/concurrency/data_race.rs
@@ -0,0 +1,119 @@
+use std::sync::atomic::{AtomicUsize, fence, Ordering};
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+static SYNC: AtomicUsize = AtomicUsize::new(0);
+
+fn test_fence_sync() {
+    let mut var = 0u32;
+    let ptr = &mut var as *mut u32;
+    let evil_ptr = EvilSend(ptr);
+    
+    
+    let j1 = spawn(move || {
+        unsafe { *evil_ptr.0 = 1; }
+        fence(Ordering::Release);
+        SYNC.store(1, Ordering::Relaxed)   
+    });
+
+    let j2 = spawn(move || {
+        if SYNC.load(Ordering::Relaxed) == 1 {
+            fence(Ordering::Acquire);
+            unsafe { *evil_ptr.0 }
+        }else{
+            0
+        }
+    });
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+
+fn test_multiple_reads() {
+    let mut var = 42u32;
+    let ptr = &mut var as *mut u32;
+    let evil_ptr = EvilSend(ptr);
+
+    let j1 = spawn(move || unsafe {*evil_ptr.0});
+    let j2 = spawn(move || unsafe {*evil_ptr.0});
+    let j3 = spawn(move || unsafe {*evil_ptr.0});
+    let j4 = spawn(move || unsafe {*evil_ptr.0});
+
+    assert_eq!(j1.join().unwrap(), 42);
+    assert_eq!(j2.join().unwrap(), 42);
+    assert_eq!(j3.join().unwrap(), 42);
+    assert_eq!(j4.join().unwrap(), 42);
+
+    var = 10;
+    assert_eq!(var, 10);
+}
+
+pub fn test_rmw_no_block() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 1;
+            SYNC.store(1, Ordering::Release);
+        });
+
+        let j2 = spawn(move || {
+            if SYNC.swap(2, Ordering::Relaxed) == 1 {
+                //No op, blocking store removed
+            }
+        });
+
+        let j3 = spawn(move || {
+            if SYNC.load(Ordering::Acquire) == 2 {
+                *c.0
+            }else{
+                0
+            }
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+        let v = j3.join().unwrap();
+        assert!(v == 1 || v == 2);
+    }
+}
+
+pub fn test_release_no_block() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 1;
+            SYNC.store(1, Ordering::Release);
+            SYNC.store(3, Ordering::Relaxed);
+        });
+
+        let j2 = spawn(move || {
+            if SYNC.load(Ordering::Acquire) == 3 {
+                *c.0
+            }else{
+                0
+            }
+        });
+
+        j1.join().unwrap();
+        assert_eq!(j2.join().unwrap(),1);
+    }
+}
+
+pub fn main() {
+    test_fence_sync();
+    test_multiple_reads();
+    test_rmw_no_block();
+    test_release_no_block();
+}
\ No newline at end of file
diff --git a/tests/run-pass/concurrency/data_race.stderr b/tests/run-pass/concurrency/data_race.stderr
new file mode 100644
index 0000000000..b01247aea4
--- /dev/null
+++ b/tests/run-pass/concurrency/data_race.stderr
@@ -0,0 +1,2 @@
+warning: thread support is experimental.
+
diff --git a/tests/run-pass/concurrency/linux-futex.stderr b/tests/run-pass/concurrency/linux-futex.stderr
index 2dbfb7721d..b01247aea4 100644
--- a/tests/run-pass/concurrency/linux-futex.stderr
+++ b/tests/run-pass/concurrency/linux-futex.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
diff --git a/tests/run-pass/concurrency/simple.stderr b/tests/run-pass/concurrency/simple.stderr
index 7060411278..f1550dd25a 100644
--- a/tests/run-pass/concurrency/simple.stderr
+++ b/tests/run-pass/concurrency/simple.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
 thread '<unnamed>' panicked at 'Hello!', $DIR/simple.rs:54:9
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/run-pass/concurrency/sync.stderr b/tests/run-pass/concurrency/sync.stderr
index 2dbfb7721d..b01247aea4 100644
--- a/tests/run-pass/concurrency/sync.stderr
+++ b/tests/run-pass/concurrency/sync.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
diff --git a/tests/run-pass/concurrency/thread_locals.stderr b/tests/run-pass/concurrency/thread_locals.stderr
index 2dbfb7721d..b01247aea4 100644
--- a/tests/run-pass/concurrency/thread_locals.stderr
+++ b/tests/run-pass/concurrency/thread_locals.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
diff --git a/tests/run-pass/concurrency/tls_lib_drop.stderr b/tests/run-pass/concurrency/tls_lib_drop.stderr
index 2dbfb7721d..b01247aea4 100644
--- a/tests/run-pass/concurrency/tls_lib_drop.stderr
+++ b/tests/run-pass/concurrency/tls_lib_drop.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
diff --git a/tests/run-pass/libc.stderr b/tests/run-pass/libc.stderr
index 2dbfb7721d..b01247aea4 100644
--- a/tests/run-pass/libc.stderr
+++ b/tests/run-pass/libc.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
diff --git a/tests/run-pass/panic/concurrent-panic.stderr b/tests/run-pass/panic/concurrent-panic.stderr
index eb5b5f59a0..ca6031e57b 100644
--- a/tests/run-pass/panic/concurrent-panic.stderr
+++ b/tests/run-pass/panic/concurrent-panic.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental. For example, Miri does not detect data races yet.
+warning: thread support is experimental.
 
 Thread 1 starting, will block on mutex
 Thread 1 reported it has started

From cae4302003f32e808b822b1b7b351894548c6c0e Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Mon, 2 Nov 2020 03:06:30 +0000
Subject: [PATCH 03/17] Fix & rebase

---
 src/data_race.rs | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index ac928071be..35898f1d93 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -176,19 +176,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             let mut current_state = alloc.global.current_thread_state_mut();
             if atomic == AtomicReadOp::Relaxed {
                 // Perform relaxed atomic load
-                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
                     range.load_relaxed(&mut *current_state);
                 }
             }else{
                 // Perform acquire(or seq-cst) atomic load
-                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
                     range.acquire(&mut *current_state);
                 }
             }
 
             // Log changes to atomic memory
             if log::log_enabled!(log::Level::Trace) {
-                for range in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
                     log::trace!(
                         "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
                         place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
@@ -227,19 +227,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
             if atomic == AtomicWriteOp::Relaxed {
                 // Perform relaxed atomic store
-                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
                     range.store_relaxed(&mut *current_state, current_thread);
                 }
             }else{
                 // Perform release(or seq-cst) atomic store
-                for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
                     range.release(&mut *current_state, current_thread);
                 }
             }
 
             // Log changes to atomic memory
             if log::log_enabled!(log::Level::Trace) {
-                for range in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
                     log::trace!(
                         "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
                         place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
@@ -279,7 +279,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
             let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
             let release = matches!(atomic, Release | AcqRel | SeqCst);
-            for range in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
+            for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
                 //FIXME: this is probably still slightly wrong due to the quirks
                 // in the c++11 memory model
                 if acquire {
@@ -298,7 +298,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
             // Log changes to atomic memory
             if log::log_enabled!(log::Level::Trace) {
-                for range in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
                     log::trace!(
                         "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
                         place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
@@ -733,7 +733,7 @@ impl VClockAlloc {
             // The alloc-ranges are not split, however changes are not going to be made
             //  to the ranges being tested, so this is ok
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
-            for range in alloc_ranges.iter_mut(pointer.offset, len) {
+            for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
                 if range.read_race_detect(&*current_state, current_thread) {
                     // Report data-race
                     return Self::report_data_race(
@@ -754,7 +754,7 @@ impl VClockAlloc {
         if self.global.multi_threaded.get() {
             let current_thread = self.global.current_thread();
             let current_state = self.global.current_thread_state();
-            for range in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
+            for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
                 if range.write_race_detect(&*current_state, current_thread) {
                     // Report data-race
                     return Self::report_data_race(
@@ -775,7 +775,7 @@ impl VClockAlloc {
         if self.global.multi_threaded.get() {
             let current_thread = self.global.current_thread();
             let current_state = self.global.current_thread_state();
-            for range in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
+            for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
                 if range.write_race_detect(&*current_state, current_thread) {
                     // Report data-race
                     return Self::report_data_race(

From 2e75de5ac9c2805d25f6b5452d8397f99ee5e342 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Mon, 2 Nov 2020 12:40:12 +0000
Subject: [PATCH 04/17] Mark all new tests as disabled on windows

---
 tests/compile-fail/data_race/read_write_race.rs    | 1 +
 tests/compile-fail/data_race/relax_acquire_race.rs | 1 +
 tests/compile-fail/data_race/release_seq_race.rs   | 1 +
 tests/compile-fail/data_race/rmw_race.rs           | 1 +
 tests/compile-fail/data_race/write_write_race.rs   | 1 +
 tests/run-pass/concurrency/data_race.rs            | 3 +++
 6 files changed, 8 insertions(+)

diff --git a/tests/compile-fail/data_race/read_write_race.rs b/tests/compile-fail/data_race/read_write_race.rs
index ece99b4a87..6a5f3f7d20 100644
--- a/tests/compile-fail/data_race/read_write_race.rs
+++ b/tests/compile-fail/data_race/read_write_race.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 
 use std::thread::spawn;
 
diff --git a/tests/compile-fail/data_race/relax_acquire_race.rs b/tests/compile-fail/data_race/relax_acquire_race.rs
index cc96083546..753d30b8f5 100644
--- a/tests/compile-fail/data_race/relax_acquire_race.rs
+++ b/tests/compile-fail/data_race/relax_acquire_race.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/compile-fail/data_race/release_seq_race.rs b/tests/compile-fail/data_race/release_seq_race.rs
index 8b3ffbcd9d..44360f70d5 100644
--- a/tests/compile-fail/data_race/release_seq_race.rs
+++ b/tests/compile-fail/data_race/release_seq_race.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 // compile-flags: -Zmiri-disable-isolation
 
 use std::thread::{spawn, sleep};
diff --git a/tests/compile-fail/data_race/rmw_race.rs b/tests/compile-fail/data_race/rmw_race.rs
index 9c31c79ebf..9dfa2751d5 100644
--- a/tests/compile-fail/data_race/rmw_race.rs
+++ b/tests/compile-fail/data_race/rmw_race.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 
 use std::thread::spawn;
 use std::sync::atomic::{AtomicUsize, Ordering};
diff --git a/tests/compile-fail/data_race/write_write_race.rs b/tests/compile-fail/data_race/write_write_race.rs
index 22caf5f0f7..0c46e5c925 100644
--- a/tests/compile-fail/data_race/write_write_race.rs
+++ b/tests/compile-fail/data_race/write_write_race.rs
@@ -1,3 +1,4 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
 
 use std::thread::spawn;
 
diff --git a/tests/run-pass/concurrency/data_race.rs b/tests/run-pass/concurrency/data_race.rs
index bc4a4e30e8..40a7c162a0 100644
--- a/tests/run-pass/concurrency/data_race.rs
+++ b/tests/run-pass/concurrency/data_race.rs
@@ -1,3 +1,6 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+
 use std::sync::atomic::{AtomicUsize, fence, Ordering};
 use std::thread::spawn;
 

From fe2e857cc3744a69b1d1dc2fe77f94da10978091 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Mon, 2 Nov 2020 13:08:09 +0000
Subject: [PATCH 05/17] Add newlines at end of file + use replace.  add
 dangling thread variant of one of the benchmarks

---
 .../mse_and_dangling_thread/Cargo.toml        |  7 +++++
 .../mse_and_dangling_thread/src/main.rs       | 30 +++++++++++++++++++
 src/data_race.rs                              | 10 +++----
 .../compile-fail/data_race/read_write_race.rs |  2 +-
 .../data_race/relax_acquire_race.rs           |  2 +-
 .../data_race/release_seq_race.rs             |  2 +-
 tests/compile-fail/data_race/rmw_race.rs      |  2 +-
 .../data_race/write_write_race.rs             |  2 +-
 tests/run-pass/concurrency/data_race.rs       |  2 +-
 9 files changed, 47 insertions(+), 12 deletions(-)
 create mode 100644 bench-cargo-miri/mse_and_dangling_thread/Cargo.toml
 create mode 100644 bench-cargo-miri/mse_and_dangling_thread/src/main.rs

diff --git a/bench-cargo-miri/mse_and_dangling_thread/Cargo.toml b/bench-cargo-miri/mse_and_dangling_thread/Cargo.toml
new file mode 100644
index 0000000000..7b4c2dc758
--- /dev/null
+++ b/bench-cargo-miri/mse_and_dangling_thread/Cargo.toml
@@ -0,0 +1,7 @@
+[package]
+name = "mse"
+version = "0.1.0"
+authors = ["Ralf Jung <post@ralfj.de>"]
+edition = "2018"
+
+[dependencies]
diff --git a/bench-cargo-miri/mse_and_dangling_thread/src/main.rs b/bench-cargo-miri/mse_and_dangling_thread/src/main.rs
new file mode 100644
index 0000000000..008e9c80ef
--- /dev/null
+++ b/bench-cargo-miri/mse_and_dangling_thread/src/main.rs
@@ -0,0 +1,30 @@
+static EXPECTED: &[u8] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 254, 255, 0, 0, 254, 255, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 255, 255, 1, 0, 255, 255, 1, 0, 0, 0, 1, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 0, 0, 254, 255, 0, 0, 254, 255, 0, 0, 254, 255, 255, 255, 254, 255, 254, 255, 254, 255, 253, 255, 253, 255, 253, 255, 253, 255, 252, 255, 254, 255, 251, 255, 254, 255, 251, 255, 254, 255, 252, 255, 255, 255, 252, 255, 0, 0, 252, 255, 0, 0, 252, 255, 1, 0, 252, 255, 1, 0, 252, 255, 2, 0, 252, 255, 2, 0, 252, 255, 2, 0, 252, 255, 2, 0, 252, 255, 2, 0, 253, 255, 1, 0, 252, 255, 0, 0, 252, 255, 255, 255, 251, 255, 0, 0, 251, 255, 0, 0, 251, 255, 0, 0, 252, 255, 2, 0, 252, 255, 3, 0, 252, 255, 4, 0, 253, 255, 5, 0, 254, 255, 5, 0, 253, 255, 6, 0, 253, 255, 6, 0, 253, 255, 5, 0, 253, 255, 5, 0, 254, 255, 4, 0, 254, 255, 3, 0, 251, 255, 0, 0, 250, 255, 255, 255, 253, 255, 254, 255, 252, 255, 252, 255, 247, 255, 251, 255, 247, 255, 252, 255, 252, 255, 254, 255, 252, 255, 254, 255, 252, 255, 255, 255, 254, 255, 1, 0, 1, 0, 1, 0, 4, 0, 2, 0, 8, 0, 2, 0, 12, 0, 1, 0, 13, 0, 0, 0, 12, 0, 0, 0, 11, 0, 255, 255, 8, 0, 254, 255, 7, 0, 0, 0, 7, 0, 253, 255, 11, 0, 248, 255, 15, 0, 247, 255, 17, 0, 250, 255, 17, 0, 251, 255, 13, 0, 253, 255, 7, 0, 255, 255, 3, 0, 255, 255, 254, 255, 255, 255, 252, 255, 255, 255, 252, 255, 254, 255, 250, 255, 255, 255, 242, 255, 254, 255, 239, 255, 252, 255, 248, 255, 255, 255, 249, 255, 5, 0, 239, 255, 7, 0, 238, 255, 10, 0, 249, 255, 18, 0, 254, 255, 25, 0, 253, 255, 27, 0, 0, 0, 31, 0, 4, 0, 34, 0, 4, 0, 34, 0, 8, 0, 36, 0, 8, 0, 37, 0, 2, 0, 36, 0, 4, 0, 34, 0, 8, 0, 28, 0, 3, 0, 15, 0, 255, 255, 11, 0, 0, 0, 12, 0, 251, 255, 8, 0, 252, 255, 10, 0, 0, 0, 23, 0, 252, 255, 31, 0, 248, 255, 30, 0, 254, 255, 30, 0, 255, 255, 26, 0, 250, 255, 22, 0, 250, 255, 20, 0, 244, 255, 15, 0, 237, 255, 10, 0, 246, 255, 13, 0, 242, 255, 6, 0, 213, 255, 243, 255, 213, 255, 240, 255, 247, 255, 244, 255, 246, 255, 227, 255, 214, 255, 216, 255, 219, 255, 228, 255, 251, 255, 235, 255, 1, 0, 232, 255, 248, 255, 236, 255, 4, 0, 238, 255, 26, 0, 232, 255, 44, 0, 230, 255, 66, 0, 226, 255, 86, 0, 219, 255, 88, 0, 215, 255, 72, 0, 210, 255, 50, 0, 225, 255, 28, 0, 23, 0, 14, 0, 64, 0, 16, 0, 51, 0, 26, 0, 32, 0, 34, 0, 39, 0, 42, 0, 48, 0, 35, 0, 58, 0, 255, 255, 72, 0, 220, 255, 69, 0, 197, 255, 58, 0, 158, 255, 54, 0, 132, 255, 36, 0, 153, 255, 12, 0, 146, 255, 5, 0, 83, 255, 237, 255, 110, 255, 197, 255, 252, 255, 214, 255, 51, 0, 1, 0, 233, 255, 250, 255, 226, 255, 250, 255, 45, 0, 46, 0, 47, 0, 70, 0, 6, 0, 55, 0, 19, 0, 60, 0, 38, 0, 62, 0, 42, 0, 47, 0, 61, 0, 46, 0, 40, 0, 42, 0, 237, 255, 22, 0, 222, 255, 6, 0, 221, 255, 206, 255, 195, 255, 115, 255, 219, 255, 85, 255, 17, 0, 93, 255, 26, 0, 76, 255, 46, 0, 102, 255, 80, 0, 193, 255, 48, 0, 252, 255, 18, 0, 20, 0, 50, 0, 47, 0, 58, 0, 53, 0, 44, 0, 61, 0, 57, 0, 85, 0, 37, 0, 80, 0, 0, 0, 86, 0, 248, 255, 106, 0, 161, 255, 49, 0, 43, 255, 248, 255, 125, 255, 47, 0, 49, 0, 63, 0, 40, 0, 217, 255, 187, 255, 182, 255, 219, 255, 236, 255, 63, 0, 244, 255, 58, 0, 242, 255, 244, 255, 25, 0, 225, 255, 41, 0, 11, 0, 45, 0, 76, 0, 47, 0, 167, 0, 5, 0, 5, 1, 219, 255, 21, 1, 173, 255, 183, 0, 84, 255, 35, 0, 134, 255, 177, 255, 138, 0, 186, 255, 10, 1, 69, 0, 124, 0, 228, 0, 0, 0, 135, 1, 227, 255, 82, 2, 172, 255, 190, 2, 178, 255, 115, 2, 248, 255, 39, 2, 243, 255, 253, 1, 13, 0, 116, 1, 120, 0, 96, 1, 125, 0, 110, 2, 127, 0, 179, 2, 223, 0, 106, 1, 126, 0, 130, 1, 223, 255, 147, 3, 198, 0, 190, 3, 201, 1, 200, 1, 42, 1, 244, 1, 233, 0, 3, 4, 213, 1, 72, 4, 170, 1, 150, 3, 160, 0, 43, 4, 141, 0, 196, 4, 189, 0, 221, 4, 164, 0, 95, 5, 41, 1, 98, 5, 247, 1, 19, 5, 190, 2, 14, 6, 161, 3, 7, 7, 87, 3, 216, 6, 35, 2, 38, 7, 90, 2, 136, 7, 64, 3, 200, 6, 28, 3, 199, 6, 165, 3, 169, 7, 105, 5, 143, 7, 26, 6, 57, 8, 205, 5, 156, 10, 169, 5, 132, 11, 25, 5, 208, 10, 181, 4, 156, 10, 66, 5, 227, 9, 170, 5, 166, 9, 117, 6, 45, 12, 63, 8, 42, 13, 128, 8, 136, 10, 155, 7, 109, 11, 1, 9, 6, 15, 98, 10, 121, 9, 136, 8, 147, 252, 189, 7, 43, 247, 63, 10, 147, 249, 92, 11, 172, 248, 172, 10, 112, 245, 137, 11, 76, 246, 44, 12, 184, 247, 138, 11, 118, 246, 144, 12, 94, 246, 171, 13, 112, 247, 162, 12, 168, 246, 33, 13, 63, 246, 29, 15, 226, 247, 188, 14, 190, 248, 75, 15, 238, 247, 86, 17, 19, 247, 118, 11, 10, 247, 232, 254, 238, 247, 30, 249, 56, 248, 124, 250, 6, 247, 1, 250, 161, 246, 3, 249, 81, 247, 117, 250, 60, 247, 202, 250, 212, 247, 60, 250, 15, 249, 140, 250, 34, 248, 221, 249, 105, 247, 218, 249, 205, 248, 113, 251, 138, 248, 90, 250, 41, 248, 230, 248];
+static PCM: &[i16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 0, 1, 0, 0, -2, 0, -2, 0, -2, 0, -2, -2, -2, -3, -3, -3, -3, -4, -2, -5, -2, -5, -2, -4, 0, -4, 0, -4, 0, -4, 1, -4, 1, -4, 2, -4, 2, -4, 2, -4, 2, -4, 2, -3, 1, -4, 0, -4, 0, -5, 0, -5, 0, -5, 0, -4, 2, -4, 3, -4, 4, -3, 5, -2, 5, -3, 6, -3, 6, -3, 5, -3, 5, -2, 4, -2, 3, -5, 0, -6, 0, -3, -2, -4, -4, -9, -5, -9, -4, -4, -2, -4, -2, -4, 0, -2, 1, 1, 1, 4, 2, 8, 2, 12, 1, 13, 0, 12, 0, 11, 0, 8, -2, 7, 0, 7, -3, 11, -8, 15, -9, 17, -6, 17, -5, 13, -3, 7, 0, 3, 0, -2, 0, -4, 0, -4, -2, -6, 0, -14, -2, -17, -4, -8, 0, -7, 5, -17, 7, -18, 10, -7, 18, -2, 25, -3, 27, 0, 31, 4, 34, 4, 34, 8, 36, 8, 37, 2, 36, 4, 34, 8, 28, 3, 15, 0, 11, 0, 12, -5, 8, -4, 10, 0, 23, -4, 31, -8, 30, -2, 30, 0, 26, -6, 22, -6, 20, -12, 15, -19, 10, -10, 13, -14, 6, -43, -13, -43, -16, -9, -12, -10, -29, -42, -40, -37, -28, -5, -21, 1, -24, -8, -20, 4, -18, 26, -24, 44, -26, 66, -30, 86, -37, 88, -41, 72, -46, 50, -31, 28, 23, 14, 64, 16, 51, 26, 32, 34, 39, 42, 48, 35, 58, 0, 72, -36, 69, -59, 58, -98, 54, -124, 36, -103, 12, -110, 5, -173, -19, -146, -59, -4, -42, 51, 1, -23, -6, -30, -6, 45, 46, 47, 70, 6, 55, 19, 60, 38, 62, 42, 47, 61, 46, 40, 42, -19, 22, -34, 6, -35, -50, -61, -141, -37, -171, 17, -163, 26, -180, 46, -154, 80, -63, 48, -4, 18, 20, 50, 47, 58, 53, 44, 61, 57, 85, 37, 80, 0, 86, -8, 106, -95, 49, -213, -8, -131, 47, 49, 63, 40, -39, -69, -74, -37, -20, 63, -12, 58, -14, -12, 25, -31, 41, 11, 45, 76, 47, 167, 5, 261, -37, 277, -83, 183, -172, 35, -122, -79, 138, -70, 266, 69, 124, 228, 0, 391, -29, 594, -84, 702, -78, 627, -8, 551, -13, 509, 13, 372, 120, 352, 125, 622, 127, 691, 223, 362, 126, 386, -33, 915, 198, 958, 457, 456, 298, 500, 233, 1027, 469, 1096, 426, 918, 160, 1067, 141, 1220, 189, 1245, 164, 1375, 297, 1378, 503, 1299, 702, 1550, 929, 1799, 855, 1752, 547, 1830, 602, 1928, 832, 1736, 796, 1735, 933, 1961, 1385, 1935, 1562, 2105, 1485, 2716, 1449, 2948, 1305, 2768, 1205, 2716, 1346, 2531, 1450, 2470, 1653, 3117, 2111, 3370, 2176, 2696, 1947, 2925, 2305, 3846, 2658, 2425, 2184, -877, 1981, -2261, 2623, -1645, 2908, -1876, 2732, -2704, 2953, -2484, 3116, -2120, 2954, -2442, 3216, -2466, 3499, -2192, 3234, -2392, 3361, -2497, 3869, -2078, 3772, -1858, 3915, -2066, 4438, -2285, 2934, -2294, -280, -2066, -1762, -1992, -1412, -2298, -1535, -2399, -1789, -2223, -1419, -2244, -1334, -2092, -1476, -1777, -1396, -2014, -1571, -2199, -1574, -1843, -1167, -1910, -1446, -2007, -1818];
+
+fn main() {
+    let thread = std::thread::spawn(|| 4);
+    for _ in 0..2 {
+        mse(PCM.len(), PCM, EXPECTED);
+    }
+    assert_eq!(4, thread.join().unwrap());
+}
+
+fn read_i16(buffer: &[u8], index: usize) -> i16 {
+    const SIZE: usize = std::mem::size_of::<i16>();
+    let mut bytes: [u8; SIZE] = [0u8; SIZE];
+    bytes.copy_from_slice(&buffer[(index * SIZE)..(index * SIZE + SIZE)]);
+    unsafe { std::mem::transmute(bytes) }
+}
+
+fn mse(samples: usize, frame_buf: &[i16], buf_ref: &[u8]) -> f64 {
+    let mut mse = 0.0;
+    let max_samples = std::cmp::min(buf_ref.len() / 2, samples as usize);
+    for i in 0..max_samples {
+        let ref_res = read_i16(buf_ref, i);
+        let info_res = frame_buf[i as usize];
+        let diff = (ref_res - info_res).abs();
+        mse += f64::from(diff.pow(2));
+    }
+    mse / max_samples as f64
+}
+
diff --git a/src/data_race.rs b/src/data_race.rs
index 35898f1d93..bd75299af4 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -65,12 +65,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     fn read_immediate_racy(&self, op: MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_ref();
         let data_race = &*this.memory.extra.data_race;
-        let old = data_race.multi_threaded.get();
 
-        data_race.multi_threaded.set(false);
+        let old = data_race.multi_threaded.replace(false);
         let res = this.read_immediate(op.into());
-
         data_race.multi_threaded.set(old);
+
         res
     }
     
@@ -80,9 +79,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let data_race = &*this.memory.extra.data_race;
-        let old = data_race.multi_threaded.get();
+        let old = data_race.multi_threaded.replace(false);
 
-        data_race.multi_threaded.set(false);
         let imm = this.write_immediate(src, dest.into());
 
         let data_race = &*this.memory.extra.data_race;
@@ -1404,4 +1402,4 @@ mod tests {
             "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
         );
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/data_race/read_write_race.rs b/tests/compile-fail/data_race/read_write_race.rs
index 6a5f3f7d20..42fd7a51ff 100644
--- a/tests/compile-fail/data_race/read_write_race.rs
+++ b/tests/compile-fail/data_race/read_write_race.rs
@@ -24,4 +24,4 @@ pub fn main() {
         j1.join().unwrap();
         j2.join().unwrap();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/data_race/relax_acquire_race.rs b/tests/compile-fail/data_race/relax_acquire_race.rs
index 753d30b8f5..f7d44c30b6 100644
--- a/tests/compile-fail/data_race/relax_acquire_race.rs
+++ b/tests/compile-fail/data_race/relax_acquire_race.rs
@@ -40,4 +40,4 @@ pub fn main() {
         j2.join().unwrap();
         j3.join().unwrap();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/data_race/release_seq_race.rs b/tests/compile-fail/data_race/release_seq_race.rs
index 44360f70d5..dc852cdb4d 100644
--- a/tests/compile-fail/data_race/release_seq_race.rs
+++ b/tests/compile-fail/data_race/release_seq_race.rs
@@ -44,4 +44,4 @@ pub fn main() {
         j2.join().unwrap();
         j3.join().unwrap();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/data_race/rmw_race.rs b/tests/compile-fail/data_race/rmw_race.rs
index 9dfa2751d5..bebd01efa1 100644
--- a/tests/compile-fail/data_race/rmw_race.rs
+++ b/tests/compile-fail/data_race/rmw_race.rs
@@ -41,4 +41,4 @@ pub fn main() {
         j2.join().unwrap();
         j3.join().unwrap();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/compile-fail/data_race/write_write_race.rs b/tests/compile-fail/data_race/write_write_race.rs
index 0c46e5c925..aca19a46c1 100644
--- a/tests/compile-fail/data_race/write_write_race.rs
+++ b/tests/compile-fail/data_race/write_write_race.rs
@@ -24,4 +24,4 @@ pub fn main() {
         j1.join().unwrap();
         j2.join().unwrap();
     }
-}
\ No newline at end of file
+}
diff --git a/tests/run-pass/concurrency/data_race.rs b/tests/run-pass/concurrency/data_race.rs
index 40a7c162a0..75e56e8c8d 100644
--- a/tests/run-pass/concurrency/data_race.rs
+++ b/tests/run-pass/concurrency/data_race.rs
@@ -119,4 +119,4 @@ pub fn main() {
     test_multiple_reads();
     test_rmw_no_block();
     test_release_no_block();
-}
\ No newline at end of file
+}

From 95c99b2044a75f27e691308ebbb7ed0d4e2cbf3b Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Wed, 4 Nov 2020 21:35:48 +0000
Subject: [PATCH 06/17] Detect races between atomic and non-atomic accesses of
 a variable,  previously only data races between two non-atomic accesses were 
 detected.

---
 src/data_race.rs                              | 350 ++++++++++++++----
 src/shims/posix/linux/sync.rs                 |   5 +-
 .../data_race/atomic_read_write_race.rs       |  31 ++
 .../data_race/atomic_read_write_race_alt.rs   |  31 ++
 .../data_race/atomic_write_read_race.rs       |  31 ++
 .../data_race/atomic_write_read_race_alt.rs   |  31 ++
 .../data_race/atomic_write_write_race.rs      |  31 ++
 .../data_race/atomic_write_write_race_alt.rs  |  31 ++
 8 files changed, 463 insertions(+), 78 deletions(-)
 create mode 100644 tests/compile-fail/data_race/atomic_read_write_race.rs
 create mode 100644 tests/compile-fail/data_race/atomic_read_write_race_alt.rs
 create mode 100644 tests/compile-fail/data_race/atomic_write_read_race.rs
 create mode 100644 tests/compile-fail/data_race/atomic_write_read_race_alt.rs
 create mode 100644 tests/compile-fail/data_race/atomic_write_write_race.rs
 create mode 100644 tests/compile-fail/data_race/atomic_write_write_race_alt.rs

diff --git a/src/data_race.rs b/src/data_race.rs
index bd75299af4..8e7a3548f5 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -6,8 +6,16 @@
 //!  and RMW operations
 //! This does not explore weak memory orders and so can still miss data-races
 //!  but should not report false-positives
+//! Data-race definiton from(https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races):
+//!  - if a memory location is accessed by twice is a data-race unless:
+//!    - both operations execute on the same thread/signal-handler
+//!    - both conflicting operations are atomic operations (1 atomic and 1 non-atomic race)
+//!    - 1 of the operations happens-before the other operation (see link for definition)
 
-use std::{fmt::{self, Debug}, cmp::Ordering, rc::Rc, cell::{Cell, RefCell, Ref, RefMut}, ops::Index};
+use std::{
+    fmt::{self, Debug}, cmp::Ordering, rc::Rc,
+    cell::{Cell, RefCell, Ref, RefMut}, ops::Index, mem
+};
 
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_target::abi::Size;
@@ -16,7 +24,11 @@ use rustc_data_structures::fx::FxHashMap;
 
 use smallvec::SmallVec;
 
-use crate::*;
+use crate::{
+    MiriEvalContext, ThreadId, Tag, MiriEvalContextExt, RangeMap,
+    MPlaceTy, ImmTy, InterpResult, Pointer, ScalarMaybeUninit,
+    OpTy, Immediate, MemPlaceMeta
+};
 
 pub type AllocExtra = VClockAlloc;
 pub type MemoryExtra = Rc<GlobalState>;
@@ -58,8 +70,8 @@ pub enum AtomicFenceOp {
 }
 
 /// Evaluation context extensions
-impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
-pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
     /// Variant of `read_immediate` that does not perform `data-race` checks.
     fn read_immediate_racy(&self, op: MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
@@ -119,6 +131,26 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         this.read_scalar_racy(value_place.into())
     }
 
+    /// Variant of `write_scalar_at_offfset` helper function that performs
+    ///  an atomic load operation with verification instead
+    fn read_scalar_at_offset_atomic(
+        &mut self,
+        op: OpTy<'tcx, Tag>,
+        offset: u64,
+        layout: TyAndLayout<'tcx>,
+        atomic: AtomicReadOp
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_mut();
+        let op_place = this.deref_operand(op)?;
+        let offset = Size::from_bytes(offset);
+        // Ensure that the following read at an offset is within bounds
+        assert!(op_place.layout.size >= offset + layout.size);
+        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
+        let res = this.read_scalar_racy(value_place.into())?;
+        this.validate_atomic_load(value_place, atomic)?;
+        Ok(res)
+    }
+
     /// Variant of `write_scalar_at_offfset` helper function that does not perform
     ///  data-race checks.
     fn write_scalar_at_offset_racy(
@@ -137,10 +169,28 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         this.write_scalar_racy(value.into(), value_place.into())
     }
 
+    /// Load the data race allocation state for a given memory place
+    ///  also returns the size and offset of the result in the allocation
+    ///  metadata
+    /// This is used for atomic loads since unconditionally requesteing
+    ///  mutable access causes issues for read-only memory, which will
+    ///  fail validation on mutable access
+    fn load_data_race_state_ref<'a>(
+        &'a self, place: MPlaceTy<'tcx, Tag>
+    ) -> InterpResult<'tcx, (&'a VClockAlloc, Size, Size)> where 'mir: 'a {
+        let this = self.eval_context_ref();
+
+        let ptr = place.ptr.assert_ptr();
+        let size = place.layout.size;
+        let data_race = &this.memory.get_raw(ptr.alloc_id)?.extra.data_race;
+
+        Ok((data_race, size, ptr.offset))
+    }
+
     /// Load the data race allocation state for a given memory place
     ///  also returns the size and the offset of the result in the allocation
     ///  metadata
-    fn load_data_race_state<'a>(
+    fn load_data_race_state_mut<'a>(
         &'a mut self, place: MPlaceTy<'tcx, Tag>
     ) -> InterpResult<'tcx, (&'a mut VClockAlloc, Size, Size)> where 'mir: 'a {
         let this = self.eval_context_mut();
@@ -164,29 +214,42 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
             let (
                 alloc, size, offset
-            ) = this.load_data_race_state(place)?;
+            ) = this.load_data_race_state_ref(place)?;
             log::trace!(
                 "Atomic load on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
                 alloc.global.current_thread(), atomic,
                 place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
             );
 
+            let current_thread = alloc.global.current_thread();
             let mut current_state = alloc.global.current_thread_state_mut();
             if atomic == AtomicReadOp::Relaxed {
                 // Perform relaxed atomic load
-                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    range.load_relaxed(&mut *current_state);
+                for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) {
+                    if range.load_relaxed(&mut *current_state, current_thread) == Err(DataRace) {
+                        mem::drop(current_state);
+                        return VClockAlloc::report_data_race(
+                            &alloc.global, range, "ATOMIC_LOAD", true,
+                            place.ptr.assert_ptr(), size
+                        );
+                    }
                 }
             }else{
                 // Perform acquire(or seq-cst) atomic load
-                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    range.acquire(&mut *current_state);
+                for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) {
+                    if range.acquire(&mut *current_state, current_thread) == Err(DataRace) {
+                        mem::drop(current_state);
+                        return VClockAlloc::report_data_race(
+                            &alloc.global, range, "ATOMIC_LOAD", true,
+                            place.ptr.assert_ptr(), size
+                        );
+                    }
                 }
             }
 
             // Log changes to atomic memory
             if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
+                for (_,range) in alloc.alloc_ranges.borrow_mut().iter(offset, size) {
                     log::trace!(
                         "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
                         place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
@@ -195,7 +258,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
                 }
             }
 
-            std::mem::drop(current_state);
+            mem::drop(current_state);
             let data_race = &*this.memory.extra.data_race;
             data_race.advance_vector_clock();
         }
@@ -214,7 +277,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
             let (
                 alloc, size, offset
-            ) = this.load_data_race_state(place)?;
+            ) = this.load_data_race_state_mut(place)?;
             let current_thread = alloc.global.current_thread();
             let mut current_state = alloc.global.current_thread_state_mut();
             log::trace!(
@@ -226,12 +289,24 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             if atomic == AtomicWriteOp::Relaxed {
                 // Perform relaxed atomic store
                 for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    range.store_relaxed(&mut *current_state, current_thread);
+                    if range.store_relaxed(&mut *current_state, current_thread) == Err(DataRace) {
+                        mem::drop(current_state);
+                        return VClockAlloc::report_data_race(
+                            &alloc.global, range, "ATOMIC_STORE", true,
+                            place.ptr.assert_ptr(), size
+                        );
+                    }
                 }
             }else{
                 // Perform release(or seq-cst) atomic store
                 for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    range.release(&mut *current_state, current_thread);
+                    if range.release(&mut *current_state, current_thread) == Err(DataRace) {
+                        mem::drop(current_state);
+                        return VClockAlloc::report_data_race(
+                            &alloc.global, range, "ATOMIC_STORE", true,
+                            place.ptr.assert_ptr(), size
+                        );
+                    }
                 }
             }
 
@@ -246,7 +321,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
                 }
             }
 
-            std::mem::drop(current_state);
+            mem::drop(current_state);
             let data_race = &*this.memory.extra.data_race;
             data_race.advance_vector_clock();
         }
@@ -266,7 +341,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
             let (
                 alloc, size, offset
-            ) = this.load_data_race_state(place)?;
+            ) = this.load_data_race_state_mut(place)?;
             let current_thread = alloc.global.current_thread();
             let mut current_state = alloc.global.current_thread_state_mut();
             log::trace!(
@@ -280,17 +355,31 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
                 //FIXME: this is probably still slightly wrong due to the quirks
                 // in the c++11 memory model
-                if acquire {
+                let maybe_race = if acquire {
                     // Atomic RW-Op acquire
-                    range.acquire(&mut *current_state);
+                    range.acquire(&mut *current_state, current_thread)
                 }else{
-                    range.load_relaxed(&mut *current_state);
+                    range.load_relaxed(&mut *current_state, current_thread) 
+                };
+                if maybe_race == Err(DataRace) {
+                    mem::drop(current_state);
+                    return VClockAlloc::report_data_race(
+                        &alloc.global, range, "ATOMIC_RMW(LOAD)", true,
+                        place.ptr.assert_ptr(), size
+                    );
                 }
-                if release {
+                let maybe_race = if release {
                     // Atomic RW-Op release
-                    range.rmw_release(&mut *current_state, current_thread);
+                    range.rmw_release(&mut *current_state, current_thread)
                 }else{
-                    range.rmw_relaxed(&mut *current_state);
+                    range.rmw_relaxed(&mut *current_state, current_thread)
+                };
+                if maybe_race == Err(DataRace) {
+                    mem::drop(current_state);
+                    return VClockAlloc::report_data_race(
+                        &alloc.global, range, "ATOMIC_RMW(STORE)", true,
+                        place.ptr.assert_ptr(), size
+                    );
                 }
             }
 
@@ -305,7 +394,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
                 }
             }
 
-            std::mem::drop(current_state);
+            mem::drop(current_state);
             let data_race = &*this.memory.extra.data_race;
             data_race.advance_vector_clock();
         }
@@ -478,6 +567,11 @@ impl Debug for AtomicReleaseSequences {
     }
 }
 
+/// Error returned by finding a data race
+///  should be elaborated upon
+#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+pub struct DataRace;
+
 /// Externally stored memory cell clocks
 ///  explicitly to reduce memory usage for the
 ///  common case where no atomic operations
@@ -485,11 +579,26 @@ impl Debug for AtomicReleaseSequences {
 #[derive(Clone, PartialEq, Eq, Debug)]
 struct AtomicMemoryCellClocks {
 
+    /// The clock-vector for the set of atomic read operations
+    ///  used for detecting data-races with non-atomic write
+    ///  operations
+    read_vector: VClock,
+
+    /// The clock-vector for the set of atomic write operations
+    ///  used for detecting data-races with non-atomic read or
+    ///  write operations
+    write_vector: VClock,
+
     /// Synchronization vector for acquire-release semantics
+    ///   contains the vector of timestamps that will
+    ///   happen-before a thread if an acquire-load is 
+    ///   performed on the data
     sync_vector: VClock,
 
     /// The Hash-Map of all threads for which a release
-    ///  sequence exists in the memory cell 
+    ///  sequence exists in the memory cell, required
+    ///  since read-modify-write operations do not
+    ///  invalidate existing release sequences 
     release_sequences: AtomicReleaseSequences,
 }
 
@@ -498,10 +607,12 @@ struct AtomicMemoryCellClocks {
 #[derive(Clone, PartialEq, Eq, Debug)]
 struct MemoryCellClocks {
 
-    /// The vector-clock of the last write
+    /// The vector-clock of the last write, only one value is stored
+    ///  since all previous writes happened-before the current write
     write: Timestamp,
 
-    /// The id of the thread that performed the last write to this memory location
+    /// The identifier of the thread that performed the last write
+    ///  operation
     write_thread: ThreadId,
 
     /// The vector-clock of the set of previous reads
@@ -532,7 +643,7 @@ impl MemoryCellClocks {
 
     /// Load the internal atomic memory cells if they exist
     #[inline]
-    fn atomic(&mut self) -> Option<&AtomicMemoryCellClocks> {
+    fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
         match &self.atomic_ops {
             Some(op) => Some(&*op),
             None => None
@@ -545,6 +656,8 @@ impl MemoryCellClocks {
     fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks {
         self.atomic_ops.get_or_insert_with(|| {
             Box::new(AtomicMemoryCellClocks {
+                read_vector: VClock::default(),
+                write_vector: VClock::default(),
                 sync_vector: VClock::default(),
                 release_sequences: AtomicReleaseSequences::new()
             })
@@ -554,75 +667,131 @@ impl MemoryCellClocks {
     /// Update memory cell data-race tracking for atomic
     ///  load acquire semantics, is a no-op if this memory was
     ///  not used previously as atomic memory
-    fn acquire(&mut self, clocks: &mut ThreadClockSet) {
+    fn acquire(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        self.atomic_read_detect(clocks, thread)?;
         if let Some(atomic) = self.atomic() {
             clocks.clock.join(&atomic.sync_vector);
         }
+        Ok(())
     }
     /// Update memory cell data-race tracking for atomic
     ///  load relaxed semantics, is a no-op if this memory was
     ///  not used previously as atomic memory
-    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet) {
+    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        self.atomic_read_detect(clocks, thread)?;
         if let Some(atomic) = self.atomic() {
             clocks.fence_acquire.join(&atomic.sync_vector);
         }
+        Ok(())
     }
 
 
     /// Update the memory cell data-race tracking for atomic
     ///  store release semantics
-    fn release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) {
+    fn release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, thread)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.set_values(&clocks.clock);
         atomic.release_sequences.clear_and_set(thread, &clocks.clock);
+        Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store relaxed semantics
-    fn store_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) {
+    fn store_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, thread)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.set_values(&clocks.fence_release);
         if let Some(release) = atomic.release_sequences.load(thread) {
             atomic.sync_vector.join(release);
         }
         atomic.release_sequences.clear_and_retain(thread);
+        Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store release semantics for RMW operations
-    fn rmw_release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) {
+    fn rmw_release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, thread)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.clock);
         atomic.release_sequences.insert(thread, &clocks.clock);
+        Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store relaxed semantics for RMW operations
-    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet) {
+    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, thread)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.fence_release);
+        Ok(())
     }
     
-    
+    /// Detect data-races with an atomic read, caused by a non-atomic write that does
+    ///  not happen-before the atomic-read
+    fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, clocks);
+        if self.write <= clocks.clock[self.write_thread] {
+            let atomic = self.atomic_mut();
+            atomic.read_vector.set_at_thread(&clocks.clock, thread);
+            Ok(())
+        }else{
+            Err(DataRace)
+        }
+    }
+
+    /// Detect data-races with an atomic write, either with a non-atomic read or with
+    ///  a non-atomic write:
+    fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, clocks);
+        if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
+            let atomic = self.atomic_mut();
+            atomic.write_vector.set_at_thread(&clocks.clock, thread);
+            Ok(())
+        }else{
+            Err(DataRace)
+        }
+    }
 
     /// Detect races for non-atomic read operations at the current memory cell
     ///  returns true if a data-race is detected
-    fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> bool {
+    fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+        log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_thread] {
-            self.read.set_at_thread(&clocks.clock, thread);
-            false
+            let race_free = if let Some(atomic) = self.atomic() {
+                atomic.write_vector <= clocks.clock
+            }else{
+                true
+            };
+            if race_free {
+                self.read.set_at_thread(&clocks.clock, thread);
+                Ok(())
+            }else{
+                Err(DataRace)
+            }
         }else{
-            true
+            Err(DataRace)
         }
     }
 
     /// Detect races for non-atomic write operations at the current memory cell
     ///  returns true if a data-race is detected
-    fn write_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> bool {
+    fn write_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId)  -> Result<(), DataRace> {
+        log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
-            self.write = clocks.clock[thread];
-            self.write_thread = thread;
-            self.read.set_zero_vector();
-            false
+            let race_free = if let Some(atomic) = self.atomic() {
+                atomic.write_vector <= clocks.clock && atomic.read_vector <= clocks.clock
+            }else{
+                true
+            };
+            if race_free {
+                self.write = clocks.clock[thread];
+                self.write_thread = thread;
+                self.read.set_zero_vector();
+                Ok(())
+            }else{
+                Err(DataRace)
+            }
         }else{
-            true
+            Err(DataRace)
         }
     }
 }
@@ -651,6 +820,33 @@ impl VClockAlloc {
         }
     }
 
+    // Find an index, if one exists where the value
+    //  in `l` is greater than the value in `r`
+    fn find_gt_index(l: &VClock, r: &VClock) -> Option<usize> {
+        let l_slice = l.as_slice();
+        let r_slice = r.as_slice();
+        l_slice.iter().zip(r_slice.iter())
+            .enumerate()
+            .find_map(|(idx, (&l, &r))| {
+                if l > r { Some(idx) } else { None }
+            }).or_else(|| {
+                if l_slice.len() > r_slice.len() {
+                    // By invariant, if l_slice is longer
+                    //  then one element must be larger
+                    // This just validates that this is true
+                    //  and reports earlier elements first
+                    let l_remainder_slice = &l_slice[r_slice.len()..];
+                    let idx = l_remainder_slice.iter().enumerate()
+                        .find_map(|(idx, &r)| {
+                            if r == 0 { None } else { Some(idx) }
+                        }).expect("Invalid VClock Invariant");
+                    Some(idx)
+                }else{
+                    None
+                }
+            })
+    }
+
     /// Report a data-race found in the program
     ///  this finds the two racing threads and the type
     ///  of data-race that occured, this will also
@@ -659,7 +855,8 @@ impl VClockAlloc {
     #[cold]
     #[inline(never)]
     fn report_data_race<'tcx>(
-        global: &MemoryExtra, range: &MemoryCellClocks, action: &str,
+        global: &MemoryExtra, range: &MemoryCellClocks,
+        action: &str, is_atomic: bool,
         pointer: Pointer<Tag>, len: Size
     ) -> InterpResult<'tcx> {
         let current_thread = global.current_thread();
@@ -669,40 +866,39 @@ impl VClockAlloc {
             other_action, other_thread, other_clock
         ) = if range.write > current_state.clock[range.write_thread] {
 
-            // Create effective write-clock that the data-race occured with
+            // Convert the write action into the vector clock it
+            //  represents for diagnostic purposes
             let wclock = write_clock.get_mut_with_min_len(
                 current_state.clock.as_slice().len()
                 .max(range.write_thread.to_u32() as usize + 1)
             );
             wclock[range.write_thread.to_u32() as usize] = range.write;
             ("WRITE", range.write_thread, write_clock.as_slice())
+        }else if let Some(idx) = Self::find_gt_index(
+            &range.read, &current_state.clock
+        ){
+            ("READ", ThreadId::new(idx), range.read.as_slice())
+        }else if !is_atomic {
+            if let Some(atomic) = range.atomic() {
+                if let Some(idx) = Self::find_gt_index(
+                    &atomic.write_vector, &current_state.clock
+                ) {
+                    ("ATOMIC_STORE", ThreadId::new(idx), atomic.write_vector.as_slice())
+                }else if let Some(idx) = Self::find_gt_index(
+                    &atomic.read_vector, &current_state.clock
+                ) {
+                    ("ATOMIC_LOAD", ThreadId::new(idx), atomic.read_vector.as_slice())
+                }else{
+                    unreachable!("Failed to find report data-race for non-atomic operation: no race found")
+                }
+            }else{
+                unreachable!("Failed to report data-race for non-atomic operation: no atomic component")
+            }
         }else{
-
-            // Find index in the read-clock that the data-race occured with
-            let read_slice = range.read.as_slice();
-            let clock_slice = current_state.clock.as_slice();
-            let conflicting_index = read_slice.iter()
-                .zip(clock_slice.iter())
-                .enumerate().find_map(|(idx,(&read, &clock))| {
-                    if read > clock {
-                        Some(idx)
-                    }else{
-                        None
-                    }
-            }).unwrap_or_else(|| {
-                assert!(read_slice.len() > clock_slice.len(), "BUG: cannot find read race yet reported data-race");
-                let rest_read = &read_slice[clock_slice.len()..];
-                rest_read.iter().enumerate().find_map(|(idx, &val)| {
-                    if val > 0 {
-                        Some(idx + clock_slice.len())
-                    }else{
-                        None
-                    }
-                }).expect("Invariant broken for read-slice, no 0 element at the tail")
-            });
-            ("READ", ThreadId::new(conflicting_index), range.read.as_slice())
+            unreachable!("Failed to report data-race for atomic operation")
         };
 
+        // Load elaborated thread information about the racing thread actions
         let current_thread_info = global.print_thread_metadata(current_thread);
         let other_thread_info = global.print_thread_metadata(other_thread);
         
@@ -732,10 +928,10 @@ impl VClockAlloc {
             //  to the ranges being tested, so this is ok
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
-                if range.read_race_detect(&*current_state, current_thread) {
+                if range.read_race_detect(&*current_state, current_thread) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
-                        &self.global,range, "READ", pointer, len
+                        &self.global,range, "READ", false, pointer, len
                     );
                 }
             }
@@ -753,10 +949,10 @@ impl VClockAlloc {
             let current_thread = self.global.current_thread();
             let current_state = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*current_state, current_thread) {
+                if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
-                        &self.global, range, "WRITE", pointer, len
+                        &self.global, range, "WRITE", false, pointer, len
                     );
                 }
             }
@@ -774,10 +970,10 @@ impl VClockAlloc {
             let current_thread = self.global.current_thread();
             let current_state = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*current_state, current_thread) {
+                if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
-                        &self.global, range, "DEALLOCATE", pointer, len
+                        &self.global, range, "DEALLOCATE", false, pointer, len
                     );
                 }
             }
diff --git a/src/shims/posix/linux/sync.rs b/src/shims/posix/linux/sync.rs
index 9d124872f5..67cea55077 100644
--- a/src/shims/posix/linux/sync.rs
+++ b/src/shims/posix/linux/sync.rs
@@ -78,7 +78,10 @@ pub fn futex<'tcx>(
             // Read an `i32` through the pointer, regardless of any wrapper types.
             // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`.
             // FIXME: this fails if `addr` is not a pointer type.
-            let futex_val = this.read_scalar_at_offset(addr.into(), 0, this.machine.layouts.i32)?.to_i32()?;
+            // FIXME: what form of atomic operation should the `futex` use to load the value?
+            let futex_val = this.read_scalar_at_offset_atomic(
+                addr.into(), 0, this.machine.layouts.i32, AtomicReadOp::Acquire
+            )?.to_i32()?;
             if val == futex_val {
                 // The value still matches, so we block the trait make it wait for FUTEX_WAKE.
                 this.block_thread(thread);
diff --git a/tests/compile-fail/data_race/atomic_read_write_race.rs b/tests/compile-fail/data_race/atomic_read_write_race.rs
new file mode 100644
index 0000000000..0b9610edc6
--- /dev/null
+++ b/tests/compile-fail/data_race/atomic_read_write_race.rs
@@ -0,0 +1,31 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+#![feature(core_intrinsics)]
+
+use std::thread::spawn;
+use std::sync::atomic::AtomicUsize;
+use std::intrinsics::atomic_load;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = AtomicUsize::new(0);
+    let b = &mut a as *mut AtomicUsize;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *(c.0 as *mut usize) = 32;
+        });
+
+        let j2 = spawn(move || {
+            //Equivalent to: (&*c.0).load(Ordering::SeqCst)
+            atomic_load(c.0 as *mut usize) //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
diff --git a/tests/compile-fail/data_race/atomic_read_write_race_alt.rs b/tests/compile-fail/data_race/atomic_read_write_race_alt.rs
new file mode 100644
index 0000000000..779babefd8
--- /dev/null
+++ b/tests/compile-fail/data_race/atomic_read_write_race_alt.rs
@@ -0,0 +1,31 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+use std::thread::spawn;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = AtomicUsize::new(0);
+    let b = &mut a as *mut AtomicUsize;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            let atomic_ref = &mut *c.0;
+            atomic_ref.load(Ordering::SeqCst)
+        });
+
+        let j2 = spawn(move || {
+            let atomic_ref = &mut *c.0;
+            *atomic_ref.get_mut() = 32; //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
diff --git a/tests/compile-fail/data_race/atomic_write_read_race.rs b/tests/compile-fail/data_race/atomic_write_read_race.rs
new file mode 100644
index 0000000000..3211a5ae53
--- /dev/null
+++ b/tests/compile-fail/data_race/atomic_write_read_race.rs
@@ -0,0 +1,31 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+use std::thread::spawn;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = AtomicUsize::new(0);
+    let b = &mut a as *mut AtomicUsize;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            let atomic_ref = &mut *c.0;
+            atomic_ref.store(32, Ordering::SeqCst)
+        });
+
+        let j2 = spawn(move || {
+            let atomic_ref = &mut *c.0;
+            *atomic_ref.get_mut() //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
diff --git a/tests/compile-fail/data_race/atomic_write_read_race_alt.rs b/tests/compile-fail/data_race/atomic_write_read_race_alt.rs
new file mode 100644
index 0000000000..131d4e07b8
--- /dev/null
+++ b/tests/compile-fail/data_race/atomic_write_read_race_alt.rs
@@ -0,0 +1,31 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+#![feature(core_intrinsics)]
+
+use std::thread::spawn;
+use std::sync::atomic::AtomicUsize;
+use std::intrinsics::atomic_store;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = AtomicUsize::new(0);
+    let b = &mut a as *mut AtomicUsize;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *(c.0 as *mut usize)
+        });
+
+        let j2 = spawn(move || {
+            //Equivalent to: (&*c.0).store(32, Ordering::SeqCst)
+            atomic_store(c.0 as *mut usize, 32); //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
diff --git a/tests/compile-fail/data_race/atomic_write_write_race.rs b/tests/compile-fail/data_race/atomic_write_write_race.rs
new file mode 100644
index 0000000000..74adf7ae4b
--- /dev/null
+++ b/tests/compile-fail/data_race/atomic_write_write_race.rs
@@ -0,0 +1,31 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+#![feature(core_intrinsics)]
+
+use std::thread::spawn;
+use std::sync::atomic::AtomicUsize;
+use std::intrinsics::atomic_store;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = AtomicUsize::new(0);
+    let b = &mut a as *mut AtomicUsize;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *(c.0 as *mut usize) = 32;
+        });
+
+        let j2 = spawn(move || {
+            //Equivalent to: (&*c.0).store(64, Ordering::SeqCst)
+            atomic_store(c.0 as *mut usize, 64); //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
diff --git a/tests/compile-fail/data_race/atomic_write_write_race_alt.rs b/tests/compile-fail/data_race/atomic_write_write_race_alt.rs
new file mode 100644
index 0000000000..75ad755fbd
--- /dev/null
+++ b/tests/compile-fail/data_race/atomic_write_write_race_alt.rs
@@ -0,0 +1,31 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+use std::thread::spawn;
+use std::sync::atomic::AtomicUsize;
+use std::sync::atomic::Ordering;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = AtomicUsize::new(0);
+    let b = &mut a as *mut AtomicUsize;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            let atomic_ref = &mut *c.0;
+            atomic_ref.store(64, Ordering::SeqCst);
+        });
+
+        let j2 = spawn(move || {
+            let atomic_ref = &mut *c.0;
+            *atomic_ref.get_mut() = 32; //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}

From 9cb6b8da3f6ca2da1139c91754d520bf2d354f31 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Thu, 5 Nov 2020 03:54:39 +0000
Subject: [PATCH 07/17] Split out vector_clock to separate file, general tidy
 up of some of the  code & add support for vector index re-use for multiple
 threads  after termination.

---
 src/data_race.rs        | 1333 +++++++++++++--------------------------
 src/lib.rs              |    4 +
 src/shims/intrinsics.rs |   31 +-
 src/shims/posix/sync.rs |   39 +-
 src/thread.rs           |    4 +-
 src/vector_clock.rs     |  602 ++++++++++++++++++
 6 files changed, 1084 insertions(+), 929 deletions(-)
 create mode 100644 src/vector_clock.rs

diff --git a/src/data_race.rs b/src/data_race.rs
index 8e7a3548f5..e992c5a1d5 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -13,21 +13,21 @@
 //!    - 1 of the operations happens-before the other operation (see link for definition)
 
 use std::{
-    fmt::{self, Debug}, cmp::Ordering, rc::Rc,
-    cell::{Cell, RefCell, Ref, RefMut}, ops::Index, mem
+    fmt::Debug, rc::Rc,
+    cell::{Cell, RefCell, Ref, RefMut}, mem
 };
 
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_target::abi::Size;
 use rustc_middle::ty::layout::TyAndLayout;
-use rustc_data_structures::fx::FxHashMap;
-
-use smallvec::SmallVec;
+use rustc_data_structures::fx::FxHashSet;
 
 use crate::{
-    MiriEvalContext, ThreadId, Tag, MiriEvalContextExt, RangeMap,
-    MPlaceTy, ImmTy, InterpResult, Pointer, ScalarMaybeUninit,
-    OpTy, Immediate, MemPlaceMeta
+    MiriEvalContext, MiriEvalContextExt,
+    ThreadId, Tag, RangeMap,
+    InterpResult, Pointer, ScalarMaybeUninit,
+    MPlaceTy, OpTy, MemPlaceMeta,
+    VClock, VSmallClockSet, VectorIdx, VTimestamp
 };
 
 pub type AllocExtra = VClockAlloc;
@@ -73,194 +73,136 @@ pub enum AtomicFenceOp {
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
 pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
-    /// Variant of `read_immediate` that does not perform `data-race` checks.
-    fn read_immediate_racy(&self, op: MPlaceTy<'tcx, Tag>) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
+    // Temporarily allow data-races to occur, this should only be
+    //  used if either one of the appropiate `validate_atomic` functions
+    //  will be called to treat a memory access as atomic or if the memory
+    //  being accessed should be treated as internal state, that cannot be
+    //  accessed by the interpreted program.
+    #[inline]
+    fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_ref();
         let data_race = &*this.memory.extra.data_race;
-
         let old = data_race.multi_threaded.replace(false);
-        let res = this.read_immediate(op.into());
+        let result = op(this);
         data_race.multi_threaded.set(old);
-
-        res
+        result
     }
-    
-    /// Variant of `write_immediate` that does not perform `data-race` checks.
-    fn write_immediate_racy(
-        &mut self, src: Immediate<Tag>, dest: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx> {
+
+    /// Same as `allow_data_races_ref`, this temporarily disables any data-race detection and
+    ///  so should only be used for atomic operations or internal state that the program cannot
+    ///  access
+    #[inline]
+    fn allow_data_races_mut<R>(&mut self, op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R) -> R {
         let this = self.eval_context_mut();
         let data_race = &*this.memory.extra.data_race;
         let old = data_race.multi_threaded.replace(false);
-
-        let imm = this.write_immediate(src, dest.into());
-
+        let result = op(this);
         let data_race = &*this.memory.extra.data_race;
         data_race.multi_threaded.set(old);
-        imm
+        result
     }
 
-    /// Variant of `read_scalar` that does not perform data-race checks.
-    fn read_scalar_racy(
-        &self, op: MPlaceTy<'tcx, Tag>
-    )-> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        Ok(self.read_immediate_racy(op)?.to_scalar_or_uninit())
-    }
 
-    /// Variant of `write_scalar` that does not perform data-race checks.
-    fn write_scalar_racy(
-        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx> {
-        self.write_immediate_racy(Immediate::Scalar(val.into()), dest)
-    }
-
-    /// Variant of `read_scalar_at_offset` helper function that does not perform
-    /// `data-race checks.
-    fn read_scalar_at_offset_racy(
-        &self,
-        op: OpTy<'tcx, Tag>,
-        offset: u64,
-        layout: TyAndLayout<'tcx>,
-    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        let this = self.eval_context_ref();
-        let op_place = this.deref_operand(op)?;
-        let offset = Size::from_bytes(offset);
-        // Ensure that the following read at an offset is within bounds
-        assert!(op_place.layout.size >= offset + layout.size);
-        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.read_scalar_racy(value_place.into())
-    }
-
-    /// Variant of `write_scalar_at_offfset` helper function that performs
-    ///  an atomic load operation with verification instead
     fn read_scalar_at_offset_atomic(
-        &mut self,
+        &self,
         op: OpTy<'tcx, Tag>,
         offset: u64,
         layout: TyAndLayout<'tcx>,
         atomic: AtomicReadOp
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        let this = self.eval_context_mut();
+        let this = self.eval_context_ref();
         let op_place = this.deref_operand(op)?;
         let offset = Size::from_bytes(offset);
         // Ensure that the following read at an offset is within bounds
         assert!(op_place.layout.size >= offset + layout.size);
         let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        let res = this.read_scalar_racy(value_place.into())?;
-        this.validate_atomic_load(value_place, atomic)?;
-        Ok(res)
+        this.read_scalar_atomic(value_place, atomic)
     }
-
-    /// Variant of `write_scalar_at_offfset` helper function that does not perform
-    ///  data-race checks.
-    fn write_scalar_at_offset_racy(
+    fn write_scalar_at_offset_atomic(
         &mut self,
         op: OpTy<'tcx, Tag>,
         offset: u64,
         value: impl Into<ScalarMaybeUninit<Tag>>,
         layout: TyAndLayout<'tcx>,
-    ) -> InterpResult<'tcx, ()> {
+        atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let op_place = this.deref_operand(op)?;
         let offset = Size::from_bytes(offset);
         // Ensure that the following read at an offset is within bounds
         assert!(op_place.layout.size >= offset + layout.size);
         let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.write_scalar_racy(value.into(), value_place.into())
+        this.write_scalar_atomic(value.into(), value_place, atomic)
     }
-
-    /// Load the data race allocation state for a given memory place
-    ///  also returns the size and offset of the result in the allocation
-    ///  metadata
-    /// This is used for atomic loads since unconditionally requesteing
-    ///  mutable access causes issues for read-only memory, which will
-    ///  fail validation on mutable access
-    fn load_data_race_state_ref<'a>(
-        &'a self, place: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx, (&'a VClockAlloc, Size, Size)> where 'mir: 'a {
-        let this = self.eval_context_ref();
-
-        let ptr = place.ptr.assert_ptr();
-        let size = place.layout.size;
-        let data_race = &this.memory.get_raw(ptr.alloc_id)?.extra.data_race;
-
-        Ok((data_race, size, ptr.offset))
-    }
-
-    /// Load the data race allocation state for a given memory place
-    ///  also returns the size and the offset of the result in the allocation
-    ///  metadata
-    fn load_data_race_state_mut<'a>(
-        &'a mut self, place: MPlaceTy<'tcx, Tag>
-    ) -> InterpResult<'tcx, (&'a mut VClockAlloc, Size, Size)> where 'mir: 'a {
-        let this = self.eval_context_mut();
-
-        let ptr = place.ptr.assert_ptr();
-        let size = place.layout.size;
-        let data_race = &mut this.memory.get_raw_mut(ptr.alloc_id)?.extra.data_race;
-
-        Ok((data_race, size, ptr.offset))
+    fn read_scalar_atomic(
+        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let scalar = self.allow_data_races_ref(move |this| {
+            this.read_scalar(place.into())
+        })?;
+        self.validate_atomic_load(place, atomic)?;
+        Ok(scalar)
+    }
+    fn write_scalar_atomic(
+        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
+        self.allow_data_races_mut(move |this| {
+            this.write_scalar(val, dest.into())
+        })?;
+        self.validate_atomic_store(dest, atomic)
     }
     
     /// Update the data-race detector for an atomic read occuring at the
     ///  associated memory-place and on the current thread
     fn validate_atomic_load(
-        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
     ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
+        let this = self.eval_context_ref();
         let data_race = &*this.memory.extra.data_race;
         if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
 
-            let (
-                alloc, size, offset
-            ) = this.load_data_race_state_ref(place)?;
+            // Load an log the atomic operation
+            //  the memory access has to be `get_raw` since otherwise this despite only 
+            //  mutating MemoryExtra will still trigger errors on read-only memory
+            let place_ptr = place.ptr.assert_ptr();
+            let size = place.layout.size;
+            let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race;
             log::trace!(
-                "Atomic load on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
-                alloc.global.current_thread(), atomic,
-                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
+                "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                "Atomic load", &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
             );
 
-            let current_thread = alloc.global.current_thread();
-            let mut current_state = alloc.global.current_thread_state_mut();
-            if atomic == AtomicReadOp::Relaxed {
-                // Perform relaxed atomic load
-                for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) {
-                    if range.load_relaxed(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
+            // Perform the atomic operation
+            let data_race = &alloc_meta.global;
+            data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+                for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
+                    let res = if atomic == AtomicReadOp::Relaxed {
+                        range.load_relaxed(&mut *clocks, index)
+                    }else{
+                        range.acquire(&mut *clocks, index)
+                    };
+                    if let Err(DataRace) = res {
+                        mem::drop(clocks);
                         return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_LOAD", true,
-                            place.ptr.assert_ptr(), size
+                            &alloc_meta.global, range, "Atomic load", true,
+                            place_ptr, size
                         );
                     }
                 }
-            }else{
-                // Perform acquire(or seq-cst) atomic load
-                for (_,range) in alloc.alloc_ranges.borrow_mut().iter_mut(offset, size) {
-                    if range.acquire(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
-                        return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_LOAD", true,
-                            place.ptr.assert_ptr(), size
-                        );
-                    }
-                }
-            }
+                Ok(())
+            })?;
 
             // Log changes to atomic memory
             if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.borrow_mut().iter(offset, size) {
+                for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
                     log::trace!(
-                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
+                        "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
                         range.atomic_ops
                     );
                 }
             }
-
-            mem::drop(current_state);
-            let data_race = &*this.memory.extra.data_race;
-            data_race.advance_vector_clock();
         }
         Ok(())
     }
@@ -271,61 +213,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
-
-            let (
-                alloc, size, offset
-            ) = this.load_data_race_state_mut(place)?;
-            let current_thread = alloc.global.current_thread();
-            let mut current_state = alloc.global.current_thread_state_mut();
-            log::trace!(
-                "Atomic store on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
-                current_thread, atomic,
-                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
-            );
-
-            if atomic == AtomicWriteOp::Relaxed {
-                // Perform relaxed atomic store
-                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    if range.store_relaxed(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
-                        return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_STORE", true,
-                            place.ptr.assert_ptr(), size
-                        );
-                    }
-                }
-            }else{
-                // Perform release(or seq-cst) atomic store
-                for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                    if range.release(&mut *current_state, current_thread) == Err(DataRace) {
-                        mem::drop(current_state);
-                        return VClockAlloc::report_data_race(
-                            &alloc.global, range, "ATOMIC_STORE", true,
-                            place.ptr.assert_ptr(), size
-                        );
-                    }
-                }
-            }
-
-            // Log changes to atomic memory
-            if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
-                    log::trace!(
-                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
-                        range.atomic_ops
-                    );
+        this.validate_atomic_op_mut(
+            place, atomic, "Atomic Store",
+            move |memory, clocks, index, atomic| {
+                if atomic == AtomicWriteOp::Relaxed {
+                    memory.store_relaxed(clocks, index)
+                }else{
+                    memory.release(clocks, index)
                 }
             }
-
-            mem::drop(current_state);
-            let data_race = &*this.memory.extra.data_race;
-            data_race.advance_vector_clock();
-        }
-        Ok(())
+        )
     }
 
     /// Update the data-race detector for an atomic read-modify-write occuring
@@ -334,97 +231,104 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRWOp
     ) -> InterpResult<'tcx> {
         use AtomicRWOp::*;
+        let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
+        let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
-
-            let (
-                alloc, size, offset
-            ) = this.load_data_race_state_mut(place)?;
-            let current_thread = alloc.global.current_thread();
-            let mut current_state = alloc.global.current_thread_state_mut();
-            log::trace!(
-                "Atomic RMW on {:?} with ordering {:?}, in memory({:?}, offset={}, size={})",
-                current_thread, atomic,
-                place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes()
-            );
-
-            let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
-            let release = matches!(atomic, Release | AcqRel | SeqCst);
-            for (_,range) in alloc.alloc_ranges.get_mut().iter_mut(offset, size) {
-                //FIXME: this is probably still slightly wrong due to the quirks
-                // in the c++11 memory model
-                let maybe_race = if acquire {
-                    // Atomic RW-Op acquire
-                    range.acquire(&mut *current_state, current_thread)
+        this.validate_atomic_op_mut(
+            place, atomic, "Atomic RMW",
+            move |memory, clocks, index, _| {
+                if acquire {
+                    memory.acquire(clocks, index)?;
                 }else{
-                    range.load_relaxed(&mut *current_state, current_thread) 
-                };
-                if maybe_race == Err(DataRace) {
-                    mem::drop(current_state);
-                    return VClockAlloc::report_data_race(
-                        &alloc.global, range, "ATOMIC_RMW(LOAD)", true,
-                        place.ptr.assert_ptr(), size
-                    );
+                    memory.load_relaxed(clocks, index)?;
                 }
-                let maybe_race = if release {
-                    // Atomic RW-Op release
-                    range.rmw_release(&mut *current_state, current_thread)
+                if release {
+                    memory.rmw_release(clocks, index)
                 }else{
-                    range.rmw_relaxed(&mut *current_state, current_thread)
-                };
-                if maybe_race == Err(DataRace) {
-                    mem::drop(current_state);
-                    return VClockAlloc::report_data_race(
-                        &alloc.global, range, "ATOMIC_RMW(STORE)", true,
-                        place.ptr.assert_ptr(), size
-                    );
+                    memory.rmw_relaxed(clocks, index)
                 }
             }
-
-            // Log changes to atomic memory
-            if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc.alloc_ranges.get_mut().iter(offset, size) {
-                    log::trace!(
-                        "  updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, offset.bytes(), size.bytes(),
-                        range.atomic_ops
-                    );
-                }
-            }
-
-            mem::drop(current_state);
-            let data_race = &*this.memory.extra.data_race;
-            data_race.advance_vector_clock();
-        }
-        Ok(())
+        )
     }
 
     /// Update the data-race detector for an atomic fence on the current thread
     fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-            data_race.advance_vector_clock();
-
-            log::trace!("Atomic fence on {:?} with ordering {:?}", data_race.current_thread(), atomic);
+        data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+            log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
             // Apply data-race detection for the current fences
             //  this treats AcqRel and SeqCst as the same as a acquire
             //  and release fence applied in the same timestamp.
             if atomic != AtomicFenceOp::Release {
                 // Either Acquire | AcqRel | SeqCst
-                data_race.current_thread_state_mut().apply_acquire_fence();
+                clocks.apply_acquire_fence();
             }
             if atomic != AtomicFenceOp::Acquire {
                 // Either Release | AcqRel | SeqCst
-                data_race.current_thread_state_mut().apply_release_fence();
+                clocks.apply_release_fence();
             }
+            Ok(())
+        })
+    }
+}
 
-            data_race.advance_vector_clock();
+impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
+trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
+
+    /// Generic atomic operation implementation, this however
+    ///  cannot be used for the atomic read operation since
+    ///  that requires non mutable memory access to not trigger
+    ///  the writing to read-only memory errors during `get_raw_mut`
+    fn validate_atomic_op_mut<A: Debug + Copy>(
+        &mut self, place: MPlaceTy<'tcx, Tag>,
+        atomic: A, description: &str,
+        mut op: impl FnMut(
+            &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A
+        ) -> Result<(), DataRace>
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let data_race = &*this.memory.extra.data_race;
+        if data_race.multi_threaded.get() {
+
+            // Load an log the atomic operation
+            let place_ptr = place.ptr.assert_ptr();
+            let size = place.layout.size;
+            let alloc_meta =  &mut this.memory.get_raw_mut(place_ptr.alloc_id)?.extra.data_race;
+            log::trace!(
+                "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
+            );
+
+            // Perform the atomic operation
+            let data_race = &alloc_meta.global;
+            data_race.maybe_perform_sync_operation(|index, mut clocks| {
+                for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
+                    if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
+                        mem::drop(clocks);
+                        return VClockAlloc::report_data_race(
+                            &alloc_meta.global, range, description, true,
+                            place_ptr, size
+                        );
+                    }
+                }
+                Ok(())
+            })?;
+
+            // Log changes to atomic memory
+            if log::log_enabled!(log::Level::Trace) {
+                for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
+                    log::trace!(
+                        "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                        place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
+                        range.atomic_ops
+                    );
+                }
+            }
         }
         Ok(())
     }
+
 }
 
 /// Handle for locks to express their
@@ -439,7 +343,7 @@ pub struct DataRaceLockHandle {
 }
 impl DataRaceLockHandle {
     pub fn set_values(&mut self, other: &Self) {
-        self.clock.set_values(&other.clock)
+        self.clock.clone_from(&other.clock)
     }
     pub fn reset(&mut self) {
         self.clock.set_zero_vector();
@@ -447,126 +351,6 @@ impl DataRaceLockHandle {
 }
 
 
-/// Avoid an atomic allocation for the common
-///  case with atomic operations where the number
-///  of active release sequences is small
-#[derive(Clone, PartialEq, Eq)]
-enum AtomicReleaseSequences {
-
-    /// Contains one or no values
-    ///  if empty: (None, reset vector clock)
-    ///  if one:   (Some(thread), thread_clock)
-    ReleaseOneOrEmpty(Option<ThreadId>, VClock),
-
-    /// Contains two or more values
-    ///  stored in a hash-map of thread id to
-    ///  vector clocks
-    ReleaseMany(FxHashMap<ThreadId, VClock>)
-}
-impl AtomicReleaseSequences {
-
-    /// Return an empty set of atomic release sequences
-    #[inline]
-    fn new() -> AtomicReleaseSequences {
-        Self::ReleaseOneOrEmpty(None, VClock::default())
-    }
-
-    /// Remove all values except for the value stored at `thread` and set
-    ///  the vector clock to the associated `clock` value
-    #[inline]
-    fn clear_and_set(&mut self, thread: ThreadId, clock: &VClock) {
-        match self {
-            Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                *id = Some(thread);
-                rel_clock.set_values(clock);
-            }
-            Self::ReleaseMany(_) => {
-                *self = Self::ReleaseOneOrEmpty(Some(thread), clock.clone());
-            }
-        }
-    }
-
-    /// Remove all values except for the value stored at `thread`
-    #[inline]
-    fn clear_and_retain(&mut self, thread: ThreadId) {
-        match self {
-            Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                // If the id is the same, then reatin the value
-                //  otherwise delete and clear the release vector clock
-                if *id != Some(thread) {
-                    *id = None;
-                    rel_clock.set_zero_vector();
-                }
-            },
-            Self::ReleaseMany(hash_map) => {
-                // Retain only the thread element, so reduce to size
-                //  of 1 or 0, and move to smaller format
-                if let Some(clock) = hash_map.remove(&thread) {
-                    *self = Self::ReleaseOneOrEmpty(Some(thread), clock);
-                }else{
-                    *self = Self::new();
-                }
-            }
-        }
-    }
-
-    /// Insert a release sequence at `thread` with values `clock`
-    fn insert(&mut self, thread: ThreadId, clock: &VClock) {
-        match self {
-            Self::ReleaseOneOrEmpty(id, rel_clock) => {
-                if id.map_or(true, |id| id == thread) {
-                    *id = Some(thread);
-                    rel_clock.set_values(clock);
-                }else{
-                    let mut hash_map = FxHashMap::default();
-                    hash_map.insert(thread, clock.clone());
-                    hash_map.insert(id.unwrap(), rel_clock.clone());
-                    *self = Self::ReleaseMany(hash_map);
-                }
-            },
-            Self::ReleaseMany(hash_map) => {
-                hash_map.insert(thread, clock.clone());
-            }
-        }
-    }
-
-    /// Return the release sequence at `thread` if one exists
-    #[inline]
-    fn load(&self, thread: ThreadId) -> Option<&VClock> {
-        match self {
-            Self::ReleaseOneOrEmpty(id, clock) => {
-                if *id == Some(thread) {
-                    Some(clock)
-                }else{
-                    None
-                }
-            },
-            Self::ReleaseMany(hash_map) => {
-                hash_map.get(&thread)
-            }
-        }
-    }
-}
-
-/// Custom debug implementation to correctly
-///  print debug as a logical mapping from threads
-///  to vector-clocks
-impl Debug for AtomicReleaseSequences {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Self::ReleaseOneOrEmpty(None,_) => {
-                f.debug_map().finish()
-            },
-            Self::ReleaseOneOrEmpty(Some(id), clock) => {
-                f.debug_map().entry(&id, &clock).finish()
-            },
-            Self::ReleaseMany(hash_map) => {
-                Debug::fmt(hash_map, f)
-            }
-        }
-    }
-}
-
 /// Error returned by finding a data race
 ///  should be elaborated upon
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
@@ -576,7 +360,7 @@ pub struct DataRace;
 ///  explicitly to reduce memory usage for the
 ///  common case where no atomic operations
 ///  exists on the memory cell
-#[derive(Clone, PartialEq, Eq, Debug)]
+#[derive(Clone, PartialEq, Eq, Default, Debug)]
 struct AtomicMemoryCellClocks {
 
     /// The clock-vector for the set of atomic read operations
@@ -599,7 +383,7 @@ struct AtomicMemoryCellClocks {
     ///  sequence exists in the memory cell, required
     ///  since read-modify-write operations do not
     ///  invalidate existing release sequences 
-    release_sequences: AtomicReleaseSequences,
+    release_sequences: VSmallClockSet,
 }
 
 /// Memory Cell vector clock metadata
@@ -609,11 +393,11 @@ struct MemoryCellClocks {
 
     /// The vector-clock of the last write, only one value is stored
     ///  since all previous writes happened-before the current write
-    write: Timestamp,
+    write: VTimestamp,
 
     /// The identifier of the thread that performed the last write
     ///  operation
-    write_thread: ThreadId,
+    write_index: VectorIdx,
 
     /// The vector-clock of the set of previous reads
     ///  each index is set to the timestamp that the associated
@@ -633,7 +417,7 @@ impl Default for MemoryCellClocks {
         MemoryCellClocks {
             read: VClock::default(),
             write: 0,
-            write_thread: ThreadId::new(u32::MAX as usize),
+            write_index: VectorIdx::MAX_INDEX,
             atomic_ops: None
         }
     }
@@ -654,21 +438,14 @@ impl MemoryCellClocks {
     ///  if it does not exist
     #[inline]
     fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks {
-        self.atomic_ops.get_or_insert_with(|| {
-            Box::new(AtomicMemoryCellClocks {
-                read_vector: VClock::default(),
-                write_vector: VClock::default(),
-                sync_vector: VClock::default(),
-                release_sequences: AtomicReleaseSequences::new()
-            })
-        })
+        self.atomic_ops.get_or_insert_with(Default::default)
     }
 
     /// Update memory cell data-race tracking for atomic
     ///  load acquire semantics, is a no-op if this memory was
     ///  not used previously as atomic memory
-    fn acquire(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_read_detect(clocks, thread)?;
+    fn acquire(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.clock.join(&atomic.sync_vector);
         }
@@ -677,8 +454,8 @@ impl MemoryCellClocks {
     /// Update memory cell data-race tracking for atomic
     ///  load relaxed semantics, is a no-op if this memory was
     ///  not used previously as atomic memory
-    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_read_detect(clocks, thread)?;
+    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.fence_acquire.join(&atomic.sync_vector);
         }
@@ -688,38 +465,39 @@ impl MemoryCellClocks {
 
     /// Update the memory cell data-race tracking for atomic
     ///  store release semantics
-    fn release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
-        atomic.sync_vector.set_values(&clocks.clock);
-        atomic.release_sequences.clear_and_set(thread, &clocks.clock);
+        atomic.sync_vector.clone_from(&clocks.clock);
+        atomic.release_sequences.clear();
+        atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store relaxed semantics
-    fn store_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn store_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
-        atomic.sync_vector.set_values(&clocks.fence_release);
-        if let Some(release) = atomic.release_sequences.load(thread) {
+        atomic.sync_vector.clone_from(&clocks.fence_release);
+        if let Some(release) = atomic.release_sequences.get(index) {
             atomic.sync_vector.join(release);
         }
-        atomic.release_sequences.clear_and_retain(thread);
+        atomic.release_sequences.retain_index(index);
         Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store release semantics for RMW operations
-    fn rmw_release(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn rmw_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.clock);
-        atomic.release_sequences.insert(thread, &clocks.clock);
+        atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
     /// Update the memory cell data-race tracking for atomic
     ///  store relaxed semantics for RMW operations
-    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
-        self.atomic_write_detect(clocks, thread)?;
+    fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+        self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.join(&clocks.fence_release);
         Ok(())
@@ -727,11 +505,11 @@ impl MemoryCellClocks {
     
     /// Detect data-races with an atomic read, caused by a non-atomic write that does
     ///  not happen-before the atomic-read
-    fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+    fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] {
+        if self.write <= clocks.clock[self.write_index] {
             let atomic = self.atomic_mut();
-            atomic.read_vector.set_at_thread(&clocks.clock, thread);
+            atomic.read_vector.set_at_index(&clocks.clock, index);
             Ok(())
         }else{
             Err(DataRace)
@@ -740,11 +518,11 @@ impl MemoryCellClocks {
 
     /// Detect data-races with an atomic write, either with a non-atomic read or with
     ///  a non-atomic write:
-    fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+    fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
+        if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let atomic = self.atomic_mut();
-            atomic.write_vector.set_at_thread(&clocks.clock, thread);
+            atomic.write_vector.set_at_index(&clocks.clock, index);
             Ok(())
         }else{
             Err(DataRace)
@@ -753,16 +531,16 @@ impl MemoryCellClocks {
 
     /// Detect races for non-atomic read operations at the current memory cell
     ///  returns true if a data-race is detected
-    fn read_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId) -> Result<(), DataRace> {
+    fn read_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] {
+        if self.write <= clocks.clock[self.write_index] {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= clocks.clock
             }else{
                 true
             };
             if race_free {
-                self.read.set_at_thread(&clocks.clock, thread);
+                self.read.set_at_index(&clocks.clock, index);
                 Ok(())
             }else{
                 Err(DataRace)
@@ -774,17 +552,17 @@ impl MemoryCellClocks {
 
     /// Detect races for non-atomic write operations at the current memory cell
     ///  returns true if a data-race is detected
-    fn write_race_detect(&mut self, clocks: &ThreadClockSet, thread: ThreadId)  -> Result<(), DataRace> {
+    fn write_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx)  -> Result<(), DataRace> {
         log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
-        if self.write <= clocks.clock[self.write_thread] && self.read <= clocks.clock {
+        if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= clocks.clock && atomic.read_vector <= clocks.clock
             }else{
                 true
             };
             if race_free {
-                self.write = clocks.clock[thread];
-                self.write_thread = thread;
+                self.write = clocks.clock[index];
+                self.write_index = index;
                 self.read.set_zero_vector();
                 Ok(())
             }else{
@@ -822,7 +600,7 @@ impl VClockAlloc {
 
     // Find an index, if one exists where the value
     //  in `l` is greater than the value in `r`
-    fn find_gt_index(l: &VClock, r: &VClock) -> Option<usize> {
+    fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
         let l_slice = l.as_slice();
         let r_slice = r.as_slice();
         l_slice.iter().zip(r_slice.iter())
@@ -844,7 +622,7 @@ impl VClockAlloc {
                 }else{
                     None
                 }
-            })
+            }).map(|idx| VectorIdx::new(idx))
     }
 
     /// Report a data-race found in the program
@@ -859,35 +637,29 @@ impl VClockAlloc {
         action: &str, is_atomic: bool,
         pointer: Pointer<Tag>, len: Size
     ) -> InterpResult<'tcx> {
-        let current_thread = global.current_thread();
-        let current_state = global.current_thread_state();
-        let mut write_clock = VClock::default();
+        let (current_index, current_clocks) = global.current_thread_state();
+        let write_clock;
         let (
             other_action, other_thread, other_clock
-        ) = if range.write > current_state.clock[range.write_thread] {
-
+        ) = if range.write > current_clocks.clock[range.write_index] {
             // Convert the write action into the vector clock it
             //  represents for diagnostic purposes
-            let wclock = write_clock.get_mut_with_min_len(
-                current_state.clock.as_slice().len()
-                .max(range.write_thread.to_u32() as usize + 1)
-            );
-            wclock[range.write_thread.to_u32() as usize] = range.write;
-            ("WRITE", range.write_thread, write_clock.as_slice())
+            write_clock = VClock::new_with_index(range.write_index, range.write);
+            ("WRITE", range.write_index, &write_clock)
         }else if let Some(idx) = Self::find_gt_index(
-            &range.read, &current_state.clock
+            &range.read, &current_clocks.clock
         ){
-            ("READ", ThreadId::new(idx), range.read.as_slice())
+            ("READ", idx, &range.read)
         }else if !is_atomic {
             if let Some(atomic) = range.atomic() {
                 if let Some(idx) = Self::find_gt_index(
-                    &atomic.write_vector, &current_state.clock
+                    &atomic.write_vector, &current_clocks.clock
                 ) {
-                    ("ATOMIC_STORE", ThreadId::new(idx), atomic.write_vector.as_slice())
+                    ("ATOMIC_STORE", idx, &atomic.write_vector)
                 }else if let Some(idx) = Self::find_gt_index(
-                    &atomic.read_vector, &current_state.clock
+                    &atomic.read_vector, &current_clocks.clock
                 ) {
-                    ("ATOMIC_LOAD", ThreadId::new(idx), atomic.read_vector.as_slice())
+                    ("ATOMIC_LOAD", idx, &atomic.read_vector)
                 }else{
                     unreachable!("Failed to find report data-race for non-atomic operation: no race found")
                 }
@@ -899,7 +671,7 @@ impl VClockAlloc {
         };
 
         // Load elaborated thread information about the racing thread actions
-        let current_thread_info = global.print_thread_metadata(current_thread);
+        let current_thread_info = global.print_thread_metadata(current_index);
         let other_thread_info = global.print_thread_metadata(other_thread);
         
         // Throw the data-race detection
@@ -910,7 +682,7 @@ impl VClockAlloc {
             action, current_thread_info, 
             other_action, other_thread_info,
             pointer.alloc_id, pointer.offset.bytes(), len.bytes(),
-            current_state.clock,
+            current_clocks.clock,
             other_clock
         )
     }
@@ -921,14 +693,10 @@ impl VClockAlloc {
     ///  operation
     pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
-            let current_thread = self.global.current_thread();
-            let current_state = self.global.current_thread_state();
-
-            // The alloc-ranges are not split, however changes are not going to be made
-            //  to the ranges being tested, so this is ok
+            let (index, clocks) = self.global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
-                if range.read_race_detect(&*current_state, current_thread) == Err(DataRace) {
+                if range.read_race_detect(&*clocks, index) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global,range, "READ", false, pointer, len
@@ -946,10 +714,9 @@ impl VClockAlloc {
     ///  operation
     pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
-            let current_thread = self.global.current_thread();
-            let current_state = self.global.current_thread_state();
+            let (index, clocks) = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) {
+                if range.write_race_detect(&*clocks, index) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global, range, "WRITE", false, pointer, len
@@ -967,10 +734,9 @@ impl VClockAlloc {
     ///  operation
     pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
-            let current_thread = self.global.current_thread();
-            let current_state = self.global.current_thread_state();
+            let (index, clocks) = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*current_state, current_thread) == Err(DataRace) {
+                if range.write_race_detect(&*clocks, index) == Err(DataRace) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global, range, "DEALLOCATE", false, pointer, len
@@ -989,6 +755,7 @@ impl VClockAlloc {
 ///  additional metadata to model atomic fence operations
 #[derive(Clone, Default, Debug)]
 struct ThreadClockSet {
+
     /// The increasing clock representing timestamps
     ///  that happen-before this thread.
     clock: VClock,
@@ -1008,7 +775,7 @@ impl ThreadClockSet {
     ///  set of thread vector clocks
     #[inline]
     fn apply_release_fence(&mut self) {
-        self.fence_release.set_values(&self.clock);
+        self.fence_release.clone_from(&self.clock);
     }
 
     /// Apply the effects of a acquire fence to this
@@ -1021,8 +788,8 @@ impl ThreadClockSet {
     /// Increment the happens-before clock at a
     ///  known index
     #[inline]
-    fn increment_clock(&mut self, thread: ThreadId) {
-        self.clock.increment_thread(thread);
+    fn increment_clock(&mut self, index: VectorIdx) {
+        self.clock.increment_index(index);
     }
 
     /// Join the happens-before clock with that of
@@ -1047,81 +814,178 @@ pub struct GlobalState {
     ///  any data-races
     multi_threaded: Cell<bool>,
 
-    /// The current vector clock for all threads
-    ///  this includes threads that have terminated
-    ///  execution
-    thread_clocks: RefCell<IndexVec<ThreadId, ThreadClockSet>>,
+    /// Mapping of a vector index to a known set of thread
+    ///  clocks, this is not directly mapping from a thread id
+    ///  since it may refer to multiple threads
+    vector_clocks: RefCell<IndexVec<VectorIdx, ThreadClockSet>>,
+
+    /// Mapping of a given vector index to the current thread
+    ///  that the execution is representing, this may change
+    ///  if a vector index is re-assigned to a new thread
+    vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>, //FIXME: make option
+
+    /// The mapping of a given thread to a known vector clock
+    thread_info: RefCell<IndexVec<ThreadId, (Option<VectorIdx>, Option<Box<str>>)>>,
 
-    /// Thread name cache for better diagnostics on the reporting
-    ///  of a data-race
-    thread_names: RefCell<IndexVec<ThreadId, Option<Box<str>>>>,
+    /// The current vector index being executed
+    current_index: Cell<VectorIdx>,
 
-    /// The current thread being executed,
-    ///  this is mirrored from the scheduler since
-    ///  it is required for loading the current vector
-    ///  clock for data-race detection
-    current_thread_id: Cell<ThreadId>,
+    /// Potential vector indices that could be re-used on thread creation
+    ///  values are inserted here on thread join events, and can be
+    ///  re-used once the vector clocks of all current threads
+    ///  are equal to the vector clock of the joined thread
+    reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
 }
 impl GlobalState {
 
     /// Create a new global state, setup with just thread-id=0
     ///  advanced to timestamp = 1
     pub fn new() -> Self {
-        let mut vec = IndexVec::new();
-        let thread_id = vec.push(ThreadClockSet::default());
-        vec[thread_id].increment_clock(thread_id);
-        GlobalState {
+        let global_state = GlobalState {
             multi_threaded: Cell::new(false),
-            thread_clocks: RefCell::new(vec),
-            thread_names: RefCell::new(IndexVec::new()),
-            current_thread_id: Cell::new(thread_id),
-        }
+            vector_clocks: RefCell::new(IndexVec::new()),
+            vector_info: RefCell::new(IndexVec::new()),
+            thread_info: RefCell::new(IndexVec::new()),
+            current_index: Cell::new(VectorIdx::new(0)),
+            reuse_candidates: RefCell::new(FxHashSet::default()),
+        };
+
+        // Setup the main-thread since it is not explicitly created:
+        //  uses vector index and thread-id 0, also the rust runtime gives
+        //  the main-thread a name of "main".
+        let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
+        global_state.vector_info.borrow_mut().push(ThreadId::new(0));
+        global_state.thread_info.borrow_mut().push(
+            (Some(index), Some("main".to_string().into_boxed_str())
+        ));
+
+        global_state
     }
     
+    // Try to find vector index values that can potentially be re-used
+    //  by a new thread instead of a new vector index being created
+    fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
+        let mut reuse = self.reuse_candidates.borrow_mut();
+        let vector_clocks = self.vector_clocks.borrow();
+        for  &candidate in reuse.iter() {
+            let target_timestamp = vector_clocks[candidate].clock[candidate];
+            if vector_clocks.iter().all(|clock| {
+                clock.clock[candidate] == target_timestamp
+            }) {
+                // All vector clocks for each vector index are equal to
+                //  the target timestamp, therefore since the thread has
+                //  terminated and cannot update the vector clock.
+                // No more data-races involving this vector index are possible
+                //  so it can be re-used
+                assert!(reuse.remove(&candidate));
+                return Some(candidate)
+            }
+        }
+        None
+    }
 
     // Hook for thread creation, enabled multi-threaded execution and marks
     //  the current thread timestamp as happening-before the current thread
     #[inline]
     pub fn thread_created(&self, thread: ThreadId) {
+        let current_index = self.current_index();
 
-        // Enable multi-threaded execution mode now that there are at least
-        //  two threads
+        // Enable multi-threaded execution, there are now two threads
+        //  so data-races are now possible.
         self.multi_threaded.set(true);
-        let current_thread = self.current_thread_id.get();
-        let mut vectors = self.thread_clocks.borrow_mut();
-        vectors.ensure_contains_elem(thread, Default::default);
-        let (current, created) = vectors.pick2_mut(current_thread, thread);
 
-        // Pre increment clocks before atomic operation
-        current.increment_clock(current_thread);
+        // Load and setup the associated thread metadata
+        let mut thread_info = self.thread_info.borrow_mut();
+        thread_info.ensure_contains_elem(thread, Default::default);
+
+        // Assign a vector index for the thread, attempting to re-use an old
+        //  vector index that can no longer report any data-races if possible
+        let created_index = if let Some(
+            reuse_index
+        ) = self.find_vector_index_reuse_candidate() {
+            // Now re-configure the re-use candidate, increment the clock
+            //  for the new sync use of the vector
+            let mut vector_clocks = self.vector_clocks.borrow_mut();
+            vector_clocks[reuse_index].increment_clock(reuse_index);
+
+            // Locate the old thread the vector was associated with and update
+            //  it to represent the new thread instead
+            let mut vector_info = self.vector_info.borrow_mut();
+            let old_thread = vector_info[reuse_index];
+            vector_info[reuse_index] = thread;
+
+            // Mark the thread the vector index was associated with as no longer
+            //  representing a thread index
+            thread_info[old_thread].0 = None;
+
+            reuse_index
+        }else{
+            // No vector re-use candidates available, instead create
+            //  a new vector index
+            let mut vector_info = self.vector_info.borrow_mut();
+            vector_info.push(thread)
+        };
+
+        // Mark the chosen vector index as in use by the thread
+        thread_info[thread].0 = Some(created_index);
+
+        // Create a thread clock set if applicable
+        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        if created_index == vector_clocks.next_index() {
+            vector_clocks.push(ThreadClockSet::default());
+        }
 
-        // The current thread happens-before the created thread
-        //  so update the created vector clock
+        // Now load the two clocks and configure the initial state
+        let (current, created) = vector_clocks.pick2_mut(current_index, created_index);
+
+        // Advance the current thread before the synchronized operation
+        current.increment_clock(current_index);
+
+        // Join the created with current, since the current threads
+        //  previous actions happen-before the created thread
         created.join_with(current);
 
-        // Post increment clocks after atomic operation
-        current.increment_clock(current_thread);
-        created.increment_clock(thread);
+        // Advance both threads after the synchronized operation
+        current.increment_clock(current_index);
+        created.increment_clock(created_index);
     }
 
     /// Hook on a thread join to update the implicit happens-before relation
-    ///  between the joined thead and the current thread
+    ///  between the joined thead and the current thread.
+    /// Called after the join has occured, and hence implicitly also states
+    ///  that the thread must have terminated as well
     #[inline]
     pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
-        let mut vectors = self.thread_clocks.borrow_mut();
-        let (current, join) = vectors.pick2_mut(current_thread, join_thread);
+        let (current_index, join_index) = {
+            let thread_info = self.thread_info.borrow();
+            let current_index = thread_info[current_thread].0
+                .expect("Joining into thread with no assigned vector");
+            let join_index = thread_info[join_thread].0
+                .expect("Joining thread with no assigned vector");
+            (current_index, join_index)
+        };
+        let mut clocks_vec = self.vector_clocks.borrow_mut();
+        let (current, join) = clocks_vec.pick2_mut(current_index, join_index);
 
         // Pre increment clocks before atomic operation
-        current.increment_clock(current_thread);
-        join.increment_clock(join_thread);
+        current.increment_clock(current_index);
+        join.increment_clock(join_index);
 
         // The join thread happens-before the current thread
         //   so update the current vector clock
         current.join_with(join);
 
         // Post increment clocks after atomic operation
-        current.increment_clock(current_thread);
-        join.increment_clock(join_thread);
+        current.increment_clock(current_index);
+        join.increment_clock(join_index);
+
+        // The joined thread vector clock is a potential candidate
+        //  for re-use given sufficient time, mark as available once
+        //  threads have been created. This is because this function
+        //  is called once join_thread has terminated and such cannot
+        //  update any-more
+        let mut reuse = self.reuse_candidates.borrow_mut();
+        reuse.insert(join_index);
     }
 
     /// Hook for updating the local tracker of the currently
@@ -1129,7 +993,10 @@ impl GlobalState {
     ///  `active_thread` in thread.rs is updated
     #[inline]
     pub fn thread_set_active(&self, thread: ThreadId) {
-        self.current_thread_id.set(thread);
+        let thread_info = self.thread_info.borrow();
+        let vector_idx = thread_info[thread].0
+            .expect("Setting thread active with no assigned vector");
+        self.current_index.set(vector_idx);
     }
 
     /// Hook for updating the local tracker of the threads name
@@ -1137,33 +1004,40 @@ impl GlobalState {
     ///  the thread name is used for improved diagnostics
     ///  during a data-race
     #[inline]
-    pub fn thread_set_name(&self, name: String) {
+    pub fn thread_set_name(&self, thread: ThreadId, name: String) {
         let name = name.into_boxed_str();
-        let mut names = self.thread_names.borrow_mut();
-        let thread = self.current_thread_id.get();
-        names.ensure_contains_elem(thread, Default::default);
-        names[thread] = Some(name);
+        let mut thread_info = self.thread_info.borrow_mut();
+        thread_info[thread].1 = Some(name);
     }
 
 
-    /// Advance the vector clock for a thread
-    ///  this is called before and after any atomic/synchronizing operations
-    ///  that may manipulate state
-    #[inline]
-    fn advance_vector_clock(&self) {
-        let thread = self.current_thread_id.get();
-        let mut vectors = self.thread_clocks.borrow_mut();
-        vectors[thread].increment_clock(thread);
-
-        // Log the increment in the atomic vector clock
-        log::trace!("Atomic vector clock increase for {:?} to {:?}",thread, vectors[thread].clock);
+    /// Attempt to perform a synchronized operation, this
+    ///  will perform no operation if multi-threading is
+    ///  not currently enabled.
+    /// Otherwise it will increment the clock for the current
+    ///  vector before and after the operation for data-race
+    ///  detection between any happens-before edges the
+    ///  operation may create
+    fn maybe_perform_sync_operation<'tcx>(
+        &self, op: impl FnOnce(VectorIdx, RefMut<'_,ThreadClockSet>) -> InterpResult<'tcx>,
+    ) -> InterpResult<'tcx> {
+        if self.multi_threaded.get() {
+            let (index, mut clocks) = self.current_thread_state_mut();
+            clocks.increment_clock(index);
+            op(index, clocks)?;
+            let (_, mut clocks) = self.current_thread_state_mut();
+            clocks.increment_clock(index);
+        }
+        Ok(())
     }
     
 
     /// Internal utility to identify a thread stored internally
     ///  returns the id and the name for better diagnostics
-    fn print_thread_metadata(&self, thread: ThreadId) -> String {
-        if let Some(Some(name)) = self.thread_names.borrow().get(thread) {
+    fn print_thread_metadata(&self, vector: VectorIdx) -> String {
+        let thread = self.vector_info.borrow()[vector];
+        let thread_name = &self.thread_info.borrow()[thread].1;
+        if let Some(name) = thread_name {
             let name: &str = name;
             format!("Thread(id = {:?}, name = {:?})", thread.to_u32(), &*name)
         }else{
@@ -1175,25 +1049,19 @@ impl GlobalState {
     /// Acquire a lock, express that the previous call of
     ///  `validate_lock_release` must happen before this
     pub fn validate_lock_acquire(&self, lock: &DataRaceLockHandle, thread: ThreadId) {
-        let mut ref_vector = self.thread_clocks.borrow_mut();
-        ref_vector[thread].increment_clock(thread);
-
-        let clocks = &mut ref_vector[thread];
+        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        clocks.increment_clock(index);
         clocks.clock.join(&lock.clock);
-
-        ref_vector[thread].increment_clock(thread);
+        clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
     ///  any subsequent calls to `validate_lock_acquire`
     pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
-        let mut ref_vector = self.thread_clocks.borrow_mut();
-        ref_vector[thread].increment_clock(thread);
-
-        let clocks = &ref_vector[thread];
-        lock.clock.set_values(&clocks.clock);
-
-        ref_vector[thread].increment_clock(thread);
+        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        clocks.increment_clock(index);
+        lock.clock.clone_from(&clocks.clock);
+        clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
@@ -1201,401 +1069,48 @@ impl GlobalState {
     ///  as any previous calls to this function after any
     ///  `validate_lock_release` calls
     pub fn validate_lock_release_shared(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
-        let mut ref_vector = self.thread_clocks.borrow_mut();
-        ref_vector[thread].increment_clock(thread);
-
-        let clocks = &ref_vector[thread];
+        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        clocks.increment_clock(index);
         lock.clock.join(&clocks.clock);
-
-        ref_vector[thread].increment_clock(thread);
-    }
-
-    /// Load the thread clock set associated with the current thread
-    #[inline]
-    fn current_thread_state(&self) -> Ref<'_, ThreadClockSet> {
-        let ref_vector = self.thread_clocks.borrow();
-        let thread = self.current_thread_id.get();
-        Ref::map(ref_vector, |vector| &vector[thread])
-    }
-
-    /// Load the thread clock set associated with the current thread
-    ///  mutably for modification
-    #[inline]
-    fn current_thread_state_mut(&self) -> RefMut<'_, ThreadClockSet> {
-        let ref_vector = self.thread_clocks.borrow_mut();
-        let thread = self.current_thread_id.get();
-        RefMut::map(ref_vector, |vector| &mut vector[thread])
-    }
-
-    /// Return the current thread, should be the same
-    ///  as the data-race active thread
-    #[inline]
-    fn current_thread(&self) -> ThreadId {
-        self.current_thread_id.get()
+        clocks.increment_clock(index);
     }
-}
-
-
-/// The size of the vector-clock to store inline
-///  clock vectors larger than this will be stored on the heap
-const SMALL_VECTOR: usize = 4;
-
-/// The type of the time-stamps recorded in the data-race detector
-///  set to a type of unsigned integer
-type Timestamp = u32;
-
-/// A vector clock for detecting data-races
-///  invariants:
-///   - the last element in a VClock must not be 0
-///     -- this means that derive(PartialEq & Eq) is correct
-///     --  as there is no implicit zero tail that might be equal
-///     --  also simplifies the implementation of PartialOrd
-#[derive(Clone, PartialEq, Eq, Default, Debug)]
-pub struct VClock(SmallVec<[Timestamp; SMALL_VECTOR]>);
-
-impl VClock {
 
-    /// Load the backing slice behind the clock vector.
+    /// Load the vector index used by the given thread as well as the set of vector clocks
+    ///  used by the thread
     #[inline]
-    fn as_slice(&self) -> &[Timestamp] {
-        self.0.as_slice()
+    fn load_thread_state_mut(&self, thread: ThreadId) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+        let index = self.thread_info.borrow()[thread].0
+            .expect("Loading thread state for thread with no assigned vector");
+        let ref_vector = self.vector_clocks.borrow_mut();
+        let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
+        (index, clocks)
     }
 
-    /// Get a mutable slice to the internal vector with minimum `min_len`
-    ///  elements, to preserve invariants this vector must modify
-    ///  the `min_len`-1 nth element to a non-zero value
+    /// Load the current vector clock in use and the current set of thread clocks
+    ///  in use for the vector
     #[inline]
-    fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [Timestamp] {
-        if self.0.len() < min_len {
-            self.0.resize(min_len, 0);
-        }
-        assert!(self.0.len() >= min_len);
-        self.0.as_mut_slice()
+    fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
+        let index = self.current_index();
+        let ref_vector = self.vector_clocks.borrow();
+        let clocks = Ref::map(ref_vector, |vec| &vec[index]);
+        (index, clocks)
     }
 
-    /// Increment the vector clock at a known index
+    /// Load the current vector clock in use and the current set of thread clocks
+    ///  in use for the vector mutably for modification
     #[inline]
-    fn increment_index(&mut self, idx: usize) {
-        let mut_slice = self.get_mut_with_min_len(idx + 1);
-        let idx_ref = &mut mut_slice[idx];
-        *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow")
+    fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
+        let index = self.current_index();
+        let ref_vector = self.vector_clocks.borrow_mut();
+        let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
+        (index, clocks)
     }
 
-    // Increment the vector element representing the progress
-    //  of execution in the given thread
-    #[inline]
-    pub fn increment_thread(&mut self, thread: ThreadId) {
-        self.increment_index(thread.to_u32() as usize);
-    }
-
-    // Join the two vector-clocks together, this
-    //  sets each vector-element to the maximum value
-    //  of that element in either of the two source elements.
-    pub fn join(&mut self, other: &Self) {
-        let rhs_slice = other.as_slice();
-        let lhs_slice = self.get_mut_with_min_len(rhs_slice.len());
-
-        // Element-wise set to maximum.
-        for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) {
-            *l = r.max(*l);
-        }
-    }
-
-    /// Joins with a thread at a known index
-    fn set_at_index(&mut self, other: &Self, idx: usize){
-        let mut_slice = self.get_mut_with_min_len(idx + 1);
-        let slice = other.as_slice();
-        mut_slice[idx] = slice[idx];
-    }
-
-    /// Join with a threads vector clock only at the desired index
-    ///  returns true if the value updated
-    #[inline]
-    pub fn set_at_thread(&mut self, other: &Self, thread: ThreadId){
-        self.set_at_index(other, thread.to_u32() as usize);
-    }
-
-    /// Clear the vector to all zeros, stored as an empty internal
-    ///  vector
-    #[inline]
-    pub fn set_zero_vector(&mut self) {
-        self.0.clear();
-    }
-
-    /// Set the values stored in this vector clock
-    ///  to the values stored in another.
-    pub fn set_values(&mut self, new_value: &VClock) {
-        let new_slice = new_value.as_slice();
-        self.0.resize(new_slice.len(), 0);
-        self.0.copy_from_slice(new_slice);
-    }
-}
-
-
-impl PartialOrd for VClock {
-    fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
-
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // Iterate through the combined vector slice
-        //  keeping track of the order that is currently possible to satisfy.
-        // If an ordering relation is detected to be impossible, then bail and
-        //  directly return None
-        let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
-        let mut order = match iter.next() {
-            Some((lhs, rhs)) => lhs.cmp(rhs),
-            None => Ordering::Equal
-        };
-        for (l, r) in iter {
-            match order {
-                Ordering::Equal => order = l.cmp(r),
-                Ordering::Less => if l > r {
-                    return None
-                },
-                Ordering::Greater => if l < r {
-                    return None
-                }
-            }
-        }
-
-        //Now test if either left or right have trailing elements
-        // by the invariant the trailing elements have at least 1
-        // non zero value, so no additional calculation is required
-        // to determine the result of the PartialOrder
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        match l_len.cmp(&r_len) {
-            // Equal has no additional elements: return current order
-            Ordering::Equal => Some(order),
-            // Right has at least 1 element > than the implicit 0,
-            //  so the only valid values are Ordering::Less or None
-            Ordering::Less => match order {
-                Ordering::Less | Ordering::Equal => Some(Ordering::Less),
-                Ordering::Greater => None
-            }
-            // Left has at least 1 element > than the implicit 0,
-            //  so the only valid values are Ordering::Greater or None
-            Ordering::Greater => match order {
-                Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
-                Ordering::Less => None
-            }
-        }
-    }
-
-    fn lt(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If l_len > r_len then at least one element
-        //  in l_len is > than r_len, therefore the result
-        //  is either Some(Greater) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len <= r_len {
-            // If any elements on the left are greater than the right
-            //  then the result is None or Some(Greater), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l <= r, finally
-            //  the case where the values are potentially equal needs to be considered
-            //  and false returned as well
-            let mut equal = l_len == r_len;
-            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
-                if l > r {
-                    return false
-                }else if l < r {
-                    equal = false;
-                }
-            }
-            !equal
-        }else{
-            false
-        }
-    }
-
-    fn le(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If l_len > r_len then at least one element
-        //  in l_len is > than r_len, therefore the result
-        //  is either Some(Greater) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len <= r_len {
-            // If any elements on the left are greater than the right
-            //  then the result is None or Some(Greater), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l <= r
-            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r)
-        }else{
-            false
-        }
-    }
-
-    fn gt(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If r_len > l_len then at least one element
-        //  in r_len is > than l_len, therefore the result
-        //  is either Some(Less) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len >= r_len {
-            // If any elements on the left are less than the right
-            //  then the result is None or Some(Less), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l >=, finally
-            //  the case where the values are potentially equal needs to be considered
-            //  and false returned as well
-            let mut equal = l_len == r_len;
-            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
-                if l < r {
-                    return false
-                }else if l > r {
-                    equal = false;
-                }
-            }
-            !equal
-        }else{
-            false
-        }
-    }
-
-    fn ge(&self, other: &VClock) -> bool {
-        // Load the values as slices
-        let lhs_slice = self.as_slice();
-        let rhs_slice = other.as_slice();
-
-        // If r_len > l_len then at least one element
-        //  in r_len is > than l_len, therefore the result
-        //  is either Some(Less) or None, so return false
-        //  early.
-        let l_len = lhs_slice.len();
-        let r_len = rhs_slice.len();
-        if l_len >= r_len {
-            // If any elements on the left are less than the right
-            //  then the result is None or Some(Less), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l >= r
-            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r)
-        }else{
-            false
-        }
-    }
-}
-
-impl Index<ThreadId> for VClock {
-    type Output = Timestamp;
-
+    /// Return the current thread, should be the same
+    ///  as the data-race active thread
     #[inline]
-    fn index(&self, index: ThreadId) -> &Timestamp {
-       self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
+    fn current_index(&self) -> VectorIdx {
+        self.current_index.get()
     }
 }
 
-
-/// Test vector clock ordering operations
-///  data-race detection is tested in the external
-///  test suite
-#[cfg(test)]
-mod tests {
-    use super::{VClock, Timestamp};
-    use std::cmp::Ordering;
-
-    #[test]
-    fn test_equal() {
-        let mut c1 = VClock::default();
-        let mut c2 = VClock::default();
-        assert_eq!(c1, c2);
-        c1.increment_index(5);
-        assert_ne!(c1, c2);
-        c2.increment_index(53);
-        assert_ne!(c1, c2);
-        c1.increment_index(53);
-        assert_ne!(c1, c2);
-        c2.increment_index(5);
-        assert_eq!(c1, c2);
-    }
-
-    #[test]
-    fn test_partial_order() {
-        // Small test
-        assert_order(&[1], &[1], Some(Ordering::Equal));
-        assert_order(&[1], &[2], Some(Ordering::Less));
-        assert_order(&[2], &[1], Some(Ordering::Greater));
-        assert_order(&[1], &[1,2], Some(Ordering::Less));
-        assert_order(&[2], &[1,2], None);
-
-        // Misc tests
-        assert_order(&[400], &[0, 1], None);
-
-        // Large test
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None);
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
-    }
-
-    fn from_slice(mut slice: &[Timestamp]) -> VClock {
-        while let Some(0) = slice.last() {
-            slice = &slice[..slice.len() - 1]
-        }
-        VClock(smallvec::SmallVec::from_slice(slice))
-    }
-
-    fn assert_order(l: &[Timestamp], r: &[Timestamp], o: Option<Ordering>) {
-        let l = from_slice(l);
-        let r = from_slice(r);
-
-        //Test partial_cmp
-        let compare = l.partial_cmp(&r);
-        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r);
-        let alt_compare = r.partial_cmp(&l);
-        assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
-
-        //Test operatorsm with faster implementations
-        assert_eq!(
-            matches!(compare,Some(Ordering::Less)), l < r,
-            "Invalid (<):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r,
-            "Invalid (<=):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(compare,Some(Ordering::Greater)), l > r,
-            "Invalid (>):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r,
-            "Invalid (>=):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Less)), r < l,
-            "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l,
-            "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Greater)), r > l,
-            "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r
-        );
-        assert_eq!(
-            matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l,
-            "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
-        );
-    }
-}
diff --git a/src/lib.rs b/src/lib.rs
index f384787e4c..c8c9e70ec3 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -35,6 +35,7 @@ mod shims;
 mod stacked_borrows;
 mod sync;
 mod thread;
+mod vector_clock;
 
 // Establish a "crate-wide prelude": we often import `crate::*`.
 
@@ -79,6 +80,9 @@ pub use crate::thread::{
 pub use crate::sync::{
     EvalContextExt as SyncEvalContextExt, CondvarId, MutexId, RwLockId
 };
+pub use crate::vector_clock::{
+    VClock, VSmallClockSet, VectorIdx, VTimestamp
+};
 
 /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
 /// set per default, for maximal validation power.
diff --git a/src/shims/intrinsics.rs b/src/shims/intrinsics.rs
index 2bb15e712c..50f97af845 100644
--- a/src/shims/intrinsics.rs
+++ b/src/shims/intrinsics.rs
@@ -469,8 +469,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let place = this.deref_operand(place)?;
 
         // make sure it fits into a scalar; otherwise it cannot be atomic
-        let val = this.read_scalar_racy(place)?;
-        this.validate_atomic_load(place, atomic)?;
+        let val = this.read_scalar_atomic(place, atomic)?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -495,9 +494,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
 
         // Perform atomic store
-        this.write_scalar_racy(val, place)?;
-
-        this.validate_atomic_store(place, atomic)?;
+        this.write_scalar_atomic(val, place, atomic)?;
         Ok(())
     }
 
@@ -527,7 +524,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             bug!("Atomic arithmetic operations only work on integer types");
         }
         let rhs = this.read_immediate(rhs)?;
-        let old = this.read_immediate_racy(place)?;
+        let old = this.allow_data_races_mut(|this| {
+            this.read_immediate(place. into())
+        })?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -539,7 +538,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         // Atomics wrap around on overflow.
         let val = this.binary_op(op, old, rhs)?;
         let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-        this.write_immediate_racy(*val, place)?;
+        this.allow_data_races_mut(|this| {
+            this.write_immediate(*val, place.into())
+        })?;
 
         this.validate_atomic_rmw(place, atomic)?;
         Ok(())
@@ -553,7 +554,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let &[place, new] = check_arg_count(args)?;
         let place = this.deref_operand(place)?;
         let new = this.read_scalar(new)?;
-        let old = this.read_scalar_racy(place)?;
+        let old = this.allow_data_races_mut(|this| {
+            this.read_scalar(place.into())
+        })?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -562,7 +565,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
 
         this.write_scalar(old, dest)?; // old value is returned
-        this.write_scalar_racy(new, place)?;
+        this.allow_data_races_mut(|this| {
+            this.write_scalar(new, place.into())
+        })?;
 
         this.validate_atomic_rmw(place, atomic)?;
         Ok(())
@@ -583,7 +588,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         //  to read with the failure ordering and if successfull then try again with the success
         //  read ordering and write in the success case.
         // Read as immediate for the sake of `binary_op()`
-        let old = this.read_immediate_racy(place)?; 
+        let old = this.allow_data_races_mut(|this| {
+            this.read_immediate(place.into())
+        })?; 
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -602,7 +609,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         //  if successful, perform a full rw-atomic validation
         //  otherwise treat this as an atomic load with the fail ordering
         if eq.to_bool()? {
-            this.write_scalar_racy(new, place)?;
+            this.allow_data_races_mut(|this| {
+                this.write_scalar(new, place.into())
+            })?;
             this.validate_atomic_rmw(place, success)?;
         } else {
             this.validate_atomic_load(place, fail)?;
diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs
index 332e79071a..d741ef346e 100644
--- a/src/shims/posix/sync.rs
+++ b/src/shims/posix/sync.rs
@@ -62,7 +62,10 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    ecx.read_scalar_at_offset_racy(mutex_op, offset, ecx.machine.layouts.i32)
+    ecx.read_scalar_at_offset_atomic(
+        mutex_op, offset, ecx.machine.layouts.i32,
+        AtomicReadOp::SeqCst
+    )
 }
 
 fn mutex_set_kind<'mir, 'tcx: 'mir>(
@@ -71,14 +74,19 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>(
     kind: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    ecx.write_scalar_at_offset_racy(mutex_op, offset, kind, ecx.machine.layouts.i32)
+    ecx.write_scalar_at_offset_atomic(
+        mutex_op, offset, kind, ecx.machine.layouts.i32, 
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn mutex_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset_racy(mutex_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_atomic(
+        mutex_op, 4, ecx.machine.layouts.u32, AtomicReadOp::SeqCst
+    )
 }
 
 fn mutex_set_id<'mir, 'tcx: 'mir>(
@@ -86,7 +94,10 @@ fn mutex_set_id<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset_racy(mutex_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_atomic(
+        mutex_op, 4, id, ecx.machine.layouts.u32,
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn mutex_get_or_create_id<'mir, 'tcx: 'mir>(
@@ -116,7 +127,10 @@ fn rwlock_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     rwlock_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset_racy(rwlock_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_atomic(
+        rwlock_op, 4, ecx.machine.layouts.u32,
+        AtomicReadOp::SeqCst
+    )
 }
 
 fn rwlock_set_id<'mir, 'tcx: 'mir>(
@@ -124,7 +138,10 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>(
     rwlock_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset_racy(rwlock_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_atomic(
+        rwlock_op, 4, id, ecx.machine.layouts.u32,
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn rwlock_get_or_create_id<'mir, 'tcx: 'mir>(
@@ -177,7 +194,10 @@ fn cond_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     cond_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    ecx.read_scalar_at_offset_racy(cond_op, 4, ecx.machine.layouts.u32)
+    ecx.read_scalar_at_offset_atomic(
+        cond_op, 4, ecx.machine.layouts.u32,
+        AtomicReadOp::SeqCst
+    )
 }
 
 fn cond_set_id<'mir, 'tcx: 'mir>(
@@ -185,7 +205,10 @@ fn cond_set_id<'mir, 'tcx: 'mir>(
     cond_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    ecx.write_scalar_at_offset_racy(cond_op, 4, id, ecx.machine.layouts.u32)
+    ecx.write_scalar_at_offset_atomic(
+        cond_op, 4, id, ecx.machine.layouts.u32,
+        AtomicWriteOp::SeqCst
+    )
 }
 
 fn cond_get_or_create_id<'mir, 'tcx: 'mir>(
diff --git a/src/thread.rs b/src/thread.rs
index 08aeaa4fd0..f94805ae02 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -638,7 +638,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     fn set_active_thread_name(&mut self, new_thread_name: Vec<u8>) {
         let this = self.eval_context_mut();
         if let Ok(string) = String::from_utf8(new_thread_name.clone()) {
-            this.memory.extra.data_race.thread_set_name(string);
+            this.memory.extra.data_race.thread_set_name(
+                this.machine.threads.active_thread, string
+            );
         }
         this.machine.threads.set_thread_name(new_thread_name);
     }
diff --git a/src/vector_clock.rs b/src/vector_clock.rs
new file mode 100644
index 0000000000..8d05eb1b99
--- /dev/null
+++ b/src/vector_clock.rs
@@ -0,0 +1,602 @@
+use std::{
+    fmt::{self, Debug}, cmp::Ordering, ops::Index,
+    num::TryFromIntError, convert::TryFrom, mem
+};
+use smallvec::SmallVec;
+use rustc_index::vec::Idx;
+use rustc_data_structures::fx::FxHashMap;
+
+/// A vector clock index, this is associated with a thread id
+///  but in some cases one vector index may be shared with
+///  multiple thread ids.
+#[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub struct VectorIdx(u32);
+
+impl VectorIdx{
+    pub fn to_u32(self) -> u32 {
+        self.0
+    }
+    pub const MAX_INDEX: VectorIdx = VectorIdx(u32::MAX);
+}
+
+impl Idx for VectorIdx {
+    fn new(idx: usize) -> Self {
+        VectorIdx(u32::try_from(idx).unwrap())
+    }
+
+    fn index(self) -> usize {
+        usize::try_from(self.0).unwrap()
+    }
+}
+
+impl TryFrom<u64> for VectorIdx {
+    type Error = TryFromIntError;
+    fn try_from(id: u64) -> Result<Self, Self::Error> {
+        u32::try_from(id).map(|id_u32| Self(id_u32))
+    }
+}
+
+impl From<u32> for VectorIdx {
+    fn from(id: u32) -> Self {
+        Self(id)
+    }
+}
+
+
+/// A sparse set of vector clocks, where each vector index
+///  is associated with a vector clock.
+/// This treats all vector clocks that have not been assigned
+///  as equal to the all zero vector clocks
+/// Is optimized for the common case where only 1 element is stored
+///  in the set and the rest can be ignored, falling-back to
+///  using an internal hash-map once more than 1 element is assigned
+///  at any one time
+#[derive(Clone)]
+pub struct VSmallClockSet(VSmallClockSetInner);
+
+#[derive(Clone)]
+enum VSmallClockSetInner {
+    /// Zero or 1 vector elements, common
+    ///  case for the sparse set.
+    /// The all zero vector clock is treated
+    ///  as equal to the empty element
+    Small(VectorIdx, VClock),
+
+    /// Hash-map of vector clocks
+    Large(FxHashMap<VectorIdx, VClock>)
+}
+
+impl VSmallClockSet {
+
+    /// Remove all clock vectors from the map, setting them
+    ///  to the zero vector
+    pub fn clear(&mut self) {
+        match &mut self.0 {
+            VSmallClockSetInner::Small(_, clock) => {
+                clock.set_zero_vector()
+            }
+            VSmallClockSetInner::Large(hash_map) => {
+                hash_map.clear();
+            }
+        }
+    }
+
+    /// Remove all clock vectors except for the clock vector
+    ///  stored at the given index, which is retained
+    pub fn retain_index(&mut self, index: VectorIdx) {
+        match &mut self.0 {
+            VSmallClockSetInner::Small(small_idx, clock) => {
+                if index != *small_idx {
+                    // The zero-vector is considered to equal
+                    //  the empty element
+                    clock.set_zero_vector()
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                hash_map.retain(|idx,_| {
+                    *idx == index
+                });
+            }
+        }
+    }
+
+    /// Insert the vector clock into the associated vector
+    ///  index
+    pub fn insert(&mut self, index: VectorIdx, clock: &VClock) {
+        match &mut self.0 {
+            VSmallClockSetInner::Small(small_idx, small_clock) => {
+                if small_clock.is_zero_vector() {
+                    *small_idx = index;
+                    small_clock.clone_from(clock);
+                }else if !clock.is_zero_vector() {
+                    let mut hash_map = FxHashMap::default();
+                    hash_map.insert(*small_idx, mem::take(small_clock));
+                    hash_map.insert(index, clock.clone());
+                    self.0 = VSmallClockSetInner::Large(hash_map);
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                if !clock.is_zero_vector() {
+                    hash_map.insert(index, clock.clone());
+                }
+            }
+        }
+    }
+
+    /// Try to load the vector clock associated with the current
+    ///  vector index.
+    pub fn get(&self, index: VectorIdx) -> Option<&VClock> {
+        match &self.0 {
+            VSmallClockSetInner::Small(small_idx, small_clock) => {
+                if *small_idx == index && !small_clock.is_zero_vector() {
+                    Some(small_clock)
+                }else{
+                    None
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                hash_map.get(&index)
+            }
+        }
+    }
+}
+
+impl Default for VSmallClockSet {
+    #[inline]
+    fn default() -> Self {
+        VSmallClockSet(
+            VSmallClockSetInner::Small(VectorIdx::new(0), VClock::default())
+        )
+    }
+}
+
+impl Debug for VSmallClockSet {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        // Print the contents of the small vector clock set as the map
+        //  of vector index to vector clock that they represent
+        let mut map = f.debug_map();
+        match &self.0 {
+            VSmallClockSetInner::Small(small_idx, small_clock) => {
+                if !small_clock.is_zero_vector() {
+                    map.entry(&small_idx, &small_clock);
+                }
+            },
+            VSmallClockSetInner::Large(hash_map) => {
+                for (idx, elem) in hash_map.iter() {
+                    map.entry(idx, elem);
+                }
+            }
+        }
+        map.finish()
+    }
+}
+impl PartialEq for VSmallClockSet {
+    fn eq(&self, other: &Self) -> bool {
+        use VSmallClockSetInner::*;
+        match (&self.0, &other.0) {
+            (Small(i1, c1), Small(i2, c2)) => {
+                if c1.is_zero_vector() {
+                    // Either they are both zero or they are non-equal
+                    c2.is_zero_vector()
+                }else{
+                    // At least one is non-zero, so the full comparison is correct
+                    i1 == i2 && c1 == c2
+                }
+            }
+            (VSmallClockSetInner::Small(idx, clock), VSmallClockSetInner::Large(hash_map)) |
+            (VSmallClockSetInner::Large(hash_map), VSmallClockSetInner::Small(idx, clock)) => {
+                if hash_map.len() == 0 {
+                    // Equal to the empty hash-map
+                    clock.is_zero_vector()
+                }else if hash_map.len() == 1 {
+                    // Equal to the hash-map with one element
+                    let (hash_idx, hash_clock) = hash_map.iter().next().unwrap();
+                    hash_idx == idx && hash_clock == clock
+                }else{
+                    false
+                }
+            }
+            (Large(map1), Large(map2)) => {
+                map1 == map2
+            }
+        }
+    }
+}
+impl Eq for VSmallClockSet {}
+
+
+
+/// The size of the vector-clock to store inline
+///  clock vectors larger than this will be stored on the heap
+const SMALL_VECTOR: usize = 4;
+
+/// The type of the time-stamps recorded in the data-race detector
+///  set to a type of unsigned integer
+pub type VTimestamp = u32;
+
+/// A vector clock for detecting data-races
+///  invariants:
+///   - the last element in a VClock must not be 0
+///     -- this means that derive(PartialEq & Eq) is correct
+///     --  as there is no implicit zero tail that might be equal
+///     --  also simplifies the implementation of PartialOrd
+#[derive(PartialEq, Eq, Default, Debug)]
+pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);
+
+impl VClock {
+
+    /// Create a new vector-clock containing all zeros except
+    ///  for a value at the given index
+    pub fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
+        let len = index.index() + 1;
+        let mut vec = smallvec::smallvec![0; len];
+        vec[index.index()] = timestamp;
+        VClock(vec)
+    }
+
+    /// Load the internal timestamp slice in the vector clock
+    #[inline]
+    pub fn as_slice(&self) -> &[VTimestamp] {
+        self.0.as_slice()
+    }
+
+    /// Get a mutable slice to the internal vector with minimum `min_len`
+    ///  elements, to preserve invariants this vector must modify
+    ///  the `min_len`-1 nth element to a non-zero value
+    #[inline]
+    fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [VTimestamp] {
+        if self.0.len() < min_len {
+            self.0.resize(min_len, 0);
+        }
+        assert!(self.0.len() >= min_len);
+        self.0.as_mut_slice()
+    }
+
+    /// Increment the vector clock at a known index
+    ///  this will panic if the vector index overflows
+    #[inline]
+    pub fn increment_index(&mut self, idx: VectorIdx) {
+        let idx = idx.index();
+        let mut_slice = self.get_mut_with_min_len(idx + 1);
+        let idx_ref = &mut mut_slice[idx];
+        *idx_ref = idx_ref.checked_add(1).expect("Vector clock overflow")
+    }
+
+    // Join the two vector-clocks together, this
+    //  sets each vector-element to the maximum value
+    //  of that element in either of the two source elements.
+    pub fn join(&mut self, other: &Self) {
+        let rhs_slice = other.as_slice();
+        let lhs_slice = self.get_mut_with_min_len(rhs_slice.len());
+        for (l, &r) in lhs_slice.iter_mut().zip(rhs_slice.iter()) {
+            *l = r.max(*l);
+        }
+    }
+
+    /// Set the element at the current index of the vector
+    pub fn set_at_index(&mut self, other: &Self, idx: VectorIdx) {
+        let idx = idx.index();
+        let mut_slice = self.get_mut_with_min_len(idx + 1);
+        let slice = other.as_slice();
+        mut_slice[idx] = slice[idx];
+    }
+
+    /// Set the vector to the all-zero vector
+    #[inline]
+    pub fn set_zero_vector(&mut self) {
+        self.0.clear();
+    }
+
+    /// Return if this vector is the all-zero vector
+    pub fn is_zero_vector(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+impl Clone for VClock {
+    fn clone(&self) -> Self {
+        VClock(self.0.clone())
+    }
+    fn clone_from(&mut self, source: &Self) {
+        let source_slice = source.as_slice();
+        self.0.clear();
+        self.0.extend_from_slice(source_slice);
+    }
+}
+
+impl PartialOrd for VClock {
+    fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
+
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // Iterate through the combined vector slice
+        //  keeping track of the order that is currently possible to satisfy.
+        // If an ordering relation is detected to be impossible, then bail and
+        //  directly return None
+        let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
+        let mut order = match iter.next() {
+            Some((lhs, rhs)) => lhs.cmp(rhs),
+            None => Ordering::Equal
+        };
+        for (l, r) in iter {
+            match order {
+                Ordering::Equal => order = l.cmp(r),
+                Ordering::Less => if l > r {
+                    return None
+                },
+                Ordering::Greater => if l < r {
+                    return None
+                }
+            }
+        }
+
+        //Now test if either left or right have trailing elements
+        // by the invariant the trailing elements have at least 1
+        // non zero value, so no additional calculation is required
+        // to determine the result of the PartialOrder
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        match l_len.cmp(&r_len) {
+            // Equal has no additional elements: return current order
+            Ordering::Equal => Some(order),
+            // Right has at least 1 element > than the implicit 0,
+            //  so the only valid values are Ordering::Less or None
+            Ordering::Less => match order {
+                Ordering::Less | Ordering::Equal => Some(Ordering::Less),
+                Ordering::Greater => None
+            }
+            // Left has at least 1 element > than the implicit 0,
+            //  so the only valid values are Ordering::Greater or None
+            Ordering::Greater => match order {
+                Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
+                Ordering::Less => None
+            }
+        }
+    }
+
+    fn lt(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If l_len > r_len then at least one element
+        //  in l_len is > than r_len, therefore the result
+        //  is either Some(Greater) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len <= r_len {
+            // If any elements on the left are greater than the right
+            //  then the result is None or Some(Greater), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l <= r, finally
+            //  the case where the values are potentially equal needs to be considered
+            //  and false returned as well
+            let mut equal = l_len == r_len;
+            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
+                if l > r {
+                    return false
+                }else if l < r {
+                    equal = false;
+                }
+            }
+            !equal
+        }else{
+            false
+        }
+    }
+
+    fn le(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If l_len > r_len then at least one element
+        //  in l_len is > than r_len, therefore the result
+        //  is either Some(Greater) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len <= r_len {
+            // If any elements on the left are greater than the right
+            //  then the result is None or Some(Greater), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l <= r
+            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r)
+        }else{
+            false
+        }
+    }
+
+    fn gt(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If r_len > l_len then at least one element
+        //  in r_len is > than l_len, therefore the result
+        //  is either Some(Less) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len >= r_len {
+            // If any elements on the left are less than the right
+            //  then the result is None or Some(Less), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l >=, finally
+            //  the case where the values are potentially equal needs to be considered
+            //  and false returned as well
+            let mut equal = l_len == r_len;
+            for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
+                if l < r {
+                    return false
+                }else if l > r {
+                    equal = false;
+                }
+            }
+            !equal
+        }else{
+            false
+        }
+    }
+
+    fn ge(&self, other: &VClock) -> bool {
+        // Load the values as slices
+        let lhs_slice = self.as_slice();
+        let rhs_slice = other.as_slice();
+
+        // If r_len > l_len then at least one element
+        //  in r_len is > than l_len, therefore the result
+        //  is either Some(Less) or None, so return false
+        //  early.
+        let l_len = lhs_slice.len();
+        let r_len = rhs_slice.len();
+        if l_len >= r_len {
+            // If any elements on the left are less than the right
+            //  then the result is None or Some(Less), both of which
+            //  return false, the earlier test asserts that no elements in the
+            //  extended tail violate this assumption. Otherwise l >= r
+            !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r)
+        }else{
+            false
+        }
+    }
+}
+
+impl Index<VectorIdx> for VClock {
+    type Output = VTimestamp;
+
+    #[inline]
+    fn index(&self, index: VectorIdx) -> &VTimestamp {
+       self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
+    }
+}
+
+
+/// Test vector clock ordering operations
+///  data-race detection is tested in the external
+///  test suite
+#[cfg(test)]
+mod tests {
+    use super::{VClock, VTimestamp, VectorIdx, VSmallClockSet};
+    use std::cmp::Ordering;
+
+    #[test]
+    fn test_equal() {
+        let mut c1 = VClock::default();
+        let mut c2 = VClock::default();
+        assert_eq!(c1, c2);
+        c1.increment_index(VectorIdx(5));
+        assert_ne!(c1, c2);
+        c2.increment_index(VectorIdx(53));
+        assert_ne!(c1, c2);
+        c1.increment_index(VectorIdx(53));
+        assert_ne!(c1, c2);
+        c2.increment_index(VectorIdx(5));
+        assert_eq!(c1, c2);
+    }
+
+    #[test]
+    fn test_partial_order() {
+        // Small test
+        assert_order(&[1], &[1], Some(Ordering::Equal));
+        assert_order(&[1], &[2], Some(Ordering::Less));
+        assert_order(&[2], &[1], Some(Ordering::Greater));
+        assert_order(&[1], &[1,2], Some(Ordering::Less));
+        assert_order(&[2], &[1,2], None);
+
+        // Misc tests
+        assert_order(&[400], &[0, 1], None);
+
+        // Large test
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None);
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less));
+        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+    }
+
+    fn from_slice(mut slice: &[VTimestamp]) -> VClock {
+        while let Some(0) = slice.last() {
+            slice = &slice[..slice.len() - 1]
+        }
+        VClock(smallvec::SmallVec::from_slice(slice))
+    }
+
+    fn assert_order(l: &[VTimestamp], r: &[VTimestamp], o: Option<Ordering>) {
+        let l = from_slice(l);
+        let r = from_slice(r);
+
+        //Test partial_cmp
+        let compare = l.partial_cmp(&r);
+        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r);
+        let alt_compare = r.partial_cmp(&l);
+        assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
+
+        //Test operatorsm with faster implementations
+        assert_eq!(
+            matches!(compare,Some(Ordering::Less)), l < r,
+            "Invalid (<):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r,
+            "Invalid (<=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Greater)), l > r,
+            "Invalid (>):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r,
+            "Invalid (>=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Less)), r < l,
+            "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l,
+            "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Greater)), r > l,
+            "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r
+        );
+        assert_eq!(
+            matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l,
+            "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
+        );
+    }
+
+    #[test]
+    pub fn test_vclock_set() {
+        let mut set = VSmallClockSet::default();
+        let v1 = from_slice(&[3,0,1]);
+        let v2 = from_slice(&[4,2,3]);
+        let v3 = from_slice(&[4,8,3]);
+        set.insert(VectorIdx(0), &v1);
+        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
+        set.insert(VectorIdx(5), &v2);
+        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
+        assert_eq!(set.get(VectorIdx(5)), Some(&v2));
+        set.insert(VectorIdx(53), &v3);
+        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
+        assert_eq!(set.get(VectorIdx(5)), Some(&v2));
+        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
+        set.retain_index(VectorIdx(53));
+        assert_eq!(set.get(VectorIdx(0)), None);
+        assert_eq!(set.get(VectorIdx(5)), None);
+        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
+        set.clear();
+        assert_eq!(set.get(VectorIdx(0)), None);
+        assert_eq!(set.get(VectorIdx(5)), None);
+        assert_eq!(set.get(VectorIdx(53)), None);
+        set.insert(VectorIdx(53), &v3);
+        assert_eq!(set.get(VectorIdx(0)), None);
+        assert_eq!(set.get(VectorIdx(5)), None);
+        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
+    }
+}

From c70bbea257d43242fa9f700d099c4a8a8eae2971 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Thu, 5 Nov 2020 14:18:28 +0000
Subject: [PATCH 08/17] Move to use of thread termination hook for vector
 re-use,  convert validate atomic op to use shared reference and get_raw 
 instead of get_raw_mut so it can be used for validate_atomic_load as well

---
 src/data_race.rs | 175 ++++++++++++++++++++++++-----------------------
 src/thread.rs    |   1 +
 2 files changed, 91 insertions(+), 85 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index e992c5a1d5..153e63b77d 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -159,52 +159,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
-        let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-
-            // Load an log the atomic operation
-            //  the memory access has to be `get_raw` since otherwise this despite only 
-            //  mutating MemoryExtra will still trigger errors on read-only memory
-            let place_ptr = place.ptr.assert_ptr();
-            let size = place.layout.size;
-            let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race;
-            log::trace!(
-                "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
-                "Atomic load", &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
-            );
-
-            // Perform the atomic operation
-            let data_race = &alloc_meta.global;
-            data_race.maybe_perform_sync_operation(move |index, mut clocks| {
-                for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
-                    let res = if atomic == AtomicReadOp::Relaxed {
-                        range.load_relaxed(&mut *clocks, index)
-                    }else{
-                        range.acquire(&mut *clocks, index)
-                    };
-                    if let Err(DataRace) = res {
-                        mem::drop(clocks);
-                        return VClockAlloc::report_data_race(
-                            &alloc_meta.global, range, "Atomic load", true,
-                            place_ptr, size
-                        );
-                    }
-                }
-                Ok(())
-            })?;
-
-            // Log changes to atomic memory
-            if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
-                    log::trace!(
-                        "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
-                        range.atomic_ops
-                    );
+        this.validate_atomic_op(
+            place, atomic, "Atomic Load",
+            move |memory, clocks, index, atomic| {
+                if atomic == AtomicReadOp::Relaxed {
+                    memory.load_relaxed(&mut *clocks, index)
+                }else{
+                    memory.acquire(&mut *clocks, index)
                 }
             }
-        }
-        Ok(())
+        )
     }
 
     /// Update the data-race detector for an atomic write occuring at the
@@ -212,8 +176,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     fn validate_atomic_store(
         &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
     ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        this.validate_atomic_op_mut(
+        let this = self.eval_context_ref();
+        this.validate_atomic_op(
             place, atomic, "Atomic Store",
             move |memory, clocks, index, atomic| {
                 if atomic == AtomicWriteOp::Relaxed {
@@ -233,8 +197,8 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         use AtomicRWOp::*;
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
-        let this = self.eval_context_mut();
-        this.validate_atomic_op_mut(
+        let this = self.eval_context_ref();
+        this.validate_atomic_op(
             place, atomic, "Atomic RMW",
             move |memory, clocks, index, _| {
                 if acquire {
@@ -276,25 +240,27 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
 trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
-    /// Generic atomic operation implementation, this however
-    ///  cannot be used for the atomic read operation since
-    ///  that requires non mutable memory access to not trigger
-    ///  the writing to read-only memory errors during `get_raw_mut`
-    fn validate_atomic_op_mut<A: Debug + Copy>(
-        &mut self, place: MPlaceTy<'tcx, Tag>,
+    /// Generic atomic operation implementation,
+    ///  this accesses memory via get_raw instead of
+    ///  get_raw_mut, due to issues calling get_raw_mut
+    ///  for atomic loads from read-only memory
+    /// FIXME: is this valid, or should get_raw_mut be used for
+    ///  atomic-stores/atomic-rmw?
+    fn validate_atomic_op<A: Debug + Copy>(
+        &self, place: MPlaceTy<'tcx, Tag>,
         atomic: A, description: &str,
         mut op: impl FnMut(
             &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A
         ) -> Result<(), DataRace>
     ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
+        let this = self.eval_context_ref();
         let data_race = &*this.memory.extra.data_race;
         if data_race.multi_threaded.get() {
 
             // Load an log the atomic operation
             let place_ptr = place.ptr.assert_ptr();
             let size = place.layout.size;
-            let alloc_meta =  &mut this.memory.get_raw_mut(place_ptr.alloc_id)?.extra.data_race;
+            let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race;
             log::trace!(
                 "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
                 description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
@@ -800,6 +766,29 @@ impl ThreadClockSet {
     }
 }
 
+/// Extra metadata associated with a thread
+#[derive(Debug, Clone, Default)]
+struct ThreadExtraState {
+
+    /// The current vector index in use by the
+    ///  thread currently, this is set to None
+    ///  after the vector index has been re-used
+    vector_index: Option<VectorIdx>,
+
+    /// The name of the thread, updated for better
+    ///  diagnostics when reporting detected data
+    ///  races
+    thread_name: Option<Box<str>>,
+    
+    /// Thread termination vector clock, this
+    ///  is set on thread termination and is used
+    ///  for joining on threads that have already
+    ///  terminated. This should be used first
+    ///  on joining as there is the possibility
+    ///  that `vector_index` is None in some cases
+    termination_vector_clock: Option<VClock>,
+}
+
 /// Global data-race detection state, contains the currently
 ///  executing thread as well as the vector-clocks associated
 ///  with each of the threads.
@@ -822,18 +811,18 @@ pub struct GlobalState {
     /// Mapping of a given vector index to the current thread
     ///  that the execution is representing, this may change
     ///  if a vector index is re-assigned to a new thread
-    vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>, //FIXME: make option
+    vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>,
 
-    /// The mapping of a given thread to a known vector clock
-    thread_info: RefCell<IndexVec<ThreadId, (Option<VectorIdx>, Option<Box<str>>)>>,
+    /// The mapping of a given thread to assocaited thread metadata
+    thread_info: RefCell<IndexVec<ThreadId, ThreadExtraState>>,
 
     /// The current vector index being executed
     current_index: Cell<VectorIdx>,
 
     /// Potential vector indices that could be re-used on thread creation
-    ///  values are inserted here on thread join events, and can be
-    ///  re-used once the vector clocks of all current threads
-    ///  are equal to the vector clock of the joined thread
+    ///  values are inserted here on thread termination, vector index values
+    ///  are then re-used once all the termination event happens-before all
+    ///  existing thread-clocks
     reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
 }
 impl GlobalState {
@@ -856,8 +845,12 @@ impl GlobalState {
         let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
         global_state.vector_info.borrow_mut().push(ThreadId::new(0));
         global_state.thread_info.borrow_mut().push(
-            (Some(index), Some("main".to_string().into_boxed_str())
-        ));
+            ThreadExtraState {
+                vector_index: Some(index),
+                thread_name: Some("main".to_string().into_boxed_str()),
+                termination_vector_clock: None
+            }
+        );
 
         global_state
     }
@@ -873,10 +866,9 @@ impl GlobalState {
                 clock.clock[candidate] == target_timestamp
             }) {
                 // All vector clocks for each vector index are equal to
-                //  the target timestamp, therefore since the thread has
-                //  terminated and cannot update the vector clock.
-                // No more data-races involving this vector index are possible
-                //  so it can be re-used
+                //  the target timestamp, and the thread is known to have
+                //  terminated, therefore this vector clock index cannot
+                //  report any more data-races
                 assert!(reuse.remove(&candidate));
                 return Some(candidate)
             }
@@ -916,7 +908,7 @@ impl GlobalState {
 
             // Mark the thread the vector index was associated with as no longer
             //  representing a thread index
-            thread_info[old_thread].0 = None;
+            thread_info[old_thread].vector_index = None;
 
             reuse_index
         }else{
@@ -927,7 +919,7 @@ impl GlobalState {
         };
 
         // Mark the chosen vector index as in use by the thread
-        thread_info[thread].0 = Some(created_index);
+        thread_info[thread].vector_index = Some(created_index);
 
         // Create a thread clock set if applicable
         let mut vector_clocks = self.vector_clocks.borrow_mut();
@@ -952,15 +944,13 @@ impl GlobalState {
 
     /// Hook on a thread join to update the implicit happens-before relation
     ///  between the joined thead and the current thread.
-    /// Called after the join has occured, and hence implicitly also states
-    ///  that the thread must have terminated as well
     #[inline]
     pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
         let (current_index, join_index) = {
             let thread_info = self.thread_info.borrow();
-            let current_index = thread_info[current_thread].0
+            let current_index = thread_info[current_thread].vector_index
                 .expect("Joining into thread with no assigned vector");
-            let join_index = thread_info[join_thread].0
+            let join_index = thread_info[join_thread].vector_index
                 .expect("Joining thread with no assigned vector");
             (current_index, join_index)
         };
@@ -976,16 +966,31 @@ impl GlobalState {
         current.join_with(join);
 
         // Post increment clocks after atomic operation
+        //  the join clock is not incremented, since there will
+        //  be no future events, also if it was incremented
+        //  the thread re-use condition would never pass
         current.increment_clock(current_index);
-        join.increment_clock(join_index);
+    }
+
+    /// On thread termination, the vector-clock may re-used
+    ///  in the future once all remaining thread-clocks catch
+    ///  up with the time index of the terminated thread
+    #[inline]
+    pub fn thread_terminated(&self, terminated_thread: ThreadId) {
+        let mut thread_info = self.thread_info.borrow_mut();
+        let termination_meta = &mut thread_info[terminated_thread];
+
+        // Find the terminated index & setup the termination vector-clock
+        //  in case thread join is called in the future after the thread
+        //  has been re-used
+        let terminated_index = termination_meta.vector_index
+            .expect("Joining into thread with no assigned vector");
+        let vector_clocks = self.vector_clocks.borrow();
+        termination_meta.termination_vector_clock = Some(vector_clocks[terminated_index].clock.clone());
 
-        // The joined thread vector clock is a potential candidate
-        //  for re-use given sufficient time, mark as available once
-        //  threads have been created. This is because this function
-        //  is called once join_thread has terminated and such cannot
-        //  update any-more
+        // Add this thread as a candidate for re-use
         let mut reuse = self.reuse_candidates.borrow_mut();
-        reuse.insert(join_index);
+        reuse.insert(terminated_index);
     }
 
     /// Hook for updating the local tracker of the currently
@@ -994,7 +999,7 @@ impl GlobalState {
     #[inline]
     pub fn thread_set_active(&self, thread: ThreadId) {
         let thread_info = self.thread_info.borrow();
-        let vector_idx = thread_info[thread].0
+        let vector_idx = thread_info[thread].vector_index
             .expect("Setting thread active with no assigned vector");
         self.current_index.set(vector_idx);
     }
@@ -1007,7 +1012,7 @@ impl GlobalState {
     pub fn thread_set_name(&self, thread: ThreadId, name: String) {
         let name = name.into_boxed_str();
         let mut thread_info = self.thread_info.borrow_mut();
-        thread_info[thread].1 = Some(name);
+        thread_info[thread].thread_name = Some(name);
     }
 
 
@@ -1036,7 +1041,7 @@ impl GlobalState {
     ///  returns the id and the name for better diagnostics
     fn print_thread_metadata(&self, vector: VectorIdx) -> String {
         let thread = self.vector_info.borrow()[vector];
-        let thread_name = &self.thread_info.borrow()[thread].1;
+        let thread_name = &self.thread_info.borrow()[thread].thread_name;
         if let Some(name) = thread_name {
             let name: &str = name;
             format!("Thread(id = {:?}, name = {:?})", thread.to_u32(), &*name)
@@ -1079,7 +1084,7 @@ impl GlobalState {
     ///  used by the thread
     #[inline]
     fn load_thread_state_mut(&self, thread: ThreadId) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
-        let index = self.thread_info.borrow()[thread].0
+        let index = self.thread_info.borrow()[thread].vector_index
             .expect("Loading thread state for thread with no assigned vector");
         let ref_vector = self.vector_clocks.borrow_mut();
         let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
diff --git a/src/thread.rs b/src/thread.rs
index f94805ae02..976ac816a0 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -452,6 +452,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
                 thread.state = ThreadState::Enabled;
             }
         }
+        data_race.thread_terminated(self.active_thread);
         return free_tls_statics;
     }
 

From 2a40d9b7a07f9a770455de26e46b766bdb395206 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Fri, 6 Nov 2020 17:29:54 +0000
Subject: [PATCH 09/17] More aggressive vector index re-use, and added some
 tests.

---
 src/data_race.rs                              | 190 ++++++++++++------
 src/thread.rs                                 |   3 +-
 .../data_race/dangling_thread_async_race.rs   |  44 ++++
 .../data_race/dangling_thread_race.rs         |  41 ++++
 .../data_race/enable_after_join_to_main.rs    |  38 ++++
 5 files changed, 254 insertions(+), 62 deletions(-)
 create mode 100644 tests/compile-fail/data_race/dangling_thread_async_race.rs
 create mode 100644 tests/compile-fail/data_race/dangling_thread_race.rs
 create mode 100644 tests/compile-fail/data_race/enable_after_join_to_main.rs

diff --git a/src/data_race.rs b/src/data_race.rs
index 153e63b77d..57f09146d6 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -20,7 +20,7 @@ use std::{
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_target::abi::Size;
 use rustc_middle::ty::layout::TyAndLayout;
-use rustc_data_structures::fx::FxHashSet;
+use rustc_data_structures::fx::{FxHashSet, FxHashMap};
 
 use crate::{
     MiriEvalContext, MiriEvalContextExt,
@@ -662,7 +662,7 @@ impl VClockAlloc {
             let (index, clocks) = self.global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
-                if range.read_race_detect(&*clocks, index) == Err(DataRace) {
+                if let Err(DataRace) = range.read_race_detect(&*clocks, index) {
                     // Report data-race
                     return Self::report_data_race(
                         &self.global,range, "READ", false, pointer, len
@@ -674,18 +674,17 @@ impl VClockAlloc {
             Ok(())
         }
     }
-    /// Detect data-races for an unsychronized write operation, will not perform
-    ///  data-race threads if `multi-threaded` is false, either due to no threads
-    ///  being created or if it is temporarily disabled during a racy read or write
-    ///  operation
-    pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
+
+
+    // Shared code for detecting data-races on unique access to a section of memory
+    fn unique_access<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size, action: &str) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
             let (index, clocks) = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*clocks, index) == Err(DataRace) {
+                if let Err(DataRace) = range.write_race_detect(&*clocks, index) {
                     // Report data-race
                     return Self::report_data_race(
-                        &self.global, range, "WRITE", false, pointer, len
+                        &self.global, range, action, false, pointer, len
                     );
                 }
             }
@@ -694,25 +693,20 @@ impl VClockAlloc {
             Ok(())
         }
     }
+
+    /// Detect data-races for an unsychronized write operation, will not perform
+    ///  data-race threads if `multi-threaded` is false, either due to no threads
+    ///  being created or if it is temporarily disabled during a racy read or write
+    ///  operation
+    pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
+        self.unique_access(pointer, len, "Write")
+    }
     /// Detect data-races for an unsychronized deallocate operation, will not perform
     ///  data-race threads if `multi-threaded` is false, either due to no threads
     ///  being created or if it is temporarily disabled during a racy read or write
     ///  operation
     pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
-        if self.global.multi_threaded.get() {
-            let (index, clocks) = self.global.current_thread_state();
-            for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
-                if range.write_race_detect(&*clocks, index) == Err(DataRace) {
-                    // Report data-race
-                    return Self::report_data_race(
-                        &self.global, range, "DEALLOCATE", false, pointer, len
-                    );
-                }
-            }
-           Ok(())
-        }else{
-            Ok(())
-        }
+        self.unique_access(pointer, len, "Deallocate")
     }
 }
 
@@ -773,6 +767,8 @@ struct ThreadExtraState {
     /// The current vector index in use by the
     ///  thread currently, this is set to None
     ///  after the vector index has been re-used
+    ///  and hence the value will never need to be
+    ///  read during data-race reporting
     vector_index: Option<VectorIdx>,
 
     /// The name of the thread, updated for better
@@ -782,10 +778,8 @@ struct ThreadExtraState {
     
     /// Thread termination vector clock, this
     ///  is set on thread termination and is used
-    ///  for joining on threads that have already
-    ///  terminated. This should be used first
-    ///  on joining as there is the possibility
-    ///  that `vector_index` is None in some cases
+    ///  for joining on threads since the vector_index
+    ///  may be re-used when the join operation occurs
     termination_vector_clock: Option<VClock>,
 }
 
@@ -820,10 +814,26 @@ pub struct GlobalState {
     current_index: Cell<VectorIdx>,
 
     /// Potential vector indices that could be re-used on thread creation
-    ///  values are inserted here on thread termination, vector index values
-    ///  are then re-used once all the termination event happens-before all
-    ///  existing thread-clocks
+    ///  values are inserted here on after the thread has terminated and
+    ///  been joined with, and hence may potentially become free
+    ///  for use as the index for a new thread.
+    /// Elements in this set may still require the vector index to
+    ///  report data-races, and can only be re-used after all
+    ///  active vector-clocks catch up with the threads timestamp.
     reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
+
+    /// Counts the number of threads that are currently active
+    ///  if the number of active threads reduces to 1 and then
+    ///  a join operation occures with the remaining main thread
+    ///  then multi-threaded execution may be disabled
+    active_thread_count: Cell<usize>, 
+
+    /// This contains threads that have terminated, but not yet joined
+    ///  and so cannot become re-use candidates until a join operation
+    ///  occurs.
+    /// The associated vector index will be moved into re-use candidates
+    ///  after the join operation occurs
+    terminated_threads: RefCell<FxHashMap<ThreadId, VectorIdx>>,
 }
 impl GlobalState {
 
@@ -836,7 +846,9 @@ impl GlobalState {
             vector_info: RefCell::new(IndexVec::new()),
             thread_info: RefCell::new(IndexVec::new()),
             current_index: Cell::new(VectorIdx::new(0)),
+            active_thread_count: Cell::new(1),
             reuse_candidates: RefCell::new(FxHashSet::default()),
+            terminated_threads: RefCell::new(FxHashMap::default())
         };
 
         // Setup the main-thread since it is not explicitly created:
@@ -860,10 +872,24 @@ impl GlobalState {
     fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
         let mut reuse = self.reuse_candidates.borrow_mut();
         let vector_clocks = self.vector_clocks.borrow();
+        let vector_info = self.vector_info.borrow();
+        let terminated_threads = self.terminated_threads.borrow();
         for  &candidate in reuse.iter() {
             let target_timestamp = vector_clocks[candidate].clock[candidate];
-            if vector_clocks.iter().all(|clock| {
-                clock.clock[candidate] == target_timestamp
+            if vector_clocks.iter_enumerated().all(|(clock_idx, clock)| {
+                // The thread happens before the clock, and hence cannot report
+                //  a data-race with this the candidate index
+                let no_data_race = clock.clock[candidate] >= target_timestamp;
+
+                // The vector represents a thread that has terminated and hence cannot
+                //  report a data-race with the candidate index
+                let thread_id = vector_info[clock_idx];
+                let vector_terminated = reuse.contains(&clock_idx)
+                    || terminated_threads.contains_key(&thread_id);
+
+                // The vector index cannot report a race with the candidate index
+                //  and hence allows the candidate index to be re-used
+                no_data_race || vector_terminated
             }) {
                 // All vector clocks for each vector index are equal to
                 //  the target timestamp, and the thread is known to have
@@ -882,6 +908,10 @@ impl GlobalState {
     pub fn thread_created(&self, thread: ThreadId) {
         let current_index = self.current_index();
 
+        // Increment the number of active threads
+        let active_threads = self.active_thread_count.get();
+        self.active_thread_count.set(active_threads + 1);
+
         // Enable multi-threaded execution, there are now two threads
         //  so data-races are now possible.
         self.multi_threaded.set(true);
@@ -946,51 +976,90 @@ impl GlobalState {
     ///  between the joined thead and the current thread.
     #[inline]
     pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
-        let (current_index, join_index) = {
-            let thread_info = self.thread_info.borrow();
-            let current_index = thread_info[current_thread].vector_index
-                .expect("Joining into thread with no assigned vector");
-            let join_index = thread_info[join_thread].vector_index
-                .expect("Joining thread with no assigned vector");
-            (current_index, join_index)
-        };
         let mut clocks_vec = self.vector_clocks.borrow_mut();
-        let (current, join) = clocks_vec.pick2_mut(current_index, join_index);
+        let thread_info = self.thread_info.borrow();
+
+        // Load the vector clock of the current thread
+        let current_index = thread_info[current_thread].vector_index
+            .expect("Performed thread join on thread with no assigned vector");
+        let current = &mut clocks_vec[current_index];
+
+        // Load the associated vector clock for the terminated thread
+        let join_clock = thread_info[join_thread].termination_vector_clock
+            .as_ref().expect("Joined with thread but thread has not terminated");
 
         // Pre increment clocks before atomic operation
         current.increment_clock(current_index);
-        join.increment_clock(join_index);
 
         // The join thread happens-before the current thread
         //   so update the current vector clock
-        current.join_with(join);
+        current.clock.join(join_clock);
 
         // Post increment clocks after atomic operation
-        //  the join clock is not incremented, since there will
-        //  be no future events, also if it was incremented
-        //  the thread re-use condition would never pass
         current.increment_clock(current_index);
+
+        // Check the number of active threads, if the value is 1
+        //  then test for potentially disabling multi-threaded execution
+        let active_threads = self.active_thread_count.get();
+        if active_threads == 1 {
+            // May potentially be able to disable multi-threaded execution
+            let current_clock = &clocks_vec[current_index];
+            if clocks_vec.iter_enumerated().all(|(idx, clocks)| {
+                clocks.clock[idx] <= current_clock.clock[idx]
+            }) {
+                // The all thread termations happen-before the current clock
+                //  therefore no data-races can be reported until a new thread
+                //  is created, so disable multi-threaded execution
+                self.multi_threaded.set(false);
+            }
+        }
+
+        // If the thread is marked as terminated but not joined
+        //  then move the thread to the re-use set
+        let mut termination = self.terminated_threads.borrow_mut();
+        if let Some(index) = termination.remove(&join_thread) {
+            let mut reuse = self.reuse_candidates.borrow_mut();
+            reuse.insert(index);
+        }
     }
 
     /// On thread termination, the vector-clock may re-used
     ///  in the future once all remaining thread-clocks catch
-    ///  up with the time index of the terminated thread
+    ///  up with the time index of the terminated thread.
+    /// This assiges thread termination with a unique index
+    ///  which will be used to join the thread
+    /// This should be called strictly before any calls to
+    ///   `thread_joined`
     #[inline]
-    pub fn thread_terminated(&self, terminated_thread: ThreadId) {
-        let mut thread_info = self.thread_info.borrow_mut();
-        let termination_meta = &mut thread_info[terminated_thread];
+    pub fn thread_terminated(&self) {
+        let current_index = self.current_index();
+        
+        // Increment the clock to a unique termination timestamp
+        let mut vector_clocks = self.vector_clocks.borrow_mut();
+        let current_clocks = &mut vector_clocks[current_index];
+        current_clocks.increment_clock(current_index);
 
-        // Find the terminated index & setup the termination vector-clock
-        //  in case thread join is called in the future after the thread
-        //  has been re-used
-        let terminated_index = termination_meta.vector_index
-            .expect("Joining into thread with no assigned vector");
-        let vector_clocks = self.vector_clocks.borrow();
-        termination_meta.termination_vector_clock = Some(vector_clocks[terminated_index].clock.clone());
+        // Load the current thread id for the executing vector
+        let vector_info = self.vector_info.borrow();
+        let current_thread = vector_info[current_index];
 
-        // Add this thread as a candidate for re-use
-        let mut reuse = self.reuse_candidates.borrow_mut();
-        reuse.insert(terminated_index);
+        // Load the current thread metadata, and move to a terminated
+        //  vector state. Setting up the vector clock all join operations
+        //  will use.
+        let mut thread_info = self.thread_info.borrow_mut();
+        let current = &mut thread_info[current_thread];
+        current.termination_vector_clock = Some(current_clocks.clock.clone());
+
+        // Add this thread as a candidate for re-use after a thread join
+        //  occurs
+        let mut termination = self.terminated_threads.borrow_mut();
+        termination.insert(current_thread, current_index);
+            
+        // Reduce the number of active threads, now that a thread has
+        //  terminated
+        let mut active_threads = self.active_thread_count.get();
+        active_threads -= 1;
+        self.active_thread_count.set(active_threads);
     }
 
     /// Hook for updating the local tracker of the currently
@@ -1118,4 +1187,3 @@ impl GlobalState {
         self.current_index.get()
     }
 }
-
diff --git a/src/thread.rs b/src/thread.rs
index 976ac816a0..40cfd04d79 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -443,6 +443,8 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
                 return false;
             });
         }
+        // Set the thread into a terminated state in the data-race detector
+        data_race.thread_terminated();
         // Check if we need to unblock any threads.
         for (i, thread) in self.threads.iter_enumerated_mut() {
             if thread.state == ThreadState::BlockedOnJoin(self.active_thread) {
@@ -452,7 +454,6 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
                 thread.state = ThreadState::Enabled;
             }
         }
-        data_race.thread_terminated(self.active_thread);
         return free_tls_statics;
     }
 
diff --git a/tests/compile-fail/data_race/dangling_thread_async_race.rs b/tests/compile-fail/data_race/dangling_thread_async_race.rs
new file mode 100644
index 0000000000..6af5706835
--- /dev/null
+++ b/tests/compile-fail/data_race/dangling_thread_async_race.rs
@@ -0,0 +1,44 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-isolation
+
+use std::thread::{spawn, sleep};
+use std::time::Duration;
+use std::mem;
+
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+
+fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    let join = unsafe {
+        spawn(move || {
+            *c.0 = 32;
+        })
+    };
+
+    // Detatch the thread and sleep until it terminates
+    mem::drop(join);
+    sleep(Duration::from_millis(100));
+
+    // Spawn and immediately join a thread
+    //  to execute the join code-path
+    //  and ensure that data-race detection
+    //  remains enabled
+    spawn(|| ()).join().unwrap();
+
+    let join2 = unsafe {
+        spawn(move || {
+            *c.0 = 64; //~ ERROR Data race      
+        })
+    };
+
+    join2.join().unwrap();
+}
diff --git a/tests/compile-fail/data_race/dangling_thread_race.rs b/tests/compile-fail/data_race/dangling_thread_race.rs
new file mode 100644
index 0000000000..c37f303bba
--- /dev/null
+++ b/tests/compile-fail/data_race/dangling_thread_race.rs
@@ -0,0 +1,41 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-isolation
+
+use std::thread::{spawn, sleep};
+use std::time::Duration;
+use std::mem;
+
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+
+fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+
+    let join = unsafe {
+        spawn(move || {
+            *c.0 = 32;
+        })
+    };
+
+    // Detatch the thread and sleep until it terminates
+    mem::drop(join);
+    sleep(Duration::from_millis(100));
+
+    // Spawn and immediately join a thread
+    //  to execute the join code-path
+    //  and ensure that data-race detection
+    //  remains enabled
+    spawn(|| ()).join().unwrap();
+
+
+    unsafe {
+        *c.0 = 64; //~ ERROR Data race
+    }
+}
diff --git a/tests/compile-fail/data_race/enable_after_join_to_main.rs b/tests/compile-fail/data_race/enable_after_join_to_main.rs
new file mode 100644
index 0000000000..fba7ba4841
--- /dev/null
+++ b/tests/compile-fail/data_race/enable_after_join_to_main.rs
@@ -0,0 +1,38 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    // Enable and the join with multiple threads
+    let t1 = spawn(|| ());
+    let t2 = spawn(|| ());
+    let t3 = spawn(|| ());
+    let t4 = spawn(|| ());
+    t1.join().unwrap();
+    t2.join().unwrap();
+    t3.join().unwrap();
+    t4.join().unwrap();
+
+    // Perform write-write data race detection
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 32;
+        });
+
+        let j2 = spawn(move || {
+            *c.0 = 64; //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}

From 69fb6413ddc5b7fd5d9cb0a68ebf58ee513bf9d5 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Sun, 15 Nov 2020 18:30:26 +0000
Subject: [PATCH 10/17] Tidy up comments and function layout, should fix most
 of the review notes.

---
 src/bin/miri.rs                               |    3 +
 src/data_race.rs                              | 1104 +++++++++--------
 src/eval.rs                                   |    3 +
 src/lib.rs                                    |    4 +-
 src/machine.rs                                |   31 +-
 src/shims/intrinsics.rs                       |  166 +--
 src/shims/posix/linux/sync.rs                 |   12 +-
 src/shims/posix/sync.rs                       |   16 +-
 src/shims/posix/thread.rs                     |   15 +-
 src/sync.rs                                   |   58 +-
 src/thread.rs                                 |   49 +-
 src/vector_clock.rs                           |  323 ++---
 tests/run-pass/concurrency/data_race.stderr   |    2 +-
 tests/run-pass/concurrency/linux-futex.stderr |    2 +-
 tests/run-pass/concurrency/simple.stderr      |    2 +-
 tests/run-pass/concurrency/sync.stderr        |    2 +-
 .../run-pass/concurrency/thread_locals.stderr |    2 +-
 .../run-pass/concurrency/tls_lib_drop.stderr  |    2 +-
 tests/run-pass/libc.stderr                    |    2 +-
 tests/run-pass/panic/concurrent-panic.stderr  |    2 +-
 20 files changed, 1005 insertions(+), 795 deletions(-)

diff --git a/src/bin/miri.rs b/src/bin/miri.rs
index ef1429a350..1117b69116 100644
--- a/src/bin/miri.rs
+++ b/src/bin/miri.rs
@@ -195,6 +195,9 @@ fn main() {
                 "-Zmiri-disable-stacked-borrows" => {
                     miri_config.stacked_borrows = false;
                 }
+                "-Zmiri-disable-data-race-detector" => {
+                    miri_config.data_race_detector = false;
+                }
                 "-Zmiri-disable-alignment-check" => {
                     miri_config.check_alignment = miri::AlignmentCheck::None;
                 }
diff --git a/src/data_race.rs b/src/data_race.rs
index 57f09146d6..822ceab8fa 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -1,16 +1,36 @@
-//! Implementation of a data-race detector
-//!  uses Lamport Timestamps / Vector-clocks
-//!  base on the Dyamic Race Detection for C++:
-//!     - https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
-//!  to extend data-race detection to work correctly with fences
-//!  and RMW operations
+//! Implementation of a data-race detector using Lamport Timestamps / Vector-clocks
+//! based on the Dyamic Race Detection for C++:
+//! https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf
+//! which does not report false-positives when fences are used, and gives better
+//! accuracy in presence of read-modify-write operations.
+//!
 //! This does not explore weak memory orders and so can still miss data-races
-//!  but should not report false-positives
+//! but should not report false-positives
+//!
 //! Data-race definiton from(https://en.cppreference.com/w/cpp/language/memory_model#Threads_and_data_races):
-//!  - if a memory location is accessed by twice is a data-race unless:
-//!    - both operations execute on the same thread/signal-handler
-//!    - both conflicting operations are atomic operations (1 atomic and 1 non-atomic race)
-//!    - 1 of the operations happens-before the other operation (see link for definition)
+//! a data race occurs between two memory accesses if they are on different threads, at least one operation
+//! is non-atomic, at least one operation is a write and neither access happens-before the other. Read the link
+//! for full definition.
+//! 
+//! This re-uses vector indexes for threads that are known to be unable to report data-races, this is valid
+//! because it only re-uses vector indexes once all currently-active (not-terminated) threads have an internal
+//! vector clock that happens-after the join operation of the candidate thread. Threads that have not been joined
+//! on are not considered. Since the thread's vector clock will only increase and a data-race implies that
+//! there is some index x where clock[x] > thread_clock, when this is true clock[candidate-idx] > thread_clock
+//! can never hold and hence a data-race can never be reported in that vector index again.
+//! This means that the thread-index can be safely re-used, starting on the next timestamp for the newly created
+//! thread.
+//!
+//! The sequentially consistant ordering corresponds to the ordering that the threads
+//! are currently scheduled, this means that the data-race detector has no additional
+//! logic for sequentially consistent accesses at the moment since they are indistinguishable
+//! from acquire/release operations. If weak memory orderings are explored then this
+//! may need to change or be updated accordingly.
+//!
+//! FIXME:
+//! currently we have our own local copy of the currently active thread index and names, this is due
+//! in part to the inability to access the current location of threads.active_thread inside the AllocExtra
+//! read, write and deallocate functions and should be cleaned up in the future.
 
 use std::{
     fmt::Debug, rc::Rc,
@@ -19,23 +39,23 @@ use std::{
 
 use rustc_index::vec::{Idx, IndexVec};
 use rustc_target::abi::Size;
-use rustc_middle::ty::layout::TyAndLayout;
+use rustc_middle::{mir, ty::layout::TyAndLayout};
 use rustc_data_structures::fx::{FxHashSet, FxHashMap};
 
 use crate::{
     MiriEvalContext, MiriEvalContextExt,
     ThreadId, Tag, RangeMap,
     InterpResult, Pointer, ScalarMaybeUninit,
-    MPlaceTy, OpTy, MemPlaceMeta,
-    VClock, VSmallClockSet, VectorIdx, VTimestamp
+    MPlaceTy, OpTy, MemPlaceMeta, ImmTy, Immediate,
+    VClock, VSmallClockMap, VectorIdx, VTimestamp
 };
 
 pub type AllocExtra = VClockAlloc;
 pub type MemoryExtra = Rc<GlobalState>;
 
-/// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive)
+/// Valid atomic read-write operations, alias of atomic::Ordering (not non-exhaustive).
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
-pub enum AtomicRWOp {
+pub enum AtomicRwOp {
     Relaxed,
     Acquire,
     Release,
@@ -43,7 +63,7 @@ pub enum AtomicRWOp {
     SeqCst,
 }
 
-/// Valid atomic read operations, subset of atomic::Ordering
+/// Valid atomic read operations, subset of atomic::Ordering.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum AtomicReadOp {
     Relaxed,
@@ -51,7 +71,7 @@ pub enum AtomicReadOp {
     SeqCst,
 }
 
-/// Valid atomic write operations, subset of atomic::Ordering
+/// Valid atomic write operations, subset of atomic::Ordering.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum AtomicWriteOp {
     Relaxed,
@@ -60,7 +80,7 @@ pub enum AtomicWriteOp {
 }
 
 
-/// Valid atomic fence operations, subset of atomic::Ordering
+/// Valid atomic fence operations, subset of atomic::Ordering.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum AtomicFenceOp {
     Acquire,
@@ -69,315 +89,124 @@ pub enum AtomicFenceOp {
     SeqCst,
 }
 
-/// Evaluation context extensions
-impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
-pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
-    // Temporarily allow data-races to occur, this should only be
-    //  used if either one of the appropiate `validate_atomic` functions
-    //  will be called to treat a memory access as atomic or if the memory
-    //  being accessed should be treated as internal state, that cannot be
-    //  accessed by the interpreted program.
-    #[inline]
-    fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
-        let this = self.eval_context_ref();
-        let data_race = &*this.memory.extra.data_race;
-        let old = data_race.multi_threaded.replace(false);
-        let result = op(this);
-        data_race.multi_threaded.set(old);
-        result
-    }
 
-    /// Same as `allow_data_races_ref`, this temporarily disables any data-race detection and
-    ///  so should only be used for atomic operations or internal state that the program cannot
-    ///  access
-    #[inline]
-    fn allow_data_races_mut<R>(&mut self, op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R) -> R {
-        let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
-        let old = data_race.multi_threaded.replace(false);
-        let result = op(this);
-        let data_race = &*this.memory.extra.data_race;
-        data_race.multi_threaded.set(old);
-        result
-    }
-
-
-    fn read_scalar_at_offset_atomic(
-        &self,
-        op: OpTy<'tcx, Tag>,
-        offset: u64,
-        layout: TyAndLayout<'tcx>,
-        atomic: AtomicReadOp
-    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        let this = self.eval_context_ref();
-        let op_place = this.deref_operand(op)?;
-        let offset = Size::from_bytes(offset);
-        // Ensure that the following read at an offset is within bounds
-        assert!(op_place.layout.size >= offset + layout.size);
-        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.read_scalar_atomic(value_place, atomic)
-    }
-    fn write_scalar_at_offset_atomic(
-        &mut self,
-        op: OpTy<'tcx, Tag>,
-        offset: u64,
-        value: impl Into<ScalarMaybeUninit<Tag>>,
-        layout: TyAndLayout<'tcx>,
-        atomic: AtomicWriteOp
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        let op_place = this.deref_operand(op)?;
-        let offset = Size::from_bytes(offset);
-        // Ensure that the following read at an offset is within bounds
-        assert!(op_place.layout.size >= offset + layout.size);
-        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
-        this.write_scalar_atomic(value.into(), value_place, atomic)
-    }
-    fn read_scalar_atomic(
-        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
-    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-        let scalar = self.allow_data_races_ref(move |this| {
-            this.read_scalar(place.into())
-        })?;
-        self.validate_atomic_load(place, atomic)?;
-        Ok(scalar)
-    }
-    fn write_scalar_atomic(
-        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>,
-        atomic: AtomicWriteOp
-    ) -> InterpResult<'tcx> {
-        self.allow_data_races_mut(move |this| {
-            this.write_scalar(val, dest.into())
-        })?;
-        self.validate_atomic_store(dest, atomic)
-    }
-    
-    /// Update the data-race detector for an atomic read occuring at the
-    ///  associated memory-place and on the current thread
-    fn validate_atomic_load(
-        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_ref();
-        this.validate_atomic_op(
-            place, atomic, "Atomic Load",
-            move |memory, clocks, index, atomic| {
-                if atomic == AtomicReadOp::Relaxed {
-                    memory.load_relaxed(&mut *clocks, index)
-                }else{
-                    memory.acquire(&mut *clocks, index)
-                }
-            }
-        )
-    }
+/// The current set of vector clocks describing the state
+/// of a thread, contains the happens-before clock and
+/// additional metadata to model atomic fence operations.
+#[derive(Clone, Default, Debug)]
+struct ThreadClockSet {
 
-    /// Update the data-race detector for an atomic write occuring at the
-    ///  associated memory-place and on the current thread
-    fn validate_atomic_store(
-        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_ref();
-        this.validate_atomic_op(
-            place, atomic, "Atomic Store",
-            move |memory, clocks, index, atomic| {
-                if atomic == AtomicWriteOp::Relaxed {
-                    memory.store_relaxed(clocks, index)
-                }else{
-                    memory.release(clocks, index)
-                }
-            }
-        )
-    }
+    /// The increasing clock representing timestamps
+    /// that happen-before this thread.
+    clock: VClock,
 
-    /// Update the data-race detector for an atomic read-modify-write occuring
-    ///  at the associated memory place and on the current thread
-    fn validate_atomic_rmw(
-        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRWOp
-    ) -> InterpResult<'tcx> {
-        use AtomicRWOp::*;
-        let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
-        let release = matches!(atomic, Release | AcqRel | SeqCst);
-        let this = self.eval_context_ref();
-        this.validate_atomic_op(
-            place, atomic, "Atomic RMW",
-            move |memory, clocks, index, _| {
-                if acquire {
-                    memory.acquire(clocks, index)?;
-                }else{
-                    memory.load_relaxed(clocks, index)?;
-                }
-                if release {
-                    memory.rmw_release(clocks, index)
-                }else{
-                    memory.rmw_relaxed(clocks, index)
-                }
-            }
-        )
-    }
+    /// The set of timestamps that will happen-before this
+    /// thread once it performs an acquire fence.
+    fence_acquire: VClock,
 
-    /// Update the data-race detector for an atomic fence on the current thread
-    fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
-        let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
-        data_race.maybe_perform_sync_operation(move |index, mut clocks| {
-            log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
-            // Apply data-race detection for the current fences
-            //  this treats AcqRel and SeqCst as the same as a acquire
-            //  and release fence applied in the same timestamp.
-            if atomic != AtomicFenceOp::Release {
-                // Either Acquire | AcqRel | SeqCst
-                clocks.apply_acquire_fence();
-            }
-            if atomic != AtomicFenceOp::Acquire {
-                // Either Release | AcqRel | SeqCst
-                clocks.apply_release_fence();
-            }
-            Ok(())
-        })
-    }
+    /// The last timesamp of happens-before relations that
+    /// have been released by this thread by a fence.
+    fence_release: VClock,
 }
 
-impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
-trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
-    /// Generic atomic operation implementation,
-    ///  this accesses memory via get_raw instead of
-    ///  get_raw_mut, due to issues calling get_raw_mut
-    ///  for atomic loads from read-only memory
-    /// FIXME: is this valid, or should get_raw_mut be used for
-    ///  atomic-stores/atomic-rmw?
-    fn validate_atomic_op<A: Debug + Copy>(
-        &self, place: MPlaceTy<'tcx, Tag>,
-        atomic: A, description: &str,
-        mut op: impl FnMut(
-            &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A
-        ) -> Result<(), DataRace>
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_ref();
-        let data_race = &*this.memory.extra.data_race;
-        if data_race.multi_threaded.get() {
-
-            // Load an log the atomic operation
-            let place_ptr = place.ptr.assert_ptr();
-            let size = place.layout.size;
-            let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race;
-            log::trace!(
-                "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
-                description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
-            );
-
-            // Perform the atomic operation
-            let data_race = &alloc_meta.global;
-            data_race.maybe_perform_sync_operation(|index, mut clocks| {
-                for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
-                    if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
-                        mem::drop(clocks);
-                        return VClockAlloc::report_data_race(
-                            &alloc_meta.global, range, description, true,
-                            place_ptr, size
-                        );
-                    }
-                }
-                Ok(())
-            })?;
+impl ThreadClockSet {
 
-            // Log changes to atomic memory
-            if log::log_enabled!(log::Level::Trace) {
-                for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
-                    log::trace!(
-                        "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                        place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
-                        range.atomic_ops
-                    );
-                }
-            }
-        }
-        Ok(())
+    /// Apply the effects of a release fence to this
+    /// set of thread vector clocks.
+    #[inline]
+    fn apply_release_fence(&mut self) {
+        self.fence_release.clone_from(&self.clock);
     }
 
-}
-
-/// Handle for locks to express their
-///  acquire-release semantics
-#[derive(Clone, Debug, Default)]
-pub struct DataRaceLockHandle {
+    /// Apply the effects of a acquire fence to this
+    /// set of thread vector clocks.
+    #[inline]
+    fn apply_acquire_fence(&mut self) {
+        self.clock.join(&self.fence_acquire);
+    }
 
-    /// Internal acquire-release clock
-    ///  to express the acquire release sync
-    ///  found in concurrency primitives
-    clock: VClock,
-}
-impl DataRaceLockHandle {
-    pub fn set_values(&mut self, other: &Self) {
-        self.clock.clone_from(&other.clock)
+    /// Increment the happens-before clock at a
+    /// known index.
+    #[inline]
+    fn increment_clock(&mut self, index: VectorIdx) {
+        self.clock.increment_index(index);
     }
-    pub fn reset(&mut self) {
-        self.clock.set_zero_vector();
+
+    /// Join the happens-before clock with that of
+    /// another thread, used to model thread join
+    /// operations.
+    fn join_with(&mut self, other: &ThreadClockSet) {
+        self.clock.join(&other.clock);
     }
 }
 
 
 /// Error returned by finding a data race
-///  should be elaborated upon
+/// should be elaborated upon.
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
 pub struct DataRace;
 
 /// Externally stored memory cell clocks
-///  explicitly to reduce memory usage for the
-///  common case where no atomic operations
-///  exists on the memory cell
+/// explicitly to reduce memory usage for the
+/// common case where no atomic operations
+/// exists on the memory cell.
 #[derive(Clone, PartialEq, Eq, Default, Debug)]
 struct AtomicMemoryCellClocks {
 
-    /// The clock-vector for the set of atomic read operations
-    ///  used for detecting data-races with non-atomic write
-    ///  operations
+    /// The clock-vector of the timestamp of the last atomic
+    /// read operation performed by each thread.
+    /// This detects potential data-races between atomic read
+    /// and non-atomic write operations.
     read_vector: VClock,
 
-    /// The clock-vector for the set of atomic write operations
-    ///  used for detecting data-races with non-atomic read or
-    ///  write operations
+    /// The clock-vector of the timestamp of the last atomic
+    /// write operation performed by each thread.
+    /// This detects potential data-races between atomic write
+    /// and non-atomic read or write operations.
     write_vector: VClock,
 
     /// Synchronization vector for acquire-release semantics
-    ///   contains the vector of timestamps that will
-    ///   happen-before a thread if an acquire-load is 
-    ///   performed on the data
+    /// contains the vector of timestamps that will
+    /// happen-before a thread if an acquire-load is 
+    /// performed on the data.
     sync_vector: VClock,
 
     /// The Hash-Map of all threads for which a release
-    ///  sequence exists in the memory cell, required
-    ///  since read-modify-write operations do not
-    ///  invalidate existing release sequences 
-    release_sequences: VSmallClockSet,
+    /// sequence exists in the memory cell, required
+    /// since read-modify-write operations do not
+    /// invalidate existing release sequences.
+    /// See page 6 of linked paper.
+    release_sequences: VSmallClockMap,
 }
 
 /// Memory Cell vector clock metadata
-///  for data-race detection
+/// for data-race detection.
 #[derive(Clone, PartialEq, Eq, Debug)]
 struct MemoryCellClocks {
 
-    /// The vector-clock of the last write, only one value is stored
-    ///  since all previous writes happened-before the current write
+    /// The vector-clock timestamp of the last write
+    /// corresponding to the writing threads timestamp.
     write: VTimestamp,
 
-    /// The identifier of the thread that performed the last write
-    ///  operation
+    /// The identifier of the vector index, corresponding to a thread
+    /// that performed the last write operation.
     write_index: VectorIdx,
 
-    /// The vector-clock of the set of previous reads
-    ///  each index is set to the timestamp that the associated
-    ///  thread last read this value.
+    /// The vector-clock of the timestamp of the last read operation
+    /// performed by a thread since the last write operation occured.
     read: VClock,
 
-    /// Atomic acquire & release sequence tracking clocks
-    ///  for non-atomic memory in the common case this
-    ///  value is set to None
+    /// Atomic acquire & release sequence tracking clocks.
+    /// For non-atomic memory in the common case this
+    /// value is set to None.
     atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
 }
 
+
 /// Create a default memory cell clocks instance
-///  for uninitialized memory
+/// for uninitialized memory.
 impl Default for MemoryCellClocks {
     fn default() -> Self {
         MemoryCellClocks {
@@ -389,9 +218,10 @@ impl Default for MemoryCellClocks {
     }
 }
 
+
 impl MemoryCellClocks {
 
-    /// Load the internal atomic memory cells if they exist
+    /// Load the internal atomic memory cells if they exist.
     #[inline]
     fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
         match &self.atomic_ops {
@@ -401,25 +231,26 @@ impl MemoryCellClocks {
     }
 
     /// Load or create the internal atomic memory metadata
-    ///  if it does not exist
+    /// if it does not exist.
     #[inline]
     fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks {
         self.atomic_ops.get_or_insert_with(Default::default)
     }
 
     /// Update memory cell data-race tracking for atomic
-    ///  load acquire semantics, is a no-op if this memory was
-    ///  not used previously as atomic memory
-    fn acquire(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    /// load acquire semantics, is a no-op if this memory was
+    /// not used previously as atomic memory.
+    fn load_acquire(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.clock.join(&atomic.sync_vector);
         }
         Ok(())
     }
+
     /// Update memory cell data-race tracking for atomic
-    ///  load relaxed semantics, is a no-op if this memory was
-    ///  not used previously as atomic memory
+    /// load relaxed semantics, is a no-op if this memory was
+    /// not used previously as atomic memory.
     fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
@@ -430,8 +261,8 @@ impl MemoryCellClocks {
 
 
     /// Update the memory cell data-race tracking for atomic
-    ///  store release semantics
-    fn release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    /// store release semantics.
+    fn store_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
         atomic.sync_vector.clone_from(&clocks.clock);
@@ -439,8 +270,9 @@ impl MemoryCellClocks {
         atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
+
     /// Update the memory cell data-race tracking for atomic
-    ///  store relaxed semantics
+    /// store relaxed semantics.
     fn store_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
@@ -451,8 +283,9 @@ impl MemoryCellClocks {
         atomic.release_sequences.retain_index(index);
         Ok(())
     }
+
     /// Update the memory cell data-race tracking for atomic
-    ///  store release semantics for RMW operations
+    /// store release semantics for RMW operations.
     fn rmw_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
@@ -460,8 +293,9 @@ impl MemoryCellClocks {
         atomic.release_sequences.insert(index, &clocks.clock);
         Ok(())
     }
+
     /// Update the memory cell data-race tracking for atomic
-    ///  store relaxed semantics for RMW operations
+    /// store relaxed semantics for RMW operations.
     fn rmw_relaxed(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         self.atomic_write_detect(clocks, index)?;
         let atomic = self.atomic_mut();
@@ -470,60 +304,60 @@ impl MemoryCellClocks {
     }
     
     /// Detect data-races with an atomic read, caused by a non-atomic write that does
-    ///  not happen-before the atomic-read
+    /// not happen-before the atomic-read.
     fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] {
             let atomic = self.atomic_mut();
             atomic.read_vector.set_at_index(&clocks.clock, index);
             Ok(())
-        }else{
+        } else {
             Err(DataRace)
         }
     }
 
     /// Detect data-races with an atomic write, either with a non-atomic read or with
-    ///  a non-atomic write:
+    /// a non-atomic write.
     fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let atomic = self.atomic_mut();
             atomic.write_vector.set_at_index(&clocks.clock, index);
             Ok(())
-        }else{
+        } else {
             Err(DataRace)
         }
     }
 
     /// Detect races for non-atomic read operations at the current memory cell
-    ///  returns true if a data-race is detected
+    /// returns true if a data-race is detected.
     fn read_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
         log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= clocks.clock
-            }else{
+            } else {
                 true
             };
             if race_free {
                 self.read.set_at_index(&clocks.clock, index);
                 Ok(())
-            }else{
+            } else {
                 Err(DataRace)
             }
-        }else{
+        } else {
             Err(DataRace)
         }
     }
 
     /// Detect races for non-atomic write operations at the current memory cell
-    ///  returns true if a data-race is detected
+    /// returns true if a data-race is detected.
     fn write_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx)  -> Result<(), DataRace> {
         log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= clocks.clock && atomic.read_vector <= clocks.clock
-            }else{
+            } else {
                 true
             };
             if race_free {
@@ -531,30 +365,269 @@ impl MemoryCellClocks {
                 self.write_index = index;
                 self.read.set_zero_vector();
                 Ok(())
-            }else{
+            } else {
                 Err(DataRace)
             }
-        }else{
+        } else {
             Err(DataRace)
         }
     }
 }
 
-/// Vector clock metadata for a logical memory allocation
+
+/// Evaluation context extensions.
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
+pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
+
+    /// Atomic variant of read_scalar_at_offset.
+    fn read_scalar_at_offset_atomic(
+        &self,
+        op: OpTy<'tcx, Tag>,
+        offset: u64,
+        layout: TyAndLayout<'tcx>,
+        atomic: AtomicReadOp
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_ref();
+        let op_place = this.deref_operand(op)?;
+        let offset = Size::from_bytes(offset);
+
+        // Ensure that the following read at an offset is within bounds.
+        assert!(op_place.layout.size >= offset + layout.size);
+        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
+        this.read_scalar_atomic(value_place, atomic)
+    }
+
+    /// Atomic variant of write_scalar_at_offset.
+    fn write_scalar_at_offset_atomic(
+        &mut self,
+        op: OpTy<'tcx, Tag>,
+        offset: u64,
+        value: impl Into<ScalarMaybeUninit<Tag>>,
+        layout: TyAndLayout<'tcx>,
+        atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        let op_place = this.deref_operand(op)?;
+        let offset = Size::from_bytes(offset);
+
+        // Ensure that the following read at an offset is within bounds.
+        assert!(op_place.layout.size >= offset + layout.size);
+        let value_place = op_place.offset(offset, MemPlaceMeta::None, layout, this)?;
+        this.write_scalar_atomic(value.into(), value_place, atomic)
+    }
+
+    /// Perform an atomic read operation at the memory location.
+    fn read_scalar_atomic(
+        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_ref();
+        let scalar = this.allow_data_races_ref(move |this| {
+            this.read_scalar(place.into())
+        })?;
+        self.validate_atomic_load(place, atomic)?;
+        Ok(scalar)
+    }
+
+    /// Perform an atomic write operation at the memory location.
+    fn write_scalar_atomic(
+        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        this.allow_data_races_mut(move |this| {
+            this.write_scalar(val, dest.into())
+        })?;
+        self.validate_atomic_store(dest, atomic)
+    }
+
+    /// Perform a atomic operation on a memory location.
+    fn atomic_op_immediate(
+        &mut self,
+        place: MPlaceTy<'tcx, Tag>, rhs: ImmTy<'tcx, Tag>,
+        op: mir::BinOp, neg: bool, atomic: AtomicRwOp
+    ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
+        let this = self.eval_context_mut();
+
+        let old = this.allow_data_races_mut(|this| {
+            this.read_immediate(place. into())
+        })?;        
+
+        // Atomics wrap around on overflow.
+        let val = this.binary_op(op, old, rhs)?;
+        let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
+        this.allow_data_races_mut(|this| {
+            this.write_immediate(*val, place.into())
+        })?;
+
+        this.validate_atomic_rmw(place, atomic)?;
+        Ok(old)
+    }
+
+    /// Perform an atomic exchange with a memory place and a new
+    /// scalar value, the old value is returned.
+    fn atomic_exchange_scalar(
+        &mut self,
+        place: MPlaceTy<'tcx, Tag>, new: ScalarMaybeUninit<Tag>,
+        atomic: AtomicRwOp
+    ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+        let this = self.eval_context_mut();
+
+        let old = this.allow_data_races_mut(|this| {
+            this.read_scalar(place.into())
+        })?;
+        this.allow_data_races_mut(|this| {
+            this.write_scalar(new, place.into())
+        })?;
+        this.validate_atomic_rmw(place, atomic)?;
+        Ok(old)
+    }
+
+    /// Perform an atomic compare and exchange at a given memory location
+    /// on success an atomic RMW operation is performed and on failure
+    /// only an atomic read occurs.
+    fn atomic_compare_exchange_scalar(
+        &mut self, place: MPlaceTy<'tcx, Tag>,
+        expect_old: ImmTy<'tcx, Tag>, new: ScalarMaybeUninit<Tag>,
+        success: AtomicRwOp, fail: AtomicReadOp
+    ) -> InterpResult<'tcx, Immediate<Tag>> {
+        let this = self.eval_context_mut();
+
+        // Failure ordering cannot be stronger than success ordering, therefore first attempt
+        // to read with the failure ordering and if successfull then try again with the success
+        // read ordering and write in the success case.
+        // Read as immediate for the sake of `binary_op()`
+        let old = this.allow_data_races_mut(|this| {
+            this.read_immediate(place.into())
+        })?; 
+
+        // `binary_op` will bail if either of them is not a scalar.
+        let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
+        let res = Immediate::ScalarPair(old.to_scalar_or_uninit(), eq.into());
+
+        // Update ptr depending on comparison.
+        // if successful, perform a full rw-atomic validation
+        // otherwise treat this as an atomic load with the fail ordering.
+        if eq.to_bool()? {
+            this.allow_data_races_mut(|this| {
+                this.write_scalar(new, place.into())
+            })?;
+            this.validate_atomic_rmw(place, success)?;
+        } else {
+            this.validate_atomic_load(place, fail)?;
+        }
+
+        // Return the old value.
+        Ok(res)
+    }
+    
+    
+    /// Update the data-race detector for an atomic read occuring at the
+    /// associated memory-place and on the current thread.
+    fn validate_atomic_load(
+        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        this.validate_atomic_op(
+            place, atomic, "Atomic Load",
+            move |memory, clocks, index, atomic| {
+                if atomic == AtomicReadOp::Relaxed {
+                    memory.load_relaxed(&mut *clocks, index)
+                } else {
+                    memory.load_acquire(&mut *clocks, index)
+                }
+            }
+        )
+    }
+
+    /// Update the data-race detector for an atomic write occuring at the
+    /// associated memory-place and on the current thread.
+    fn validate_atomic_store(
+        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        this.validate_atomic_op(
+            place, atomic, "Atomic Store",
+            move |memory, clocks, index, atomic| {
+                if atomic == AtomicWriteOp::Relaxed {
+                    memory.store_relaxed(clocks, index)
+                } else {
+                    memory.store_release(clocks, index)
+                }
+            }
+        )
+    }
+
+    /// Update the data-race detector for an atomic read-modify-write occuring
+    /// at the associated memory place and on the current thread.
+    fn validate_atomic_rmw(
+        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRwOp
+    ) -> InterpResult<'tcx> {
+        use AtomicRwOp::*;
+        let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
+        let release = matches!(atomic, Release | AcqRel | SeqCst);
+        let this = self.eval_context_ref();
+        this.validate_atomic_op(
+            place, atomic, "Atomic RMW",
+            move |memory, clocks, index, _| {
+                if acquire {
+                    memory.load_acquire(clocks, index)?;
+                } else {
+                    memory.load_relaxed(clocks, index)?;
+                }
+                if release {
+                    memory.rmw_release(clocks, index)
+                } else {
+                    memory.rmw_relaxed(clocks, index)
+                }
+            }
+        )
+    }
+
+    /// Update the data-race detector for an atomic fence on the current thread.
+    fn validate_atomic_fence(&mut self, atomic: AtomicFenceOp) -> InterpResult<'tcx> {
+        let this = self.eval_context_mut();
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.maybe_perform_sync_operation(move |index, mut clocks| {
+                log::trace!("Atomic fence on {:?} with ordering {:?}", index, atomic);
+
+                // Apply data-race detection for the current fences
+                // this treats AcqRel and SeqCst as the same as a acquire
+                // and release fence applied in the same timestamp.
+                if atomic != AtomicFenceOp::Release {
+                    // Either Acquire | AcqRel | SeqCst
+                    clocks.apply_acquire_fence();
+                }
+                if atomic != AtomicFenceOp::Acquire {
+                    // Either Release | AcqRel | SeqCst
+                    clocks.apply_release_fence();
+                }
+                Ok(())
+            })
+        } else {
+            Ok(())
+        }
+    }
+}
+
+
+
+/// Vector clock metadata for a logical memory allocation.
 #[derive(Debug, Clone)]
 pub struct VClockAlloc {
 
-    /// Range of Vector clocks, mapping to the vector-clock
-    ///  index of the last write to the bytes in this allocation
+    /// Range of Vector clocks, this gives each byte a potentially
+    /// unqiue set of vector clocks, but merges identical information
+    /// together for improved efficiency.
     alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
 
-    // Pointer to global state
+    // Pointer to global state.
     global: MemoryExtra,
 }
 
+
 impl VClockAlloc {
 
-    /// Create a new data-race allocation detector
+    /// Create a new data-race allocation detector.
     pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc {
         VClockAlloc {
             global: Rc::clone(global),
@@ -565,7 +638,7 @@ impl VClockAlloc {
     }
 
     // Find an index, if one exists where the value
-    //  in `l` is greater than the value in `r`
+    // in `l` is greater than the value in `r`.
     fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
         let l_slice = l.as_slice();
         let r_slice = r.as_slice();
@@ -575,27 +648,28 @@ impl VClockAlloc {
                 if l > r { Some(idx) } else { None }
             }).or_else(|| {
                 if l_slice.len() > r_slice.len() {
+
                     // By invariant, if l_slice is longer
-                    //  then one element must be larger
+                    // then one element must be larger.
                     // This just validates that this is true
-                    //  and reports earlier elements first
+                    // and reports earlier elements first.
                     let l_remainder_slice = &l_slice[r_slice.len()..];
                     let idx = l_remainder_slice.iter().enumerate()
                         .find_map(|(idx, &r)| {
                             if r == 0 { None } else { Some(idx) }
                         }).expect("Invalid VClock Invariant");
                     Some(idx)
-                }else{
+                } else {
                     None
                 }
             }).map(|idx| VectorIdx::new(idx))
     }
 
-    /// Report a data-race found in the program
-    ///  this finds the two racing threads and the type
-    ///  of data-race that occured, this will also
-    ///  return info about the memory location the data-race
-    ///  occured in
+    /// Report a data-race found in the program.
+    /// This finds the two racing threads and the type
+    /// of data-race that occured. This will also
+    /// return info about the memory location the data-race
+    /// occured in.
     #[cold]
     #[inline(never)]
     fn report_data_race<'tcx>(
@@ -608,39 +682,40 @@ impl VClockAlloc {
         let (
             other_action, other_thread, other_clock
         ) = if range.write > current_clocks.clock[range.write_index] {
+
             // Convert the write action into the vector clock it
-            //  represents for diagnostic purposes
+            // represents for diagnostic purposes.
             write_clock = VClock::new_with_index(range.write_index, range.write);
             ("WRITE", range.write_index, &write_clock)
-        }else if let Some(idx) = Self::find_gt_index(
+        } else if let Some(idx) = Self::find_gt_index(
             &range.read, &current_clocks.clock
         ){
             ("READ", idx, &range.read)
-        }else if !is_atomic {
+        } else if !is_atomic {
             if let Some(atomic) = range.atomic() {
                 if let Some(idx) = Self::find_gt_index(
                     &atomic.write_vector, &current_clocks.clock
                 ) {
                     ("ATOMIC_STORE", idx, &atomic.write_vector)
-                }else if let Some(idx) = Self::find_gt_index(
+                } else if let Some(idx) = Self::find_gt_index(
                     &atomic.read_vector, &current_clocks.clock
                 ) {
                     ("ATOMIC_LOAD", idx, &atomic.read_vector)
-                }else{
-                    unreachable!("Failed to find report data-race for non-atomic operation: no race found")
+                } else {
+                    unreachable!("Failed to report data-race for non-atomic operation: no race found")
                 }
-            }else{
+            } else {
                 unreachable!("Failed to report data-race for non-atomic operation: no atomic component")
             }
-        }else{
+        } else {
             unreachable!("Failed to report data-race for atomic operation")
         };
 
-        // Load elaborated thread information about the racing thread actions
+        // Load elaborated thread information about the racing thread actions.
         let current_thread_info = global.print_thread_metadata(current_index);
         let other_thread_info = global.print_thread_metadata(other_thread);
         
-        // Throw the data-race detection
+        // Throw the data-race detection.
         throw_ub_format!(
             "Data race detected between {} on {} and {} on {}, memory({:?},offset={},size={})\
             \n\t\t -current vector clock = {:?}\
@@ -654,23 +729,25 @@ impl VClockAlloc {
     }
 
     /// Detect data-races for an unsychronized read operation, will not perform
-    ///  data-race threads if `multi-threaded` is false, either due to no threads
-    ///  being created or if it is temporarily disabled during a racy read or write
-    ///  operation
+    /// data-race detection if `multi-threaded` is false, either due to no threads
+    /// being created or if it is temporarily disabled during a racy read or write
+    /// operation for which data-race detection is handled separately, for example
+    /// atomic read operations.
     pub fn read<'tcx>(&self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
             let (index, clocks) = self.global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
             for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
                 if let Err(DataRace) = range.read_race_detect(&*clocks, index) {
-                    // Report data-race
+
+                    // Report data-race.
                     return Self::report_data_race(
                         &self.global,range, "READ", false, pointer, len
                     );
                 }
             }
             Ok(())
-        }else{
+        } else {
             Ok(())
         }
     }
@@ -682,6 +759,7 @@ impl VClockAlloc {
             let (index, clocks) = self.global.current_thread_state();
             for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
                 if let Err(DataRace) = range.write_race_detect(&*clocks, index) {
+                    
                     // Report data-race
                     return Self::report_data_race(
                         &self.global, range, action, false, pointer, len
@@ -689,156 +767,208 @@ impl VClockAlloc {
                 }
             }
             Ok(())
-        }else{
+        } else {
             Ok(())
         }
     }
 
     /// Detect data-races for an unsychronized write operation, will not perform
-    ///  data-race threads if `multi-threaded` is false, either due to no threads
-    ///  being created or if it is temporarily disabled during a racy read or write
-    ///  operation
+    /// data-race threads if `multi-threaded` is false, either due to no threads
+    /// being created or if it is temporarily disabled during a racy read or write
+    /// operation
     pub fn write<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         self.unique_access(pointer, len, "Write")
     }
+
     /// Detect data-races for an unsychronized deallocate operation, will not perform
-    ///  data-race threads if `multi-threaded` is false, either due to no threads
-    ///  being created or if it is temporarily disabled during a racy read or write
-    ///  operation
+    /// data-race threads if `multi-threaded` is false, either due to no threads
+    /// being created or if it is temporarily disabled during a racy read or write
+    /// operation
     pub fn deallocate<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size) -> InterpResult<'tcx> {
         self.unique_access(pointer, len, "Deallocate")
     }
 }
 
-/// The current set of vector clocks describing the state
-///  of a thread, contains the happens-before clock and
-///  additional metadata to model atomic fence operations
-#[derive(Clone, Default, Debug)]
-struct ThreadClockSet {
-
-    /// The increasing clock representing timestamps
-    ///  that happen-before this thread.
-    clock: VClock,
-
-    /// The set of timestamps that will happen-before this
-    ///  thread once it performs an acquire fence
-    fence_acquire: VClock,
-
-    /// The last timesamp of happens-before relations that
-    ///  have been released by this thread by a fence
-    fence_release: VClock,
-}
-
-impl ThreadClockSet {
+impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
+trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
-    /// Apply the effects of a release fence to this
-    ///  set of thread vector clocks
+    // Temporarily allow data-races to occur, this should only be
+    // used if either one of the appropiate `validate_atomic` functions
+    // will be called to treat a memory access as atomic or if the memory
+    // being accessed should be treated as internal state, that cannot be
+    // accessed by the interpreted program.
     #[inline]
-    fn apply_release_fence(&mut self) {
-        self.fence_release.clone_from(&self.clock);
+    fn allow_data_races_ref<R>(&self, op: impl FnOnce(&MiriEvalContext<'mir, 'tcx>) -> R) -> R {
+        let this = self.eval_context_ref();
+        let old = if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.multi_threaded.replace(false)
+        } else {
+            false
+        };
+        let result = op(this);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.multi_threaded.set(old);
+        }
+        result
     }
 
-    /// Apply the effects of a acquire fence to this
-    ///  set of thread vector clocks
+    /// Same as `allow_data_races_ref`, this temporarily disables any data-race detection and
+    /// so should only be used for atomic operations or internal state that the program cannot
+    /// access.
     #[inline]
-    fn apply_acquire_fence(&mut self) {
-        self.clock.join(&self.fence_acquire);
+    fn allow_data_races_mut<R>(&mut self, op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R) -> R {
+        let this = self.eval_context_mut();
+        let old = if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.multi_threaded.replace(false)
+        } else {
+            false
+        };
+        let result = op(this);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.multi_threaded.set(old);
+        }
+        result
     }
 
-    /// Increment the happens-before clock at a
-    ///  known index
-    #[inline]
-    fn increment_clock(&mut self, index: VectorIdx) {
-        self.clock.increment_index(index);
+    /// Generic atomic operation implementation,
+    /// this accesses memory via get_raw instead of
+    /// get_raw_mut, due to issues calling get_raw_mut
+    /// for atomic loads from read-only memory.
+    /// FIXME: is this valid, or should get_raw_mut be used for
+    /// atomic-stores/atomic-rmw?
+    fn validate_atomic_op<A: Debug + Copy>(
+        &self, place: MPlaceTy<'tcx, Tag>,
+        atomic: A, description: &str,
+        mut op: impl FnMut(
+            &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A
+        ) -> Result<(), DataRace>
+    ) -> InterpResult<'tcx> {
+        let this = self.eval_context_ref();
+        if let Some(data_race) = &this.memory.extra.data_race {
+            if data_race.multi_threaded.get() {
+
+                // Load and log the atomic operation.
+                let place_ptr = place.ptr.assert_ptr();
+                let size = place.layout.size;
+                let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race.as_ref().unwrap();
+                log::trace!(
+                    "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
+                    description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
+                );
+
+                // Perform the atomic operation.
+                let data_race = &alloc_meta.global;
+                data_race.maybe_perform_sync_operation(|index, mut clocks| {
+                    for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
+                        if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
+                            mem::drop(clocks);
+                            return VClockAlloc::report_data_race(
+                                &alloc_meta.global, range, description, true,
+                                place_ptr, size
+                            );
+                        }
+                    }
+                    Ok(())
+                })?;
+
+                // Log changes to atomic memory.
+                if log::log_enabled!(log::Level::Trace) {
+                    for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
+                        log::trace!(
+                            "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
+                            place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
+                            range.atomic_ops
+                        );
+                    }
+                }
+            }
+        }
+        Ok(())
     }
 
-    /// Join the happens-before clock with that of
-    ///  another thread, used to model thread join
-    ///  operations
-    fn join_with(&mut self, other: &ThreadClockSet) {
-        self.clock.join(&other.clock);
-    }
 }
 
-/// Extra metadata associated with a thread
+
+/// Extra metadata associated with a thread.
 #[derive(Debug, Clone, Default)]
 struct ThreadExtraState {
 
     /// The current vector index in use by the
-    ///  thread currently, this is set to None
-    ///  after the vector index has been re-used
-    ///  and hence the value will never need to be
-    ///  read during data-race reporting
+    /// thread currently, this is set to None
+    /// after the vector index has been re-used
+    /// and hence the value will never need to be
+    /// read during data-race reporting.
     vector_index: Option<VectorIdx>,
 
     /// The name of the thread, updated for better
-    ///  diagnostics when reporting detected data
-    ///  races
+    /// diagnostics when reporting detected data
+    /// races.
     thread_name: Option<Box<str>>,
     
     /// Thread termination vector clock, this
-    ///  is set on thread termination and is used
-    ///  for joining on threads since the vector_index
-    ///  may be re-used when the join operation occurs
+    /// is set on thread termination and is used
+    /// for joining on threads since the vector_index
+    /// may be re-used when the join operation occurs.
     termination_vector_clock: Option<VClock>,
 }
 
 /// Global data-race detection state, contains the currently
-///  executing thread as well as the vector-clocks associated
-///  with each of the threads.
+/// executing thread as well as the vector-clocks associated
+/// with each of the threads.
 #[derive(Debug, Clone)]
 pub struct GlobalState {
 
     /// Set to true once the first additional
-    ///  thread has launched, due to the dependency
-    ///  between before and after a thread launch
+    /// thread has launched, due to the dependency
+    /// between before and after a thread launch.
     /// Any data-races must be recorded after this
-    ///  so concurrent execution can ignore recording
-    ///  any data-races
+    /// so concurrent execution can ignore recording
+    /// any data-races.
     multi_threaded: Cell<bool>,
 
     /// Mapping of a vector index to a known set of thread
-    ///  clocks, this is not directly mapping from a thread id
-    ///  since it may refer to multiple threads
+    /// clocks, this is not directly mapping from a thread id
+    /// since it may refer to multiple threads.
     vector_clocks: RefCell<IndexVec<VectorIdx, ThreadClockSet>>,
 
     /// Mapping of a given vector index to the current thread
-    ///  that the execution is representing, this may change
-    ///  if a vector index is re-assigned to a new thread
+    /// that the execution is representing, this may change
+    /// if a vector index is re-assigned to a new thread.
     vector_info: RefCell<IndexVec<VectorIdx, ThreadId>>,
 
-    /// The mapping of a given thread to assocaited thread metadata
+    /// The mapping of a given thread to assocaited thread metadata.
     thread_info: RefCell<IndexVec<ThreadId, ThreadExtraState>>,
 
-    /// The current vector index being executed
+    /// The current vector index being executed.
     current_index: Cell<VectorIdx>,
 
     /// Potential vector indices that could be re-used on thread creation
-    ///  values are inserted here on after the thread has terminated and
-    ///  been joined with, and hence may potentially become free
-    ///  for use as the index for a new thread.
+    /// values are inserted here on after the thread has terminated and
+    /// been joined with, and hence may potentially become free
+    /// for use as the index for a new thread.
     /// Elements in this set may still require the vector index to
-    ///  report data-races, and can only be re-used after all
-    ///  active vector-clocks catch up with the threads timestamp.
+    /// report data-races, and can only be re-used after all
+    /// active vector-clocks catch up with the threads timestamp.
     reuse_candidates: RefCell<FxHashSet<VectorIdx>>,
 
     /// Counts the number of threads that are currently active
-    ///  if the number of active threads reduces to 1 and then
-    ///  a join operation occures with the remaining main thread
-    ///  then multi-threaded execution may be disabled
+    /// if the number of active threads reduces to 1 and then
+    /// a join operation occures with the remaining main thread
+    /// then multi-threaded execution may be disabled.
     active_thread_count: Cell<usize>, 
 
     /// This contains threads that have terminated, but not yet joined
-    ///  and so cannot become re-use candidates until a join operation
-    ///  occurs.
+    /// and so cannot become re-use candidates until a join operation
+    /// occurs.
     /// The associated vector index will be moved into re-use candidates
-    ///  after the join operation occurs
+    /// after the join operation occurs.
     terminated_threads: RefCell<FxHashMap<ThreadId, VectorIdx>>,
 }
+
 impl GlobalState {
 
     /// Create a new global state, setup with just thread-id=0
-    ///  advanced to timestamp = 1
+    /// advanced to timestamp = 1.
     pub fn new() -> Self {
         let global_state = GlobalState {
             multi_threaded: Cell::new(false),
@@ -852,8 +982,8 @@ impl GlobalState {
         };
 
         // Setup the main-thread since it is not explicitly created:
-        //  uses vector index and thread-id 0, also the rust runtime gives
-        //  the main-thread a name of "main".
+        // uses vector index and thread-id 0, also the rust runtime gives
+        // the main-thread a name of "main".
         let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
         global_state.vector_info.borrow_mut().push(ThreadId::new(0));
         global_state.thread_info.borrow_mut().push(
@@ -868,7 +998,7 @@ impl GlobalState {
     }
     
     // Try to find vector index values that can potentially be re-used
-    //  by a new thread instead of a new vector index being created
+    // by a new thread instead of a new vector index being created.
     fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
         let mut reuse = self.reuse_candidates.borrow_mut();
         let vector_clocks = self.vector_clocks.borrow();
@@ -877,24 +1007,26 @@ impl GlobalState {
         for  &candidate in reuse.iter() {
             let target_timestamp = vector_clocks[candidate].clock[candidate];
             if vector_clocks.iter_enumerated().all(|(clock_idx, clock)| {
+
                 // The thread happens before the clock, and hence cannot report
-                //  a data-race with this the candidate index
+                // a data-race with this the candidate index.
                 let no_data_race = clock.clock[candidate] >= target_timestamp;
 
                 // The vector represents a thread that has terminated and hence cannot
-                //  report a data-race with the candidate index
+                // report a data-race with the candidate index.
                 let thread_id = vector_info[clock_idx];
                 let vector_terminated = reuse.contains(&clock_idx)
                     || terminated_threads.contains_key(&thread_id);
 
                 // The vector index cannot report a race with the candidate index
-                //  and hence allows the candidate index to be re-used
+                // and hence allows the candidate index to be re-used.
                 no_data_race || vector_terminated
             }) {
+
                 // All vector clocks for each vector index are equal to
-                //  the target timestamp, and the thread is known to have
-                //  terminated, therefore this vector clock index cannot
-                //  report any more data-races
+                // the target timestamp, and the thread is known to have
+                // terminated, therefore this vector clock index cannot
+                // report any more data-races.
                 assert!(reuse.remove(&candidate));
                 return Some(candidate)
             }
@@ -903,17 +1035,17 @@ impl GlobalState {
     }
 
     // Hook for thread creation, enabled multi-threaded execution and marks
-    //  the current thread timestamp as happening-before the current thread
+    // the current thread timestamp as happening-before the current thread.
     #[inline]
     pub fn thread_created(&self, thread: ThreadId) {
         let current_index = self.current_index();
 
-        // Increment the number of active threads
+        // Increment the number of active threads.
         let active_threads = self.active_thread_count.get();
         self.active_thread_count.set(active_threads + 1);
 
         // Enable multi-threaded execution, there are now two threads
-        //  so data-races are now possible.
+        // so data-races are now possible.
         self.multi_threaded.set(true);
 
         // Load and setup the associated thread metadata
@@ -921,101 +1053,105 @@ impl GlobalState {
         thread_info.ensure_contains_elem(thread, Default::default);
 
         // Assign a vector index for the thread, attempting to re-use an old
-        //  vector index that can no longer report any data-races if possible
+        // vector index that can no longer report any data-races if possible.
         let created_index = if let Some(
             reuse_index
         ) = self.find_vector_index_reuse_candidate() {
+
             // Now re-configure the re-use candidate, increment the clock
-            //  for the new sync use of the vector
+            // for the new sync use of the vector.
             let mut vector_clocks = self.vector_clocks.borrow_mut();
             vector_clocks[reuse_index].increment_clock(reuse_index);
 
             // Locate the old thread the vector was associated with and update
-            //  it to represent the new thread instead
+            // it to represent the new thread instead.
             let mut vector_info = self.vector_info.borrow_mut();
             let old_thread = vector_info[reuse_index];
             vector_info[reuse_index] = thread;
 
             // Mark the thread the vector index was associated with as no longer
-            //  representing a thread index
+            // representing a thread index.
             thread_info[old_thread].vector_index = None;
 
             reuse_index
-        }else{
+        } else {
+
             // No vector re-use candidates available, instead create
-            //  a new vector index
+            // a new vector index.
             let mut vector_info = self.vector_info.borrow_mut();
             vector_info.push(thread)
         };
 
-        // Mark the chosen vector index as in use by the thread
+        // Mark the chosen vector index as in use by the thread.
         thread_info[thread].vector_index = Some(created_index);
 
-        // Create a thread clock set if applicable
+        // Create a thread clock set if applicable.
         let mut vector_clocks = self.vector_clocks.borrow_mut();
         if created_index == vector_clocks.next_index() {
             vector_clocks.push(ThreadClockSet::default());
         }
 
-        // Now load the two clocks and configure the initial state
+        // Now load the two clocks and configure the initial state.
         let (current, created) = vector_clocks.pick2_mut(current_index, created_index);
 
-        // Advance the current thread before the synchronized operation
+        // Advance the current thread before the synchronized operation.
         current.increment_clock(current_index);
 
         // Join the created with current, since the current threads
-        //  previous actions happen-before the created thread
+        // previous actions happen-before the created thread.
         created.join_with(current);
 
-        // Advance both threads after the synchronized operation
+        // Advance both threads after the synchronized operation.
         current.increment_clock(current_index);
         created.increment_clock(created_index);
     }
 
     /// Hook on a thread join to update the implicit happens-before relation
-    ///  between the joined thead and the current thread.
+    /// between the joined thead and the current thread.
     #[inline]
     pub fn thread_joined(&self, current_thread: ThreadId, join_thread: ThreadId) {
         let mut clocks_vec = self.vector_clocks.borrow_mut();
         let thread_info = self.thread_info.borrow();
 
-        // Load the vector clock of the current thread
+        // Load the vector clock of the current thread.
         let current_index = thread_info[current_thread].vector_index
             .expect("Performed thread join on thread with no assigned vector");
         let current = &mut clocks_vec[current_index];
 
-        // Load the associated vector clock for the terminated thread
+        // Load the associated vector clock for the terminated thread.
         let join_clock = thread_info[join_thread].termination_vector_clock
             .as_ref().expect("Joined with thread but thread has not terminated");
 
-        // Pre increment clocks before atomic operation
+        // Pre increment clocks before atomic operation.
         current.increment_clock(current_index);
 
         // The join thread happens-before the current thread
-        //   so update the current vector clock
+        // so update the current vector clock.
         current.clock.join(join_clock);
 
-        // Post increment clocks after atomic operation
+        // Post increment clocks after atomic operation.
         current.increment_clock(current_index);
 
         // Check the number of active threads, if the value is 1
-        //  then test for potentially disabling multi-threaded execution
+        // then test for potentially disabling multi-threaded execution.
         let active_threads = self.active_thread_count.get();
         if active_threads == 1 {
-            // May potentially be able to disable multi-threaded execution
+
+            // May potentially be able to disable multi-threaded execution.
             let current_clock = &clocks_vec[current_index];
             if clocks_vec.iter_enumerated().all(|(idx, clocks)| {
                 clocks.clock[idx] <= current_clock.clock[idx]
             }) {
+
                 // The all thread termations happen-before the current clock
-                //  therefore no data-races can be reported until a new thread
-                //  is created, so disable multi-threaded execution
+                // therefore no data-races can be reported until a new thread
+                // is created, so disable multi-threaded execution.
                 self.multi_threaded.set(false);
             }
         }
 
         // If the thread is marked as terminated but not joined
-        //  then move the thread to the re-use set
+        // then move the thread to the re-use set.
         let mut termination = self.terminated_threads.borrow_mut();
         if let Some(index) = termination.remove(&join_thread) {
             let mut reuse = self.reuse_candidates.borrow_mut();
@@ -1024,47 +1160,47 @@ impl GlobalState {
     }
 
     /// On thread termination, the vector-clock may re-used
-    ///  in the future once all remaining thread-clocks catch
-    ///  up with the time index of the terminated thread.
+    /// in the future once all remaining thread-clocks catch
+    /// up with the time index of the terminated thread.
     /// This assiges thread termination with a unique index
-    ///  which will be used to join the thread
+    /// which will be used to join the thread
     /// This should be called strictly before any calls to
-    ///   `thread_joined`
+    /// `thread_joined`.
     #[inline]
     pub fn thread_terminated(&self) {
         let current_index = self.current_index();
         
-        // Increment the clock to a unique termination timestamp
+        // Increment the clock to a unique termination timestamp.
         let mut vector_clocks = self.vector_clocks.borrow_mut();
         let current_clocks = &mut vector_clocks[current_index];
         current_clocks.increment_clock(current_index);
 
-        // Load the current thread id for the executing vector
+        // Load the current thread id for the executing vector.
         let vector_info = self.vector_info.borrow();
         let current_thread = vector_info[current_index];
 
         // Load the current thread metadata, and move to a terminated
-        //  vector state. Setting up the vector clock all join operations
-        //  will use.
+        // vector state. Setting up the vector clock all join operations
+        // will use.
         let mut thread_info = self.thread_info.borrow_mut();
         let current = &mut thread_info[current_thread];
         current.termination_vector_clock = Some(current_clocks.clock.clone());
 
         // Add this thread as a candidate for re-use after a thread join
-        //  occurs
+        // occurs.
         let mut termination = self.terminated_threads.borrow_mut();
         termination.insert(current_thread, current_index);
             
         // Reduce the number of active threads, now that a thread has
-        //  terminated
+        // terminated.
         let mut active_threads = self.active_thread_count.get();
         active_threads -= 1;
         self.active_thread_count.set(active_threads);
     }
 
     /// Hook for updating the local tracker of the currently
-    ///  enabled thread, should always be updated whenever
-    ///  `active_thread` in thread.rs is updated
+    /// enabled thread, should always be updated whenever
+    /// `active_thread` in thread.rs is updated.
     #[inline]
     pub fn thread_set_active(&self, thread: ThreadId) {
         let thread_info = self.thread_info.borrow();
@@ -1074,9 +1210,9 @@ impl GlobalState {
     }
 
     /// Hook for updating the local tracker of the threads name
-    ///  this should always mirror the local value in thread.rs
-    ///  the thread name is used for improved diagnostics
-    ///  during a data-race
+    /// this should always mirror the local value in thread.rs
+    /// the thread name is used for improved diagnostics
+    /// during a data-race.
     #[inline]
     pub fn thread_set_name(&self, thread: ThreadId, name: String) {
         let name = name.into_boxed_str();
@@ -1086,12 +1222,12 @@ impl GlobalState {
 
 
     /// Attempt to perform a synchronized operation, this
-    ///  will perform no operation if multi-threading is
-    ///  not currently enabled.
+    /// will perform no operation if multi-threading is
+    /// not currently enabled.
     /// Otherwise it will increment the clock for the current
-    ///  vector before and after the operation for data-race
-    ///  detection between any happens-before edges the
-    ///  operation may create
+    /// vector before and after the operation for data-race
+    /// detection between any happens-before edges the
+    /// operation may create.
     fn maybe_perform_sync_operation<'tcx>(
         &self, op: impl FnOnce(VectorIdx, RefMut<'_,ThreadClockSet>) -> InterpResult<'tcx>,
     ) -> InterpResult<'tcx> {
@@ -1107,50 +1243,50 @@ impl GlobalState {
     
 
     /// Internal utility to identify a thread stored internally
-    ///  returns the id and the name for better diagnostics
+    /// returns the id and the name for better diagnostics.
     fn print_thread_metadata(&self, vector: VectorIdx) -> String {
         let thread = self.vector_info.borrow()[vector];
         let thread_name = &self.thread_info.borrow()[thread].thread_name;
         if let Some(name) = thread_name {
             let name: &str = name;
             format!("Thread(id = {:?}, name = {:?})", thread.to_u32(), &*name)
-        }else{
+        } else {
             format!("Thread(id = {:?})", thread.to_u32())
         }
     }
 
 
     /// Acquire a lock, express that the previous call of
-    ///  `validate_lock_release` must happen before this
-    pub fn validate_lock_acquire(&self, lock: &DataRaceLockHandle, thread: ThreadId) {
+    /// `validate_lock_release` must happen before this.
+    pub fn validate_lock_acquire(&self, lock: &VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
         clocks.increment_clock(index);
-        clocks.clock.join(&lock.clock);
+        clocks.clock.join(&lock);
         clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
-    ///  any subsequent calls to `validate_lock_acquire`
-    pub fn validate_lock_release(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
+    /// any subsequent calls to `validate_lock_acquire`.
+    pub fn validate_lock_release(&self, lock: &mut VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
         clocks.increment_clock(index);
-        lock.clock.clone_from(&clocks.clock);
+        lock.clone_from(&clocks.clock);
         clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
-    ///  any subsequent calls to `validate_lock_acquire` as well
-    ///  as any previous calls to this function after any
-    ///  `validate_lock_release` calls
-    pub fn validate_lock_release_shared(&self, lock: &mut DataRaceLockHandle, thread: ThreadId) {
+    /// any subsequent calls to `validate_lock_acquire` as well
+    /// as any previous calls to this function after any
+    /// `validate_lock_release` calls.
+    pub fn validate_lock_release_shared(&self, lock: &mut VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
         clocks.increment_clock(index);
-        lock.clock.join(&clocks.clock);
+        lock.join(&clocks.clock);
         clocks.increment_clock(index);
     }
 
     /// Load the vector index used by the given thread as well as the set of vector clocks
-    ///  used by the thread
+    /// used by the thread.
     #[inline]
     fn load_thread_state_mut(&self, thread: ThreadId) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
         let index = self.thread_info.borrow()[thread].vector_index
@@ -1161,7 +1297,7 @@ impl GlobalState {
     }
 
     /// Load the current vector clock in use and the current set of thread clocks
-    ///  in use for the vector
+    /// in use for the vector.
     #[inline]
     fn current_thread_state(&self) -> (VectorIdx, Ref<'_, ThreadClockSet>) {
         let index = self.current_index();
@@ -1171,7 +1307,7 @@ impl GlobalState {
     }
 
     /// Load the current vector clock in use and the current set of thread clocks
-    ///  in use for the vector mutably for modification
+    /// in use for the vector mutably for modification.
     #[inline]
     fn current_thread_state_mut(&self) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
         let index = self.current_index();
@@ -1181,7 +1317,7 @@ impl GlobalState {
     }
 
     /// Return the current thread, should be the same
-    ///  as the data-race active thread
+    /// as the data-race active thread.
     #[inline]
     fn current_index(&self) -> VectorIdx {
         self.current_index.get()
diff --git a/src/eval.rs b/src/eval.rs
index 54d06feec3..0a62f14dd3 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -48,6 +48,8 @@ pub struct MiriConfig {
     pub tracked_alloc_id: Option<AllocId>,
     /// Whether to track raw pointers in stacked borrows.
     pub track_raw: bool,
+    /// Determine if data race detection should be enabled
+    pub data_race_detector: bool,
 }
 
 impl Default for MiriConfig {
@@ -65,6 +67,7 @@ impl Default for MiriConfig {
             tracked_call_id: None,
             tracked_alloc_id: None,
             track_raw: false,
+            data_race_detector: true,
         }
     }
 }
diff --git a/src/lib.rs b/src/lib.rs
index c8c9e70ec3..87effe9c68 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -55,7 +55,7 @@ pub use crate::shims::tls::{EvalContextExt as _, TlsData};
 pub use crate::shims::EvalContextExt as _;
 
 pub use crate::data_race::{
-    AtomicReadOp, AtomicWriteOp, AtomicRWOp, AtomicFenceOp, DataRaceLockHandle,
+    AtomicReadOp, AtomicWriteOp, AtomicRwOp, AtomicFenceOp,
     EvalContextExt as DataRaceEvalContextExt
 };
 pub use crate::diagnostics::{
@@ -81,7 +81,7 @@ pub use crate::sync::{
     EvalContextExt as SyncEvalContextExt, CondvarId, MutexId, RwLockId
 };
 pub use crate::vector_clock::{
-    VClock, VSmallClockSet, VectorIdx, VTimestamp
+    VClock, VSmallClockMap, VectorIdx, VTimestamp
 };
 
 /// Insert rustc arguments at the beginning of the argument list that Miri wants to be
diff --git a/src/machine.rs b/src/machine.rs
index 363513f636..9612d9e191 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -109,15 +109,16 @@ impl fmt::Display for MiriMemoryKind {
 pub struct AllocExtra {
     /// Stacked Borrows state is only added if it is enabled.
     pub stacked_borrows: Option<stacked_borrows::AllocExtra>,
-    /// Data race detection via the use of a vector-clock.
-    pub data_race: data_race::AllocExtra,
+    /// Data race detection via the use of a vector-clock,
+    ///  this is only added if it is enabled.
+    pub data_race: Option<data_race::AllocExtra>,
 }
 
 /// Extra global memory data
 #[derive(Clone, Debug)]
 pub struct MemoryExtra {
     pub stacked_borrows: Option<stacked_borrows::MemoryExtra>,
-    pub data_race: data_race::MemoryExtra,
+    pub data_race: Option<data_race::MemoryExtra>,
     pub intptrcast: intptrcast::MemoryExtra,
 
     /// Mapping extern static names to their canonical allocation.
@@ -147,7 +148,11 @@ impl MemoryExtra {
         } else {
             None
         };
-        let data_race = Rc::new(data_race::GlobalState::new());
+        let data_race = if config.data_race_detector {
+            Some(Rc::new(data_race::GlobalState::new()))
+        }else{
+            None
+        };
         MemoryExtra {
             stacked_borrows,
             data_race,
@@ -472,7 +477,11 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for Evaluator<'mir, 'tcx> {
                 // No stacks, no tag.
                 (None, Tag::Untagged)
             };
-        let race_alloc = data_race::AllocExtra::new_allocation(&memory_extra.data_race, alloc.size);
+        let race_alloc = if let Some(data_race) = &memory_extra.data_race {
+            Some(data_race::AllocExtra::new_allocation(&data_race, alloc.size))
+        } else {
+            None
+        };
         let mut stacked_borrows = memory_extra.stacked_borrows.as_ref().map(|sb| sb.borrow_mut());
         let alloc: Allocation<Tag, Self::AllocExtra> = alloc.with_tags_and_extra(
             |alloc| {
@@ -590,7 +599,9 @@ impl AllocationExtra<Tag> for AllocExtra {
         ptr: Pointer<Tag>,
         size: Size,
     ) -> InterpResult<'tcx> {
-        alloc.extra.data_race.read(ptr, size)?;
+        if let Some(data_race) = &alloc.extra.data_race {
+            data_race.read(ptr, size)?;
+        }
         if let Some(stacked_borrows) = &alloc.extra.stacked_borrows {
             stacked_borrows.memory_read(ptr, size)
         } else {
@@ -604,7 +615,9 @@ impl AllocationExtra<Tag> for AllocExtra {
         ptr: Pointer<Tag>,
         size: Size,
     ) -> InterpResult<'tcx> {
-        alloc.extra.data_race.write(ptr, size)?;
+        if let Some(data_race) = &mut alloc.extra.data_race {
+            data_race.write(ptr, size)?;
+        }
         if let Some(stacked_borrows) = &mut alloc.extra.stacked_borrows {
             stacked_borrows.memory_written(ptr, size)
         } else {
@@ -618,7 +631,9 @@ impl AllocationExtra<Tag> for AllocExtra {
         ptr: Pointer<Tag>,
         size: Size,
     ) -> InterpResult<'tcx> {
-        alloc.extra.data_race.deallocate(ptr, size)?;
+        if let Some(data_race) = &mut alloc.extra.data_race {
+            data_race.deallocate(ptr, size)?;
+        }
         if let Some(stacked_borrows) = &mut alloc.extra.stacked_borrows {
             stacked_borrows.memory_deallocated(ptr, size)
         } else {
diff --git a/src/shims/intrinsics.rs b/src/shims/intrinsics.rs
index 50f97af845..8f7ae6bebb 100644
--- a/src/shims/intrinsics.rs
+++ b/src/shims/intrinsics.rs
@@ -324,98 +324,98 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             "atomic_singlethreadfence_acqrel" => this.compiler_fence(args, AtomicFenceOp::AcqRel)?,
             "atomic_singlethreadfence" => this.compiler_fence(args, AtomicFenceOp::SeqCst)?,
 
-            "atomic_xchg" => this.atomic_exchange(args, dest, AtomicRWOp::SeqCst)?,
-            "atomic_xchg_acq" => this.atomic_exchange(args, dest, AtomicRWOp::Acquire)?,
-            "atomic_xchg_rel" => this.atomic_exchange(args, dest, AtomicRWOp::Release)?,
-            "atomic_xchg_acqrel" => this.atomic_exchange(args, dest, AtomicRWOp::AcqRel)?,
-            "atomic_xchg_relaxed" => this.atomic_exchange(args, dest, AtomicRWOp::Relaxed)?,
+            "atomic_xchg" => this.atomic_exchange(args, dest, AtomicRwOp::SeqCst)?,
+            "atomic_xchg_acq" => this.atomic_exchange(args, dest, AtomicRwOp::Acquire)?,
+            "atomic_xchg_rel" => this.atomic_exchange(args, dest, AtomicRwOp::Release)?,
+            "atomic_xchg_acqrel" => this.atomic_exchange(args, dest, AtomicRwOp::AcqRel)?,
+            "atomic_xchg_relaxed" => this.atomic_exchange(args, dest, AtomicRwOp::Relaxed)?,
 
             "atomic_cxchg" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::SeqCst
+                args, dest, AtomicRwOp::SeqCst, AtomicReadOp::SeqCst
             )?,
             "atomic_cxchg_acq" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Acquire
+                args, dest, AtomicRwOp::Acquire, AtomicReadOp::Acquire
             )?,
             "atomic_cxchg_rel" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::Release, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::Release, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchg_acqrel" => this.atomic_compare_exchange
-            (args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Acquire
+            (args, dest, AtomicRwOp::AcqRel, AtomicReadOp::Acquire
             )?,
             "atomic_cxchg_relaxed" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::Relaxed, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::Relaxed, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchg_acq_failrelaxed" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::Acquire, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchg_acqrel_failrelaxed" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::AcqRel, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchg_failrelaxed" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::SeqCst, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchg_failacq" => this.atomic_compare_exchange(
-                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Acquire
+                args, dest, AtomicRwOp::SeqCst, AtomicReadOp::Acquire
             )?,
 
             "atomic_cxchgweak" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::SeqCst
+                args, dest, AtomicRwOp::SeqCst, AtomicReadOp::SeqCst
             )?,
             "atomic_cxchgweak_acq" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Acquire
+                args, dest, AtomicRwOp::Acquire, AtomicReadOp::Acquire
             )?,
             "atomic_cxchgweak_rel" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::Release, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::Release, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchgweak_acqrel" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Acquire
+                args, dest, AtomicRwOp::AcqRel, AtomicReadOp::Acquire
             )?,
             "atomic_cxchgweak_relaxed" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::Relaxed, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::Relaxed, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchgweak_acq_failrelaxed" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::Acquire, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::Acquire, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchgweak_acqrel_failrelaxed" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::AcqRel, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::AcqRel, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchgweak_failrelaxed" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Relaxed
+                args, dest, AtomicRwOp::SeqCst, AtomicReadOp::Relaxed
             )?,
             "atomic_cxchgweak_failacq" => this.atomic_compare_exchange_weak(
-                args, dest, AtomicRWOp::SeqCst, AtomicReadOp::Acquire
+                args, dest, AtomicRwOp::SeqCst, AtomicReadOp::Acquire
             )?,
 
-            "atomic_or" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::SeqCst)?,
-            "atomic_or_acq" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::Acquire)?,
-            "atomic_or_rel" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::Release)?,
-            "atomic_or_acqrel" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::AcqRel)?,
-            "atomic_or_relaxed" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRWOp::Relaxed)?,
-            "atomic_xor" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::SeqCst)?,
-            "atomic_xor_acq" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::Acquire)?,
-            "atomic_xor_rel" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::Release)?,
-            "atomic_xor_acqrel" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::AcqRel)?,
-            "atomic_xor_relaxed" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRWOp::Relaxed)?,
-            "atomic_and" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::SeqCst)?,
-            "atomic_and_acq" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::Acquire)?,
-            "atomic_and_rel" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::Release)?,
-            "atomic_and_acqrel" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::AcqRel)?,
-            "atomic_and_relaxed" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRWOp::Relaxed)?,
-            "atomic_nand" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::SeqCst)?,
-            "atomic_nand_acq" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::Acquire)?,
-            "atomic_nand_rel" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::Release)?,
-            "atomic_nand_acqrel" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::AcqRel)?,
-            "atomic_nand_relaxed" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRWOp::Relaxed)?,
-            "atomic_xadd" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::SeqCst)?,
-            "atomic_xadd_acq" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::Acquire)?,
-            "atomic_xadd_rel" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::Release)?,
-            "atomic_xadd_acqrel" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::AcqRel)?,
-            "atomic_xadd_relaxed" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRWOp::Relaxed)?,
-            "atomic_xsub" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::SeqCst)?,
-            "atomic_xsub_acq" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::Acquire)?,
-            "atomic_xsub_rel" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::Release)?,
-            "atomic_xsub_acqrel" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::AcqRel)?,
-            "atomic_xsub_relaxed" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRWOp::Relaxed)?,
+            "atomic_or" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRwOp::SeqCst)?,
+            "atomic_or_acq" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRwOp::Acquire)?,
+            "atomic_or_rel" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRwOp::Release)?,
+            "atomic_or_acqrel" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRwOp::AcqRel)?,
+            "atomic_or_relaxed" => this.atomic_op(args, dest, BinOp::BitOr, false, AtomicRwOp::Relaxed)?,
+            "atomic_xor" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRwOp::SeqCst)?,
+            "atomic_xor_acq" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRwOp::Acquire)?,
+            "atomic_xor_rel" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRwOp::Release)?,
+            "atomic_xor_acqrel" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRwOp::AcqRel)?,
+            "atomic_xor_relaxed" => this.atomic_op(args, dest, BinOp::BitXor, false, AtomicRwOp::Relaxed)?,
+            "atomic_and" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRwOp::SeqCst)?,
+            "atomic_and_acq" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRwOp::Acquire)?,
+            "atomic_and_rel" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRwOp::Release)?,
+            "atomic_and_acqrel" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRwOp::AcqRel)?,
+            "atomic_and_relaxed" => this.atomic_op(args, dest, BinOp::BitAnd, false, AtomicRwOp::Relaxed)?,
+            "atomic_nand" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRwOp::SeqCst)?,
+            "atomic_nand_acq" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRwOp::Acquire)?,
+            "atomic_nand_rel" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRwOp::Release)?,
+            "atomic_nand_acqrel" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRwOp::AcqRel)?,
+            "atomic_nand_relaxed" => this.atomic_op(args, dest, BinOp::BitAnd, true, AtomicRwOp::Relaxed)?,
+            "atomic_xadd" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRwOp::SeqCst)?,
+            "atomic_xadd_acq" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRwOp::Acquire)?,
+            "atomic_xadd_rel" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRwOp::Release)?,
+            "atomic_xadd_acqrel" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRwOp::AcqRel)?,
+            "atomic_xadd_relaxed" => this.atomic_op(args, dest, BinOp::Add, false, AtomicRwOp::Relaxed)?,
+            "atomic_xsub" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRwOp::SeqCst)?,
+            "atomic_xsub_acq" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRwOp::Acquire)?,
+            "atomic_xsub_rel" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRwOp::Release)?,
+            "atomic_xsub_acqrel" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRwOp::AcqRel)?,
+            "atomic_xsub_relaxed" => this.atomic_op(args, dest, BinOp::Sub, false, AtomicRwOp::Relaxed)?,
 
 
             // Query type information
@@ -514,7 +514,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
 
     fn atomic_op(
         &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
-        op: mir::BinOp, neg: bool, atomic: AtomicRWOp
+        op: mir::BinOp, neg: bool, atomic: AtomicRwOp
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
 
@@ -524,39 +524,26 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             bug!("Atomic arithmetic operations only work on integer types");
         }
         let rhs = this.read_immediate(rhs)?;
-        let old = this.allow_data_races_mut(|this| {
-            this.read_immediate(place. into())
-        })?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
         // be 8-aligned).
         let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
+        
+        let old = this.atomic_op_immediate(place, rhs, op, neg, atomic)?;
         this.write_immediate(*old, dest)?; // old value is returned
-
-        // Atomics wrap around on overflow.
-        let val = this.binary_op(op, old, rhs)?;
-        let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-        this.allow_data_races_mut(|this| {
-            this.write_immediate(*val, place.into())
-        })?;
-
-        this.validate_atomic_rmw(place, atomic)?;
         Ok(())
     }
     
     fn atomic_exchange(
-        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>, atomic: AtomicRWOp
+        &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>, atomic: AtomicRwOp
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
 
         let &[place, new] = check_arg_count(args)?;
         let place = this.deref_operand(place)?;
         let new = this.read_scalar(new)?;
-        let old = this.allow_data_races_mut(|this| {
-            this.read_scalar(place.into())
-        })?;
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -564,18 +551,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
 
+        let old = this.atomic_exchange_scalar(place, new, atomic)?;
         this.write_scalar(old, dest)?; // old value is returned
-        this.allow_data_races_mut(|this| {
-            this.write_scalar(new, place.into())
-        })?;
-
-        this.validate_atomic_rmw(place, atomic)?;
         Ok(())
     }
 
     fn atomic_compare_exchange(
         &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
-        success: AtomicRWOp, fail: AtomicReadOp
+        success: AtomicRwOp, fail: AtomicReadOp
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
 
@@ -584,13 +567,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let expect_old = this.read_immediate(expect_old)?; // read as immediate for the sake of `binary_op()`
         let new = this.read_scalar(new)?;
 
-        // Failure ordering cannot be stronger than success ordering, therefore first attempt
-        //  to read with the failure ordering and if successfull then try again with the success
-        //  read ordering and write in the success case.
-        // Read as immediate for the sake of `binary_op()`
-        let old = this.allow_data_races_mut(|this| {
-            this.read_immediate(place.into())
-        })?; 
 
         // Check alignment requirements. Atomics must always be aligned to their size,
         // even if the type they wrap would be less aligned (e.g. AtomicU64 on 32bit must
@@ -598,31 +574,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let align = Align::from_bytes(place.layout.size.bytes()).unwrap();
         this.memory.check_ptr_access(place.ptr, place.layout.size, align)?;
 
-        // `binary_op` will bail if either of them is not a scalar.
-        let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
-        let res = Immediate::ScalarPair(old.to_scalar_or_uninit(), eq.into());
+        
+        let old = this.atomic_compare_exchange_scalar(
+            place, expect_old, new, success, fail
+        )?;
 
         // Return old value.
-        this.write_immediate(res, dest)?;
-
-        // Update ptr depending on comparison.
-        //  if successful, perform a full rw-atomic validation
-        //  otherwise treat this as an atomic load with the fail ordering
-        if eq.to_bool()? {
-            this.allow_data_races_mut(|this| {
-                this.write_scalar(new, place.into())
-            })?;
-            this.validate_atomic_rmw(place, success)?;
-        } else {
-            this.validate_atomic_load(place, fail)?;
-        }
-
+        this.write_immediate(old, dest)?;
         Ok(())
     }
 
     fn atomic_compare_exchange_weak(
         &mut self, args: &[OpTy<'tcx, Tag>], dest: PlaceTy<'tcx, Tag>,
-        success: AtomicRWOp, fail: AtomicReadOp
+        success: AtomicRwOp, fail: AtomicReadOp
     ) -> InterpResult<'tcx> {
 
         // FIXME: the weak part of this is currently not modelled,
diff --git a/src/shims/posix/linux/sync.rs b/src/shims/posix/linux/sync.rs
index 67cea55077..78244ab7b8 100644
--- a/src/shims/posix/linux/sync.rs
+++ b/src/shims/posix/linux/sync.rs
@@ -78,7 +78,17 @@ pub fn futex<'tcx>(
             // Read an `i32` through the pointer, regardless of any wrapper types.
             // It's not uncommon for `addr` to be passed as another type than `*mut i32`, such as `*const AtomicI32`.
             // FIXME: this fails if `addr` is not a pointer type.
-            // FIXME: what form of atomic operation should the `futex` use to load the value?
+            // The atomic ordering for futex(https://man7.org/linux/man-pages/man2/futex.2.html):
+            //  "The load of the value of the futex word is an
+            //   atomic memory access (i.e., using atomic machine instructions
+            //   of the respective architecture).  This load, the comparison
+            //   with the expected value, and starting to sleep are performed
+            //   atomically and totally ordered with respect to other futex
+            //   operations on the same futex word."
+            // SeqCst is total order over all operations, so uses acquire,
+            // either are equal under the current implementation.
+            // FIXME: is Acquire correct or should some additional ordering constraints be observed?
+            // FIXME: use RMW or similar?
             let futex_val = this.read_scalar_at_offset_atomic(
                 addr.into(), 0, this.machine.layouts.i32, AtomicReadOp::Acquire
             )?.to_i32()?;
diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs
index d741ef346e..64308d0613 100644
--- a/src/shims/posix/sync.rs
+++ b/src/shims/posix/sync.rs
@@ -64,7 +64,7 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>(
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
     ecx.read_scalar_at_offset_atomic(
         mutex_op, offset, ecx.machine.layouts.i32,
-        AtomicReadOp::SeqCst
+        AtomicReadOp::Acquire
     )
 }
 
@@ -76,7 +76,7 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>(
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
     ecx.write_scalar_at_offset_atomic(
         mutex_op, offset, kind, ecx.machine.layouts.i32, 
-        AtomicWriteOp::SeqCst
+        AtomicWriteOp::Release
     )
 }
 
@@ -85,7 +85,7 @@ fn mutex_get_id<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     ecx.read_scalar_at_offset_atomic(
-        mutex_op, 4, ecx.machine.layouts.u32, AtomicReadOp::SeqCst
+        mutex_op, 4, ecx.machine.layouts.u32, AtomicReadOp::Acquire
     )
 }
 
@@ -96,7 +96,7 @@ fn mutex_set_id<'mir, 'tcx: 'mir>(
 ) -> InterpResult<'tcx, ()> {
     ecx.write_scalar_at_offset_atomic(
         mutex_op, 4, id, ecx.machine.layouts.u32,
-        AtomicWriteOp::SeqCst
+        AtomicWriteOp::Release
     )
 }
 
@@ -129,7 +129,7 @@ fn rwlock_get_id<'mir, 'tcx: 'mir>(
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     ecx.read_scalar_at_offset_atomic(
         rwlock_op, 4, ecx.machine.layouts.u32,
-        AtomicReadOp::SeqCst
+        AtomicReadOp::Acquire
     )
 }
 
@@ -140,7 +140,7 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>(
 ) -> InterpResult<'tcx, ()> {
     ecx.write_scalar_at_offset_atomic(
         rwlock_op, 4, id, ecx.machine.layouts.u32,
-        AtomicWriteOp::SeqCst
+        AtomicWriteOp::Release
     )
 }
 
@@ -196,7 +196,7 @@ fn cond_get_id<'mir, 'tcx: 'mir>(
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     ecx.read_scalar_at_offset_atomic(
         cond_op, 4, ecx.machine.layouts.u32,
-        AtomicReadOp::SeqCst
+        AtomicReadOp::Acquire
     )
 }
 
@@ -207,7 +207,7 @@ fn cond_set_id<'mir, 'tcx: 'mir>(
 ) -> InterpResult<'tcx, ()> {
     ecx.write_scalar_at_offset_atomic(
         cond_op, 4, id, ecx.machine.layouts.u32,
-        AtomicWriteOp::SeqCst
+        AtomicWriteOp::Release
     )
 }
 
diff --git a/src/shims/posix/thread.rs b/src/shims/posix/thread.rs
index e823a7d88d..847d083bfa 100644
--- a/src/shims/posix/thread.rs
+++ b/src/shims/posix/thread.rs
@@ -15,14 +15,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
 
         this.tcx.sess.warn(
-            "thread support is experimental.",
+            "thread support is experimental, no weak memory effects are currently emulated.",
         );
 
         // Create the new thread
         let new_thread_id = this.create_thread();
 
         // Write the current thread-id, switch to the next thread later
-        //  to treat this write operation as occuring on this thread index
+        // to treat this write operation as occuring on the current thread.
         let thread_info_place = this.deref_operand(thread)?;
         this.write_scalar(
             Scalar::from_uint(new_thread_id.to_u32(), thread_info_place.layout.size),
@@ -30,15 +30,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         )?;
 
         // Read the function argument that will be sent to the new thread
-        //  again perform the read before the thread starts executing.
+        // before the thread starts executing since reading after the 
+        // context switch will incorrectly report a data-race.
         let fn_ptr = this.read_scalar(start_routine)?.check_init()?;
         let func_arg = this.read_immediate(arg)?;
 
-        // Also switch to new thread so that we can push the first stackframe.
-        //  after this all accesses will be treated as occuring in the new thread
+        // Finally switch to new thread so that we can push the first stackframe.
+        // After this all accesses will be treated as occuring in the new thread.
         let old_thread_id = this.set_active_thread(new_thread_id);
 
-        // Perform the function pointer load in the new thread frame
+        // Perform the function pointer load in the new thread frame.
         let instance = this.memory.get_fn(fn_ptr)?.as_instance()?;
 
         // Note: the returned value is currently ignored (see the FIXME in
@@ -54,7 +55,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             StackPopCleanup::None { cleanup: true },
         )?;
 
-        // Restore the old active thread frame
+        // Restore the old active thread frame.
         this.set_active_thread(old_thread_id);
 
         Ok(0)
diff --git a/src/sync.rs b/src/sync.rs
index 3469afdcd2..828268c06c 100644
--- a/src/sync.rs
+++ b/src/sync.rs
@@ -62,7 +62,7 @@ struct Mutex {
     /// The queue of threads waiting for this mutex.
     queue: VecDeque<ThreadId>,
     /// Data race handle
-    data_race: DataRaceLockHandle
+    data_race: VClock
 }
 
 declare_id!(RwLockId);
@@ -80,9 +80,9 @@ struct RwLock {
     /// The queue of reader threads waiting for this lock.
     reader_queue: VecDeque<ThreadId>,
     /// Data race handle for writers
-    data_race: DataRaceLockHandle,
+    data_race: VClock,
     /// Data race handle for readers
-    data_race_reader: DataRaceLockHandle,
+    data_race_reader: VClock,
 }
 
 declare_id!(CondvarId);
@@ -100,14 +100,14 @@ struct CondvarWaiter {
 #[derive(Default, Debug)]
 struct Condvar {
     waiters: VecDeque<CondvarWaiter>,
-    data_race: DataRaceLockHandle,
+    data_race: VClock,
 }
 
 /// The futex state.
 #[derive(Default, Debug)]
 struct Futex {
     waiters: VecDeque<FutexWaiter>,
-    data_race: DataRaceLockHandle,
+    data_race: VClock,
 }
 
 /// A thread waiting on a futex.
@@ -213,7 +213,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             mutex.owner = Some(thread);
         }
         mutex.lock_count = mutex.lock_count.checked_add(1).unwrap();
-        this.memory.extra.data_race.validate_lock_acquire(&mutex.data_race, thread);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.validate_lock_acquire(&mutex.data_race, thread);
+        }
     }
 
     /// Try unlocking by decreasing the lock count and returning the old lock
@@ -241,7 +243,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
                 mutex.owner = None;
                 // The mutex is completely unlocked. Try transfering ownership
                 // to another thread.
-                this.memory.extra.data_race.validate_lock_release(&mut mutex.data_race, current_owner);
+                if let Some(data_race) = &this.memory.extra.data_race {
+                    data_race.validate_lock_release(&mut mutex.data_race, current_owner);
+                }
                 this.mutex_dequeue_and_lock(id);
             }
             Some(old_lock_count)
@@ -297,7 +301,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let rwlock = &mut this.machine.threads.sync.rwlocks[id];
         let count = rwlock.readers.entry(reader).or_insert(0);
         *count = count.checked_add(1).expect("the reader counter overflowed");
-        this.memory.extra.data_race.validate_lock_acquire(&rwlock.data_race, reader);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.validate_lock_acquire(&rwlock.data_race, reader);
+        }
     }
 
     /// Try read-unlock the lock for `reader` and potentially give the lock to a new owner.
@@ -319,7 +325,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             }
             Entry::Vacant(_) => return false, // we did not even own this lock
         }
-        this.memory.extra.data_race.validate_lock_release_shared(&mut rwlock.data_race_reader, reader);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.validate_lock_release_shared(&mut rwlock.data_race_reader, reader);
+        }
 
         // The thread was a reader. If the lock is not held any more, give it to a writer.
         if this.rwlock_is_locked(id).not() {
@@ -328,7 +336,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             //  of the union of all reader data race handles, since the set of readers
             //  happen-before the writers
             let rwlock = &mut this.machine.threads.sync.rwlocks[id];
-            rwlock.data_race.set_values(&rwlock.data_race_reader);
+            rwlock.data_race.clone_from(&rwlock.data_race_reader);
             this.rwlock_dequeue_and_lock_writer(id);
         }
         true
@@ -355,7 +363,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         trace!("rwlock_writer_lock: {:?} now held by {:?}", id, writer);
         let rwlock = &mut this.machine.threads.sync.rwlocks[id];
         rwlock.writer = Some(writer);
-        this.memory.extra.data_race.validate_lock_acquire(&rwlock.data_race, writer);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.validate_lock_acquire(&rwlock.data_race, writer);
+        }
     }
 
     #[inline]
@@ -373,8 +383,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
             // Release memory to both reader and writer vector clocks
             //  since this writer happens-before both the union of readers once they are finished
             //  and the next writer
-            this.memory.extra.data_race.validate_lock_release(&mut rwlock.data_race, current_writer);
-            this.memory.extra.data_race.validate_lock_release(&mut rwlock.data_race_reader, current_writer);
+            if let Some(data_race) = &this.memory.extra.data_race {
+                data_race.validate_lock_release(&mut rwlock.data_race, current_writer);
+                data_race.validate_lock_release(&mut rwlock.data_race_reader, current_writer);
+            }
             // The thread was a writer.
             //
             // We are prioritizing writers here against the readers. As a
@@ -435,14 +447,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
         let current_thread = this.get_active_thread();
         let condvar = &mut this.machine.threads.sync.condvars[id];
-        let data_race = &mut this.memory.extra.data_race;
+        let data_race = &this.memory.extra.data_race;
 
         // Each condvar signal happens-before the end of the condvar wake
-        data_race.validate_lock_release(&mut condvar.data_race, current_thread);
+        if let Some(data_race) = data_race {
+            data_race.validate_lock_release(&mut condvar.data_race, current_thread);
+        }
         condvar.waiters
             .pop_front()
             .map(|waiter| {
-                data_race.validate_lock_acquire(&mut condvar.data_race, waiter.thread);
+                if let Some(data_race) = data_race {
+                    data_race.validate_lock_acquire(&mut condvar.data_race, waiter.thread);
+                }
                 (waiter.thread, waiter.mutex)
             })
     }
@@ -466,12 +482,16 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
         let current_thread = this.get_active_thread();
         let futex = &mut this.machine.threads.sync.futexes.get_mut(&addr.erase_tag())?;
-        let data_race =  &mut this.memory.extra.data_race;
+        let data_race =  &this.memory.extra.data_race;
 
         // Each futex-wake happens-before the end of the futex wait
-        data_race.validate_lock_release(&mut futex.data_race, current_thread);
+        if let Some(data_race) = data_race {
+            data_race.validate_lock_release(&mut futex.data_race, current_thread);
+        }
         let res = futex.waiters.pop_front().map(|waiter| {
-            data_race.validate_lock_acquire(&futex.data_race, waiter.thread);  
+            if let Some(data_race) = data_race {
+                data_race.validate_lock_acquire(&futex.data_race, waiter.thread);  
+            }
             waiter.thread
         });
         res
diff --git a/src/thread.rs b/src/thread.rs
index 40cfd04d79..5d78343041 100644
--- a/src/thread.rs
+++ b/src/thread.rs
@@ -3,6 +3,7 @@
 use std::cell::RefCell;
 use std::collections::hash_map::Entry;
 use std::convert::TryFrom;
+use std::rc::Rc;
 use std::num::TryFromIntError;
 use std::time::{Duration, Instant, SystemTime};
 
@@ -327,7 +328,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
     }
 
     /// Mark that the active thread tries to join the thread with `joined_thread_id`.
-    fn join_thread(&mut self, joined_thread_id: ThreadId, data_race: &data_race::GlobalState) -> InterpResult<'tcx> {
+    fn join_thread(&mut self, joined_thread_id: ThreadId, data_race: &Option<Rc<data_race::GlobalState>>) -> InterpResult<'tcx> {
         if self.threads[joined_thread_id].join_status != ThreadJoinStatus::Joinable {
             throw_ub_format!("trying to join a detached or already joined thread");
         }
@@ -351,9 +352,11 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
                 self.active_thread,
                 joined_thread_id
             );
-        }else{
+        } else {
             // The thread has already terminated - mark join happens-before
-            data_race.thread_joined(self.active_thread, joined_thread_id);
+            if let Some(data_race) = data_race {
+                data_race.thread_joined(self.active_thread, joined_thread_id);
+            }
         }
         Ok(())
     }
@@ -428,7 +431,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
 
     /// Wakes up threads joining on the active one and deallocates thread-local statics.
     /// The `AllocId` that can now be freed is returned.
-    fn thread_terminated(&mut self, data_race: &data_race::GlobalState) -> Vec<AllocId> {
+    fn thread_terminated(&mut self, data_race: &Option<Rc<data_race::GlobalState>>) -> Vec<AllocId> {
         let mut free_tls_statics = Vec::new();
         {
             let mut thread_local_statics = self.thread_local_alloc_ids.borrow_mut();
@@ -444,12 +447,16 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
             });
         }
         // Set the thread into a terminated state in the data-race detector
-        data_race.thread_terminated();
+        if let Some(data_race) = data_race {
+            data_race.thread_terminated();
+        }
         // Check if we need to unblock any threads.
         for (i, thread) in self.threads.iter_enumerated_mut() {
             if thread.state == ThreadState::BlockedOnJoin(self.active_thread) {
                 // The thread has terminated, mark happens-before edge to joining thread
-                data_race.thread_joined(i, self.active_thread);
+                if let Some(data_race) = data_race {
+                    data_race.thread_joined(i, self.active_thread);
+                }
                 trace!("unblocking {:?} because {:?} terminated", i, self.active_thread);
                 thread.state = ThreadState::Enabled;
             }
@@ -463,7 +470,7 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
     /// used in stateless model checkers such as Loom: run the active thread as
     /// long as we can and switch only when we have to (the active thread was
     /// blocked, terminated, or has explicitly asked to be preempted).
-    fn schedule(&mut self, data_race: &data_race::GlobalState) -> InterpResult<'tcx, SchedulingAction> {
+    fn schedule(&mut self, data_race: &Option<Rc<data_race::GlobalState>>) -> InterpResult<'tcx, SchedulingAction> {
         // Check whether the thread has **just** terminated (`check_terminated`
         // checks whether the thread has popped all its stack and if yes, sets
         // the thread state to terminated).
@@ -508,7 +515,9 @@ impl<'mir, 'tcx: 'mir> ThreadManager<'mir, 'tcx> {
             if thread.state == ThreadState::Enabled {
                 if !self.yield_active_thread || id != self.active_thread {
                     self.active_thread = id;
-                    data_race.thread_set_active(self.active_thread);
+                    if let Some(data_race) = data_race {
+                        data_race.thread_set_active(self.active_thread);
+                    }
                     break;
                 }
             }
@@ -563,7 +572,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     fn create_thread(&mut self) -> ThreadId {
         let this = self.eval_context_mut();
         let id = this.machine.threads.create_thread();
-        this.memory.extra.data_race.thread_created(id);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.thread_created(id);
+        }
         id
     }
 
@@ -576,7 +587,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn join_thread(&mut self, joined_thread_id: ThreadId) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
+        let data_race = &this.memory.extra.data_race;
         this.machine.threads.join_thread(joined_thread_id, data_race)?;
         Ok(())
     }
@@ -584,7 +595,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn set_active_thread(&mut self, thread_id: ThreadId) -> ThreadId {
         let this = self.eval_context_mut();
-        this.memory.extra.data_race.thread_set_active(thread_id);
+        if let Some(data_race) = &this.memory.extra.data_race {
+            data_race.thread_set_active(thread_id);
+        }
         this.machine.threads.set_active_thread_id(thread_id)
     }
 
@@ -639,10 +652,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn set_active_thread_name(&mut self, new_thread_name: Vec<u8>) {
         let this = self.eval_context_mut();
-        if let Ok(string) = String::from_utf8(new_thread_name.clone()) {
-            this.memory.extra.data_race.thread_set_name(
-                this.machine.threads.active_thread, string
-            );
+        if let Some(data_race) = &this.memory.extra.data_race {
+            if let Ok(string) = String::from_utf8(new_thread_name.clone()) {
+                data_race.thread_set_name(
+                    this.machine.threads.active_thread, string
+                );
+            }
         }
         this.machine.threads.set_thread_name(new_thread_name);
     }
@@ -713,7 +728,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn schedule(&mut self) -> InterpResult<'tcx, SchedulingAction> {
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
+        let data_race = &this.memory.extra.data_race;
         this.machine.threads.schedule(data_race)
     }
 
@@ -724,7 +739,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
     #[inline]
     fn thread_terminated(&mut self) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        let data_race = &*this.memory.extra.data_race;
+        let data_race = &this.memory.extra.data_race;
         for alloc_id in this.machine.threads.thread_terminated(data_race) {
             let ptr = this.memory.global_base_pointer(alloc_id.into())?;
             this.memory.deallocate(ptr, None, MiriMemoryKind::Tls.into())?;
diff --git a/src/vector_clock.rs b/src/vector_clock.rs
index 8d05eb1b99..110b278852 100644
--- a/src/vector_clock.rs
+++ b/src/vector_clock.rs
@@ -1,121 +1,132 @@
 use std::{
     fmt::{self, Debug}, cmp::Ordering, ops::Index,
-    num::TryFromIntError, convert::TryFrom, mem
+    convert::TryFrom, mem
 };
 use smallvec::SmallVec;
 use rustc_index::vec::Idx;
 use rustc_data_structures::fx::FxHashMap;
 
 /// A vector clock index, this is associated with a thread id
-///  but in some cases one vector index may be shared with
-///  multiple thread ids.
+/// but in some cases one vector index may be shared with
+/// multiple thread ids id it safe to do so.
 #[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
 pub struct VectorIdx(u32);
 
-impl VectorIdx{
+impl VectorIdx {
+
+    #[inline(always)]
     pub fn to_u32(self) -> u32 {
         self.0
     }
+
     pub const MAX_INDEX: VectorIdx = VectorIdx(u32::MAX);
+
 }
 
 impl Idx for VectorIdx {
+
+    #[inline]
     fn new(idx: usize) -> Self {
         VectorIdx(u32::try_from(idx).unwrap())
     }
 
+    #[inline]
     fn index(self) -> usize {
         usize::try_from(self.0).unwrap()
     }
-}
 
-impl TryFrom<u64> for VectorIdx {
-    type Error = TryFromIntError;
-    fn try_from(id: u64) -> Result<Self, Self::Error> {
-        u32::try_from(id).map(|id_u32| Self(id_u32))
-    }
 }
 
 impl From<u32> for VectorIdx {
+
+    #[inline]
     fn from(id: u32) -> Self {
         Self(id)
     }
-}
 
+}
 
-/// A sparse set of vector clocks, where each vector index
-///  is associated with a vector clock.
-/// This treats all vector clocks that have not been assigned
-///  as equal to the all zero vector clocks
-/// Is optimized for the common case where only 1 element is stored
-///  in the set and the rest can be ignored, falling-back to
-///  using an internal hash-map once more than 1 element is assigned
-///  at any one time
+/// A sparse mapping of vector index values to vector clocks, this
+/// is optimized for the common case with only one element stored
+/// inside the map.
+/// This is used to store the set of currently active release
+/// sequences at a given memory location, since RMW operations
+/// allow for multiple release sequences to be active at once
+/// and to be collapsed back to one active release sequence 
+/// once a non RMW atomic store operation occurs.
+/// An all zero vector is considered to be equal to no
+/// element stored internally since it will never be
+/// stored and has no meaning as a release sequence
+/// vector clock.
 #[derive(Clone)]
-pub struct VSmallClockSet(VSmallClockSetInner);
+pub struct VSmallClockMap(VSmallClockMapInner);
 
 #[derive(Clone)]
-enum VSmallClockSetInner {
+enum VSmallClockMapInner {
+
     /// Zero or 1 vector elements, common
-    ///  case for the sparse set.
+    /// case for the sparse set.
     /// The all zero vector clock is treated
-    ///  as equal to the empty element
+    /// as equal to the empty element.
     Small(VectorIdx, VClock),
 
-    /// Hash-map of vector clocks
+    /// Hash-map of vector clocks.
     Large(FxHashMap<VectorIdx, VClock>)
 }
 
-impl VSmallClockSet {
+impl VSmallClockMap {
 
     /// Remove all clock vectors from the map, setting them
-    ///  to the zero vector
+    /// to the zero vector.
     pub fn clear(&mut self) {
         match &mut self.0 {
-            VSmallClockSetInner::Small(_, clock) => {
+            VSmallClockMapInner::Small(_, clock) => {
                 clock.set_zero_vector()
             }
-            VSmallClockSetInner::Large(hash_map) => {
+            VSmallClockMapInner::Large(hash_map) => {
                 hash_map.clear();
             }
         }
     }
 
     /// Remove all clock vectors except for the clock vector
-    ///  stored at the given index, which is retained
+    /// stored at the given index, which is retained.
     pub fn retain_index(&mut self, index: VectorIdx) {
         match &mut self.0 {
-            VSmallClockSetInner::Small(small_idx, clock) => {
+            VSmallClockMapInner::Small(small_idx, clock) => {
                 if index != *small_idx {
+
                     // The zero-vector is considered to equal
-                    //  the empty element
+                    // the empty element.
                     clock.set_zero_vector()
                 }
             },
-            VSmallClockSetInner::Large(hash_map) => {
-                hash_map.retain(|idx,_| {
-                    *idx == index
-                });
+            VSmallClockMapInner::Large(hash_map) => {
+                let value = hash_map.remove(&index).unwrap_or_default();
+                self.0 = VSmallClockMapInner::Small(index, value);
             }
         }
     }
 
     /// Insert the vector clock into the associated vector
-    ///  index
+    /// index.
     pub fn insert(&mut self, index: VectorIdx, clock: &VClock) {
         match &mut self.0 {
-            VSmallClockSetInner::Small(small_idx, small_clock) => {
+            VSmallClockMapInner::Small(small_idx, small_clock) => {
                 if small_clock.is_zero_vector() {
+
                     *small_idx = index;
                     small_clock.clone_from(clock);
-                }else if !clock.is_zero_vector() {
+                } else if !clock.is_zero_vector() {
+
+                    // Convert to using the hash-map representation.
                     let mut hash_map = FxHashMap::default();
                     hash_map.insert(*small_idx, mem::take(small_clock));
                     hash_map.insert(index, clock.clone());
-                    self.0 = VSmallClockSetInner::Large(hash_map);
+                    self.0 = VSmallClockMapInner::Large(hash_map);
                 }
             },
-            VSmallClockSetInner::Large(hash_map) => {
+            VSmallClockMapInner::Large(hash_map) => {
                 if !clock.is_zero_vector() {
                     hash_map.insert(index, clock.clone());
                 }
@@ -127,41 +138,44 @@ impl VSmallClockSet {
     ///  vector index.
     pub fn get(&self, index: VectorIdx) -> Option<&VClock> {
         match &self.0 {
-            VSmallClockSetInner::Small(small_idx, small_clock) => {
+            VSmallClockMapInner::Small(small_idx, small_clock) => {
                 if *small_idx == index && !small_clock.is_zero_vector() {
                     Some(small_clock)
-                }else{
+                } else {
                     None
                 }
             },
-            VSmallClockSetInner::Large(hash_map) => {
+            VSmallClockMapInner::Large(hash_map) => {
                 hash_map.get(&index)
             }
         }
     }
 }
 
-impl Default for VSmallClockSet {
+impl Default for VSmallClockMap {
+
     #[inline]
     fn default() -> Self {
-        VSmallClockSet(
-            VSmallClockSetInner::Small(VectorIdx::new(0), VClock::default())
+        VSmallClockMap(
+            VSmallClockMapInner::Small(VectorIdx::new(0), VClock::default())
         )
     }
+
 }
 
-impl Debug for VSmallClockSet {
+impl Debug for VSmallClockMap {
+
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         // Print the contents of the small vector clock set as the map
-        //  of vector index to vector clock that they represent
+        // of vector index to vector clock that they represent.
         let mut map = f.debug_map();
         match &self.0 {
-            VSmallClockSetInner::Small(small_idx, small_clock) => {
+            VSmallClockMapInner::Small(small_idx, small_clock) => {
                 if !small_clock.is_zero_vector() {
                     map.entry(&small_idx, &small_clock);
                 }
             },
-            VSmallClockSetInner::Large(hash_map) => {
+            VSmallClockMapInner::Large(hash_map) => {
                 for (idx, elem) in hash_map.iter() {
                     map.entry(idx, elem);
                 }
@@ -169,30 +183,35 @@ impl Debug for VSmallClockSet {
         }
         map.finish()
     }
+
 }
-impl PartialEq for VSmallClockSet {
+
+
+impl PartialEq for VSmallClockMap {
+
     fn eq(&self, other: &Self) -> bool {
-        use VSmallClockSetInner::*;
+        use VSmallClockMapInner::*;
         match (&self.0, &other.0) {
             (Small(i1, c1), Small(i2, c2)) => {
                 if c1.is_zero_vector() {
                     // Either they are both zero or they are non-equal
                     c2.is_zero_vector()
-                }else{
+                } else {
                     // At least one is non-zero, so the full comparison is correct
                     i1 == i2 && c1 == c2
                 }
             }
-            (VSmallClockSetInner::Small(idx, clock), VSmallClockSetInner::Large(hash_map)) |
-            (VSmallClockSetInner::Large(hash_map), VSmallClockSetInner::Small(idx, clock)) => {
+            (Small(idx, clock), Large(hash_map)) |
+            (Large(hash_map), Small(idx, clock)) => {
+
                 if hash_map.len() == 0 {
                     // Equal to the empty hash-map
                     clock.is_zero_vector()
-                }else if hash_map.len() == 1 {
+                } else if hash_map.len() == 1 {
                     // Equal to the hash-map with one element
                     let (hash_idx, hash_clock) = hash_map.iter().next().unwrap();
                     hash_idx == idx && hash_clock == clock
-                }else{
+                } else {
                     false
                 }
             }
@@ -201,32 +220,38 @@ impl PartialEq for VSmallClockSet {
             }
         }
     }
+
 }
-impl Eq for VSmallClockSet {}
+
+impl Eq for VSmallClockMap {}
 
 
 
 /// The size of the vector-clock to store inline
-///  clock vectors larger than this will be stored on the heap
+/// clock vectors larger than this will be stored on the heap
 const SMALL_VECTOR: usize = 4;
 
 /// The type of the time-stamps recorded in the data-race detector
-///  set to a type of unsigned integer
+/// set to a type of unsigned integer
 pub type VTimestamp = u32;
 
-/// A vector clock for detecting data-races
-///  invariants:
-///   - the last element in a VClock must not be 0
-///     -- this means that derive(PartialEq & Eq) is correct
-///     --  as there is no implicit zero tail that might be equal
-///     --  also simplifies the implementation of PartialOrd
+/// A vector clock for detecting data-races, this is conceptually
+/// a map from a vector index (and thus a thread id) to a timestamp.
+/// The compare operations require that the invariant that the last
+/// element in the internal timestamp slice must not be a 0, hence
+/// all zero vector clocks are always represented by the empty slice;
+/// and allows for the implementation of compare operations to short
+/// circuit the calculation and return the correct result faster,
+/// also this means that there is only one unique valid length
+/// for each set of vector clock values and hence the PartialEq
+//  and Eq derivations are correct.
 #[derive(PartialEq, Eq, Default, Debug)]
 pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);
 
 impl VClock {
 
     /// Create a new vector-clock containing all zeros except
-    ///  for a value at the given index
+    /// for a value at the given index
     pub fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
         let len = index.index() + 1;
         let mut vec = smallvec::smallvec![0; len];
@@ -241,8 +266,8 @@ impl VClock {
     }
 
     /// Get a mutable slice to the internal vector with minimum `min_len`
-    ///  elements, to preserve invariants this vector must modify
-    ///  the `min_len`-1 nth element to a non-zero value
+    /// elements, to preserve invariants this vector must modify
+    /// the `min_len`-1 nth element to a non-zero value
     #[inline]
     fn get_mut_with_min_len(&mut self, min_len: usize) -> &mut [VTimestamp] {
         if self.0.len() < min_len {
@@ -253,7 +278,7 @@ impl VClock {
     }
 
     /// Increment the vector clock at a known index
-    ///  this will panic if the vector index overflows
+    /// this will panic if the vector index overflows
     #[inline]
     pub fn increment_index(&mut self, idx: VectorIdx) {
         let idx = idx.index();
@@ -263,8 +288,8 @@ impl VClock {
     }
 
     // Join the two vector-clocks together, this
-    //  sets each vector-element to the maximum value
-    //  of that element in either of the two source elements.
+    // sets each vector-element to the maximum value
+    // of that element in either of the two source elements.
     pub fn join(&mut self, other: &Self) {
         let rhs_slice = other.as_slice();
         let lhs_slice = self.get_mut_with_min_len(rhs_slice.len());
@@ -291,30 +316,43 @@ impl VClock {
     pub fn is_zero_vector(&self) -> bool {
         self.0.is_empty()
     }
+
 }
 
 impl Clone for VClock {
+
     fn clone(&self) -> Self {
         VClock(self.0.clone())
     }
+
+    // Optimized clone-from, can be removed
+    // and replaced with a derive once a similar
+    // optimization is inserted into SmallVec's
+    // clone implementation.
     fn clone_from(&mut self, source: &Self) {
         let source_slice = source.as_slice();
         self.0.clear();
         self.0.extend_from_slice(source_slice);
     }
+
 }
 
 impl PartialOrd for VClock {
+
     fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
 
         // Load the values as slices
         let lhs_slice = self.as_slice();
         let rhs_slice = other.as_slice();
 
-        // Iterate through the combined vector slice
-        //  keeping track of the order that is currently possible to satisfy.
-        // If an ordering relation is detected to be impossible, then bail and
-        //  directly return None
+        // Iterate through the combined vector slice continuously updating
+        // the value of `order` to the current comparison of the vector from
+        // index 0 to the currently checked index.
+        // An Equal ordering can be converted into Less or Greater ordering
+        // on finding an element that is less than or greater than the other
+        // but if one Greater and one Less element-wise comparison is found
+        // then no ordering is possible and so directly return an ordering
+        // of None.
         let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
         let mut order = match iter.next() {
             Some((lhs, rhs)) => lhs.cmp(rhs),
@@ -332,23 +370,23 @@ impl PartialOrd for VClock {
             }
         }
 
-        //Now test if either left or right have trailing elements
+        // Now test if either left or right have trailing elements,
         // by the invariant the trailing elements have at least 1
         // non zero value, so no additional calculation is required
-        // to determine the result of the PartialOrder
+        // to determine the result of the PartialOrder.
         let l_len = lhs_slice.len();
         let r_len = rhs_slice.len();
         match l_len.cmp(&r_len) {
-            // Equal has no additional elements: return current order
+            // Equal means no additional elements: return current order
             Ordering::Equal => Some(order),
             // Right has at least 1 element > than the implicit 0,
-            //  so the only valid values are Ordering::Less or None
+            // so the only valid values are Ordering::Less or None.
             Ordering::Less => match order {
                 Ordering::Less | Ordering::Equal => Some(Ordering::Less),
                 Ordering::Greater => None
             }
             // Left has at least 1 element > than the implicit 0,
-            //  so the only valid values are Ordering::Greater or None
+            // so the only valid values are Ordering::Greater or None.
             Ordering::Greater => match order {
                 Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
                 Ordering::Less => None
@@ -362,28 +400,28 @@ impl PartialOrd for VClock {
         let rhs_slice = other.as_slice();
 
         // If l_len > r_len then at least one element
-        //  in l_len is > than r_len, therefore the result
-        //  is either Some(Greater) or None, so return false
-        //  early.
+        // in l_len is > than r_len, therefore the result
+        // is either Some(Greater) or None, so return false
+        // early.
         let l_len = lhs_slice.len();
         let r_len = rhs_slice.len();
         if l_len <= r_len {
             // If any elements on the left are greater than the right
-            //  then the result is None or Some(Greater), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l <= r, finally
-            //  the case where the values are potentially equal needs to be considered
-            //  and false returned as well
+            // then the result is None or Some(Greater), both of which
+            // return false, the earlier test asserts that no elements in the
+            // extended tail violate this assumption. Otherwise l <= r, finally
+            // the case where the values are potentially equal needs to be considered
+            // and false returned as well
             let mut equal = l_len == r_len;
             for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
                 if l > r {
                     return false
-                }else if l < r {
+                } else if l < r {
                     equal = false;
                 }
             }
             !equal
-        }else{
+         } else {
             false
         }
     }
@@ -394,18 +432,18 @@ impl PartialOrd for VClock {
         let rhs_slice = other.as_slice();
 
         // If l_len > r_len then at least one element
-        //  in l_len is > than r_len, therefore the result
-        //  is either Some(Greater) or None, so return false
-        //  early.
+        // in l_len is > than r_len, therefore the result
+        // is either Some(Greater) or None, so return false
+        // early.
         let l_len = lhs_slice.len();
         let r_len = rhs_slice.len();
         if l_len <= r_len {
             // If any elements on the left are greater than the right
-            //  then the result is None or Some(Greater), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l <= r
+            // then the result is None or Some(Greater), both of which
+            // return false, the earlier test asserts that no elements in the
+            // extended tail violate this assumption. Otherwise l <= r
             !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l > r)
-        }else{
+        } else {
             false
         }
     }
@@ -416,28 +454,28 @@ impl PartialOrd for VClock {
         let rhs_slice = other.as_slice();
 
         // If r_len > l_len then at least one element
-        //  in r_len is > than l_len, therefore the result
-        //  is either Some(Less) or None, so return false
-        //  early.
+        // in r_len is > than l_len, therefore the result
+        // is either Some(Less) or None, so return false
+        // early.
         let l_len = lhs_slice.len();
         let r_len = rhs_slice.len();
         if l_len >= r_len {
             // If any elements on the left are less than the right
-            //  then the result is None or Some(Less), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l >=, finally
-            //  the case where the values are potentially equal needs to be considered
-            //  and false returned as well
+            // then the result is None or Some(Less), both of which
+            // return false, the earlier test asserts that no elements in the
+            // extended tail violate this assumption. Otherwise l >=, finally
+            // the case where the values are potentially equal needs to be considered
+            // and false returned as well
             let mut equal = l_len == r_len;
             for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
                 if l < r {
                     return false
-                }else if l > r {
+                } else if l > r {
                     equal = false;
                 }
             }
             !equal
-        }else{
+        } else {
             false
         }
     }
@@ -448,30 +486,33 @@ impl PartialOrd for VClock {
         let rhs_slice = other.as_slice();
 
         // If r_len > l_len then at least one element
-        //  in r_len is > than l_len, therefore the result
-        //  is either Some(Less) or None, so return false
-        //  early.
+        // in r_len is > than l_len, therefore the result
+        // is either Some(Less) or None, so return false
+        // early.
         let l_len = lhs_slice.len();
         let r_len = rhs_slice.len();
         if l_len >= r_len {
             // If any elements on the left are less than the right
-            //  then the result is None or Some(Less), both of which
-            //  return false, the earlier test asserts that no elements in the
-            //  extended tail violate this assumption. Otherwise l >= r
+            // then the result is None or Some(Less), both of which
+            // return false, the earlier test asserts that no elements in the
+            // extended tail violate this assumption. Otherwise l >= r
             !lhs_slice.iter().zip(rhs_slice.iter()).any(|(&l, &r)| l < r)
-        }else{
+        } else {
             false
         }
     }
+
 }
 
 impl Index<VectorIdx> for VClock {
+
     type Output = VTimestamp;
 
     #[inline]
     fn index(&self, index: VectorIdx) -> &VTimestamp {
        self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
     }
+
 }
 
 
@@ -480,7 +521,8 @@ impl Index<VectorIdx> for VClock {
 ///  test suite
 #[cfg(test)]
 mod tests {
-    use super::{VClock, VTimestamp, VectorIdx, VSmallClockSet};
+
+    use super::{VClock, VTimestamp, VectorIdx, VSmallClockMap};
     use std::cmp::Ordering;
 
     #[test]
@@ -536,7 +578,7 @@ mod tests {
         let alt_compare = r.partial_cmp(&l);
         assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
 
-        //Test operatorsm with faster implementations
+        //Test operators with faster implementations
         assert_eq!(
             matches!(compare,Some(Ordering::Less)), l < r,
             "Invalid (<):\n l: {:?}\n r: {:?}",l,r
@@ -573,30 +615,31 @@ mod tests {
 
     #[test]
     pub fn test_vclock_set() {
-        let mut set = VSmallClockSet::default();
+        let mut map = VSmallClockMap::default();
         let v1 = from_slice(&[3,0,1]);
         let v2 = from_slice(&[4,2,3]);
         let v3 = from_slice(&[4,8,3]);
-        set.insert(VectorIdx(0), &v1);
-        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
-        set.insert(VectorIdx(5), &v2);
-        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
-        assert_eq!(set.get(VectorIdx(5)), Some(&v2));
-        set.insert(VectorIdx(53), &v3);
-        assert_eq!(set.get(VectorIdx(0)), Some(&v1));
-        assert_eq!(set.get(VectorIdx(5)), Some(&v2));
-        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
-        set.retain_index(VectorIdx(53));
-        assert_eq!(set.get(VectorIdx(0)), None);
-        assert_eq!(set.get(VectorIdx(5)), None);
-        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
-        set.clear();
-        assert_eq!(set.get(VectorIdx(0)), None);
-        assert_eq!(set.get(VectorIdx(5)), None);
-        assert_eq!(set.get(VectorIdx(53)), None);
-        set.insert(VectorIdx(53), &v3);
-        assert_eq!(set.get(VectorIdx(0)), None);
-        assert_eq!(set.get(VectorIdx(5)), None);
-        assert_eq!(set.get(VectorIdx(53)), Some(&v3));
-    }
+        map.insert(VectorIdx(0), &v1);
+        assert_eq!(map.get(VectorIdx(0)), Some(&v1));
+        map.insert(VectorIdx(5), &v2);
+        assert_eq!(map.get(VectorIdx(0)), Some(&v1));
+        assert_eq!(map.get(VectorIdx(5)), Some(&v2));
+        map.insert(VectorIdx(53), &v3);
+        assert_eq!(map.get(VectorIdx(0)), Some(&v1));
+        assert_eq!(map.get(VectorIdx(5)), Some(&v2));
+        assert_eq!(map.get(VectorIdx(53)), Some(&v3));
+        map.retain_index(VectorIdx(53));
+        assert_eq!(map.get(VectorIdx(0)), None);
+        assert_eq!(map.get(VectorIdx(5)), None);
+        assert_eq!(map.get(VectorIdx(53)), Some(&v3));
+        map.clear();
+        assert_eq!(map.get(VectorIdx(0)), None);
+        assert_eq!(map.get(VectorIdx(5)), None);
+        assert_eq!(map.get(VectorIdx(53)), None);
+        map.insert(VectorIdx(53), &v3);
+        assert_eq!(map.get(VectorIdx(0)), None);
+        assert_eq!(map.get(VectorIdx(5)), None);
+        assert_eq!(map.get(VectorIdx(53)), Some(&v3));
+    }
+    
 }
diff --git a/tests/run-pass/concurrency/data_race.stderr b/tests/run-pass/concurrency/data_race.stderr
index b01247aea4..7ba8087a9b 100644
--- a/tests/run-pass/concurrency/data_race.stderr
+++ b/tests/run-pass/concurrency/data_race.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
diff --git a/tests/run-pass/concurrency/linux-futex.stderr b/tests/run-pass/concurrency/linux-futex.stderr
index b01247aea4..7ba8087a9b 100644
--- a/tests/run-pass/concurrency/linux-futex.stderr
+++ b/tests/run-pass/concurrency/linux-futex.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
diff --git a/tests/run-pass/concurrency/simple.stderr b/tests/run-pass/concurrency/simple.stderr
index f1550dd25a..24444fdc17 100644
--- a/tests/run-pass/concurrency/simple.stderr
+++ b/tests/run-pass/concurrency/simple.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
 thread '<unnamed>' panicked at 'Hello!', $DIR/simple.rs:54:9
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/run-pass/concurrency/sync.stderr b/tests/run-pass/concurrency/sync.stderr
index b01247aea4..7ba8087a9b 100644
--- a/tests/run-pass/concurrency/sync.stderr
+++ b/tests/run-pass/concurrency/sync.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
diff --git a/tests/run-pass/concurrency/thread_locals.stderr b/tests/run-pass/concurrency/thread_locals.stderr
index b01247aea4..7ba8087a9b 100644
--- a/tests/run-pass/concurrency/thread_locals.stderr
+++ b/tests/run-pass/concurrency/thread_locals.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
diff --git a/tests/run-pass/concurrency/tls_lib_drop.stderr b/tests/run-pass/concurrency/tls_lib_drop.stderr
index b01247aea4..7ba8087a9b 100644
--- a/tests/run-pass/concurrency/tls_lib_drop.stderr
+++ b/tests/run-pass/concurrency/tls_lib_drop.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
diff --git a/tests/run-pass/libc.stderr b/tests/run-pass/libc.stderr
index b01247aea4..7ba8087a9b 100644
--- a/tests/run-pass/libc.stderr
+++ b/tests/run-pass/libc.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
diff --git a/tests/run-pass/panic/concurrent-panic.stderr b/tests/run-pass/panic/concurrent-panic.stderr
index ca6031e57b..885385a8dd 100644
--- a/tests/run-pass/panic/concurrent-panic.stderr
+++ b/tests/run-pass/panic/concurrent-panic.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental.
+warning: thread support is experimental, no weak memory effects are currently emulated.
 
 Thread 1 starting, will block on mutex
 Thread 1 reported it has started

From 4a1f7ac1f153c7b0df8d7603b4852cc24b22c039 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Sun, 15 Nov 2020 19:50:38 +0000
Subject: [PATCH 11/17] Convert extra benchmark program into cfg option.

---
 bench-cargo-miri/mse/src/main.rs              |  3 ++
 .../mse_and_dangling_thread/Cargo.toml        |  7 -----
 .../mse_and_dangling_thread/src/main.rs       | 30 -------------------
 3 files changed, 3 insertions(+), 37 deletions(-)
 delete mode 100644 bench-cargo-miri/mse_and_dangling_thread/Cargo.toml
 delete mode 100644 bench-cargo-miri/mse_and_dangling_thread/src/main.rs

diff --git a/bench-cargo-miri/mse/src/main.rs b/bench-cargo-miri/mse/src/main.rs
index b4ad157510..57e2860710 100644
--- a/bench-cargo-miri/mse/src/main.rs
+++ b/bench-cargo-miri/mse/src/main.rs
@@ -2,6 +2,9 @@ static EXPECTED: &[u8] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 static PCM: &[i16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 0, 1, 0, 0, -2, 0, -2, 0, -2, 0, -2, -2, -2, -3, -3, -3, -3, -4, -2, -5, -2, -5, -2, -4, 0, -4, 0, -4, 0, -4, 1, -4, 1, -4, 2, -4, 2, -4, 2, -4, 2, -4, 2, -3, 1, -4, 0, -4, 0, -5, 0, -5, 0, -5, 0, -4, 2, -4, 3, -4, 4, -3, 5, -2, 5, -3, 6, -3, 6, -3, 5, -3, 5, -2, 4, -2, 3, -5, 0, -6, 0, -3, -2, -4, -4, -9, -5, -9, -4, -4, -2, -4, -2, -4, 0, -2, 1, 1, 1, 4, 2, 8, 2, 12, 1, 13, 0, 12, 0, 11, 0, 8, -2, 7, 0, 7, -3, 11, -8, 15, -9, 17, -6, 17, -5, 13, -3, 7, 0, 3, 0, -2, 0, -4, 0, -4, -2, -6, 0, -14, -2, -17, -4, -8, 0, -7, 5, -17, 7, -18, 10, -7, 18, -2, 25, -3, 27, 0, 31, 4, 34, 4, 34, 8, 36, 8, 37, 2, 36, 4, 34, 8, 28, 3, 15, 0, 11, 0, 12, -5, 8, -4, 10, 0, 23, -4, 31, -8, 30, -2, 30, 0, 26, -6, 22, -6, 20, -12, 15, -19, 10, -10, 13, -14, 6, -43, -13, -43, -16, -9, -12, -10, -29, -42, -40, -37, -28, -5, -21, 1, -24, -8, -20, 4, -18, 26, -24, 44, -26, 66, -30, 86, -37, 88, -41, 72, -46, 50, -31, 28, 23, 14, 64, 16, 51, 26, 32, 34, 39, 42, 48, 35, 58, 0, 72, -36, 69, -59, 58, -98, 54, -124, 36, -103, 12, -110, 5, -173, -19, -146, -59, -4, -42, 51, 1, -23, -6, -30, -6, 45, 46, 47, 70, 6, 55, 19, 60, 38, 62, 42, 47, 61, 46, 40, 42, -19, 22, -34, 6, -35, -50, -61, -141, -37, -171, 17, -163, 26, -180, 46, -154, 80, -63, 48, -4, 18, 20, 50, 47, 58, 53, 44, 61, 57, 85, 37, 80, 0, 86, -8, 106, -95, 49, -213, -8, -131, 47, 49, 63, 40, -39, -69, -74, -37, -20, 63, -12, 58, -14, -12, 25, -31, 41, 11, 45, 76, 47, 167, 5, 261, -37, 277, -83, 183, -172, 35, -122, -79, 138, -70, 266, 69, 124, 228, 0, 391, -29, 594, -84, 702, -78, 627, -8, 551, -13, 509, 13, 372, 120, 352, 125, 622, 127, 691, 223, 362, 126, 386, -33, 915, 198, 958, 457, 456, 298, 500, 233, 1027, 469, 1096, 426, 918, 160, 1067, 141, 1220, 189, 1245, 164, 1375, 297, 1378, 503, 1299, 702, 1550, 929, 1799, 855, 1752, 547, 1830, 602, 1928, 832, 1736, 796, 1735, 933, 1961, 1385, 1935, 1562, 2105, 1485, 2716, 1449, 2948, 1305, 2768, 1205, 2716, 1346, 2531, 1450, 2470, 1653, 3117, 2111, 3370, 2176, 2696, 1947, 2925, 2305, 3846, 2658, 2425, 2184, -877, 1981, -2261, 2623, -1645, 2908, -1876, 2732, -2704, 2953, -2484, 3116, -2120, 2954, -2442, 3216, -2466, 3499, -2192, 3234, -2392, 3361, -2497, 3869, -2078, 3772, -1858, 3915, -2066, 4438, -2285, 2934, -2294, -280, -2066, -1762, -1992, -1412, -2298, -1535, -2399, -1789, -2223, -1419, -2244, -1334, -2092, -1476, -1777, -1396, -2014, -1571, -2199, -1574, -1843, -1167, -1910, -1446, -2007, -1818];
 
 fn main() {
+    #[cfg(increase_thread_usage)]
+    let thread = std::thread::spawn(|| 4);
+    
     for _ in 0..2 {
         mse(PCM.len(), PCM, EXPECTED);
     }
diff --git a/bench-cargo-miri/mse_and_dangling_thread/Cargo.toml b/bench-cargo-miri/mse_and_dangling_thread/Cargo.toml
deleted file mode 100644
index 7b4c2dc758..0000000000
--- a/bench-cargo-miri/mse_and_dangling_thread/Cargo.toml
+++ /dev/null
@@ -1,7 +0,0 @@
-[package]
-name = "mse"
-version = "0.1.0"
-authors = ["Ralf Jung <post@ralfj.de>"]
-edition = "2018"
-
-[dependencies]
diff --git a/bench-cargo-miri/mse_and_dangling_thread/src/main.rs b/bench-cargo-miri/mse_and_dangling_thread/src/main.rs
deleted file mode 100644
index 008e9c80ef..0000000000
--- a/bench-cargo-miri/mse_and_dangling_thread/src/main.rs
+++ /dev/null
@@ -1,30 +0,0 @@
-static EXPECTED: &[u8] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 254, 255, 0, 0, 254, 255, 0, 0, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 255, 255, 1, 0, 255, 255, 1, 0, 0, 0, 1, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 2, 0, 255, 255, 1, 0, 255, 255, 1, 0, 255, 255, 0, 0, 254, 255, 0, 0, 254, 255, 0, 0, 254, 255, 255, 255, 254, 255, 254, 255, 254, 255, 253, 255, 253, 255, 253, 255, 253, 255, 252, 255, 254, 255, 251, 255, 254, 255, 251, 255, 254, 255, 252, 255, 255, 255, 252, 255, 0, 0, 252, 255, 0, 0, 252, 255, 1, 0, 252, 255, 1, 0, 252, 255, 2, 0, 252, 255, 2, 0, 252, 255, 2, 0, 252, 255, 2, 0, 252, 255, 2, 0, 253, 255, 1, 0, 252, 255, 0, 0, 252, 255, 255, 255, 251, 255, 0, 0, 251, 255, 0, 0, 251, 255, 0, 0, 252, 255, 2, 0, 252, 255, 3, 0, 252, 255, 4, 0, 253, 255, 5, 0, 254, 255, 5, 0, 253, 255, 6, 0, 253, 255, 6, 0, 253, 255, 5, 0, 253, 255, 5, 0, 254, 255, 4, 0, 254, 255, 3, 0, 251, 255, 0, 0, 250, 255, 255, 255, 253, 255, 254, 255, 252, 255, 252, 255, 247, 255, 251, 255, 247, 255, 252, 255, 252, 255, 254, 255, 252, 255, 254, 255, 252, 255, 255, 255, 254, 255, 1, 0, 1, 0, 1, 0, 4, 0, 2, 0, 8, 0, 2, 0, 12, 0, 1, 0, 13, 0, 0, 0, 12, 0, 0, 0, 11, 0, 255, 255, 8, 0, 254, 255, 7, 0, 0, 0, 7, 0, 253, 255, 11, 0, 248, 255, 15, 0, 247, 255, 17, 0, 250, 255, 17, 0, 251, 255, 13, 0, 253, 255, 7, 0, 255, 255, 3, 0, 255, 255, 254, 255, 255, 255, 252, 255, 255, 255, 252, 255, 254, 255, 250, 255, 255, 255, 242, 255, 254, 255, 239, 255, 252, 255, 248, 255, 255, 255, 249, 255, 5, 0, 239, 255, 7, 0, 238, 255, 10, 0, 249, 255, 18, 0, 254, 255, 25, 0, 253, 255, 27, 0, 0, 0, 31, 0, 4, 0, 34, 0, 4, 0, 34, 0, 8, 0, 36, 0, 8, 0, 37, 0, 2, 0, 36, 0, 4, 0, 34, 0, 8, 0, 28, 0, 3, 0, 15, 0, 255, 255, 11, 0, 0, 0, 12, 0, 251, 255, 8, 0, 252, 255, 10, 0, 0, 0, 23, 0, 252, 255, 31, 0, 248, 255, 30, 0, 254, 255, 30, 0, 255, 255, 26, 0, 250, 255, 22, 0, 250, 255, 20, 0, 244, 255, 15, 0, 237, 255, 10, 0, 246, 255, 13, 0, 242, 255, 6, 0, 213, 255, 243, 255, 213, 255, 240, 255, 247, 255, 244, 255, 246, 255, 227, 255, 214, 255, 216, 255, 219, 255, 228, 255, 251, 255, 235, 255, 1, 0, 232, 255, 248, 255, 236, 255, 4, 0, 238, 255, 26, 0, 232, 255, 44, 0, 230, 255, 66, 0, 226, 255, 86, 0, 219, 255, 88, 0, 215, 255, 72, 0, 210, 255, 50, 0, 225, 255, 28, 0, 23, 0, 14, 0, 64, 0, 16, 0, 51, 0, 26, 0, 32, 0, 34, 0, 39, 0, 42, 0, 48, 0, 35, 0, 58, 0, 255, 255, 72, 0, 220, 255, 69, 0, 197, 255, 58, 0, 158, 255, 54, 0, 132, 255, 36, 0, 153, 255, 12, 0, 146, 255, 5, 0, 83, 255, 237, 255, 110, 255, 197, 255, 252, 255, 214, 255, 51, 0, 1, 0, 233, 255, 250, 255, 226, 255, 250, 255, 45, 0, 46, 0, 47, 0, 70, 0, 6, 0, 55, 0, 19, 0, 60, 0, 38, 0, 62, 0, 42, 0, 47, 0, 61, 0, 46, 0, 40, 0, 42, 0, 237, 255, 22, 0, 222, 255, 6, 0, 221, 255, 206, 255, 195, 255, 115, 255, 219, 255, 85, 255, 17, 0, 93, 255, 26, 0, 76, 255, 46, 0, 102, 255, 80, 0, 193, 255, 48, 0, 252, 255, 18, 0, 20, 0, 50, 0, 47, 0, 58, 0, 53, 0, 44, 0, 61, 0, 57, 0, 85, 0, 37, 0, 80, 0, 0, 0, 86, 0, 248, 255, 106, 0, 161, 255, 49, 0, 43, 255, 248, 255, 125, 255, 47, 0, 49, 0, 63, 0, 40, 0, 217, 255, 187, 255, 182, 255, 219, 255, 236, 255, 63, 0, 244, 255, 58, 0, 242, 255, 244, 255, 25, 0, 225, 255, 41, 0, 11, 0, 45, 0, 76, 0, 47, 0, 167, 0, 5, 0, 5, 1, 219, 255, 21, 1, 173, 255, 183, 0, 84, 255, 35, 0, 134, 255, 177, 255, 138, 0, 186, 255, 10, 1, 69, 0, 124, 0, 228, 0, 0, 0, 135, 1, 227, 255, 82, 2, 172, 255, 190, 2, 178, 255, 115, 2, 248, 255, 39, 2, 243, 255, 253, 1, 13, 0, 116, 1, 120, 0, 96, 1, 125, 0, 110, 2, 127, 0, 179, 2, 223, 0, 106, 1, 126, 0, 130, 1, 223, 255, 147, 3, 198, 0, 190, 3, 201, 1, 200, 1, 42, 1, 244, 1, 233, 0, 3, 4, 213, 1, 72, 4, 170, 1, 150, 3, 160, 0, 43, 4, 141, 0, 196, 4, 189, 0, 221, 4, 164, 0, 95, 5, 41, 1, 98, 5, 247, 1, 19, 5, 190, 2, 14, 6, 161, 3, 7, 7, 87, 3, 216, 6, 35, 2, 38, 7, 90, 2, 136, 7, 64, 3, 200, 6, 28, 3, 199, 6, 165, 3, 169, 7, 105, 5, 143, 7, 26, 6, 57, 8, 205, 5, 156, 10, 169, 5, 132, 11, 25, 5, 208, 10, 181, 4, 156, 10, 66, 5, 227, 9, 170, 5, 166, 9, 117, 6, 45, 12, 63, 8, 42, 13, 128, 8, 136, 10, 155, 7, 109, 11, 1, 9, 6, 15, 98, 10, 121, 9, 136, 8, 147, 252, 189, 7, 43, 247, 63, 10, 147, 249, 92, 11, 172, 248, 172, 10, 112, 245, 137, 11, 76, 246, 44, 12, 184, 247, 138, 11, 118, 246, 144, 12, 94, 246, 171, 13, 112, 247, 162, 12, 168, 246, 33, 13, 63, 246, 29, 15, 226, 247, 188, 14, 190, 248, 75, 15, 238, 247, 86, 17, 19, 247, 118, 11, 10, 247, 232, 254, 238, 247, 30, 249, 56, 248, 124, 250, 6, 247, 1, 250, 161, 246, 3, 249, 81, 247, 117, 250, 60, 247, 202, 250, 212, 247, 60, 250, 15, 249, 140, 250, 34, 248, 221, 249, 105, 247, 218, 249, 205, 248, 113, 251, 138, 248, 90, 250, 41, 248, 230, 248];
-static PCM: &[i16] = &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -2, 0, -2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1, 0, 1, 0, 0, -2, 0, -2, 0, -2, 0, -2, -2, -2, -3, -3, -3, -3, -4, -2, -5, -2, -5, -2, -4, 0, -4, 0, -4, 0, -4, 1, -4, 1, -4, 2, -4, 2, -4, 2, -4, 2, -4, 2, -3, 1, -4, 0, -4, 0, -5, 0, -5, 0, -5, 0, -4, 2, -4, 3, -4, 4, -3, 5, -2, 5, -3, 6, -3, 6, -3, 5, -3, 5, -2, 4, -2, 3, -5, 0, -6, 0, -3, -2, -4, -4, -9, -5, -9, -4, -4, -2, -4, -2, -4, 0, -2, 1, 1, 1, 4, 2, 8, 2, 12, 1, 13, 0, 12, 0, 11, 0, 8, -2, 7, 0, 7, -3, 11, -8, 15, -9, 17, -6, 17, -5, 13, -3, 7, 0, 3, 0, -2, 0, -4, 0, -4, -2, -6, 0, -14, -2, -17, -4, -8, 0, -7, 5, -17, 7, -18, 10, -7, 18, -2, 25, -3, 27, 0, 31, 4, 34, 4, 34, 8, 36, 8, 37, 2, 36, 4, 34, 8, 28, 3, 15, 0, 11, 0, 12, -5, 8, -4, 10, 0, 23, -4, 31, -8, 30, -2, 30, 0, 26, -6, 22, -6, 20, -12, 15, -19, 10, -10, 13, -14, 6, -43, -13, -43, -16, -9, -12, -10, -29, -42, -40, -37, -28, -5, -21, 1, -24, -8, -20, 4, -18, 26, -24, 44, -26, 66, -30, 86, -37, 88, -41, 72, -46, 50, -31, 28, 23, 14, 64, 16, 51, 26, 32, 34, 39, 42, 48, 35, 58, 0, 72, -36, 69, -59, 58, -98, 54, -124, 36, -103, 12, -110, 5, -173, -19, -146, -59, -4, -42, 51, 1, -23, -6, -30, -6, 45, 46, 47, 70, 6, 55, 19, 60, 38, 62, 42, 47, 61, 46, 40, 42, -19, 22, -34, 6, -35, -50, -61, -141, -37, -171, 17, -163, 26, -180, 46, -154, 80, -63, 48, -4, 18, 20, 50, 47, 58, 53, 44, 61, 57, 85, 37, 80, 0, 86, -8, 106, -95, 49, -213, -8, -131, 47, 49, 63, 40, -39, -69, -74, -37, -20, 63, -12, 58, -14, -12, 25, -31, 41, 11, 45, 76, 47, 167, 5, 261, -37, 277, -83, 183, -172, 35, -122, -79, 138, -70, 266, 69, 124, 228, 0, 391, -29, 594, -84, 702, -78, 627, -8, 551, -13, 509, 13, 372, 120, 352, 125, 622, 127, 691, 223, 362, 126, 386, -33, 915, 198, 958, 457, 456, 298, 500, 233, 1027, 469, 1096, 426, 918, 160, 1067, 141, 1220, 189, 1245, 164, 1375, 297, 1378, 503, 1299, 702, 1550, 929, 1799, 855, 1752, 547, 1830, 602, 1928, 832, 1736, 796, 1735, 933, 1961, 1385, 1935, 1562, 2105, 1485, 2716, 1449, 2948, 1305, 2768, 1205, 2716, 1346, 2531, 1450, 2470, 1653, 3117, 2111, 3370, 2176, 2696, 1947, 2925, 2305, 3846, 2658, 2425, 2184, -877, 1981, -2261, 2623, -1645, 2908, -1876, 2732, -2704, 2953, -2484, 3116, -2120, 2954, -2442, 3216, -2466, 3499, -2192, 3234, -2392, 3361, -2497, 3869, -2078, 3772, -1858, 3915, -2066, 4438, -2285, 2934, -2294, -280, -2066, -1762, -1992, -1412, -2298, -1535, -2399, -1789, -2223, -1419, -2244, -1334, -2092, -1476, -1777, -1396, -2014, -1571, -2199, -1574, -1843, -1167, -1910, -1446, -2007, -1818];
-
-fn main() {
-    let thread = std::thread::spawn(|| 4);
-    for _ in 0..2 {
-        mse(PCM.len(), PCM, EXPECTED);
-    }
-    assert_eq!(4, thread.join().unwrap());
-}
-
-fn read_i16(buffer: &[u8], index: usize) -> i16 {
-    const SIZE: usize = std::mem::size_of::<i16>();
-    let mut bytes: [u8; SIZE] = [0u8; SIZE];
-    bytes.copy_from_slice(&buffer[(index * SIZE)..(index * SIZE + SIZE)]);
-    unsafe { std::mem::transmute(bytes) }
-}
-
-fn mse(samples: usize, frame_buf: &[i16], buf_ref: &[u8]) -> f64 {
-    let mut mse = 0.0;
-    let max_samples = std::cmp::min(buf_ref.len() / 2, samples as usize);
-    for i in 0..max_samples {
-        let ref_res = read_i16(buf_ref, i);
-        let info_res = frame_buf[i as usize];
-        let diff = (ref_res - info_res).abs();
-        mse += f64::from(diff.pow(2));
-    }
-    mse / max_samples as f64
-}
-

From a3b7839bbdde0c5856720dc885250752aefd4207 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Sun, 15 Nov 2020 20:12:58 +0000
Subject: [PATCH 12/17] Add comment regarding seq-cst ordering & add test for
 disabling the data-race detector.

---
 src/data_race.rs                              | 11 ++++++++
 .../concurrency/disable_data_race_detector.rs | 28 +++++++++++++++++++
 .../disable_data_race_detector.stderr         |  2 ++
 3 files changed, 41 insertions(+)
 create mode 100644 tests/run-pass/concurrency/disable_data_race_detector.rs
 create mode 100644 tests/run-pass/concurrency/disable_data_race_detector.stderr

diff --git a/src/data_race.rs b/src/data_race.rs
index 822ceab8fa..bad757bc70 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -27,6 +27,16 @@
 //! from acquire/release operations. If weak memory orderings are explored then this
 //! may need to change or be updated accordingly.
 //!
+//! Per the C++ spec for the memory model a sequentially consistent operation:
+//!   "A load operation with this memory order performs an acquire operation,
+//!    a store performs a release operation, and read-modify-write performs
+//!    both an acquire operation and a release operation, plus a single total
+//!    order exists in which all threads observe all modifications in the same
+//!    order (see Sequentially-consistent ordering below) "
+//! So in the absence of weak memory effects a seq-cst load & a seq-cst store is identical
+//! to a acquire load and a release store given the global sequentially consistent order
+//! of the schedule.
+//!
 //! FIXME:
 //! currently we have our own local copy of the currently active thread index and names, this is due
 //! in part to the inability to access the current location of threads.active_thread inside the AllocExtra
@@ -196,6 +206,7 @@ struct MemoryCellClocks {
 
     /// The vector-clock of the timestamp of the last read operation
     /// performed by a thread since the last write operation occured.
+    /// It is reset to zero on each write operation.
     read: VClock,
 
     /// Atomic acquire & release sequence tracking clocks.
diff --git a/tests/run-pass/concurrency/disable_data_race_detector.rs b/tests/run-pass/concurrency/disable_data_race_detector.rs
new file mode 100644
index 0000000000..e47a2079c2
--- /dev/null
+++ b/tests/run-pass/concurrency/disable_data_race_detector.rs
@@ -0,0 +1,28 @@
+// ignore-windows: Concurrency on Windows is not supported yet.
+// compile-flags: -Zmiri-disable-data-race-detector
+
+use std::thread::spawn;
+
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+pub fn main() {
+    let mut a = 0u32;
+    let b = &mut a as *mut u32;
+    let c = EvilSend(b);
+    unsafe {
+        let j1 = spawn(move || {
+            *c.0 = 32;
+        });
+
+        let j2 = spawn(move || {
+            *c.0 = 64; //~ ERROR Data race
+        });
+
+        j1.join().unwrap();
+        j2.join().unwrap();
+    }
+}
diff --git a/tests/run-pass/concurrency/disable_data_race_detector.stderr b/tests/run-pass/concurrency/disable_data_race_detector.stderr
new file mode 100644
index 0000000000..7ba8087a9b
--- /dev/null
+++ b/tests/run-pass/concurrency/disable_data_race_detector.stderr
@@ -0,0 +1,2 @@
+warning: thread support is experimental, no weak memory effects are currently emulated.
+

From 0b0264fc820d12d6c5e6f9f702bc33e8921bb110 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Sun, 15 Nov 2020 20:19:34 +0000
Subject: [PATCH 13/17] Run rustfmt on vector_clock.rs and data_race.rs

---
 src/data_race.rs    | 431 ++++++++++++++++++++++++--------------------
 src/vector_clock.rs | 233 +++++++++++++-----------
 2 files changed, 356 insertions(+), 308 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index bad757bc70..b9542f6e2d 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -11,7 +11,7 @@
 //! a data race occurs between two memory accesses if they are on different threads, at least one operation
 //! is non-atomic, at least one operation is a write and neither access happens-before the other. Read the link
 //! for full definition.
-//! 
+//!
 //! This re-uses vector indexes for threads that are known to be unable to report data-races, this is valid
 //! because it only re-uses vector indexes once all currently-active (not-terminated) threads have an internal
 //! vector clock that happens-after the join operation of the candidate thread. Threads that have not been joined
@@ -43,21 +43,21 @@
 //! read, write and deallocate functions and should be cleaned up in the future.
 
 use std::{
-    fmt::Debug, rc::Rc,
-    cell::{Cell, RefCell, Ref, RefMut}, mem
+    cell::{Cell, Ref, RefCell, RefMut},
+    fmt::Debug,
+    mem,
+    rc::Rc,
 };
 
+use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_index::vec::{Idx, IndexVec};
-use rustc_target::abi::Size;
 use rustc_middle::{mir, ty::layout::TyAndLayout};
-use rustc_data_structures::fx::{FxHashSet, FxHashMap};
+use rustc_target::abi::Size;
 
 use crate::{
-    MiriEvalContext, MiriEvalContextExt,
-    ThreadId, Tag, RangeMap,
-    InterpResult, Pointer, ScalarMaybeUninit,
-    MPlaceTy, OpTy, MemPlaceMeta, ImmTy, Immediate,
-    VClock, VSmallClockMap, VectorIdx, VTimestamp
+    ImmTy, Immediate, InterpResult, MPlaceTy, MemPlaceMeta, MiriEvalContext, MiriEvalContextExt,
+    OpTy, Pointer, RangeMap, ScalarMaybeUninit, Tag, ThreadId, VClock, VSmallClockMap, VTimestamp,
+    VectorIdx,
 };
 
 pub type AllocExtra = VClockAlloc;
@@ -89,7 +89,6 @@ pub enum AtomicWriteOp {
     SeqCst,
 }
 
-
 /// Valid atomic fence operations, subset of atomic::Ordering.
 #[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum AtomicFenceOp {
@@ -99,14 +98,11 @@ pub enum AtomicFenceOp {
     SeqCst,
 }
 
-
-
 /// The current set of vector clocks describing the state
 /// of a thread, contains the happens-before clock and
 /// additional metadata to model atomic fence operations.
 #[derive(Clone, Default, Debug)]
 struct ThreadClockSet {
-
     /// The increasing clock representing timestamps
     /// that happen-before this thread.
     clock: VClock,
@@ -120,9 +116,7 @@ struct ThreadClockSet {
     fence_release: VClock,
 }
 
-
 impl ThreadClockSet {
-
     /// Apply the effects of a release fence to this
     /// set of thread vector clocks.
     #[inline]
@@ -152,7 +146,6 @@ impl ThreadClockSet {
     }
 }
 
-
 /// Error returned by finding a data race
 /// should be elaborated upon.
 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
@@ -164,7 +157,6 @@ pub struct DataRace;
 /// exists on the memory cell.
 #[derive(Clone, PartialEq, Eq, Default, Debug)]
 struct AtomicMemoryCellClocks {
-
     /// The clock-vector of the timestamp of the last atomic
     /// read operation performed by each thread.
     /// This detects potential data-races between atomic read
@@ -179,7 +171,7 @@ struct AtomicMemoryCellClocks {
 
     /// Synchronization vector for acquire-release semantics
     /// contains the vector of timestamps that will
-    /// happen-before a thread if an acquire-load is 
+    /// happen-before a thread if an acquire-load is
     /// performed on the data.
     sync_vector: VClock,
 
@@ -195,7 +187,6 @@ struct AtomicMemoryCellClocks {
 /// for data-race detection.
 #[derive(Clone, PartialEq, Eq, Debug)]
 struct MemoryCellClocks {
-
     /// The vector-clock timestamp of the last write
     /// corresponding to the writing threads timestamp.
     write: VTimestamp,
@@ -215,7 +206,6 @@ struct MemoryCellClocks {
     atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
 }
 
-
 /// Create a default memory cell clocks instance
 /// for uninitialized memory.
 impl Default for MemoryCellClocks {
@@ -224,20 +214,18 @@ impl Default for MemoryCellClocks {
             read: VClock::default(),
             write: 0,
             write_index: VectorIdx::MAX_INDEX,
-            atomic_ops: None
+            atomic_ops: None,
         }
     }
 }
 
-
 impl MemoryCellClocks {
-
     /// Load the internal atomic memory cells if they exist.
     #[inline]
     fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
         match &self.atomic_ops {
             Some(op) => Some(&*op),
-            None => None
+            None => None,
         }
     }
 
@@ -251,7 +239,11 @@ impl MemoryCellClocks {
     /// Update memory cell data-race tracking for atomic
     /// load acquire semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
-    fn load_acquire(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    fn load_acquire(
+        &mut self,
+        clocks: &mut ThreadClockSet,
+        index: VectorIdx,
+    ) -> Result<(), DataRace> {
         self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.clock.join(&atomic.sync_vector);
@@ -262,7 +254,11 @@ impl MemoryCellClocks {
     /// Update memory cell data-race tracking for atomic
     /// load relaxed semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
-    fn load_relaxed(&mut self, clocks: &mut ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    fn load_relaxed(
+        &mut self,
+        clocks: &mut ThreadClockSet,
+        index: VectorIdx,
+    ) -> Result<(), DataRace> {
         self.atomic_read_detect(clocks, index)?;
         if let Some(atomic) = self.atomic() {
             clocks.fence_acquire.join(&atomic.sync_vector);
@@ -270,7 +266,6 @@ impl MemoryCellClocks {
         Ok(())
     }
 
-
     /// Update the memory cell data-race tracking for atomic
     /// store release semantics.
     fn store_release(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
@@ -313,10 +308,14 @@ impl MemoryCellClocks {
         atomic.sync_vector.join(&clocks.fence_release);
         Ok(())
     }
-    
+
     /// Detect data-races with an atomic read, caused by a non-atomic write that does
     /// not happen-before the atomic-read.
-    fn atomic_read_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    fn atomic_read_detect(
+        &mut self,
+        clocks: &ThreadClockSet,
+        index: VectorIdx,
+    ) -> Result<(), DataRace> {
         log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] {
             let atomic = self.atomic_mut();
@@ -329,7 +328,11 @@ impl MemoryCellClocks {
 
     /// Detect data-races with an atomic write, either with a non-atomic read or with
     /// a non-atomic write.
-    fn atomic_write_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    fn atomic_write_detect(
+        &mut self,
+        clocks: &ThreadClockSet,
+        index: VectorIdx,
+    ) -> Result<(), DataRace> {
         log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let atomic = self.atomic_mut();
@@ -342,7 +345,11 @@ impl MemoryCellClocks {
 
     /// Detect races for non-atomic read operations at the current memory cell
     /// returns true if a data-race is detected.
-    fn read_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx) -> Result<(), DataRace> {
+    fn read_race_detect(
+        &mut self,
+        clocks: &ThreadClockSet,
+        index: VectorIdx,
+    ) -> Result<(), DataRace> {
         log::trace!("Unsynchronized read with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] {
             let race_free = if let Some(atomic) = self.atomic() {
@@ -363,7 +370,11 @@ impl MemoryCellClocks {
 
     /// Detect races for non-atomic write operations at the current memory cell
     /// returns true if a data-race is detected.
-    fn write_race_detect(&mut self, clocks: &ThreadClockSet, index: VectorIdx)  -> Result<(), DataRace> {
+    fn write_race_detect(
+        &mut self,
+        clocks: &ThreadClockSet,
+        index: VectorIdx,
+    ) -> Result<(), DataRace> {
         log::trace!("Unsynchronized write with vectors: {:#?} :: {:#?}", self, clocks);
         if self.write <= clocks.clock[self.write_index] && self.read <= clocks.clock {
             let race_free = if let Some(atomic) = self.atomic() {
@@ -385,18 +396,16 @@ impl MemoryCellClocks {
     }
 }
 
-
 /// Evaluation context extensions.
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
 pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
-
     /// Atomic variant of read_scalar_at_offset.
     fn read_scalar_at_offset_atomic(
         &self,
         op: OpTy<'tcx, Tag>,
         offset: u64,
         layout: TyAndLayout<'tcx>,
-        atomic: AtomicReadOp
+        atomic: AtomicReadOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_ref();
         let op_place = this.deref_operand(op)?;
@@ -415,7 +424,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         offset: u64,
         value: impl Into<ScalarMaybeUninit<Tag>>,
         layout: TyAndLayout<'tcx>,
-        atomic: AtomicWriteOp
+        atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
         let op_place = this.deref_operand(op)?;
@@ -429,46 +438,45 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
     /// Perform an atomic read operation at the memory location.
     fn read_scalar_atomic(
-        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+        &self,
+        place: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_ref();
-        let scalar = this.allow_data_races_ref(move |this| {
-            this.read_scalar(place.into())
-        })?;
+        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place.into()))?;
         self.validate_atomic_load(place, atomic)?;
         Ok(scalar)
     }
 
     /// Perform an atomic write operation at the memory location.
     fn write_scalar_atomic(
-        &mut self, val: ScalarMaybeUninit<Tag>, dest: MPlaceTy<'tcx, Tag>,
-        atomic: AtomicWriteOp
+        &mut self,
+        val: ScalarMaybeUninit<Tag>,
+        dest: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.allow_data_races_mut(move |this| {
-            this.write_scalar(val, dest.into())
-        })?;
+        this.allow_data_races_mut(move |this| this.write_scalar(val, dest.into()))?;
         self.validate_atomic_store(dest, atomic)
     }
 
     /// Perform a atomic operation on a memory location.
     fn atomic_op_immediate(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>, rhs: ImmTy<'tcx, Tag>,
-        op: mir::BinOp, neg: bool, atomic: AtomicRwOp
+        place: MPlaceTy<'tcx, Tag>,
+        rhs: ImmTy<'tcx, Tag>,
+        op: mir::BinOp,
+        neg: bool,
+        atomic: AtomicRwOp,
     ) -> InterpResult<'tcx, ImmTy<'tcx, Tag>> {
         let this = self.eval_context_mut();
 
-        let old = this.allow_data_races_mut(|this| {
-            this.read_immediate(place. into())
-        })?;        
+        let old = this.allow_data_races_mut(|this| this.read_immediate(place.into()))?;
 
         // Atomics wrap around on overflow.
         let val = this.binary_op(op, old, rhs)?;
         let val = if neg { this.unary_op(mir::UnOp::Not, val)? } else { val };
-        this.allow_data_races_mut(|this| {
-            this.write_immediate(*val, place.into())
-        })?;
+        this.allow_data_races_mut(|this| this.write_immediate(*val, place.into()))?;
 
         this.validate_atomic_rmw(place, atomic)?;
         Ok(old)
@@ -478,17 +486,14 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     /// scalar value, the old value is returned.
     fn atomic_exchange_scalar(
         &mut self,
-        place: MPlaceTy<'tcx, Tag>, new: ScalarMaybeUninit<Tag>,
-        atomic: AtomicRwOp
+        place: MPlaceTy<'tcx, Tag>,
+        new: ScalarMaybeUninit<Tag>,
+        atomic: AtomicRwOp,
     ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
         let this = self.eval_context_mut();
 
-        let old = this.allow_data_races_mut(|this| {
-            this.read_scalar(place.into())
-        })?;
-        this.allow_data_races_mut(|this| {
-            this.write_scalar(new, place.into())
-        })?;
+        let old = this.allow_data_races_mut(|this| this.read_scalar(place.into()))?;
+        this.allow_data_races_mut(|this| this.write_scalar(new, place.into()))?;
         this.validate_atomic_rmw(place, atomic)?;
         Ok(old)
     }
@@ -497,9 +502,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     /// on success an atomic RMW operation is performed and on failure
     /// only an atomic read occurs.
     fn atomic_compare_exchange_scalar(
-        &mut self, place: MPlaceTy<'tcx, Tag>,
-        expect_old: ImmTy<'tcx, Tag>, new: ScalarMaybeUninit<Tag>,
-        success: AtomicRwOp, fail: AtomicReadOp
+        &mut self,
+        place: MPlaceTy<'tcx, Tag>,
+        expect_old: ImmTy<'tcx, Tag>,
+        new: ScalarMaybeUninit<Tag>,
+        success: AtomicRwOp,
+        fail: AtomicReadOp,
     ) -> InterpResult<'tcx, Immediate<Tag>> {
         let this = self.eval_context_mut();
 
@@ -507,9 +515,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // to read with the failure ordering and if successfull then try again with the success
         // read ordering and write in the success case.
         // Read as immediate for the sake of `binary_op()`
-        let old = this.allow_data_races_mut(|this| {
-            this.read_immediate(place.into())
-        })?; 
+        let old = this.allow_data_races_mut(|this| this.read_immediate(place.into()))?;
 
         // `binary_op` will bail if either of them is not a scalar.
         let eq = this.overflowing_binary_op(mir::BinOp::Eq, old, expect_old)?.0;
@@ -519,9 +525,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // if successful, perform a full rw-atomic validation
         // otherwise treat this as an atomic load with the fail ordering.
         if eq.to_bool()? {
-            this.allow_data_races_mut(|this| {
-                this.write_scalar(new, place.into())
-            })?;
+            this.allow_data_races_mut(|this| this.write_scalar(new, place.into()))?;
             this.validate_atomic_rmw(place, success)?;
         } else {
             this.validate_atomic_load(place, fail)?;
@@ -530,68 +534,74 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         // Return the old value.
         Ok(res)
     }
-    
-    
+
     /// Update the data-race detector for an atomic read occuring at the
     /// associated memory-place and on the current thread.
     fn validate_atomic_load(
-        &self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicReadOp
+        &self,
+        place: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicReadOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         this.validate_atomic_op(
-            place, atomic, "Atomic Load",
+            place,
+            atomic,
+            "Atomic Load",
             move |memory, clocks, index, atomic| {
                 if atomic == AtomicReadOp::Relaxed {
                     memory.load_relaxed(&mut *clocks, index)
                 } else {
                     memory.load_acquire(&mut *clocks, index)
                 }
-            }
+            },
         )
     }
 
     /// Update the data-race detector for an atomic write occuring at the
     /// associated memory-place and on the current thread.
     fn validate_atomic_store(
-        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicWriteOp
+        &mut self,
+        place: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicWriteOp,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         this.validate_atomic_op(
-            place, atomic, "Atomic Store",
+            place,
+            atomic,
+            "Atomic Store",
             move |memory, clocks, index, atomic| {
                 if atomic == AtomicWriteOp::Relaxed {
                     memory.store_relaxed(clocks, index)
                 } else {
                     memory.store_release(clocks, index)
                 }
-            }
+            },
         )
     }
 
     /// Update the data-race detector for an atomic read-modify-write occuring
     /// at the associated memory place and on the current thread.
     fn validate_atomic_rmw(
-        &mut self, place: MPlaceTy<'tcx, Tag>, atomic: AtomicRwOp
+        &mut self,
+        place: MPlaceTy<'tcx, Tag>,
+        atomic: AtomicRwOp,
     ) -> InterpResult<'tcx> {
         use AtomicRwOp::*;
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_ref();
-        this.validate_atomic_op(
-            place, atomic, "Atomic RMW",
-            move |memory, clocks, index, _| {
-                if acquire {
-                    memory.load_acquire(clocks, index)?;
-                } else {
-                    memory.load_relaxed(clocks, index)?;
-                }
-                if release {
-                    memory.rmw_release(clocks, index)
-                } else {
-                    memory.rmw_relaxed(clocks, index)
-                }
+        this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
+            if acquire {
+                memory.load_acquire(clocks, index)?;
+            } else {
+                memory.load_relaxed(clocks, index)?;
             }
-        )
+            if release {
+                memory.rmw_release(clocks, index)
+            } else {
+                memory.rmw_relaxed(clocks, index)
+            }
+        })
     }
 
     /// Update the data-race detector for an atomic fence on the current thread.
@@ -620,12 +630,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     }
 }
 
-
-
 /// Vector clock metadata for a logical memory allocation.
 #[derive(Debug, Clone)]
 pub struct VClockAlloc {
-
     /// Range of Vector clocks, this gives each byte a potentially
     /// unqiue set of vector clocks, but merges identical information
     /// together for improved efficiency.
@@ -635,16 +642,12 @@ pub struct VClockAlloc {
     global: MemoryExtra,
 }
 
-
 impl VClockAlloc {
-
     /// Create a new data-race allocation detector.
     pub fn new_allocation(global: &MemoryExtra, len: Size) -> VClockAlloc {
         VClockAlloc {
             global: Rc::clone(global),
-            alloc_ranges: RefCell::new(
-                RangeMap::new(len, MemoryCellClocks::default())
-            )
+            alloc_ranges: RefCell::new(RangeMap::new(len, MemoryCellClocks::default())),
         }
     }
 
@@ -653,27 +656,29 @@ impl VClockAlloc {
     fn find_gt_index(l: &VClock, r: &VClock) -> Option<VectorIdx> {
         let l_slice = l.as_slice();
         let r_slice = r.as_slice();
-        l_slice.iter().zip(r_slice.iter())
+        l_slice
+            .iter()
+            .zip(r_slice.iter())
             .enumerate()
-            .find_map(|(idx, (&l, &r))| {
-                if l > r { Some(idx) } else { None }
-            }).or_else(|| {
+            .find_map(|(idx, (&l, &r))| if l > r { Some(idx) } else { None })
+            .or_else(|| {
                 if l_slice.len() > r_slice.len() {
-
                     // By invariant, if l_slice is longer
                     // then one element must be larger.
                     // This just validates that this is true
                     // and reports earlier elements first.
                     let l_remainder_slice = &l_slice[r_slice.len()..];
-                    let idx = l_remainder_slice.iter().enumerate()
-                        .find_map(|(idx, &r)| {
-                            if r == 0 { None } else { Some(idx) }
-                        }).expect("Invalid VClock Invariant");
+                    let idx = l_remainder_slice
+                        .iter()
+                        .enumerate()
+                        .find_map(|(idx, &r)| if r == 0 { None } else { Some(idx) })
+                        .expect("Invalid VClock Invariant");
                     Some(idx)
                 } else {
                     None
                 }
-            }).map(|idx| VectorIdx::new(idx))
+            })
+            .map(|idx| VectorIdx::new(idx))
     }
 
     /// Report a data-race found in the program.
@@ -684,39 +689,42 @@ impl VClockAlloc {
     #[cold]
     #[inline(never)]
     fn report_data_race<'tcx>(
-        global: &MemoryExtra, range: &MemoryCellClocks,
-        action: &str, is_atomic: bool,
-        pointer: Pointer<Tag>, len: Size
+        global: &MemoryExtra,
+        range: &MemoryCellClocks,
+        action: &str,
+        is_atomic: bool,
+        pointer: Pointer<Tag>,
+        len: Size,
     ) -> InterpResult<'tcx> {
         let (current_index, current_clocks) = global.current_thread_state();
         let write_clock;
-        let (
-            other_action, other_thread, other_clock
-        ) = if range.write > current_clocks.clock[range.write_index] {
-
+        let (other_action, other_thread, other_clock) = if range.write
+            > current_clocks.clock[range.write_index]
+        {
             // Convert the write action into the vector clock it
             // represents for diagnostic purposes.
             write_clock = VClock::new_with_index(range.write_index, range.write);
             ("WRITE", range.write_index, &write_clock)
-        } else if let Some(idx) = Self::find_gt_index(
-            &range.read, &current_clocks.clock
-        ){
+        } else if let Some(idx) = Self::find_gt_index(&range.read, &current_clocks.clock) {
             ("READ", idx, &range.read)
         } else if !is_atomic {
             if let Some(atomic) = range.atomic() {
-                if let Some(idx) = Self::find_gt_index(
-                    &atomic.write_vector, &current_clocks.clock
-                ) {
+                if let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
+                {
                     ("ATOMIC_STORE", idx, &atomic.write_vector)
-                } else if let Some(idx) = Self::find_gt_index(
-                    &atomic.read_vector, &current_clocks.clock
-                ) {
+                } else if let Some(idx) =
+                    Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
+                {
                     ("ATOMIC_LOAD", idx, &atomic.read_vector)
                 } else {
-                    unreachable!("Failed to report data-race for non-atomic operation: no race found")
+                    unreachable!(
+                        "Failed to report data-race for non-atomic operation: no race found"
+                    )
                 }
             } else {
-                unreachable!("Failed to report data-race for non-atomic operation: no atomic component")
+                unreachable!(
+                    "Failed to report data-race for non-atomic operation: no atomic component"
+                )
             }
         } else {
             unreachable!("Failed to report data-race for atomic operation")
@@ -725,15 +733,19 @@ impl VClockAlloc {
         // Load elaborated thread information about the racing thread actions.
         let current_thread_info = global.print_thread_metadata(current_index);
         let other_thread_info = global.print_thread_metadata(other_thread);
-        
+
         // Throw the data-race detection.
         throw_ub_format!(
             "Data race detected between {} on {} and {} on {}, memory({:?},offset={},size={})\
             \n\t\t -current vector clock = {:?}\
             \n\t\t -conflicting timestamp = {:?}",
-            action, current_thread_info, 
-            other_action, other_thread_info,
-            pointer.alloc_id, pointer.offset.bytes(), len.bytes(),
+            action,
+            current_thread_info,
+            other_action,
+            other_thread_info,
+            pointer.alloc_id,
+            pointer.offset.bytes(),
+            len.bytes(),
             current_clocks.clock,
             other_clock
         )
@@ -748,12 +760,16 @@ impl VClockAlloc {
         if self.global.multi_threaded.get() {
             let (index, clocks) = self.global.current_thread_state();
             let mut alloc_ranges = self.alloc_ranges.borrow_mut();
-            for (_,range) in alloc_ranges.iter_mut(pointer.offset, len) {
+            for (_, range) in alloc_ranges.iter_mut(pointer.offset, len) {
                 if let Err(DataRace) = range.read_race_detect(&*clocks, index) {
-
                     // Report data-race.
                     return Self::report_data_race(
-                        &self.global,range, "READ", false, pointer, len
+                        &self.global,
+                        range,
+                        "READ",
+                        false,
+                        pointer,
+                        len,
                     );
                 }
             }
@@ -763,17 +779,25 @@ impl VClockAlloc {
         }
     }
 
-
     // Shared code for detecting data-races on unique access to a section of memory
-    fn unique_access<'tcx>(&mut self, pointer: Pointer<Tag>, len: Size, action: &str) -> InterpResult<'tcx> {
+    fn unique_access<'tcx>(
+        &mut self,
+        pointer: Pointer<Tag>,
+        len: Size,
+        action: &str,
+    ) -> InterpResult<'tcx> {
         if self.global.multi_threaded.get() {
             let (index, clocks) = self.global.current_thread_state();
-            for (_,range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
+            for (_, range) in self.alloc_ranges.get_mut().iter_mut(pointer.offset, len) {
                 if let Err(DataRace) = range.write_race_detect(&*clocks, index) {
-                    
                     // Report data-race
                     return Self::report_data_race(
-                        &self.global, range, action, false, pointer, len
+                        &self.global,
+                        range,
+                        action,
+                        false,
+                        pointer,
+                        len,
                     );
                 }
             }
@@ -802,7 +826,6 @@ impl VClockAlloc {
 
 impl<'mir, 'tcx: 'mir> EvalContextPrivExt<'mir, 'tcx> for MiriEvalContext<'mir, 'tcx> {}
 trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
-
     // Temporarily allow data-races to occur, this should only be
     // used if either one of the appropiate `validate_atomic` functions
     // will be called to treat a memory access as atomic or if the memory
@@ -827,7 +850,10 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     /// so should only be used for atomic operations or internal state that the program cannot
     /// access.
     #[inline]
-    fn allow_data_races_mut<R>(&mut self, op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R) -> R {
+    fn allow_data_races_mut<R>(
+        &mut self,
+        op: impl FnOnce(&mut MiriEvalContext<'mir, 'tcx>) -> R,
+    ) -> R {
         let this = self.eval_context_mut();
         let old = if let Some(data_race) = &this.memory.extra.data_race {
             data_race.multi_threaded.replace(false)
@@ -848,34 +874,49 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     /// FIXME: is this valid, or should get_raw_mut be used for
     /// atomic-stores/atomic-rmw?
     fn validate_atomic_op<A: Debug + Copy>(
-        &self, place: MPlaceTy<'tcx, Tag>,
-        atomic: A, description: &str,
+        &self,
+        place: MPlaceTy<'tcx, Tag>,
+        atomic: A,
+        description: &str,
         mut op: impl FnMut(
-            &mut MemoryCellClocks, &mut ThreadClockSet, VectorIdx, A
-        ) -> Result<(), DataRace>
+            &mut MemoryCellClocks,
+            &mut ThreadClockSet,
+            VectorIdx,
+            A,
+        ) -> Result<(), DataRace>,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
         if let Some(data_race) = &this.memory.extra.data_race {
             if data_race.multi_threaded.get() {
-
                 // Load and log the atomic operation.
                 let place_ptr = place.ptr.assert_ptr();
                 let size = place.layout.size;
-                let alloc_meta =  &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race.as_ref().unwrap();
+                let alloc_meta =
+                    &this.memory.get_raw(place_ptr.alloc_id)?.extra.data_race.as_ref().unwrap();
                 log::trace!(
                     "Atomic op({}) with ordering {:?} on memory({:?}, offset={}, size={})",
-                    description, &atomic, place_ptr.alloc_id, place_ptr.offset.bytes(), size.bytes()
+                    description,
+                    &atomic,
+                    place_ptr.alloc_id,
+                    place_ptr.offset.bytes(),
+                    size.bytes()
                 );
 
                 // Perform the atomic operation.
                 let data_race = &alloc_meta.global;
                 data_race.maybe_perform_sync_operation(|index, mut clocks| {
-                    for (_,range) in alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size) {
+                    for (_, range) in
+                        alloc_meta.alloc_ranges.borrow_mut().iter_mut(place_ptr.offset, size)
+                    {
                         if let Err(DataRace) = op(range, &mut *clocks, index, atomic) {
                             mem::drop(clocks);
                             return VClockAlloc::report_data_race(
-                                &alloc_meta.global, range, description, true,
-                                place_ptr, size
+                                &alloc_meta.global,
+                                range,
+                                description,
+                                true,
+                                place_ptr,
+                                size,
                             );
                         }
                     }
@@ -884,10 +925,13 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 
                 // Log changes to atomic memory.
                 if log::log_enabled!(log::Level::Trace) {
-                    for (_,range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size) {
+                    for (_, range) in alloc_meta.alloc_ranges.borrow().iter(place_ptr.offset, size)
+                    {
                         log::trace!(
                             "Updated atomic memory({:?}, offset={}, size={}) to {:#?}",
-                            place.ptr.assert_ptr().alloc_id, place_ptr.offset.bytes(), size.bytes(),
+                            place.ptr.assert_ptr().alloc_id,
+                            place_ptr.offset.bytes(),
+                            size.bytes(),
                             range.atomic_ops
                         );
                     }
@@ -896,14 +940,11 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         }
         Ok(())
     }
-
 }
 
-
 /// Extra metadata associated with a thread.
 #[derive(Debug, Clone, Default)]
 struct ThreadExtraState {
-
     /// The current vector index in use by the
     /// thread currently, this is set to None
     /// after the vector index has been re-used
@@ -915,7 +956,7 @@ struct ThreadExtraState {
     /// diagnostics when reporting detected data
     /// races.
     thread_name: Option<Box<str>>,
-    
+
     /// Thread termination vector clock, this
     /// is set on thread termination and is used
     /// for joining on threads since the vector_index
@@ -928,7 +969,6 @@ struct ThreadExtraState {
 /// with each of the threads.
 #[derive(Debug, Clone)]
 pub struct GlobalState {
-
     /// Set to true once the first additional
     /// thread has launched, due to the dependency
     /// between before and after a thread launch.
@@ -966,7 +1006,7 @@ pub struct GlobalState {
     /// if the number of active threads reduces to 1 and then
     /// a join operation occures with the remaining main thread
     /// then multi-threaded execution may be disabled.
-    active_thread_count: Cell<usize>, 
+    active_thread_count: Cell<usize>,
 
     /// This contains threads that have terminated, but not yet joined
     /// and so cannot become re-use candidates until a join operation
@@ -977,7 +1017,6 @@ pub struct GlobalState {
 }
 
 impl GlobalState {
-
     /// Create a new global state, setup with just thread-id=0
     /// advanced to timestamp = 1.
     pub fn new() -> Self {
@@ -989,7 +1028,7 @@ impl GlobalState {
             current_index: Cell::new(VectorIdx::new(0)),
             active_thread_count: Cell::new(1),
             reuse_candidates: RefCell::new(FxHashSet::default()),
-            terminated_threads: RefCell::new(FxHashMap::default())
+            terminated_threads: RefCell::new(FxHashMap::default()),
         };
 
         // Setup the main-thread since it is not explicitly created:
@@ -997,17 +1036,15 @@ impl GlobalState {
         // the main-thread a name of "main".
         let index = global_state.vector_clocks.borrow_mut().push(ThreadClockSet::default());
         global_state.vector_info.borrow_mut().push(ThreadId::new(0));
-        global_state.thread_info.borrow_mut().push(
-            ThreadExtraState {
-                vector_index: Some(index),
-                thread_name: Some("main".to_string().into_boxed_str()),
-                termination_vector_clock: None
-            }
-        );
+        global_state.thread_info.borrow_mut().push(ThreadExtraState {
+            vector_index: Some(index),
+            thread_name: Some("main".to_string().into_boxed_str()),
+            termination_vector_clock: None,
+        });
 
         global_state
     }
-    
+
     // Try to find vector index values that can potentially be re-used
     // by a new thread instead of a new vector index being created.
     fn find_vector_index_reuse_candidate(&self) -> Option<VectorIdx> {
@@ -1015,10 +1052,9 @@ impl GlobalState {
         let vector_clocks = self.vector_clocks.borrow();
         let vector_info = self.vector_info.borrow();
         let terminated_threads = self.terminated_threads.borrow();
-        for  &candidate in reuse.iter() {
+        for &candidate in reuse.iter() {
             let target_timestamp = vector_clocks[candidate].clock[candidate];
             if vector_clocks.iter_enumerated().all(|(clock_idx, clock)| {
-
                 // The thread happens before the clock, and hence cannot report
                 // a data-race with this the candidate index.
                 let no_data_race = clock.clock[candidate] >= target_timestamp;
@@ -1026,20 +1062,19 @@ impl GlobalState {
                 // The vector represents a thread that has terminated and hence cannot
                 // report a data-race with the candidate index.
                 let thread_id = vector_info[clock_idx];
-                let vector_terminated = reuse.contains(&clock_idx)
-                    || terminated_threads.contains_key(&thread_id);
+                let vector_terminated =
+                    reuse.contains(&clock_idx) || terminated_threads.contains_key(&thread_id);
 
                 // The vector index cannot report a race with the candidate index
                 // and hence allows the candidate index to be re-used.
                 no_data_race || vector_terminated
             }) {
-
                 // All vector clocks for each vector index are equal to
                 // the target timestamp, and the thread is known to have
                 // terminated, therefore this vector clock index cannot
                 // report any more data-races.
                 assert!(reuse.remove(&candidate));
-                return Some(candidate)
+                return Some(candidate);
             }
         }
         None
@@ -1065,10 +1100,7 @@ impl GlobalState {
 
         // Assign a vector index for the thread, attempting to re-use an old
         // vector index that can no longer report any data-races if possible.
-        let created_index = if let Some(
-            reuse_index
-        ) = self.find_vector_index_reuse_candidate() {
-
+        let created_index = if let Some(reuse_index) = self.find_vector_index_reuse_candidate() {
             // Now re-configure the re-use candidate, increment the clock
             // for the new sync use of the vector.
             let mut vector_clocks = self.vector_clocks.borrow_mut();
@@ -1086,7 +1118,6 @@ impl GlobalState {
 
             reuse_index
         } else {
-
             // No vector re-use candidates available, instead create
             // a new vector index.
             let mut vector_info = self.vector_info.borrow_mut();
@@ -1125,13 +1156,16 @@ impl GlobalState {
         let thread_info = self.thread_info.borrow();
 
         // Load the vector clock of the current thread.
-        let current_index = thread_info[current_thread].vector_index
+        let current_index = thread_info[current_thread]
+            .vector_index
             .expect("Performed thread join on thread with no assigned vector");
         let current = &mut clocks_vec[current_index];
 
         // Load the associated vector clock for the terminated thread.
-        let join_clock = thread_info[join_thread].termination_vector_clock
-            .as_ref().expect("Joined with thread but thread has not terminated");
+        let join_clock = thread_info[join_thread]
+            .termination_vector_clock
+            .as_ref()
+            .expect("Joined with thread but thread has not terminated");
 
         // Pre increment clocks before atomic operation.
         current.increment_clock(current_index);
@@ -1147,13 +1181,12 @@ impl GlobalState {
         // then test for potentially disabling multi-threaded execution.
         let active_threads = self.active_thread_count.get();
         if active_threads == 1 {
-
             // May potentially be able to disable multi-threaded execution.
             let current_clock = &clocks_vec[current_index];
-            if clocks_vec.iter_enumerated().all(|(idx, clocks)| {
-                clocks.clock[idx] <= current_clock.clock[idx]
-            }) {
-
+            if clocks_vec
+                .iter_enumerated()
+                .all(|(idx, clocks)| clocks.clock[idx] <= current_clock.clock[idx])
+            {
                 // The all thread termations happen-before the current clock
                 // therefore no data-races can be reported until a new thread
                 // is created, so disable multi-threaded execution.
@@ -1180,7 +1213,7 @@ impl GlobalState {
     #[inline]
     pub fn thread_terminated(&self) {
         let current_index = self.current_index();
-        
+
         // Increment the clock to a unique termination timestamp.
         let mut vector_clocks = self.vector_clocks.borrow_mut();
         let current_clocks = &mut vector_clocks[current_index];
@@ -1201,7 +1234,7 @@ impl GlobalState {
         // occurs.
         let mut termination = self.terminated_threads.borrow_mut();
         termination.insert(current_thread, current_index);
-            
+
         // Reduce the number of active threads, now that a thread has
         // terminated.
         let mut active_threads = self.active_thread_count.get();
@@ -1215,7 +1248,8 @@ impl GlobalState {
     #[inline]
     pub fn thread_set_active(&self, thread: ThreadId) {
         let thread_info = self.thread_info.borrow();
-        let vector_idx = thread_info[thread].vector_index
+        let vector_idx = thread_info[thread]
+            .vector_index
             .expect("Setting thread active with no assigned vector");
         self.current_index.set(vector_idx);
     }
@@ -1231,7 +1265,6 @@ impl GlobalState {
         thread_info[thread].thread_name = Some(name);
     }
 
-
     /// Attempt to perform a synchronized operation, this
     /// will perform no operation if multi-threading is
     /// not currently enabled.
@@ -1240,7 +1273,8 @@ impl GlobalState {
     /// detection between any happens-before edges the
     /// operation may create.
     fn maybe_perform_sync_operation<'tcx>(
-        &self, op: impl FnOnce(VectorIdx, RefMut<'_,ThreadClockSet>) -> InterpResult<'tcx>,
+        &self,
+        op: impl FnOnce(VectorIdx, RefMut<'_, ThreadClockSet>) -> InterpResult<'tcx>,
     ) -> InterpResult<'tcx> {
         if self.multi_threaded.get() {
             let (index, mut clocks) = self.current_thread_state_mut();
@@ -1251,7 +1285,6 @@ impl GlobalState {
         }
         Ok(())
     }
-    
 
     /// Internal utility to identify a thread stored internally
     /// returns the id and the name for better diagnostics.
@@ -1266,7 +1299,6 @@ impl GlobalState {
         }
     }
 
-
     /// Acquire a lock, express that the previous call of
     /// `validate_lock_release` must happen before this.
     pub fn validate_lock_acquire(&self, lock: &VClock, thread: ThreadId) {
@@ -1300,7 +1332,8 @@ impl GlobalState {
     /// used by the thread.
     #[inline]
     fn load_thread_state_mut(&self, thread: ThreadId) -> (VectorIdx, RefMut<'_, ThreadClockSet>) {
-        let index = self.thread_info.borrow()[thread].vector_index
+        let index = self.thread_info.borrow()[thread]
+            .vector_index
             .expect("Loading thread state for thread with no assigned vector");
         let ref_vector = self.vector_clocks.borrow_mut();
         let clocks = RefMut::map(ref_vector, |vec| &mut vec[index]);
diff --git a/src/vector_clock.rs b/src/vector_clock.rs
index 110b278852..ddee98bcf6 100644
--- a/src/vector_clock.rs
+++ b/src/vector_clock.rs
@@ -1,10 +1,13 @@
+use rustc_data_structures::fx::FxHashMap;
+use rustc_index::vec::Idx;
+use smallvec::SmallVec;
 use std::{
-    fmt::{self, Debug}, cmp::Ordering, ops::Index,
-    convert::TryFrom, mem
+    cmp::Ordering,
+    convert::TryFrom,
+    fmt::{self, Debug},
+    mem,
+    ops::Index,
 };
-use smallvec::SmallVec;
-use rustc_index::vec::Idx;
-use rustc_data_structures::fx::FxHashMap;
 
 /// A vector clock index, this is associated with a thread id
 /// but in some cases one vector index may be shared with
@@ -13,18 +16,15 @@ use rustc_data_structures::fx::FxHashMap;
 pub struct VectorIdx(u32);
 
 impl VectorIdx {
-
     #[inline(always)]
     pub fn to_u32(self) -> u32 {
         self.0
     }
 
     pub const MAX_INDEX: VectorIdx = VectorIdx(u32::MAX);
-
 }
 
 impl Idx for VectorIdx {
-
     #[inline]
     fn new(idx: usize) -> Self {
         VectorIdx(u32::try_from(idx).unwrap())
@@ -34,16 +34,13 @@ impl Idx for VectorIdx {
     fn index(self) -> usize {
         usize::try_from(self.0).unwrap()
     }
-
 }
 
 impl From<u32> for VectorIdx {
-
     #[inline]
     fn from(id: u32) -> Self {
         Self(id)
     }
-
 }
 
 /// A sparse mapping of vector index values to vector clocks, this
@@ -52,7 +49,7 @@ impl From<u32> for VectorIdx {
 /// This is used to store the set of currently active release
 /// sequences at a given memory location, since RMW operations
 /// allow for multiple release sequences to be active at once
-/// and to be collapsed back to one active release sequence 
+/// and to be collapsed back to one active release sequence
 /// once a non RMW atomic store operation occurs.
 /// An all zero vector is considered to be equal to no
 /// element stored internally since it will never be
@@ -63,7 +60,6 @@ pub struct VSmallClockMap(VSmallClockMapInner);
 
 #[derive(Clone)]
 enum VSmallClockMapInner {
-
     /// Zero or 1 vector elements, common
     /// case for the sparse set.
     /// The all zero vector clock is treated
@@ -71,18 +67,15 @@ enum VSmallClockMapInner {
     Small(VectorIdx, VClock),
 
     /// Hash-map of vector clocks.
-    Large(FxHashMap<VectorIdx, VClock>)
+    Large(FxHashMap<VectorIdx, VClock>),
 }
 
 impl VSmallClockMap {
-
     /// Remove all clock vectors from the map, setting them
     /// to the zero vector.
     pub fn clear(&mut self) {
         match &mut self.0 {
-            VSmallClockMapInner::Small(_, clock) => {
-                clock.set_zero_vector()
-            }
+            VSmallClockMapInner::Small(_, clock) => clock.set_zero_vector(),
             VSmallClockMapInner::Large(hash_map) => {
                 hash_map.clear();
             }
@@ -95,12 +88,11 @@ impl VSmallClockMap {
         match &mut self.0 {
             VSmallClockMapInner::Small(small_idx, clock) => {
                 if index != *small_idx {
-
                     // The zero-vector is considered to equal
                     // the empty element.
                     clock.set_zero_vector()
                 }
-            },
+            }
             VSmallClockMapInner::Large(hash_map) => {
                 let value = hash_map.remove(&index).unwrap_or_default();
                 self.0 = VSmallClockMapInner::Small(index, value);
@@ -114,23 +106,20 @@ impl VSmallClockMap {
         match &mut self.0 {
             VSmallClockMapInner::Small(small_idx, small_clock) => {
                 if small_clock.is_zero_vector() {
-
                     *small_idx = index;
                     small_clock.clone_from(clock);
                 } else if !clock.is_zero_vector() {
-
                     // Convert to using the hash-map representation.
                     let mut hash_map = FxHashMap::default();
                     hash_map.insert(*small_idx, mem::take(small_clock));
                     hash_map.insert(index, clock.clone());
                     self.0 = VSmallClockMapInner::Large(hash_map);
                 }
-            },
-            VSmallClockMapInner::Large(hash_map) => {
+            }
+            VSmallClockMapInner::Large(hash_map) =>
                 if !clock.is_zero_vector() {
                     hash_map.insert(index, clock.clone());
-                }
-            }
+                },
         }
     }
 
@@ -144,51 +133,39 @@ impl VSmallClockMap {
                 } else {
                     None
                 }
-            },
-            VSmallClockMapInner::Large(hash_map) => {
-                hash_map.get(&index)
             }
+            VSmallClockMapInner::Large(hash_map) => hash_map.get(&index),
         }
     }
 }
 
 impl Default for VSmallClockMap {
-
     #[inline]
     fn default() -> Self {
-        VSmallClockMap(
-            VSmallClockMapInner::Small(VectorIdx::new(0), VClock::default())
-        )
+        VSmallClockMap(VSmallClockMapInner::Small(VectorIdx::new(0), VClock::default()))
     }
-
 }
 
 impl Debug for VSmallClockMap {
-
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         // Print the contents of the small vector clock set as the map
         // of vector index to vector clock that they represent.
         let mut map = f.debug_map();
         match &self.0 {
-            VSmallClockMapInner::Small(small_idx, small_clock) => {
+            VSmallClockMapInner::Small(small_idx, small_clock) =>
                 if !small_clock.is_zero_vector() {
                     map.entry(&small_idx, &small_clock);
-                }
-            },
-            VSmallClockMapInner::Large(hash_map) => {
+                },
+            VSmallClockMapInner::Large(hash_map) =>
                 for (idx, elem) in hash_map.iter() {
                     map.entry(idx, elem);
-                }
-            }
+                },
         }
         map.finish()
     }
-
 }
 
-
 impl PartialEq for VSmallClockMap {
-
     fn eq(&self, other: &Self) -> bool {
         use VSmallClockMapInner::*;
         match (&self.0, &other.0) {
@@ -201,9 +178,7 @@ impl PartialEq for VSmallClockMap {
                     i1 == i2 && c1 == c2
                 }
             }
-            (Small(idx, clock), Large(hash_map)) |
-            (Large(hash_map), Small(idx, clock)) => {
-
+            (Small(idx, clock), Large(hash_map)) | (Large(hash_map), Small(idx, clock)) => {
                 if hash_map.len() == 0 {
                     // Equal to the empty hash-map
                     clock.is_zero_vector()
@@ -215,18 +190,13 @@ impl PartialEq for VSmallClockMap {
                     false
                 }
             }
-            (Large(map1), Large(map2)) => {
-                map1 == map2
-            }
+            (Large(map1), Large(map2)) => map1 == map2,
         }
     }
-
 }
 
 impl Eq for VSmallClockMap {}
 
-
-
 /// The size of the vector-clock to store inline
 /// clock vectors larger than this will be stored on the heap
 const SMALL_VECTOR: usize = 4;
@@ -249,7 +219,6 @@ pub type VTimestamp = u32;
 pub struct VClock(SmallVec<[VTimestamp; SMALL_VECTOR]>);
 
 impl VClock {
-
     /// Create a new vector-clock containing all zeros except
     /// for a value at the given index
     pub fn new_with_index(index: VectorIdx, timestamp: VTimestamp) -> VClock {
@@ -316,11 +285,9 @@ impl VClock {
     pub fn is_zero_vector(&self) -> bool {
         self.0.is_empty()
     }
-
 }
 
 impl Clone for VClock {
-
     fn clone(&self) -> Self {
         VClock(self.0.clone())
     }
@@ -334,13 +301,10 @@ impl Clone for VClock {
         self.0.clear();
         self.0.extend_from_slice(source_slice);
     }
-
 }
 
 impl PartialOrd for VClock {
-
     fn partial_cmp(&self, other: &VClock) -> Option<Ordering> {
-
         // Load the values as slices
         let lhs_slice = self.as_slice();
         let rhs_slice = other.as_slice();
@@ -356,17 +320,19 @@ impl PartialOrd for VClock {
         let mut iter = lhs_slice.iter().zip(rhs_slice.iter());
         let mut order = match iter.next() {
             Some((lhs, rhs)) => lhs.cmp(rhs),
-            None => Ordering::Equal
+            None => Ordering::Equal,
         };
         for (l, r) in iter {
             match order {
                 Ordering::Equal => order = l.cmp(r),
-                Ordering::Less => if l > r {
-                    return None
-                },
-                Ordering::Greater => if l < r {
-                    return None
-                }
+                Ordering::Less =>
+                    if l > r {
+                        return None;
+                    },
+                Ordering::Greater =>
+                    if l < r {
+                        return None;
+                    },
             }
         }
 
@@ -383,14 +349,14 @@ impl PartialOrd for VClock {
             // so the only valid values are Ordering::Less or None.
             Ordering::Less => match order {
                 Ordering::Less | Ordering::Equal => Some(Ordering::Less),
-                Ordering::Greater => None
-            }
+                Ordering::Greater => None,
+            },
             // Left has at least 1 element > than the implicit 0,
             // so the only valid values are Ordering::Greater or None.
             Ordering::Greater => match order {
                 Ordering::Greater | Ordering::Equal => Some(Ordering::Greater),
-                Ordering::Less => None
-            }
+                Ordering::Less => None,
+            },
         }
     }
 
@@ -415,13 +381,13 @@ impl PartialOrd for VClock {
             let mut equal = l_len == r_len;
             for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
                 if l > r {
-                    return false
+                    return false;
                 } else if l < r {
                     equal = false;
                 }
             }
             !equal
-         } else {
+        } else {
             false
         }
     }
@@ -469,7 +435,7 @@ impl PartialOrd for VClock {
             let mut equal = l_len == r_len;
             for (&l, &r) in lhs_slice.iter().zip(rhs_slice.iter()) {
                 if l < r {
-                    return false
+                    return false;
                 } else if l > r {
                     equal = false;
                 }
@@ -501,28 +467,24 @@ impl PartialOrd for VClock {
             false
         }
     }
-
 }
 
 impl Index<VectorIdx> for VClock {
-
     type Output = VTimestamp;
 
     #[inline]
     fn index(&self, index: VectorIdx) -> &VTimestamp {
-       self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
+        self.as_slice().get(index.to_u32() as usize).unwrap_or(&0)
     }
-
 }
 
-
 /// Test vector clock ordering operations
 ///  data-race detection is tested in the external
 ///  test suite
 #[cfg(test)]
 mod tests {
 
-    use super::{VClock, VTimestamp, VectorIdx, VSmallClockMap};
+    use super::{VClock, VSmallClockMap, VTimestamp, VectorIdx};
     use std::cmp::Ordering;
 
     #[test]
@@ -546,19 +508,43 @@ mod tests {
         assert_order(&[1], &[1], Some(Ordering::Equal));
         assert_order(&[1], &[2], Some(Ordering::Less));
         assert_order(&[2], &[1], Some(Ordering::Greater));
-        assert_order(&[1], &[1,2], Some(Ordering::Less));
-        assert_order(&[2], &[1,2], None);
+        assert_order(&[1], &[1, 2], Some(Ordering::Less));
+        assert_order(&[2], &[1, 2], None);
 
         // Misc tests
         assert_order(&[400], &[0, 1], None);
 
         // Large test
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Equal));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,10], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Greater));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,11], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], None);
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,0,0], Some(Ordering::Less));
-        assert_order(&[0,1,2,3,4,5,6,7,8,9,9 ], &[0,1,2,3,4,5,6,7,8,9,10,0,1,0], Some(Ordering::Less));
+        assert_order(
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0],
+            Some(Ordering::Equal),
+        );
+        assert_order(
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 0],
+            Some(Ordering::Less),
+        );
+        assert_order(
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0],
+            Some(Ordering::Greater),
+        );
+        assert_order(
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 0],
+            None,
+        );
+        assert_order(
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 0, 0],
+            Some(Ordering::Less),
+        );
+        assert_order(
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9],
+            &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 0],
+            Some(Ordering::Less),
+        );
     }
 
     fn from_slice(mut slice: &[VTimestamp]) -> VClock {
@@ -574,51 +560,81 @@ mod tests {
 
         //Test partial_cmp
         let compare = l.partial_cmp(&r);
-        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}",l,r);
+        assert_eq!(compare, o, "Invalid comparison\n l: {:?}\n r: {:?}", l, r);
         let alt_compare = r.partial_cmp(&l);
-        assert_eq!(alt_compare, o.map(Ordering::reverse), "Invalid alt comparison\n l: {:?}\n r: {:?}",l,r);
+        assert_eq!(
+            alt_compare,
+            o.map(Ordering::reverse),
+            "Invalid alt comparison\n l: {:?}\n r: {:?}",
+            l,
+            r
+        );
 
         //Test operators with faster implementations
         assert_eq!(
-            matches!(compare,Some(Ordering::Less)), l < r,
-            "Invalid (<):\n l: {:?}\n r: {:?}",l,r
+            matches!(compare, Some(Ordering::Less)),
+            l < r,
+            "Invalid (<):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(compare,Some(Ordering::Less) | Some(Ordering::Equal)), l <= r,
-            "Invalid (<=):\n l: {:?}\n r: {:?}",l,r
+            matches!(compare, Some(Ordering::Less) | Some(Ordering::Equal)),
+            l <= r,
+            "Invalid (<=):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(compare,Some(Ordering::Greater)), l > r,
-            "Invalid (>):\n l: {:?}\n r: {:?}",l,r
+            matches!(compare, Some(Ordering::Greater)),
+            l > r,
+            "Invalid (>):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(compare,Some(Ordering::Greater) | Some(Ordering::Equal)), l >= r,
-            "Invalid (>=):\n l: {:?}\n r: {:?}",l,r
+            matches!(compare, Some(Ordering::Greater) | Some(Ordering::Equal)),
+            l >= r,
+            "Invalid (>=):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(alt_compare,Some(Ordering::Less)), r < l,
-            "Invalid alt (<):\n l: {:?}\n r: {:?}",l,r
+            matches!(alt_compare, Some(Ordering::Less)),
+            r < l,
+            "Invalid alt (<):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(alt_compare,Some(Ordering::Less) | Some(Ordering::Equal)), r <= l,
-            "Invalid alt (<=):\n l: {:?}\n r: {:?}",l,r
+            matches!(alt_compare, Some(Ordering::Less) | Some(Ordering::Equal)),
+            r <= l,
+            "Invalid alt (<=):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(alt_compare,Some(Ordering::Greater)), r > l,
-            "Invalid alt (>):\n l: {:?}\n r: {:?}",l,r
+            matches!(alt_compare, Some(Ordering::Greater)),
+            r > l,
+            "Invalid alt (>):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
         assert_eq!(
-            matches!(alt_compare,Some(Ordering::Greater) | Some(Ordering::Equal)), r >= l,
-            "Invalid alt (>=):\n l: {:?}\n r: {:?}",l,r
+            matches!(alt_compare, Some(Ordering::Greater) | Some(Ordering::Equal)),
+            r >= l,
+            "Invalid alt (>=):\n l: {:?}\n r: {:?}",
+            l,
+            r
         );
     }
 
     #[test]
     pub fn test_vclock_set() {
         let mut map = VSmallClockMap::default();
-        let v1 = from_slice(&[3,0,1]);
-        let v2 = from_slice(&[4,2,3]);
-        let v3 = from_slice(&[4,8,3]);
+        let v1 = from_slice(&[3, 0, 1]);
+        let v2 = from_slice(&[4, 2, 3]);
+        let v3 = from_slice(&[4, 8, 3]);
         map.insert(VectorIdx(0), &v1);
         assert_eq!(map.get(VectorIdx(0)), Some(&v1));
         map.insert(VectorIdx(5), &v2);
@@ -641,5 +657,4 @@ mod tests {
         assert_eq!(map.get(VectorIdx(5)), None);
         assert_eq!(map.get(VectorIdx(53)), Some(&v3));
     }
-    
 }

From 3268f56a97f2ac7fbcdf6f23e31445a29c529674 Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Sun, 22 Nov 2020 17:28:12 +0000
Subject: [PATCH 14/17] Fix review changes

---
 src/machine.rs                                |  2 +-
 src/shims/posix/linux/sync.rs                 |  8 ++---
 src/sync.rs                                   | 35 +++++++++++++++++--
 src/vector_clock.rs                           |  2 +-
 .../data_race/relax_acquire_race.rs           |  2 +-
 .../data_race/release_seq_race.rs             |  2 +-
 tests/compile-fail/data_race/rmw_race.rs      |  2 +-
 tests/run-pass/concurrency/data_race.rs       |  6 ++--
 8 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/machine.rs b/src/machine.rs
index 9612d9e191..02c6691556 100644
--- a/src/machine.rs
+++ b/src/machine.rs
@@ -150,7 +150,7 @@ impl MemoryExtra {
         };
         let data_race = if config.data_race_detector {
             Some(Rc::new(data_race::GlobalState::new()))
-        }else{
+        } else {
             None
         };
         MemoryExtra {
diff --git a/src/shims/posix/linux/sync.rs b/src/shims/posix/linux/sync.rs
index 78244ab7b8..5243431194 100644
--- a/src/shims/posix/linux/sync.rs
+++ b/src/shims/posix/linux/sync.rs
@@ -85,12 +85,10 @@ pub fn futex<'tcx>(
             //   with the expected value, and starting to sleep are performed
             //   atomically and totally ordered with respect to other futex
             //   operations on the same futex word."
-            // SeqCst is total order over all operations, so uses acquire,
-            // either are equal under the current implementation.
-            // FIXME: is Acquire correct or should some additional ordering constraints be observed?
-            // FIXME: use RMW or similar?
+            // SeqCst is total order over all operations.
+            // FIXME: check if this should be changed when weak memory orders are added.
             let futex_val = this.read_scalar_at_offset_atomic(
-                addr.into(), 0, this.machine.layouts.i32, AtomicReadOp::Acquire
+                addr.into(), 0, this.machine.layouts.i32, AtomicReadOp::SeqCst
             )?.to_i32()?;
             if val == futex_val {
                 // The value still matches, so we block the trait make it wait for FUTEX_WAKE.
diff --git a/src/sync.rs b/src/sync.rs
index 828268c06c..4d488565fa 100644
--- a/src/sync.rs
+++ b/src/sync.rs
@@ -61,7 +61,11 @@ struct Mutex {
     lock_count: usize,
     /// The queue of threads waiting for this mutex.
     queue: VecDeque<ThreadId>,
-    /// Data race handle
+    /// Data race handle, this tracks the happens-before
+    /// relationship between each mutex access. It is
+    /// released to during unlock and acquired from during
+    /// locking, and therefore stores the clock of the last
+    /// thread to release this mutex.
     data_race: VClock
 }
 
@@ -79,9 +83,24 @@ struct RwLock {
     writer_queue: VecDeque<ThreadId>,
     /// The queue of reader threads waiting for this lock.
     reader_queue: VecDeque<ThreadId>,
-    /// Data race handle for writers
+    /// Data race handle for writers, tracks the happens-before
+    /// ordering between each write access to a rwlock and is updated
+    /// after a sequence of concurrent readers to track the happens-
+    /// before ordering between the set of previous readers and
+    /// the current writer.
+    /// Contains the clock of the last thread to release a writer
+    /// lock or the joined clock of the set of last threads to release
+    /// shared reader locks.
     data_race: VClock,
-    /// Data race handle for readers
+    /// Data race handle for readers, this is temporary storage
+    /// for the combined happens-before ordering for between all
+    /// concurrent readers and the next writer, and the value
+    /// is stored to the main data_race variable once all
+    /// readers are finished.
+    /// Has to be stored separately since reader lock acquires
+    /// must load the clock of the last write and must not 
+    /// add happens-before orderings between shared reader
+    /// locks.
     data_race_reader: VClock,
 }
 
@@ -100,6 +119,11 @@ struct CondvarWaiter {
 #[derive(Default, Debug)]
 struct Condvar {
     waiters: VecDeque<CondvarWaiter>,
+    /// Tracks the happens-before relationship
+    /// between a cond-var signal and a cond-var
+    /// wait during a non-suprious signal event.
+    /// Contains the clock of the last thread to
+    /// perform a futex-signal.
     data_race: VClock,
 }
 
@@ -107,6 +131,11 @@ struct Condvar {
 #[derive(Default, Debug)]
 struct Futex {
     waiters: VecDeque<FutexWaiter>,
+    /// Tracks the happens-before relationship
+    /// between a futex-wake and a futex-wait
+    /// during a non-spurious wake event.
+    /// Contains the clock of the last thread to
+    /// perform a futex-wake.
     data_race: VClock,
 }
 
diff --git a/src/vector_clock.rs b/src/vector_clock.rs
index ddee98bcf6..6840d7e6cb 100644
--- a/src/vector_clock.rs
+++ b/src/vector_clock.rs
@@ -11,7 +11,7 @@ use std::{
 
 /// A vector clock index, this is associated with a thread id
 /// but in some cases one vector index may be shared with
-/// multiple thread ids id it safe to do so.
+/// multiple thread ids if it safe to do so.
 #[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)]
 pub struct VectorIdx(u32);
 
diff --git a/tests/compile-fail/data_race/relax_acquire_race.rs b/tests/compile-fail/data_race/relax_acquire_race.rs
index f7d44c30b6..4b736e5720 100644
--- a/tests/compile-fail/data_race/relax_acquire_race.rs
+++ b/tests/compile-fail/data_race/relax_acquire_race.rs
@@ -31,7 +31,7 @@ pub fn main() {
         let j3 = spawn(move || {
             if SYNC.load(Ordering::Acquire) == 2 {
                 *c.0 //~ ERROR Data race
-            }else{
+            } else {
                 0
             }
         });
diff --git a/tests/compile-fail/data_race/release_seq_race.rs b/tests/compile-fail/data_race/release_seq_race.rs
index dc852cdb4d..0278e98643 100644
--- a/tests/compile-fail/data_race/release_seq_race.rs
+++ b/tests/compile-fail/data_race/release_seq_race.rs
@@ -35,7 +35,7 @@ pub fn main() {
             sleep(Duration::from_millis(1000));
             if SYNC.load(Ordering::Acquire) == 3 {
                 *c.0 //~ ERROR Data race
-            }else{
+            } else {
                 0
             }
         });
diff --git a/tests/compile-fail/data_race/rmw_race.rs b/tests/compile-fail/data_race/rmw_race.rs
index bebd01efa1..c533f595f1 100644
--- a/tests/compile-fail/data_race/rmw_race.rs
+++ b/tests/compile-fail/data_race/rmw_race.rs
@@ -32,7 +32,7 @@ pub fn main() {
         let j3 = spawn(move || {
             if SYNC.load(Ordering::Acquire) == 3 {
                 *c.0 //~ ERROR Data race
-            }else{
+            } else {
                 0
             }
         });
diff --git a/tests/run-pass/concurrency/data_race.rs b/tests/run-pass/concurrency/data_race.rs
index 75e56e8c8d..64e90024ed 100644
--- a/tests/run-pass/concurrency/data_race.rs
+++ b/tests/run-pass/concurrency/data_race.rs
@@ -28,7 +28,7 @@ fn test_fence_sync() {
         if SYNC.load(Ordering::Relaxed) == 1 {
             fence(Ordering::Acquire);
             unsafe { *evil_ptr.0 }
-        }else{
+        } else {
             0
         }
     });
@@ -77,7 +77,7 @@ pub fn test_rmw_no_block() {
         let j3 = spawn(move || {
             if SYNC.load(Ordering::Acquire) == 2 {
                 *c.0
-            }else{
+            } else {
                 0
             }
         });
@@ -104,7 +104,7 @@ pub fn test_release_no_block() {
         let j2 = spawn(move || {
             if SYNC.load(Ordering::Acquire) == 3 {
                 *c.0
-            }else{
+            } else {
                 0
             }
         });

From 55fc552d9900e2f53ad4302da9387da32d7bcf8d Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Fri, 27 Nov 2020 19:26:06 +0000
Subject: [PATCH 15/17] Apply review changes, incrementing the clocks twice is
 an unnecessary hold-over from earlier versions so fixed.

---
 src/data_race.rs                              | 40 +++++++++++++------
 src/shims/posix/sync.rs                       | 35 +++++++++++-----
 src/shims/posix/thread.rs                     |  2 +-
 ..._race.rs => atomic_read_na_write_race1.rs} |  0
 ...e_alt.rs => atomic_read_na_write_race2.rs} |  0
 ..._race.rs => atomic_write_na_read_race1.rs} |  0
 ...e_alt.rs => atomic_write_na_read_race2.rs} |  0
 ...race.rs => atomic_write_na_write_race1.rs} |  0
 ..._alt.rs => atomic_write_na_write_race2.rs} |  0
 .../data_race/dangling_thread_async_race.rs   |  6 +--
 .../data_race/dangling_thread_race.rs         |  6 +--
 .../data_race/enable_after_join_to_main.rs    |  4 +-
 .../data_race/relax_acquire_race.rs           |  7 ++++
 .../data_race/release_seq_race.rs             |  8 ++++
 tests/compile-fail/data_race/rmw_race.rs      |  7 ++++
 tests/run-pass/concurrency/data_race.stderr   |  2 +-
 .../concurrency/disable_data_race_detector.rs |  2 +-
 .../disable_data_race_detector.stderr         |  2 +-
 tests/run-pass/concurrency/linux-futex.stderr |  2 +-
 tests/run-pass/concurrency/simple.stderr      |  2 +-
 tests/run-pass/concurrency/sync.stderr        |  2 +-
 .../run-pass/concurrency/thread_locals.stderr |  2 +-
 .../run-pass/concurrency/tls_lib_drop.stderr  |  2 +-
 tests/run-pass/libc.stderr                    |  2 +-
 tests/run-pass/panic/concurrent-panic.stderr  |  2 +-
 25 files changed, 95 insertions(+), 40 deletions(-)
 rename tests/compile-fail/data_race/{atomic_read_write_race.rs => atomic_read_na_write_race1.rs} (100%)
 rename tests/compile-fail/data_race/{atomic_read_write_race_alt.rs => atomic_read_na_write_race2.rs} (100%)
 rename tests/compile-fail/data_race/{atomic_write_read_race.rs => atomic_write_na_read_race1.rs} (100%)
 rename tests/compile-fail/data_race/{atomic_write_read_race_alt.rs => atomic_write_na_read_race2.rs} (100%)
 rename tests/compile-fail/data_race/{atomic_write_write_race.rs => atomic_write_na_write_race1.rs} (100%)
 rename tests/compile-fail/data_race/{atomic_write_write_race_alt.rs => atomic_write_na_write_race2.rs} (100%)

diff --git a/src/data_race.rs b/src/data_race.rs
index b9542f6e2d..3f70631d13 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -37,6 +37,24 @@
 //! to a acquire load and a release store given the global sequentially consistent order
 //! of the schedule.
 //!
+//! The timestamps used in the data-race detector assign each sequence of non-atomic operations
+//! followed by a single atomic or concurrent operation a single timestamp.
+//! Write, Read, Write, ThreadJoin will be represented by a single timestamp value on a thread
+//! This is because extra increment operations between the operations in the sequence are not
+//! required for accurate reporting of data-race values.
+//!
+//! If the timestamp was not incremented after the atomic operation, then data-races would not be detected:
+//!  Example - this should report a data-race but does not:
+//!   t1: (x,0), atomic[release A],                    t1=(x+1, 0  ), write(var B),
+//!   t2: (0,y)                   , atomic[acquire A], t2=(x+1, y+1),             ,write(var B)
+//!
+//! The timestamp is not incremented before an atomic operation, since the result is indistinguishable
+//! from the value not being incremented.
+//!    t: (x, 0), atomic[release _], (x + 1, 0) || (0, y), atomic[acquire _], (x, _)
+//! vs t: (x, 0), atomic[release _], (x + 1, 0) || (0, y), atomic[acquire _], (x+1, _)
+//! Both result in the sequence on thread x up to and including the atomic release as happening
+//! before the acquire.
+//!
 //! FIXME:
 //! currently we have our own local copy of the currently active thread index and names, this is due
 //! in part to the inability to access the current location of threads.active_thread inside the AllocExtra
@@ -499,7 +517,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
     }
 
     /// Perform an atomic compare and exchange at a given memory location
-    /// on success an atomic RMW operation is performed and on failure
+    /// On success an atomic RMW operation is performed and on failure
     /// only an atomic read occurs.
     fn atomic_compare_exchange_scalar(
         &mut self,
@@ -1136,9 +1154,6 @@ impl GlobalState {
         // Now load the two clocks and configure the initial state.
         let (current, created) = vector_clocks.pick2_mut(current_index, created_index);
 
-        // Advance the current thread before the synchronized operation.
-        current.increment_clock(current_index);
-
         // Join the created with current, since the current threads
         // previous actions happen-before the created thread.
         created.join_with(current);
@@ -1167,14 +1182,12 @@ impl GlobalState {
             .as_ref()
             .expect("Joined with thread but thread has not terminated");
 
-        // Pre increment clocks before atomic operation.
-        current.increment_clock(current_index);
 
         // The join thread happens-before the current thread
         // so update the current vector clock.
         current.clock.join(join_clock);
 
-        // Post increment clocks after atomic operation.
+        // Increment clocks after atomic operation.
         current.increment_clock(current_index);
 
         // Check the number of active threads, if the value is 1
@@ -1277,8 +1290,7 @@ impl GlobalState {
         op: impl FnOnce(VectorIdx, RefMut<'_, ThreadClockSet>) -> InterpResult<'tcx>,
     ) -> InterpResult<'tcx> {
         if self.multi_threaded.get() {
-            let (index, mut clocks) = self.current_thread_state_mut();
-            clocks.increment_clock(index);
+            let (index, clocks) = self.current_thread_state_mut();
             op(index, clocks)?;
             let (_, mut clocks) = self.current_thread_state_mut();
             clocks.increment_clock(index);
@@ -1303,16 +1315,18 @@ impl GlobalState {
     /// `validate_lock_release` must happen before this.
     pub fn validate_lock_acquire(&self, lock: &VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
-        clocks.increment_clock(index);
         clocks.clock.join(&lock);
         clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
     /// any subsequent calls to `validate_lock_acquire`.
+    /// For normal locks this should be equivalent to `validate_lock_release_shared`
+    /// since an acquire operation should have occured before, however
+    /// for futex & cond-var operations this is not the case and this
+    /// operation must be used.
     pub fn validate_lock_release(&self, lock: &mut VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
-        clocks.increment_clock(index);
         lock.clone_from(&clocks.clock);
         clocks.increment_clock(index);
     }
@@ -1321,9 +1335,11 @@ impl GlobalState {
     /// any subsequent calls to `validate_lock_acquire` as well
     /// as any previous calls to this function after any
     /// `validate_lock_release` calls.
+    /// For normal locks this should be equivalent to `validate_lock_release`
+    /// this function only exists for joining over the set of concurrent readers
+    /// in a read-write lock and should not be used for anything else.
     pub fn validate_lock_release_shared(&self, lock: &mut VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
-        clocks.increment_clock(index);
         lock.join(&clocks.clock);
         clocks.increment_clock(index);
     }
diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs
index 64308d0613..efa4412991 100644
--- a/src/shims/posix/sync.rs
+++ b/src/shims/posix/sync.rs
@@ -62,9 +62,11 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // mutex implementation, it may not need to be atomic.
     ecx.read_scalar_at_offset_atomic(
         mutex_op, offset, ecx.machine.layouts.i32,
-        AtomicReadOp::Acquire
+        AtomicReadOp::Relaxed
     )
 }
 
@@ -74,9 +76,11 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>(
     kind: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // mutex implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
-        mutex_op, offset, kind, ecx.machine.layouts.i32, 
-        AtomicWriteOp::Release
+        mutex_op, offset, kind, ecx.machine.layouts.i32,
+        AtomicWriteOp::Relaxed
     )
 }
 
@@ -84,8 +88,11 @@ fn mutex_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // mutex implementation, it may not need to be atomic.
     ecx.read_scalar_at_offset_atomic(
-        mutex_op, 4, ecx.machine.layouts.u32, AtomicReadOp::Acquire
+        mutex_op, 4, ecx.machine.layouts.u32, 
+        AtomicReadOp::Relaxed
     )
 }
 
@@ -94,9 +101,11 @@ fn mutex_set_id<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // mutex implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         mutex_op, 4, id, ecx.machine.layouts.u32,
-        AtomicWriteOp::Release
+        AtomicWriteOp::Relaxed
     )
 }
 
@@ -126,10 +135,12 @@ fn mutex_get_or_create_id<'mir, 'tcx: 'mir>(
 fn rwlock_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     rwlock_op: OpTy<'tcx, Tag>,
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // rw-lock implementation, it may not need to be atomic.
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     ecx.read_scalar_at_offset_atomic(
         rwlock_op, 4, ecx.machine.layouts.u32,
-        AtomicReadOp::Acquire
+        AtomicReadOp::Relaxed
     )
 }
 
@@ -138,9 +149,11 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>(
     rwlock_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // rw-lock implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         rwlock_op, 4, id, ecx.machine.layouts.u32,
-        AtomicWriteOp::Release
+        AtomicWriteOp::Relaxed
     )
 }
 
@@ -194,9 +207,11 @@ fn cond_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     cond_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // cond-var implementation, it may not need to be atomic.
     ecx.read_scalar_at_offset_atomic(
         cond_op, 4, ecx.machine.layouts.u32,
-        AtomicReadOp::Acquire
+        AtomicReadOp::Relaxed
     )
 }
 
@@ -205,9 +220,11 @@ fn cond_set_id<'mir, 'tcx: 'mir>(
     cond_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
+    //FIXME: this has been made atomic to fix data-race reporting inside the internal
+    // cond-var implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         cond_op, 4, id, ecx.machine.layouts.u32,
-        AtomicWriteOp::Release
+        AtomicWriteOp::Relaxed
     )
 }
 
diff --git a/src/shims/posix/thread.rs b/src/shims/posix/thread.rs
index 847d083bfa..0ea20cdff6 100644
--- a/src/shims/posix/thread.rs
+++ b/src/shims/posix/thread.rs
@@ -15,7 +15,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
         let this = self.eval_context_mut();
 
         this.tcx.sess.warn(
-            "thread support is experimental, no weak memory effects are currently emulated.",
+            "thread support is experimental and incomplete: weak memory effects are not emulated."
         );
 
         // Create the new thread
diff --git a/tests/compile-fail/data_race/atomic_read_write_race.rs b/tests/compile-fail/data_race/atomic_read_na_write_race1.rs
similarity index 100%
rename from tests/compile-fail/data_race/atomic_read_write_race.rs
rename to tests/compile-fail/data_race/atomic_read_na_write_race1.rs
diff --git a/tests/compile-fail/data_race/atomic_read_write_race_alt.rs b/tests/compile-fail/data_race/atomic_read_na_write_race2.rs
similarity index 100%
rename from tests/compile-fail/data_race/atomic_read_write_race_alt.rs
rename to tests/compile-fail/data_race/atomic_read_na_write_race2.rs
diff --git a/tests/compile-fail/data_race/atomic_write_read_race.rs b/tests/compile-fail/data_race/atomic_write_na_read_race1.rs
similarity index 100%
rename from tests/compile-fail/data_race/atomic_write_read_race.rs
rename to tests/compile-fail/data_race/atomic_write_na_read_race1.rs
diff --git a/tests/compile-fail/data_race/atomic_write_read_race_alt.rs b/tests/compile-fail/data_race/atomic_write_na_read_race2.rs
similarity index 100%
rename from tests/compile-fail/data_race/atomic_write_read_race_alt.rs
rename to tests/compile-fail/data_race/atomic_write_na_read_race2.rs
diff --git a/tests/compile-fail/data_race/atomic_write_write_race.rs b/tests/compile-fail/data_race/atomic_write_na_write_race1.rs
similarity index 100%
rename from tests/compile-fail/data_race/atomic_write_write_race.rs
rename to tests/compile-fail/data_race/atomic_write_na_write_race1.rs
diff --git a/tests/compile-fail/data_race/atomic_write_write_race_alt.rs b/tests/compile-fail/data_race/atomic_write_na_write_race2.rs
similarity index 100%
rename from tests/compile-fail/data_race/atomic_write_write_race_alt.rs
rename to tests/compile-fail/data_race/atomic_write_na_write_race2.rs
diff --git a/tests/compile-fail/data_race/dangling_thread_async_race.rs b/tests/compile-fail/data_race/dangling_thread_async_race.rs
index 6af5706835..d8b5d82f83 100644
--- a/tests/compile-fail/data_race/dangling_thread_async_race.rs
+++ b/tests/compile-fail/data_race/dangling_thread_async_race.rs
@@ -29,9 +29,9 @@ fn main() {
     sleep(Duration::from_millis(100));
 
     // Spawn and immediately join a thread
-    //  to execute the join code-path
-    //  and ensure that data-race detection
-    //  remains enabled
+    // to execute the join code-path
+    // and ensure that data-race detection
+    // remains enabled nevertheless.
     spawn(|| ()).join().unwrap();
 
     let join2 = unsafe {
diff --git a/tests/compile-fail/data_race/dangling_thread_race.rs b/tests/compile-fail/data_race/dangling_thread_race.rs
index c37f303bba..172b05bd4f 100644
--- a/tests/compile-fail/data_race/dangling_thread_race.rs
+++ b/tests/compile-fail/data_race/dangling_thread_race.rs
@@ -29,9 +29,9 @@ fn main() {
     sleep(Duration::from_millis(100));
 
     // Spawn and immediately join a thread
-    //  to execute the join code-path
-    //  and ensure that data-race detection
-    //  remains enabled
+    // to execute the join code-path
+    // and ensure that data-race detection
+    // remains enabled nevertheless.
     spawn(|| ()).join().unwrap();
 
 
diff --git a/tests/compile-fail/data_race/enable_after_join_to_main.rs b/tests/compile-fail/data_race/enable_after_join_to_main.rs
index fba7ba4841..c294317771 100644
--- a/tests/compile-fail/data_race/enable_after_join_to_main.rs
+++ b/tests/compile-fail/data_race/enable_after_join_to_main.rs
@@ -9,7 +9,7 @@ unsafe impl<T> Send for EvilSend<T> {}
 unsafe impl<T> Sync for EvilSend<T> {}
 
 pub fn main() {
-    // Enable and the join with multiple threads
+    // Enable and then join with multiple threads.
     let t1 = spawn(|| ());
     let t2 = spawn(|| ());
     let t3 = spawn(|| ());
@@ -19,7 +19,7 @@ pub fn main() {
     t3.join().unwrap();
     t4.join().unwrap();
 
-    // Perform write-write data race detection
+    // Perform write-write data race detection.
     let mut a = 0u32;
     let b = &mut a as *mut u32;
     let c = EvilSend(b);
diff --git a/tests/compile-fail/data_race/relax_acquire_race.rs b/tests/compile-fail/data_race/relax_acquire_race.rs
index 4b736e5720..2ae0aacbcf 100644
--- a/tests/compile-fail/data_race/relax_acquire_race.rs
+++ b/tests/compile-fail/data_race/relax_acquire_race.rs
@@ -16,6 +16,13 @@ pub fn main() {
     let b = &mut a as *mut u32;
     let c = EvilSend(b);
 
+    // Note: this is scheduler-dependent
+    // the operations need to occur in
+    // order:
+    //  1. store release : 1
+    //  2. load acquire : 1
+    //  3. store relaxed : 2
+    //  4. load acquire : 2
     unsafe {
         let j1 = spawn(move || {
             *c.0 = 1;
diff --git a/tests/compile-fail/data_race/release_seq_race.rs b/tests/compile-fail/data_race/release_seq_race.rs
index 0278e98643..59263cb712 100644
--- a/tests/compile-fail/data_race/release_seq_race.rs
+++ b/tests/compile-fail/data_race/release_seq_race.rs
@@ -18,6 +18,14 @@ pub fn main() {
     let b = &mut a as *mut u32;
     let c = EvilSend(b);
 
+    // Note: this is scheduler-dependent
+    // the operations need to occur in
+    // order, the sleep operations currently
+    // force the desired ordering:
+    //  1. store release : 1
+    //  2. store relaxed : 2
+    //  3. store relaxed : 3
+    //  4. load acquire : 3
     unsafe {
         let j1 = spawn(move || {
             *c.0 = 1;
diff --git a/tests/compile-fail/data_race/rmw_race.rs b/tests/compile-fail/data_race/rmw_race.rs
index c533f595f1..e523f8b374 100644
--- a/tests/compile-fail/data_race/rmw_race.rs
+++ b/tests/compile-fail/data_race/rmw_race.rs
@@ -16,6 +16,13 @@ pub fn main() {
     let b = &mut a as *mut u32;
     let c = EvilSend(b);
 
+    // Note: this is scheduler-dependent
+    // the operations need to occur in
+    // order:
+    //  1. store release : 1
+    //  2. RMW relaxed : 1 -> 2
+    //  3. store relaxed : 3
+    //  4. load acquire : 3
     unsafe {
         let j1 = spawn(move || {
             *c.0 = 1;
diff --git a/tests/run-pass/concurrency/data_race.stderr b/tests/run-pass/concurrency/data_race.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/concurrency/data_race.stderr
+++ b/tests/run-pass/concurrency/data_race.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/concurrency/disable_data_race_detector.rs b/tests/run-pass/concurrency/disable_data_race_detector.rs
index e47a2079c2..8b2d180f11 100644
--- a/tests/run-pass/concurrency/disable_data_race_detector.rs
+++ b/tests/run-pass/concurrency/disable_data_race_detector.rs
@@ -19,7 +19,7 @@ pub fn main() {
         });
 
         let j2 = spawn(move || {
-            *c.0 = 64; //~ ERROR Data race
+            *c.0 = 64; //~ ERROR Data race (but not detected as the detector is disabled)
         });
 
         j1.join().unwrap();
diff --git a/tests/run-pass/concurrency/disable_data_race_detector.stderr b/tests/run-pass/concurrency/disable_data_race_detector.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/concurrency/disable_data_race_detector.stderr
+++ b/tests/run-pass/concurrency/disable_data_race_detector.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/concurrency/linux-futex.stderr b/tests/run-pass/concurrency/linux-futex.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/concurrency/linux-futex.stderr
+++ b/tests/run-pass/concurrency/linux-futex.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/concurrency/simple.stderr b/tests/run-pass/concurrency/simple.stderr
index 24444fdc17..f46b1442d7 100644
--- a/tests/run-pass/concurrency/simple.stderr
+++ b/tests/run-pass/concurrency/simple.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
 thread '<unnamed>' panicked at 'Hello!', $DIR/simple.rs:54:9
 note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
diff --git a/tests/run-pass/concurrency/sync.stderr b/tests/run-pass/concurrency/sync.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/concurrency/sync.stderr
+++ b/tests/run-pass/concurrency/sync.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/concurrency/thread_locals.stderr b/tests/run-pass/concurrency/thread_locals.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/concurrency/thread_locals.stderr
+++ b/tests/run-pass/concurrency/thread_locals.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/concurrency/tls_lib_drop.stderr b/tests/run-pass/concurrency/tls_lib_drop.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/concurrency/tls_lib_drop.stderr
+++ b/tests/run-pass/concurrency/tls_lib_drop.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/libc.stderr b/tests/run-pass/libc.stderr
index 7ba8087a9b..03676519d4 100644
--- a/tests/run-pass/libc.stderr
+++ b/tests/run-pass/libc.stderr
@@ -1,2 +1,2 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
diff --git a/tests/run-pass/panic/concurrent-panic.stderr b/tests/run-pass/panic/concurrent-panic.stderr
index 885385a8dd..1ee688c1d3 100644
--- a/tests/run-pass/panic/concurrent-panic.stderr
+++ b/tests/run-pass/panic/concurrent-panic.stderr
@@ -1,4 +1,4 @@
-warning: thread support is experimental, no weak memory effects are currently emulated.
+warning: thread support is experimental and incomplete: weak memory effects are not emulated.
 
 Thread 1 starting, will block on mutex
 Thread 1 reported it has started

From 6c5722933e5233b4b64134680baae1f48e1e47ea Mon Sep 17 00:00:00 2001
From: JCTyBlaidd <JCTyblaidd@users.noreply.github.com>
Date: Sat, 28 Nov 2020 17:17:07 +0000
Subject: [PATCH 16/17] Fix typos - looked into the papers handling of
 timestamps, after looking into it again, it seems the paper only increments
 the timestamp after release operations, so changed to approximation of that
 implementation.

---
 src/data_race.rs        | 56 +++++++++++++++++++----------------------
 src/shims/posix/sync.rs | 16 ------------
 2 files changed, 26 insertions(+), 46 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index 3f70631d13..49332721fc 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -39,21 +39,14 @@
 //!
 //! The timestamps used in the data-race detector assign each sequence of non-atomic operations
 //! followed by a single atomic or concurrent operation a single timestamp.
-//! Write, Read, Write, ThreadJoin will be represented by a single timestamp value on a thread
+//! Write, Read, Write, ThreadJoin will be represented by a single timestamp value on a thread.
 //! This is because extra increment operations between the operations in the sequence are not
 //! required for accurate reporting of data-race values.
 //!
-//! If the timestamp was not incremented after the atomic operation, then data-races would not be detected:
-//!  Example - this should report a data-race but does not:
-//!   t1: (x,0), atomic[release A],                    t1=(x+1, 0  ), write(var B),
-//!   t2: (0,y)                   , atomic[acquire A], t2=(x+1, y+1),             ,write(var B)
-//!
-//! The timestamp is not incremented before an atomic operation, since the result is indistinguishable
-//! from the value not being incremented.
-//!    t: (x, 0), atomic[release _], (x + 1, 0) || (0, y), atomic[acquire _], (x, _)
-//! vs t: (x, 0), atomic[release _], (x + 1, 0) || (0, y), atomic[acquire _], (x+1, _)
-//! Both result in the sequence on thread x up to and including the atomic release as happening
-//! before the acquire.
+//! As per the paper a threads timestamp is only incremented after a release operation is performed
+//! so some atomic operations that only perform acquires do not increment the timestamp, due to shared
+//! code some atomic operations may increment the timestamp when not necessary but this has no effect
+//! on the data-race detection code.
 //!
 //! FIXME:
 //! currently we have our own local copy of the currently active thread index and names, this is due
@@ -516,7 +509,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
         Ok(old)
     }
 
-    /// Perform an atomic compare and exchange at a given memory location
+    /// Perform an atomic compare and exchange at a given memory location.
     /// On success an atomic RMW operation is performed and on failure
     /// only an atomic read occurs.
     fn atomic_compare_exchange_scalar(
@@ -640,7 +633,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                     // Either Release | AcqRel | SeqCst
                     clocks.apply_release_fence();
                 }
-                Ok(())
+                
+                // Increment timestamp if hase release semantics
+                Ok(atomic != AtomicFenceOp::Acquire)
             })
         } else {
             Ok(())
@@ -651,9 +646,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
 /// Vector clock metadata for a logical memory allocation.
 #[derive(Debug, Clone)]
 pub struct VClockAlloc {
-    /// Range of Vector clocks, this gives each byte a potentially
-    /// unqiue set of vector clocks, but merges identical information
-    /// together for improved efficiency.
+    /// Assigning each byte a MemoryCellClocks.
     alloc_ranges: RefCell<RangeMap<MemoryCellClocks>>,
 
     // Pointer to global state.
@@ -935,10 +928,12 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                                 true,
                                 place_ptr,
                                 size,
-                            );
+                            ).map(|_| true);
                         }
                     }
-                    Ok(())
+
+                    // This conservatively assumes all operations have release semantics
+                    Ok(true)
                 })?;
 
                 // Log changes to atomic memory.
@@ -1159,6 +1154,7 @@ impl GlobalState {
         created.join_with(current);
 
         // Advance both threads after the synchronized operation.
+        // Both operations are considered to have release semantics.
         current.increment_clock(current_index);
         created.increment_clock(created_index);
     }
@@ -1185,11 +1181,9 @@ impl GlobalState {
 
         // The join thread happens-before the current thread
         // so update the current vector clock.
+        // Is not a release operation so the clock is not incremented.
         current.clock.join(join_clock);
 
-        // Increment clocks after atomic operation.
-        current.increment_clock(current_index);
-
         // Check the number of active threads, if the value is 1
         // then test for potentially disabling multi-threaded execution.
         let active_threads = self.active_thread_count.get();
@@ -1287,13 +1281,14 @@ impl GlobalState {
     /// operation may create.
     fn maybe_perform_sync_operation<'tcx>(
         &self,
-        op: impl FnOnce(VectorIdx, RefMut<'_, ThreadClockSet>) -> InterpResult<'tcx>,
+        op: impl FnOnce(VectorIdx, RefMut<'_, ThreadClockSet>) -> InterpResult<'tcx, bool>,
     ) -> InterpResult<'tcx> {
         if self.multi_threaded.get() {
             let (index, clocks) = self.current_thread_state_mut();
-            op(index, clocks)?;
-            let (_, mut clocks) = self.current_thread_state_mut();
-            clocks.increment_clock(index);
+            if op(index, clocks)? {
+                let (_, mut clocks) = self.current_thread_state_mut();
+                clocks.increment_clock(index);
+            }
         }
         Ok(())
     }
@@ -1313,10 +1308,11 @@ impl GlobalState {
 
     /// Acquire a lock, express that the previous call of
     /// `validate_lock_release` must happen before this.
+    /// As this is an acquire operation, the thread timestamp is not
+    /// incremented.
     pub fn validate_lock_acquire(&self, lock: &VClock, thread: ThreadId) {
-        let (index, mut clocks) = self.load_thread_state_mut(thread);
+        let (_, mut clocks) = self.load_thread_state_mut(thread);
         clocks.clock.join(&lock);
-        clocks.increment_clock(index);
     }
 
     /// Release a lock handle, express that this happens-before
@@ -1335,8 +1331,8 @@ impl GlobalState {
     /// any subsequent calls to `validate_lock_acquire` as well
     /// as any previous calls to this function after any
     /// `validate_lock_release` calls.
-    /// For normal locks this should be equivalent to `validate_lock_release`
-    /// this function only exists for joining over the set of concurrent readers
+    /// For normal locks this should be equivalent to `validate_lock_release`.
+    /// This function only exists for joining over the set of concurrent readers
     /// in a read-write lock and should not be used for anything else.
     pub fn validate_lock_release_shared(&self, lock: &mut VClock, thread: ThreadId) {
         let (index, mut clocks) = self.load_thread_state_mut(thread);
diff --git a/src/shims/posix/sync.rs b/src/shims/posix/sync.rs
index efa4412991..868c72289a 100644
--- a/src/shims/posix/sync.rs
+++ b/src/shims/posix/sync.rs
@@ -62,8 +62,6 @@ fn mutex_get_kind<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // mutex implementation, it may not need to be atomic.
     ecx.read_scalar_at_offset_atomic(
         mutex_op, offset, ecx.machine.layouts.i32,
         AtomicReadOp::Relaxed
@@ -76,8 +74,6 @@ fn mutex_set_kind<'mir, 'tcx: 'mir>(
     kind: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
     let offset = if ecx.pointer_size().bytes() == 8 { 16 } else { 12 };
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // mutex implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         mutex_op, offset, kind, ecx.machine.layouts.i32,
         AtomicWriteOp::Relaxed
@@ -88,8 +84,6 @@ fn mutex_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     mutex_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // mutex implementation, it may not need to be atomic.
     ecx.read_scalar_at_offset_atomic(
         mutex_op, 4, ecx.machine.layouts.u32, 
         AtomicReadOp::Relaxed
@@ -101,8 +95,6 @@ fn mutex_set_id<'mir, 'tcx: 'mir>(
     mutex_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // mutex implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         mutex_op, 4, id, ecx.machine.layouts.u32,
         AtomicWriteOp::Relaxed
@@ -135,8 +127,6 @@ fn mutex_get_or_create_id<'mir, 'tcx: 'mir>(
 fn rwlock_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     rwlock_op: OpTy<'tcx, Tag>,
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // rw-lock implementation, it may not need to be atomic.
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
     ecx.read_scalar_at_offset_atomic(
         rwlock_op, 4, ecx.machine.layouts.u32,
@@ -149,8 +139,6 @@ fn rwlock_set_id<'mir, 'tcx: 'mir>(
     rwlock_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // rw-lock implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         rwlock_op, 4, id, ecx.machine.layouts.u32,
         AtomicWriteOp::Relaxed
@@ -207,8 +195,6 @@ fn cond_get_id<'mir, 'tcx: 'mir>(
     ecx: &MiriEvalContext<'mir, 'tcx>,
     cond_op: OpTy<'tcx, Tag>,
 ) -> InterpResult<'tcx, ScalarMaybeUninit<Tag>> {
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // cond-var implementation, it may not need to be atomic.
     ecx.read_scalar_at_offset_atomic(
         cond_op, 4, ecx.machine.layouts.u32,
         AtomicReadOp::Relaxed
@@ -220,8 +206,6 @@ fn cond_set_id<'mir, 'tcx: 'mir>(
     cond_op: OpTy<'tcx, Tag>,
     id: impl Into<ScalarMaybeUninit<Tag>>,
 ) -> InterpResult<'tcx, ()> {
-    //FIXME: this has been made atomic to fix data-race reporting inside the internal
-    // cond-var implementation, it may not need to be atomic.
     ecx.write_scalar_at_offset_atomic(
         cond_op, 4, id, ecx.machine.layouts.u32,
         AtomicWriteOp::Relaxed

From cbb695f782dceca959661e9c57d7aeb120cbc1d8 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Sun, 29 Nov 2020 19:43:44 +0100
Subject: [PATCH 17/17] fix some typos

---
 src/data_race.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/data_race.rs b/src/data_race.rs
index 49332721fc..aca735e6f2 100644
--- a/src/data_race.rs
+++ b/src/data_race.rs
@@ -44,7 +44,7 @@
 //! required for accurate reporting of data-race values.
 //!
 //! As per the paper a threads timestamp is only incremented after a release operation is performed
-//! so some atomic operations that only perform acquires do not increment the timestamp, due to shared
+//! so some atomic operations that only perform acquires do not increment the timestamp. Due to shared
 //! code some atomic operations may increment the timestamp when not necessary but this has no effect
 //! on the data-race detection code.
 //!
@@ -634,7 +634,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriEvalContextExt<'mir, 'tcx> {
                     clocks.apply_release_fence();
                 }
                 
-                // Increment timestamp if hase release semantics
+                // Increment timestamp in case of release semantics.
                 Ok(atomic != AtomicFenceOp::Acquire)
             })
         } else {