From 01345db784faa062d67a2dc9e0c658a1d549aa43 Mon Sep 17 00:00:00 2001 From: Joshua Liebow-Feeser Date: Sun, 3 Sep 2023 21:13:57 +0000 Subject: [PATCH] Add MaybeValid type `MaybeValid` is a `T` which might not be valid. It is similar to `MaybeUninit`, but it is slightly more strict: any byte in `T` which is guaranteed to be initialized is also guaranteed to be initialized in `MaybeValid` (see the doc comment for a more precise definition). `MaybeValid` is a building block of the `TryFromBytes` design outlined in #5. Makes progress on #5 --- src/lib.rs | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index f05007cc85..780dee7dce 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1814,6 +1814,199 @@ safety_comment! { assert_unaligned!(mem::MaybeUninit<()>, MaybeUninit); } +/// A value which might or might not constitute a valid instance of `T`. +/// +/// `MaybeValid` has the same layout (size and alignment) and field offsets +/// as `T`. Unlike `T`, it may contain any bit pattern, except that +/// uninitialized bytes may only appear in `MaybeValid` at byte offsets where +/// they may appear in `T`. This is a dynamic property: if, at a particular byte +/// offset, a valid enum discriminant is set, the subsequent bytes may only have +/// uninitialized bytes as specified by the corresponding enum variant. +/// +/// Formally, given `m: MaybeValid` and a byte offset, `b` in the range `[0, +/// size_of_val(m))`: +/// - If, in all valid instances `t: T`, the byte at offset `b` in `t` is +/// initialized, then the byte at offset `b` within `m` is guaranteed to be +/// initialized. +/// - Let `c` be the contents of the byte range `[0, b)` in `m`. Let `TT` be the +/// subset of valid instances of `T` which contain `c` in the offset range +/// `[0, b)`. If, for all instances of `t: T` in `TT`, the byte at offset `b` +/// in `t` is initialized, then the byte at offset `b` in `m` is guaranteed to +/// be initialized. +/// +/// Pragmatically, this means that if `m` is guaranteed to contain an enum +/// type at a particular offset, and the enum discriminant stored in `m` +/// corresponds to a valid variant of that enum type, then it is guaranteed +/// that the appropriate bytes of `m` are initialized as defined by that +/// variant's bit validity (although note that the variant may contain another +/// enum type, in which case the same rules apply depending on the state of +/// its discriminant, and so on recursively). +/// +/// # Safety +/// +/// Unsafe code may assume that an instance of `MaybeValid` satisfies the +/// constraints described above. Unsafe code may produce a `MaybeValid` or +/// modify the bytes of an existing `MaybeValid` so long as these constraints +/// are upheld. It is unsound to produce a `MaybeValid` which fails to uphold +/// these constraints. +#[repr(transparent)] +pub struct MaybeValid { + inner: MaybeUninit, +} + +safety_comment! { + /// SAFETY: + /// - `AsBytes`: `MaybeValid` requires that, if a byte in `T` is always + /// initialized, the equivalent byte in `MaybeValid` must be + /// initialized. `T: AsBytes` implies that all bytes in `T` must always be + /// initialized, and so all bytes in `MaybeValid` must always be + /// initialized, and so `MaybeValid` satisfies `AsBytes`. + /// - `Unaligned`: `MaybeValid` has the same alignment as `T`. + /// - `KnownLayout`: Since `MaybeUninit` is a `repr(transparent)` wrapper + /// around `T::MaybeUninit`: + /// - They have the same prefix size, alignment, and trailing slice + /// element size + /// - It is valid to perform an `as` cast in either direction, and this + /// operation preserves referent size + /// + /// TODO(#5): Implement `FromZeroes` and `FromBytes` for `MaybeValid` and + /// `MaybeValid<[T]>`. + unsafe_impl!(T: ?Sized + KnownLayout + AsBytes => AsBytes for MaybeValid); + unsafe_impl!(T: ?Sized + KnownLayout + Unaligned => Unaligned for MaybeValid); + unsafe_impl_known_layout!(T: ?Sized + KnownLayout => #[repr(MaybeUninit)] MaybeValid); +} + +// impl Default for MaybeValid { +// fn default() -> MaybeValid { +// // SAFETY: All of the bytes of `inner` are initialized to 0, and so the +// // safety invariant on `MaybeValid` is upheld. +// MaybeValid { inner: MaybeUninit::zeroed() } +// } +// } + +impl MaybeValid { + /// Converts this `&MaybeValid` to a `&T`. + /// + /// # Safety + /// + /// `self` must contain a valid `T`. + pub unsafe fn assume_valid_ref(&self) -> &T { + // SAFETY: The caller has promised that `self` contains a valid `T`. + // Since `Self` is `repr(transparent)`, it has the same layout as + // `MaybeUninit`, which in turn is guaranteed to have the same layout + // as `T`. Thus, it is sound to treat `self.inner` as containing a valid + // `T`. + unsafe { self.inner.assume_init_ref() } + } + + /// Converts this `&mut MaybeValid` to a `&mut T`. + /// + /// # Safety + /// + /// `self` must contain a valid `T`. + pub unsafe fn assume_valid_mut(&mut self) -> &mut T { + // SAFETY: The caller has promised that `self` contains a valid `T`. + // Since `Self` is `repr(transparent)`, it has the same layout as + // `MaybeUninit`, which in turn is guaranteed to have the same layout + // as `T`. Thus, it is sound to treat `self.inner` as containing a valid + // `T`. + unsafe { self.inner.assume_init_mut() } + } + + /// Gets a view of this `&T` as a `&MaybeValid`. + /// + /// There is no mutable equivalent to this function, as producing a `&mut + /// MaybeValid` from a `&mut T` would allow safe code to write invalid + /// values which would be accessible through `&mut T`. + pub fn from_ref(r: &T) -> &MaybeValid { + let m: *const MaybeUninit = MaybeUninit::from_ref(r); + #[allow(clippy::as_conversions)] + let ptr = m as *const MaybeValid; + // SAFETY: Since `Self` is `repr(transparent)`, it has the same layout + // as `MaybeUninit`, so the size and alignment here are valid. + // + // `MaybeValid`'s bit validity constraints are weaker than those of + // `T`, so this is guaranteed not to produce an invalid `MaybeValid`. + // If it were possible to write a different value for `MaybeValid` + // through the returned reference, it could result in an invalid value + // being exposed via the `&T`. Luckily, the only way for mutation to + // happen is if `T` contains an `UnsafeCell` and the caller uses it to + // perform interior mutation. Importantly, `T` containing an + // `UnsafeCell` does not permit interior mutation through + // `MaybeValid`, so it doesn't permit writing uninitialized or + // otherwise invalid values which would be visible through the original + // `&T`. + unsafe { &*ptr } + } +} + +impl>> MaybeValid { + /// Converts this `MaybeValid` to a `T`. + /// + /// # Safety + /// + /// `self` must contain a valid `T`. + pub const unsafe fn assume_valid(self) -> T { + // SAFETY: The caller has promised that `self` contains a valid `T`. + // Since `Self` is `repr(transparent)`, it has the same layout as + // `MaybeUninit`, which in turn is guaranteed to have the same layout + // as `T`. Thus, it is sound to treat `self.inner` as containing a valid + // `T`. + unsafe { self.inner.assume_init() } + } +} + +impl>> MaybeValid<[T]> { + /// Converts a `MaybeValid<[T]>` to a `[MaybeValid]`. + /// + /// `MaybeValid` has the same layout as `T`, so these layouts are + /// equivalent. + pub const fn as_slice_of_maybe_valids(&self) -> &[MaybeValid] { + let inner: &[::MaybeUninit] = &self.inner.inner; + let inner_ptr: *const [::MaybeUninit] = inner; + // Note: this Clippy warning is only emitted on our MSRV (1.61), but not + // on later versions of Clippy. Thus, we consider it spurious. + #[allow(clippy::as_conversions)] + let ret_ptr = inner_ptr as *const [MaybeValid]; + // SAFETY: Since `inner` is a `&[MaybeUninit]`, and `MaybeValid` + // is a `repr(transparent)` struct around `MaybeUninit`, `inner` has + // the same layout as `&[MaybeValid]`. + unsafe { &*ret_ptr } + } +} + +impl MaybeValid<[T; N]> { + /// Converts a `MaybeValid<[T; N]>` to a `MaybeValid<[T]>`. + // TODO(#64): Make this `const` once our MSRV is >= 1.64.0 (when + // `slice_from_raw_parts` was stabilized as `const`). + pub fn as_slice(&self) -> &MaybeValid<[T]> { + let base: *const MaybeValid<[T; N]> = self; + let slice_of_t: *const [T] = ptr::slice_from_raw_parts(base.cast::(), N); + // Note: this Clippy warning is only emitted on our MSRV (1.61), but not + // on later versions of Clippy. Thus, we consider it spurious. + #[allow(clippy::as_conversions)] + let mv_of_slice = slice_of_t as *const MaybeValid<[T]>; + // SAFETY: `MaybeValid` is a `repr(transparent)` wrapper around + // `MaybeUninit`, which in turn has the same layout as `T`. Thus, the + // trailing slices of `[T]` and of `MaybeValid<[T]>` both have element + // type `T`. Since the number of elements is preserved during an `as` + // cast of slice/DST pointers, the resulting `*const MaybeValid<[T]>` + // has the same number of elements - and thus the same length - as the + // original `*const [T]`. + // + // Thanks to their layouts, `MaybeValid<[T; N]>` and `MaybeValid<[T]>` + // have the same alignment, so `mv_of_slice` is guaranteed to be + // aligned. + unsafe { &*mv_of_slice } + } +} + +impl Debug for MaybeValid { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.pad(core::any::type_name::()) + } +} + /// A type with no alignment requirement. /// /// An `Unalign` wraps a `T`, removing any alignment requirement. `Unalign` @@ -3853,6 +4046,91 @@ mod tests { assert_eq!(unsafe { m.assume_init_ref() }, &Cell::new(2)); } + #[test] + fn test_maybe_valid() { + let m = MaybeValid::::default(); + // SAFETY: all bit patterns are valid `usize`s, and `m` is initialized. + let u = unsafe { m.assume_valid() }; + // This ensures that Miri can see whether `u` (and thus `m`) has been + // properly initialized. + assert_eq!(u, u); + + fn bytes_to_maybe_valid(bytes: &mut [u8]) -> &mut MaybeValid<[u8]> { + // SAFETY: `MaybeValid<[u8]>` has the same layout as `[u8]`, and + // `bytes` is initialized. + unsafe { + #[allow(clippy::as_conversions)] + let m = &mut *(bytes as *mut [u8] as *mut MaybeValid<[u8]>); + m + } + } + + let mut bytes = [0u8, 1, 2]; + let m = bytes_to_maybe_valid(&mut bytes[..]); + + // SAFETY: `m` was created from a valid `[u8]`. + let r = unsafe { m.assume_valid_ref() }; + assert_eq!(r.len(), 3); + assert_eq!(r, [0, 1, 2]); + + // SAFETY: `m` was created from a valid `[u8]`. + let r = unsafe { m.assume_valid_mut() }; + assert_eq!(r.len(), 3); + assert_eq!(r, [0, 1, 2]); + + r[0] = 1; + assert_eq!(bytes, [1, 1, 2]); + + let mut bytes = [0u8, 1, 2]; + let m = bytes_to_maybe_valid(&mut bytes[..]); + let slc = m.as_slice_of_maybe_valids(); + assert_eq!(slc.len(), 3); + for i in 0u8..3 { + // SAFETY: `m` was created from a valid `[u8]`. + let u = unsafe { slc[usize::from(i)].assume_valid_ref() }; + assert_eq!(u, &i); + } + } + + #[test] + fn test_maybe_valid_as_slice() { + let mut m = MaybeValid::<[u8; 3]>::default(); + // SAFETY: all bit patterns are valid `[u8; 3]`s, and `m` is + // initialized. + unsafe { *m.assume_valid_mut() = [0, 1, 2] }; + + let slc = m.as_slice().as_slice_of_maybe_valids(); + assert_eq!(slc.len(), 3); + + for i in 0u8..3 { + // SAFETY: `m` was initialized as a valid `[u8; 3]`. + let u = unsafe { slc[usize::from(i)].assume_valid_ref() }; + assert_eq!(u, &i); + } + } + + #[test] + fn test_maybe_valid_from_ref() { + use core::cell::Cell; + + let u = 1usize; + let m = MaybeValid::from_ref(&u); + // SAFETY: `m` was constructed from a valid `&usize`. + assert_eq!(unsafe { m.assume_valid_ref() }, &1usize); + + // Test that interior mutability doesn't affect correctness or + // soundness. + + let c = Cell::new(1usize); + let m = MaybeValid::from_ref(&c); + // SAFETY: `m` was constructed from a valid `&usize`. + assert_eq!(unsafe { m.assume_valid_ref() }, &Cell::new(1)); + + c.set(2); + // SAFETY: `m` was constructed from a valid `&usize`. + assert_eq!(unsafe { m.assume_valid_ref() }, &Cell::new(2)); + } + #[test] fn test_unalign() { // Test methods that don't depend on alignment. @@ -4771,6 +5049,12 @@ mod tests { assert_impls!(MaybeUninit: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); assert_impls!(MaybeUninit>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(MaybeValid: Unaligned, AsBytes, !FromZeroes, !FromBytes); + assert_impls!(MaybeValid>: Unaligned, AsBytes, !FromZeroes, !FromBytes); + assert_impls!(MaybeValid<[u8]>: Unaligned, AsBytes, !FromZeroes, !FromBytes); + assert_impls!(MaybeValid: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(MaybeValid>: !FromZeroes, !FromBytes, !AsBytes, !Unaligned); + assert_impls!(Wrapping: FromZeroes, FromBytes, AsBytes, Unaligned); assert_impls!(Wrapping: !FromZeroes, !FromBytes, !AsBytes, !Unaligned);