diff --git a/Cargo.toml b/Cargo.toml index 2fd90097fa1..a9c30320d00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ __internal_use_only_features_that_work_on_stable = ["alloc", "simd"] [dependencies] zerocopy-derive = { version = "=0.7.0-alpha.5", path = "zerocopy-derive" } +project = { version = "0.1.0", path = "project" } [dependencies.byteorder] version = "1.3" diff --git a/project/Cargo.toml b/project/Cargo.toml new file mode 100644 index 00000000000..5d2835ffd48 --- /dev/null +++ b/project/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "project" +version = "0.1.0" +edition = "2021" + +[features] +cell = [] \ No newline at end of file diff --git a/project/src/lib.rs b/project/src/lib.rs new file mode 100644 index 00000000000..4c772103f4a --- /dev/null +++ b/project/src/lib.rs @@ -0,0 +1,510 @@ +//! Field projection inside of any container type. +//! +//! # How to use this crate (for container users) +//! +//! Just call `project!(&a.b.c)`. It's that easy! +//! +//! Okay, maybe you want a bit more detail... +//! +//! ## The quick version +//! +//! Here's a quick, very dense explanation. If you want a friendlier +//! explanation, skip to the next section. +//! +//! Given a container type, `C`, an inner type, `I`, and a field in `I`, `f: F`, +//! if `C` implements the [`Projectable`] trait, then `project!` allows you +//! to project from a `&C` to a `&C` which points to the `f` field within +//! the original `C`. For `c: &C`, this is invoked as `project!(&c.f)`. +//! +//! ## The long version +//! +//! Let's say you're using a crate which provides a container type like the +//! following: +//! +//! ```rust +//! /// An unaligned `T`. +//! /// +//! /// `Unalign` has the same layout as `T`, except that its alignment +//! /// is always 1 regardless of `T`'s alignment. +//! #[repr(C, packed)] +//! pub struct Unalign{ +//! # t: T, +//! # } +//! ``` +//! +//! You're using it with a type from your crate: +//! +//! ```rust +//! #[repr(C)] +//! struct UdpHeader { +//! src_port: u16, +//! dst_port: u16, +//! length: u16, +//! checksum: u16, +//! } +//! ``` +//! +//! Let's say you're reading UDP packets off the network. `UdpHeader` contains +//! `u16`s, and so (on some platforms) it has an alignment of 2. But you don't +//! have any control of where your packets are stored in memory, so you can't +//! construct a `&UdpHeader` from the bytes you've received off the network. +//! Instead, you construct an `&Unalign`. +//! +//! That's all well and good, but what if you want to hand out a reference to a +//! packet's source port? It's a `u16` that might not be validly-aligned, so you +//! can't just create a `&u16` like you would be able to do if you just had a +//! `&UdpHeader`. What you'd really like to do is take your +//! `&Unalign` and take a reference to the `src_port`... and get back +//! an `&Unalign`. That's what `project!` lets you do: +//! +//! ```rust +//! # use project::project; +//! # struct UdpHeader { +//! # src_port: u16, +//! # dst_port: u16, +//! # length: u16, +//! # checksum: u16, +//! # } +//! # #[repr(C, packed)] +//! # struct Unalign{ +//! # t: T, +//! # } +//! # unsafe impl project::Projectable> for Unalign { +//! # type Inner = T; +//! # } +//! # fn read_udp_header() -> &'static Unalign { todo!() } +//! # fn __main() { +//! let packet = read_udp_header(); +//! let src_port = project!(&packet.src_port); +//! # } +//! ``` +//! +//! # How to use this type (for container authors) +//! +//! TODO + +#![no_std] + +/// A container which supports field projection of its contained type. +/// +/// # Example +/// +/// ```rust +/// # use project::Projectable; +/// #[repr(transparent)] +/// struct Wrapper(T); +/// +/// unsafe impl Projectable> for Wrapper { +/// type Inner = T; +/// } +/// ``` +/// +/// # Safety +/// +/// TODO: What must be true about a type which implements `Projectable`? E.g., +/// clearly `Box` can't implement `Projectable`, but some `#[repr(transparent)]` +/// wrappers can. +/// +/// TODO: Document that, for DSTs, the prefix and tail-slice-element sizes must +/// be the same (put another way, given a reference to the outer type with a +/// certain number of tail elements, a reference to the inner type with the same +/// number of tail elements will reference the same memory region). +pub unsafe trait Projectable { + /// The inner type. + type Inner: ?Sized; +} + +// TODO: +// - Support nested field projection? ie, `&Align, T>` -> +// `&Align, F>`. Doesn't just work naively (nesting calls to +// `project!`) because the outer `project!` thinks the inner type is +// `Wrapper`, where the user wants to name fields on `T`, not fields on +// `Wrapper`. +// - Expose `unsafe_impl_projectable` for implementors to use. + +macro_rules! unsafe_impl_projectable { + ($($c:ident)::* $(: ?$sized:ident)?) => { + unsafe impl Projectable> for $($c)::* { + type Inner = T; + } + }; +} + +unsafe_impl_projectable!(core::mem::MaybeUninit); +unsafe_impl_projectable!(core::num::Wrapping); + +// Cell currently has a soundness hole, so these are behind a feature. +// +// https://github.com/rust-lang/unsafe-code-guidelines/issues/451 +// https://github.com/rust-lang/rust/issues/80778 +#[cfg(feature = "cell")] +unsafe_impl_projectable!(core::cell::UnsafeCell); +#[cfg(feature = "cell")] +unsafe_impl_projectable!(core::cell::Cell); + +/// Performs field projection on `outer`, projecting into the field of type `F` +/// at the address provided by `inner_to_field`. +/// +/// # Safety +/// +/// `inner_to_field(p)` must return an address, `f`, with the following +/// property: If `p` points to a validly-initialized `P::Inner`, then `f` points +/// to a validly-initialized `F`. `inner_to_field` may NOT assume that `p` +/// *does* actually point to a validly-initialized `P::Inner`. `inner_to_field` +/// may also NOT assume that `p` is aligned. More specifically, `inner_to_field` +/// may only assume that it is sound to invoke `core::ptr::addr_of!(*p)`; it may +/// not assume anything that is not logically deducible from that assumption. +/// +/// TODO: Mention lifetimes. +/// TODO: Document safety for other arguments. +#[doc(hidden)] +#[inline(always)] +pub fn project( + _unsafe: crate::unsafe_token::UnsafeToken, + outer: &P, + outer_to_inner: OuterToInner, + inner_to_field: InnerToField, + field_to_wrapped_field: FieldToWrappedField, +) -> &W +where + P: Projectable + ?Sized, + // TODO: Leave a breadcrumb here to remove this bound once `Unalign` is + // removed and we can support unsized types. + P::Inner: Sized, + F: ?Sized, + W: ?Sized, + OuterToInner: Fn(*const P) -> *const Unalign, + InnerToField: Fn(*const Unalign) -> *const F, + FieldToWrappedField: Fn(*const F) -> *const W, +{ + let outer: *const P = outer; + let inner = outer_to_inner(outer); + let field = inner_to_field(inner); + let wrapped_field = field_to_wrapped_field(field); + unsafe { &*wrapped_field } +} + +/// Performs field projection on `outer`, projecting into the field of type `F` +/// at the address provided by `inner_to_field`. +/// +/// # Safety +/// +/// TODO: Mention in terms of `project`. +#[doc(hidden)] +#[inline(always)] +pub fn project_mut( + _unsafe: crate::unsafe_token::UnsafeToken, + outer: &mut P, + outer_to_inner: OuterToInner, + inner_to_field: InnerToField, + field_to_wrapped_field: FieldToWrappedField, +) -> &mut W +where + P: Projectable + ?Sized, + // TODO: Leave a breadcrumb here to remove this bound once `Unalign` is + // removed and we can support unsized types. + P::Inner: Sized, + F: ?Sized, + W: ?Sized, + OuterToInner: Fn(*mut P) -> *mut Unalign, + InnerToField: Fn(*mut Unalign) -> *mut F, + FieldToWrappedField: Fn(*mut F) -> *mut W, +{ + let outer: *mut P = outer; + let inner = outer_to_inner(outer); + let field = inner_to_field(inner); + let wrapped_field = field_to_wrapped_field(field); + unsafe { &mut *wrapped_field } +} + +/// Performs field projection. +/// +/// Given a wrapper, `w: W`, and a field type in `T`, `f: F`, +/// `project!(&w.f)` returns a reference to a `W` (this works for mutable +/// references too). +// TODO: Is there any way to teach Rust about when references are +// non-overlapping so you can borrow multiple fields mutably at a time? +#[macro_export] +macro_rules! project { + // Note that it's very important that the `mut` branches comes first! If it + // came after the immutable branches, then a `mut` token could be matched by + // `$c:ident` in those branches. + (&mut $c:ident $($f:tt)*) => { + $crate::project!(&mut ($c) $($f)*) + }; + (&mut ($c:expr) $($f:tt)*) => { + $crate::project!( + @inner + BorrowMut, + borrow_mut, + project_mut, + *mut _, + inner, + &mut *inner, + addr_of_mut, + $c, + $($f)* + ) + }; + + (&$c:ident $($f:tt)*) => { + $crate::project!(&($c) $($f)*) + }; + (&($c:expr) $($f:tt)*) => { + $crate::project!( + @inner + Borrow, + borrow, + project, + *const _, + inner, + &*inner, + addr_of, + $c, + $($f)* + ) + }; + + ( + @inner + $borrow_trait:ident, + $borrow_method:ident, + $project_fn:ident, + $ptr_ty:ty, + $inner_name:ident, + $convert_inner_raw_to_ref:expr, + $addr_of:ident, + $c:expr, + $($f:tt)* + ) => {{ + // We generate an `UnsafeToken` so that `$project_fn` can itself be + // safe, and thus we don't need to wrap the entire call to `$project_fn` + // in `unsafe { ... }`. This, in turn, is done so that the + // meta-variables `$c` and `$($f)*` are not expanded inside of an + // `unsafe { ... }`, which would allow safe Rust code to smuggle in + // unsafe code via a call to `project!` without needing to write the + // `unsafe` keyword. + // + // TODO: + // - Safety comment + // - Slicing seems to be bounds-checked at runtime if need be, but is + // this guaranteed by the reference/stdlib docs? + // - What if the type has a `borrow` method? We unfortunately can't do + // `Borrow::borrow(&$e)` because that defeats the purpose of having + // `.borrow()` produce a `&T` regardless of whether `$e` is a `T` or a + // `&T`. + let token = unsafe { $crate::unsafe_token::UnsafeToken::new() }; + use ::core::borrow::$borrow_trait as _; + $crate::$project_fn( + token, + $c.$borrow_method(), + |outer| outer as $ptr_ty, + |inner| if false { + // This branch is never executed, but allows us to ensure that + // `$($f)*` doesn't contain any unsafe code that isn't wrapped + // in an `unsafe` block. If it does, then wrapping it in + // `unsafe` - as we do in the `else` branch - would allow users + // to write unsafe code without needing to write `unsafe`. + // + // The way we accomplish this is to generate a reference from + // `inner` (which is a raw pointer). That allows us to extract + // the unsafe operation of converting to a reference and wrap it + // in `unsafe { ... }` on its own, while leaving the `$($f)*` + // not wrapped in `unsafe { ... }`. Note that this is NOT sound + // to execute in the general case, but that's okay because we're + // in an `if false` branch. For example, if the wrapper type is + // `#[repr(packed)]`, then `inner_ref` may not be validly + // aligned, which is unsound. + let $inner_name = inner; + let inner_ref = unsafe { $convert_inner_raw_to_ref }; + ::core::ptr::$addr_of!(inner_ref .0 $($f)*) + } else { + unsafe { ::core::ptr::$addr_of!((*inner) .0 $($f)* ) } + }, + |field| field as $ptr_ty, + ) + }}; +} + +// TODO(#196), TODO(https://github.com/rust-lang/reference/pull/1387), +// TODO(https://github.com/rust-lang/rust/pull/114330): Remove this once it is +// no longer UB to use `addr_of!` with an unaligned pointer, and once Miri knows +// that this isn't UB. Note that this struct is the only reason that `project!` +// doesn't support unsized types, so removing this will also address that +// limitation. +#[doc(hidden)] +#[repr(packed)] +pub struct Unalign(pub T); + +#[doc(hidden)] +pub mod unsafe_token { + /// A token used to prove that the `unsafe` keyword has been written + /// somewhere. + pub struct UnsafeToken(()); + + impl UnsafeToken { + /// Constructs a new `UnsafeToken`. + /// + /// # Safety + /// + /// The caller is responsible for ensuring that they uphold the safety + /// invariants of any APIs which consume this token. + pub unsafe fn new() -> UnsafeToken { + UnsafeToken(()) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[derive(Copy)] + #[repr(C, packed)] + struct Unalign(T); + unsafe_impl_projectable!(Unalign); + + impl Clone for Unalign { + fn clone(&self) -> Unalign { + *self + } + } + + #[derive(Eq, PartialEq, Debug)] + #[repr(transparent)] + struct Wrapper(T); + unsafe_impl_projectable!(Wrapper: ?Sized); + + #[derive(Copy, Clone, Debug, Eq, PartialEq)] + struct Foo { + a: u8, + b: u16, + c: T, + } + + macro_rules! test_project { + (($c:ident $($f:tt)*) => $expect:expr) => {{ + // Test with an immutable reference. + let f = project!(&$c $($f)*); + assert_eq!({ f.0 }, $expect); + // Test with a mutable reference. + let f = project!(&mut $c $($f)*); + assert_eq!({ f.0 }, $expect); + + // Run the same tests with `$c` in parentheses. + test_project!((($c) $($f)*) => $expect); + }}; + ((($c:expr) $($f:tt)*) => $expect:expr) => {{ + // Test with an immutable reference. + let f = project!(&($c) $($f)*); + assert_eq!({ f.0 }, $expect); + // Test with a mutable reference. + let f = project!(&mut ($c) $($f)*); + assert_eq!({ f.0 }, $expect); + }}; + } + + #[test] + fn test_project() { + let mut u = Unalign(Foo:: { a: 1, b: 2, c: 3 }); + + test_project!((u.a) => 1); + test_project!((u.b) => 2); + test_project!((u.c) => 3); + } + + #[test] + fn test_project_complex() { + // Test projection using a complex expression rather than just the + // identifier of a local variable. + + let mut u = Unalign(Foo:: { a: 1, b: 2, c: 3 }); + + fn ident(t: T) -> T { + t + } + + let ua = project!(&(ident(&u)).a); + let ub = project!(&(ident(&u)).b); + assert_eq!({ ua.0 }, 1); + assert_eq!({ ub.0 }, 2); + + let uc = project!(&mut (ident(&mut u)).c); + assert_eq!({ uc.0 }, 3); + } + + #[test] + fn test_project_complex_access() { + let mut u = Unalign(Foo::> { a: 1, b: 2, c: Foo { a: 3, b: 4, c: 5 } }); + test_project!((u.c) => Foo { a: 3, b: 4, c: 5 }); + test_project!((u.c.a) => 3); + + let mut u = Unalign(Foo::<[u32; 3]> { a: 1, b: 2, c: [3, 4, 5] }); + test_project!((u.c) => [3, 4, 5]); + test_project!((u.c[0]) => 3); + + let mut u = Unalign([0u8, 1, 2]); + test_project!((u[0]) => 0); + test_project!((u[1]) => 1); + test_project!((u[2]) => 2); + + // Test that indexing works using variables rather than literals. + for i in 0u8..3 { + test_project!((u[usize::from(i)]) => i); + } + + let mut u = Unalign([[0u8, 1, 2], [3, 4, 5], [6, 7, 8]]); + test_project!((u[0][0]) => 0); + test_project!((u[1][1]) => 4); + test_project!((u[2][2]) => 8); + + // Test that indexing works using variables rather than literals. + for (i, elem) in [(0usize, 0u8), (1, 4), (2, 8)] { + test_project!((u[i][i]) => elem); + } + } + + // TODO(#196), TODO(https://github.com/rust-lang/reference/pull/1387), + // TODO(https://github.com/rust-lang/rust/pull/114330): Uncomment this once + // unsized projection is supported. + // + // #[test] + // fn test_project_unsized() { + // let inner = [0u8, 1, 2]; + // let inner_ref: &[u8] = &inner[..]; + // let wrapper_ref: &Wrapper<([u8],)> = unsafe { &*(inner_ref as *const _ as *const _) }; + + // let first = project!(&wrapper_ref.0[1]); + // assert_eq!(first, &Wrapper(1u8)); + // let first_two = project!(&wrapper_ref.0[0..2]); + // assert_eq!(&first_two.0, &[0, 1]); + // } + + #[test] + #[should_panic(expected = "index out of bounds: the len is 3 but the index is 3")] + fn test_project_out_of_bounds() { + let u = Unalign([0u8, 1, 2]); + let u0 = project!(&u[0]); + assert_eq!({ u0.0 }, 0); + let _ = project!(&u[3]); + } + + #[test] + #[should_panic(expected = "index out of bounds: the len is 3 but the index is 3")] + fn test_project_out_of_bounds_variable() { + let u = Unalign([0u8, 1, 2]); + let u0 = project!(&u[0]); + assert_eq!({ u0.0 }, 0); + let i = 3; + let _ = project!(&u[i]); + } + + #[test] + #[should_panic(expected = "range end index 4 out of range for slice of length 3")] + fn test_project_out_of_bounds_range() { + let u = Wrapper([0u8, 1, 2]); + let u0 = project!(&u[0]); + assert_eq!({ u0.0 }, 0); + let _ = project!(&u[0..4]); + } +}