Skip to content

Commit

Permalink
add wrapping_offset_from which allows wrapping but still requires ptr…
Browse files Browse the repository at this point in the history
…s to be for the same allocation
  • Loading branch information
RalfJung committed Jun 20, 2023
1 parent b9d608c commit c49d0aa
Show file tree
Hide file tree
Showing 12 changed files with 378 additions and 62 deletions.
31 changes: 18 additions & 13 deletions compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
bx.icmp(IntPredicate::IntEQ, a, b)
}

sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
sym::ptr_offset_from | sym::ptr_offset_from_unsigned | sym::ptr_wrapping_offset_from => {
let ty = substs.type_at(0);
let pointee_size = bx.layout_of(ty).size;

Expand All @@ -447,18 +447,23 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
let a = bx.ptrtoint(a, bx.type_isize());
let b = bx.ptrtoint(b, bx.type_isize());
let pointee_size = bx.const_usize(pointee_size.bytes());
if name == sym::ptr_offset_from {
// This is the same sequence that Clang emits for pointer subtraction.
// It can be neither `nsw` nor `nuw` because the input is treated as
// unsigned but then the output is treated as signed, so neither works.
let d = bx.sub(a, b);
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
bx.exactsdiv(d, pointee_size)
} else {
// The `_unsigned` version knows the relative ordering of the pointers,
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
let d = bx.unchecked_usub(a, b);
bx.exactudiv(d, pointee_size)
match name {
sym::ptr_offset_from | sym::ptr_wrapping_offset_from => {
// This is the same sequence that Clang emits for pointer subtraction.
// Even for `ptr_offset_from`, which cannot wrap, this can be neither `nsw`
// nor `nuw` because the input is treated as unsigned but then the output is
// treated as signed, so neither works.
let d = bx.sub(a, b);
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
bx.exactsdiv(d, pointee_size)
}
sym::ptr_offset_from_unsigned => {
// The `_unsigned` version knows the relative ordering of the pointers,
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
let d = bx.unchecked_usub(a, b);
bx.exactudiv(d, pointee_size)
}
_ => bug!(),
}
}

Expand Down
89 changes: 49 additions & 40 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, self);
self.write_pointer(offset_ptr, dest)?;
}
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
sym::ptr_offset_from
| sym::ptr_offset_from_unsigned
| sym::ptr_wrapping_offset_from => {
let a = self.read_pointer(&args[0])?;
let b = self.read_pointer(&args[1])?;

Expand All @@ -288,6 +290,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
(Err(_), _) | (_, Err(_)) => {
// We managed to find a valid allocation for one pointer, but not the other.
// That means they are definitely not pointing to the same allocation.
// FIXME: if at least one pointer was a wildcard pointer, we should not throw UB here
// but just use their absolute addresses instead.
throw_ub_custom!(
fluent::const_eval_different_allocations,
name = intrinsic_name,
Expand Down Expand Up @@ -315,57 +319,62 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let b_offset = ImmTy::from_uint(b_offset, usize_layout);
self.overflowing_binary_op(BinOp::Sub, &a_offset, &b_offset)?
};
if overflowed {
// a < b
if intrinsic_name == sym::ptr_offset_from_unsigned {
throw_ub_custom!(
fluent::const_eval_unsigned_offset_from_overflow,
a_offset = a_offset,
b_offset = b_offset,
);
}
// The signed form of the intrinsic allows this. If we interpret the
// difference as isize, we'll get the proper signed difference. If that
// seems *positive*, they were more than isize::MAX apart.
let dist = val.to_target_isize(self)?;
if dist >= 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_underflow,
name = intrinsic_name,
);
}
dist
} else {
// b >= a
let dist = val.to_target_isize(self)?;
// If converting to isize produced a *negative* result, we had an overflow
// because they were more than isize::MAX apart.
if dist < 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_overflow,
name = intrinsic_name,
);
let dist = val.to_target_isize(self)?;
if intrinsic_name != sym::ptr_wrapping_offset_from {
// Overflow check.
if overflowed {
// a < b
if intrinsic_name == sym::ptr_offset_from_unsigned {
throw_ub_custom!(
fluent::const_eval_unsigned_offset_from_overflow,
a_offset = a_offset,
b_offset = b_offset,
);
}
// The signed form of the intrinsic allows this. If we interpret the
// difference as isize, we'll get the proper signed difference. If that
// seems *positive*, they were more than isize::MAX apart.
if dist >= 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_underflow,
name = intrinsic_name,
);
}
} else {
// b >= a, no overflow during subtraction.
// If converting to isize produced a *negative* result, we had an overflow
// when converting to `isize` because they were more than isize::MAX apart.
if dist < 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_overflow,
name = intrinsic_name,
);
}
}
dist
}
dist
};

// Check that the range between them is dereferenceable ("in-bounds or one past the
// end of the same allocation"). This is like the check in ptr_offset_inbounds.
let min_ptr = if dist >= 0 { b } else { a };
self.check_ptr_access_align(
min_ptr,
Size::from_bytes(dist.unsigned_abs()),
Align::ONE,
CheckInAllocMsg::OffsetFromTest,
)?;
if intrinsic_name != sym::ptr_wrapping_offset_from {
let min_ptr = if dist >= 0 { b } else { a };
self.check_ptr_access_align(
min_ptr,
Size::from_bytes(dist.unsigned_abs()),
Align::ONE,
CheckInAllocMsg::OffsetFromTest,
)?;
}

// Perform division by size to compute return value.
let ret_layout = if intrinsic_name == sym::ptr_offset_from_unsigned {
assert!(0 <= dist && dist <= self.target_isize_max());
usize_layout
} else {
assert!(self.target_isize_min() <= dist && dist <= self.target_isize_max());
if intrinsic_name != sym::ptr_wrapping_offset_from {
assert!(self.target_isize_min() <= dist && dist <= self.target_isize_max());
}
isize_layout
};
let pointee_layout = self.layout_of(substs.type_at(0))?;
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
tcx.mk_unit(),
),

sym::ptr_offset_from => {
sym::ptr_offset_from | sym::ptr_wrapping_offset_from => {
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.isize)
}
sym::ptr_offset_from_unsigned => {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,7 @@ symbols! {
ptr_offset_from,
ptr_offset_from_unsigned,
ptr_unique,
ptr_wrapping_offset_from,
pub_macro_rules,
pub_restricted,
public,
Expand Down
6 changes: 6 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2337,6 +2337,12 @@ extern "rust-intrinsic" {
#[rustc_nounwind]
pub fn ptr_offset_from_unsigned<T>(ptr: *const T, base: *const T) -> usize;

/// See documentation of `<*const T>::wrapping_offset_from` for details.
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[rustc_nounwind]
#[cfg(not(bootstrap))]
pub fn ptr_wrapping_offset_from<T>(ptr: *const T, base: *const T) -> isize;

/// See documentation of `<*const T>::guaranteed_eq` for details.
/// Returns `2` if the result is unknown.
/// Returns `1` if the pointers are guaranteed equal
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
//
// Library features:
// tidy-alphabetical-start
#![cfg_attr(not(bootstrap), feature(ptr_wrapping_offset_from))]
#![feature(char_indices_offset)]
#![feature(const_align_of_val)]
#![feature(const_align_of_val_raw)]
Expand Down
114 changes: 112 additions & 2 deletions library/core/src/ptr/const_ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,9 @@ impl<T: ?Sized> *const T {
/// Calculates the distance between two pointers. The returned value is in
/// units of T: the distance in bytes divided by `mem::size_of::<T>()`.
///
/// This function is the inverse of [`offset`].
/// This function is the inverse of [`offset`]: it is valid to call if and only if
/// `self` could have been computed as `origin.offset(n)` for some `n`, and it will
/// then return that `n`.
///
/// [`offset`]: #method.offset
///
Expand Down Expand Up @@ -644,6 +646,12 @@ impl<T: ?Sized> *const T {
/// (Note that [`offset`] and [`add`] also have a similar limitation and hence cannot be used on
/// such large allocations either.)
///
/// The requirement for pointers to be derived from the same allocated object is primarily
/// needed for `const`-compatibility: at compile-time, pointers into *different* allocated
/// object do not have a known distance to each other. However, the requirement also exists at
/// runtime, and may be exploited by optimizations. You can use `(self as usize).sub(origin as
/// usize) / mem::size_of::<T>()` to avoid this requirement.
///
/// [`add`]: #method.add
/// [allocated object]: crate::ptr#allocated-object
///
Expand Down Expand Up @@ -701,7 +709,7 @@ impl<T: ?Sized> *const T {
/// units of **bytes**.
///
/// This is purely a convenience for casting to a `u8` pointer and
/// using [offset_from][pointer::offset_from] on it. See that method for
/// using [`offset_from`][pointer::offset_from] on it. See that method for
/// documentation and safety requirements.
///
/// For non-`Sized` pointees this operation considers only the data pointers,
Expand Down Expand Up @@ -799,6 +807,108 @@ impl<T: ?Sized> *const T {
unsafe { intrinsics::ptr_offset_from_unsigned(self, origin) }
}

/// Calculates the distance between two pointers using wrapping arithmetic. The returned value
/// is in units of T: the distance in bytes divided by `mem::size_of::<T>()`.
///
/// This function is the inverse of [`wrapping_offset`]: it is valid to call if and only if
/// `self` could have been computed as `origin.wrapping_offset(n)` for some `n`, and it will
/// then return that `n`.
///
/// [`wrapping_offset`]: #method.wrapping_offset
///
/// # Safety
///
/// If any of the following conditions are violated, the result is Undefined
/// Behavior:
///
/// * Both pointers must be *derived from* a pointer to the same [allocated object].
/// (See below for an example.)
///
/// * The distance between the pointers, in bytes, must be an exact multiple
/// of the size of `T`.
///
/// Unlike [`offset_from`][pointer::offset_from], this method does *not* require the pointers to
/// be in-bounds of the object they are derived from, nor does it impose any restrictions
/// regarding the maximum distance or wrapping around the address space.
///
/// The requirement for pointers to be derived from the same allocated object is primarily
/// needed for `const`-compatibility: at compile-time, pointers into *different* allocated
/// object do not have a known distance to each other. However, the requirement also exists at
/// runtime, and may be exploited by optimizations. You can use `(self as usize).sub(origin as
/// usize) / mem::size_of::<T>()` to avoid this requirement.
///
/// [allocated object]: crate::ptr#allocated-object
///
/// # Panics
///
/// This function panics if `T` is a Zero-Sized Type ("ZST").
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(ptr_wrapping_offset_from)]
/// let a = [0; 2];
/// let ptr1: *const i32 = &a[1];
/// let ptr2: *const i32 = a.as_ptr().wrapping_offset(3); // out-of-bounds!
/// unsafe {
/// assert_eq!(ptr2.wrapping_offset_from(ptr1), 2);
/// assert_eq!(ptr1.wrapping_offset_from(ptr2), -2);
/// assert_eq!(ptr1.wrapping_offset(2), ptr2);
/// assert_eq!(ptr2.wrapping_offset(-2), ptr1);
/// }
/// ```
///
/// *Incorrect* usage:
///
/// ```rust,no_run
/// #![feature(ptr_wrapping_offset_from)]
/// let ptr1 = Box::into_raw(Box::new(0u8)) as *const u8;
/// let ptr2 = Box::into_raw(Box::new(1u8)) as *const u8;
/// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize);
/// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
/// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff);
/// assert_eq!(ptr2 as usize, ptr2_other as usize);
/// // Since ptr2_other and ptr2 are derived from pointers to different objects,
/// // computing their offset is undefined behavior, even though
/// // they point to the same address!
/// unsafe {
/// let zero = ptr2_other.wrapping_offset_from(ptr2); // Undefined Behavior
/// }
/// ```
#[unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
#[cfg(not(bootstrap))]
pub const unsafe fn wrapping_offset_from(self, origin: *const T) -> isize
where
T: Sized,
{
// SAFETY: the caller must uphold the safety contract for `ptr_offset_from`.
unsafe { intrinsics::ptr_wrapping_offset_from(self, origin) }
}

/// Calculates the distance between two pointers using wrapping arithmetic. The returned value
/// is in units of **bytes**.
///
/// This is purely a convenience for casting to a `u8` pointer and using
/// [`wrapping_offset_from`][pointer::wrapping_offset_from] on it. See that method for
/// documentation and safety requirements.
///
/// For non-`Sized` pointees this operation considers only the data pointers,
/// ignoring the metadata.
#[inline(always)]
#[unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
#[cfg(not(bootstrap))]
pub const unsafe fn wrapping_byte_offset_from<U: ?Sized>(self, origin: *const U) -> isize {
// SAFETY: the caller must uphold the safety contract for `wrapping_offset_from`.
unsafe { self.cast::<u8>().wrapping_offset_from(origin.cast::<u8>()) }
}

/// Returns whether two pointers are guaranteed to be equal.
///
/// At runtime this function behaves like `Some(self == other)`.
Expand Down
Loading

0 comments on commit c49d0aa

Please sign in to comment.