From 454bca514aab74a8c3c746908cac2d9c61300941 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 6 Apr 2023 13:53:10 -0700 Subject: [PATCH 1/2] Check `CastKind::Transmute` sizes in a better way Fixes #110005 --- compiler/rustc_codegen_ssa/src/mir/operand.rs | 25 +++++++ compiler/rustc_codegen_ssa/src/mir/rvalue.rs | 67 +++++++++-------- tests/codegen/intrinsics/transmute.rs | 74 ++++++++++++++++++- 3 files changed, 134 insertions(+), 32 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs index ddef4aaee3bab..b37797fef4ce3 100644 --- a/compiler/rustc_codegen_ssa/src/mir/operand.rs +++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs @@ -259,6 +259,31 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> { } impl<'a, 'tcx, V: CodegenObject> OperandValue { + /// Returns an `OperandValue` that's generally UB to use in any way. + /// + /// Depending on the `layout`, returns an `Immediate` or `Pair` containing + /// poison value(s), or a `Ref` containing a poison pointer. + /// + /// Supports sized types only. + pub fn poison>( + bx: &mut Bx, + layout: TyAndLayout<'tcx>, + ) -> OperandValue { + assert!(layout.is_sized()); + if bx.cx().is_backend_immediate(layout) { + let ibty = bx.cx().immediate_backend_type(layout); + OperandValue::Immediate(bx.const_poison(ibty)) + } else if bx.cx().is_backend_scalar_pair(layout) { + let ibty0 = bx.cx().scalar_pair_element_backend_type(layout, 0, true); + let ibty1 = bx.cx().scalar_pair_element_backend_type(layout, 1, true); + OperandValue::Pair(bx.const_poison(ibty0), bx.const_poison(ibty1)) + } else { + let bty = bx.cx().backend_type(layout); + let ptr_bty = bx.cx().type_ptr_to(bty); + OperandValue::Ref(bx.const_poison(ptr_bty), None, layout.align.abi) + } + } + pub fn store>( self, bx: &mut Bx, diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs index 400512fe4e9ad..6e4c0be12f083 100644 --- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs +++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs @@ -158,17 +158,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { debug_assert!(src.layout.is_sized()); debug_assert!(dst.layout.is_sized()); - if src.layout.size != dst.layout.size - || src.layout.abi.is_uninhabited() - || dst.layout.abi.is_uninhabited() - { - // In all of these cases it's UB to run this transmute, but that's - // known statically so might as well trap for it, rather than just - // making it unreachable. - bx.abort(); - return; - } - if let Some(val) = self.codegen_transmute_operand(bx, src, dst.layout) { val.store(bx, dst); return; @@ -202,8 +191,21 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { operand: OperandRef<'tcx, Bx::Value>, cast: TyAndLayout<'tcx>, ) -> Option> { - // Callers already checked that the layout sizes match - debug_assert_eq!(operand.layout.size, cast.size); + // Check for transmutes that are always UB. + if operand.layout.size != cast.size + || operand.layout.abi.is_uninhabited() + || cast.abi.is_uninhabited() + { + if !operand.layout.abi.is_uninhabited() { + // Since this is known statically and the input could have existed + // without already having hit UB, might as well trap for it. + bx.abort(); + } + + // Because this transmute is UB, return something easy to generate, + // since it's fine that later uses of the value are probably UB. + return Some(OperandValue::poison(bx, cast)); + } let operand_kind = self.value_kind(operand.layout); let cast_kind = self.value_kind(cast); @@ -221,11 +223,13 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { let OperandValueKind::Immediate(in_scalar) = operand_kind else { bug!("Found {operand_kind:?} for operand {operand:?}"); }; - if let OperandValueKind::Immediate(out_scalar) = cast_kind { + if let OperandValueKind::Immediate(out_scalar) = cast_kind + && in_scalar.size(self.cx) == out_scalar.size(self.cx) + { let cast_bty = bx.backend_type(cast); - Some(OperandValue::Immediate(Self::transmute_immediate( - bx, imm, in_scalar, out_scalar, cast_bty, - ))) + Some(OperandValue::Immediate( + self.transmute_immediate(bx, imm, in_scalar, out_scalar, cast_bty), + )) } else { None } @@ -234,12 +238,15 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { let OperandValueKind::Pair(in_a, in_b) = operand_kind else { bug!("Found {operand_kind:?} for operand {operand:?}"); }; - if let OperandValueKind::Pair(out_a, out_b) = cast_kind { + if let OperandValueKind::Pair(out_a, out_b) = cast_kind + && in_a.size(self.cx) == out_a.size(self.cx) + && in_b.size(self.cx) == out_b.size(self.cx) + { let out_a_ibty = bx.scalar_pair_element_backend_type(cast, 0, false); let out_b_ibty = bx.scalar_pair_element_backend_type(cast, 1, false); Some(OperandValue::Pair( - Self::transmute_immediate(bx, imm_a, in_a, out_a, out_a_ibty), - Self::transmute_immediate(bx, imm_b, in_b, out_b, out_b_ibty), + self.transmute_immediate(bx, imm_a, in_a, out_a, out_a_ibty), + self.transmute_immediate(bx, imm_b, in_b, out_b, out_b_ibty), )) } else { None @@ -254,12 +261,15 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { /// `to_backend_ty` must be the *non*-immediate backend type (so it will be /// `i8`, not `i1`, for `bool`-like types.) fn transmute_immediate( + &self, bx: &mut Bx, mut imm: Bx::Value, from_scalar: abi::Scalar, to_scalar: abi::Scalar, to_backend_ty: Bx::Type, ) -> Bx::Value { + debug_assert_eq!(from_scalar.size(self.cx), to_scalar.size(self.cx)); + use abi::Primitive::*; imm = bx.from_immediate(imm); imm = match (from_scalar.primitive(), to_scalar.primitive()) { @@ -831,14 +841,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { let operand_ty = operand.ty(self.mir, self.cx.tcx()); let cast_layout = self.cx.layout_of(self.monomorphize(cast_ty)); let operand_layout = self.cx.layout_of(self.monomorphize(operand_ty)); - if operand_layout.size != cast_layout.size - || operand_layout.abi.is_uninhabited() - || cast_layout.abi.is_uninhabited() - { - // Send UB cases to the full form so the operand version can - // `bitcast` without worrying about malformed IR. - return false; - } match (self.value_kind(operand_layout), self.value_kind(cast_layout)) { // Can always load from a pointer as needed @@ -847,9 +849,12 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { // Need to generate an `alloc` to get a pointer from an immediate (OperandValueKind::Immediate(..) | OperandValueKind::Pair(..), OperandValueKind::Ref) => false, - // When we have scalar immediates, we can convert them as needed - (OperandValueKind::Immediate(..), OperandValueKind::Immediate(..)) | - (OperandValueKind::Pair(..), OperandValueKind::Pair(..)) => true, + // When we have scalar immediates, we can only convert things + // where the sizes match, to avoid endianness questions. + (OperandValueKind::Immediate(a), OperandValueKind::Immediate(b)) => + a.size(self.cx) == b.size(self.cx), + (OperandValueKind::Pair(a0, a1), OperandValueKind::Pair(b0, b1)) => + a0.size(self.cx) == b0.size(self.cx) && a1.size(self.cx) == b1.size(self.cx), // Send mixings between scalars and pairs through the memory route // FIXME: Maybe this could use insertvalue/extractvalue instead? diff --git a/tests/codegen/intrinsics/transmute.rs b/tests/codegen/intrinsics/transmute.rs index 7ad0e62213cb2..c2295ca9a0c3c 100644 --- a/tests/codegen/intrinsics/transmute.rs +++ b/tests/codegen/intrinsics/transmute.rs @@ -54,6 +54,32 @@ pub unsafe fn check_smaller_size(x: u32) -> u16 { } } +// CHECK-LABEL: @check_smaller_array( +#[no_mangle] +#[custom_mir(dialect = "runtime", phase = "initial")] +pub unsafe fn check_smaller_array(x: [u32; 7]) -> [u32; 3] { + // CHECK: call void @llvm.trap + mir!{ + { + RET = CastTransmute(x); + Return() + } + } +} + +// CHECK-LABEL: @check_bigger_array( +#[no_mangle] +#[custom_mir(dialect = "runtime", phase = "initial")] +pub unsafe fn check_bigger_array(x: [u32; 3]) -> [u32; 7] { + // CHECK: call void @llvm.trap + mir!{ + { + RET = CastTransmute(x); + Return() + } + } +} + // CHECK-LABEL: @check_to_uninhabited( #[no_mangle] #[custom_mir(dialect = "runtime", phase = "initial")] @@ -71,7 +97,7 @@ pub unsafe fn check_to_uninhabited(x: u16) -> BigNever { #[no_mangle] #[custom_mir(dialect = "runtime", phase = "initial")] pub unsafe fn check_from_uninhabited(x: BigNever) -> u16 { - // CHECK: call void @llvm.trap + // CHECK: ret i16 poison mir!{ { RET = CastTransmute(x); @@ -301,3 +327,49 @@ pub unsafe fn check_pair_to_array(x: (i64, u64)) -> [u8; 16] { // CHECK: store i64 %x.1, ptr %{{.+}}, align 1 transmute(x) } + +// CHECK-LABEL: @check_heterogeneous_integer_pair( +#[no_mangle] +pub unsafe fn check_heterogeneous_integer_pair(x: (i32, bool)) -> (bool, u32) { + // CHECK: store i32 %x.0 + // CHECK: %[[WIDER:.+]] = zext i1 %x.1 to i8 + // CHECK: store i8 %[[WIDER]] + + // CHECK: %[[BYTE:.+]] = load i8 + // CHECK: trunc i8 %[[BYTE:.+]] to i1 + // CHECK: load i32 + transmute(x) +} + +// CHECK-LABEL: @check_heterogeneous_float_pair( +#[no_mangle] +pub unsafe fn check_heterogeneous_float_pair(x: (f64, f32)) -> (f32, f64) { + // CHECK: store double %x.0 + // CHECK: store float %x.1 + // CHECK: %[[A:.+]] = load float + // CHECK: %[[B:.+]] = load double + // CHECK: %[[P:.+]] = insertvalue { float, double } poison, float %[[A]], 0 + // CHECK: insertvalue { float, double } %[[P]], double %[[B]], 1 + transmute(x) +} + +// CHECK-LABEL: @check_issue_110005( +#[no_mangle] +pub unsafe fn check_issue_110005(x: (usize, bool)) -> Option> { + // CHECK: store i64 %x.0 + // CHECK: %[[WIDER:.+]] = zext i1 %x.1 to i8 + // CHECK: store i8 %[[WIDER]] + // CHECK: load ptr + // CHECK: load i64 + transmute(x) +} + +// CHECK-LABEL: @check_pair_to_dst_ref( +#[no_mangle] +pub unsafe fn check_pair_to_dst_ref<'a>(x: (usize, usize)) -> &'a [u8] { + // CHECK: %0 = inttoptr i64 %x.0 to ptr + // CHECK: %1 = insertvalue { ptr, i64 } poison, ptr %0, 0 + // CHECK: %2 = insertvalue { ptr, i64 } %1, i64 %x.1, 1 + // CHECK: ret { ptr, i64 } %2 + transmute(x) +} From d757c4b904869967f1e665dc2bb9a2ca5122bc96 Mon Sep 17 00:00:00 2001 From: Scott McMurray Date: Thu, 6 Apr 2023 16:24:32 -0700 Subject: [PATCH 2/2] Handle not all immediates having `abi::Scalar`s --- compiler/rustc_codegen_ssa/src/mir/rvalue.rs | 54 ++++++++++++++---- tests/codegen/intrinsics/transmute-x64.rs | 35 ++++++++++++ tests/codegen/intrinsics/transmute.rs | 58 +++++++++++++++++++- 3 files changed, 134 insertions(+), 13 deletions(-) create mode 100644 tests/codegen/intrinsics/transmute-x64.rs diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs index 6e4c0be12f083..d88226f5db053 100644 --- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs +++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs @@ -223,13 +223,21 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { let OperandValueKind::Immediate(in_scalar) = operand_kind else { bug!("Found {operand_kind:?} for operand {operand:?}"); }; - if let OperandValueKind::Immediate(out_scalar) = cast_kind - && in_scalar.size(self.cx) == out_scalar.size(self.cx) - { - let cast_bty = bx.backend_type(cast); - Some(OperandValue::Immediate( - self.transmute_immediate(bx, imm, in_scalar, out_scalar, cast_bty), - )) + if let OperandValueKind::Immediate(out_scalar) = cast_kind { + match (in_scalar, out_scalar) { + (ScalarOrZst::Zst, ScalarOrZst::Zst) => { + Some(OperandRef::new_zst(bx, cast).val) + } + (ScalarOrZst::Scalar(in_scalar), ScalarOrZst::Scalar(out_scalar)) + if in_scalar.size(self.cx) == out_scalar.size(self.cx) => + { + let cast_bty = bx.backend_type(cast); + Some(OperandValue::Immediate( + self.transmute_immediate(bx, imm, in_scalar, out_scalar, cast_bty), + )) + } + _ => None, + } } else { None } @@ -892,13 +900,18 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { if self.cx.is_backend_immediate(layout) { debug_assert!(!self.cx.is_backend_scalar_pair(layout)); OperandValueKind::Immediate(match layout.abi { - abi::Abi::Scalar(s) => s, - abi::Abi::Vector { element, .. } => element, - x => bug!("Couldn't translate {x:?} as backend immediate"), + abi::Abi::Scalar(s) => ScalarOrZst::Scalar(s), + abi::Abi::Vector { element, .. } => ScalarOrZst::Scalar(element), + _ if layout.is_zst() => ScalarOrZst::Zst, + x => span_bug!(self.mir.span, "Couldn't translate {x:?} as backend immediate"), }) } else if self.cx.is_backend_scalar_pair(layout) { let abi::Abi::ScalarPair(s1, s2) = layout.abi else { - bug!("Couldn't translate {:?} as backend scalar pair", layout.abi) + span_bug!( + self.mir.span, + "Couldn't translate {:?} as backend scalar pair", + layout.abi, + ); }; OperandValueKind::Pair(s1, s2) } else { @@ -907,9 +920,26 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } } +/// The variants of this match [`OperandValue`], giving details about the +/// backend values that will be held in that other type. #[derive(Debug, Copy, Clone)] enum OperandValueKind { Ref, - Immediate(abi::Scalar), + Immediate(ScalarOrZst), Pair(abi::Scalar, abi::Scalar), } + +#[derive(Debug, Copy, Clone)] +enum ScalarOrZst { + Zst, + Scalar(abi::Scalar), +} + +impl ScalarOrZst { + pub fn size(self, cx: &impl abi::HasDataLayout) -> abi::Size { + match self { + ScalarOrZst::Zst => abi::Size::ZERO, + ScalarOrZst::Scalar(s) => s.size(cx), + } + } +} diff --git a/tests/codegen/intrinsics/transmute-x64.rs b/tests/codegen/intrinsics/transmute-x64.rs new file mode 100644 index 0000000000000..99d258c62040f --- /dev/null +++ b/tests/codegen/intrinsics/transmute-x64.rs @@ -0,0 +1,35 @@ +// compile-flags: -O -C no-prepopulate-passes +// only-x86_64 (it's using arch-specific types) +// min-llvm-version: 15.0 # this test assumes `ptr`s + +#![crate_type = "lib"] + +use std::arch::x86_64::{__m128, __m128i, __m256i}; +use std::mem::transmute; + +// CHECK-LABEL: @check_sse_float_to_int( +#[no_mangle] +pub unsafe fn check_sse_float_to_int(x: __m128) -> __m128i { + // CHECK-NOT: alloca + // CHECK: %1 = load <4 x float>, ptr %x, align 16 + // CHECK: store <4 x float> %1, ptr %0, align 16 + transmute(x) +} + +// CHECK-LABEL: @check_sse_pair_to_avx( +#[no_mangle] +pub unsafe fn check_sse_pair_to_avx(x: (__m128i, __m128i)) -> __m256i { + // CHECK-NOT: alloca + // CHECK: %1 = load <4 x i64>, ptr %x, align 16 + // CHECK: store <4 x i64> %1, ptr %0, align 32 + transmute(x) +} + +// CHECK-LABEL: @check_sse_pair_from_avx( +#[no_mangle] +pub unsafe fn check_sse_pair_from_avx(x: __m256i) -> (__m128i, __m128i) { + // CHECK-NOT: alloca + // CHECK: %1 = load <4 x i64>, ptr %x, align 32 + // CHECK: store <4 x i64> %1, ptr %0, align 16 + transmute(x) +} diff --git a/tests/codegen/intrinsics/transmute.rs b/tests/codegen/intrinsics/transmute.rs index c2295ca9a0c3c..57f901c671992 100644 --- a/tests/codegen/intrinsics/transmute.rs +++ b/tests/codegen/intrinsics/transmute.rs @@ -8,7 +8,7 @@ #![feature(inline_const)] #![allow(unreachable_code)] -use std::mem::transmute; +use std::mem::{transmute, MaybeUninit}; // Some of the cases here are statically rejected by `mem::transmute`, so // we need to generate custom MIR for those cases to get to codegen. @@ -373,3 +373,59 @@ pub unsafe fn check_pair_to_dst_ref<'a>(x: (usize, usize)) -> &'a [u8] { // CHECK: ret { ptr, i64 } %2 transmute(x) } + +// CHECK-LABEL: @check_issue_109992( +#[no_mangle] +#[custom_mir(dialect = "runtime", phase = "optimized")] +pub unsafe fn check_issue_109992(x: ()) -> [(); 1] { + // This uses custom MIR to avoid MIR optimizations having removed ZST ops. + + // CHECK: start + // CHECK-NEXT: ret void + mir!{ + { + RET = CastTransmute(x); + Return() + } + } +} + +// CHECK-LABEL: @check_maybe_uninit_pair(i16 %x.0, i64 %x.1) +#[no_mangle] +pub unsafe fn check_maybe_uninit_pair( + x: (MaybeUninit, MaybeUninit), +) -> (MaybeUninit, MaybeUninit) { + // Thanks to `MaybeUninit` this is actually defined behaviour, + // unlike the examples above with pairs of primitives. + + // CHECK: store i16 %x.0 + // CHECK: store i64 %x.1 + // CHECK: load i64 + // CHECK-NOT: noundef + // CHECK: load i16 + // CHECK-NOT: noundef + // CHECK: ret { i64, i16 } + transmute(x) +} + +#[repr(align(8))] +pub struct HighAlignScalar(u8); + +// CHECK-LABEL: @check_to_overalign( +#[no_mangle] +pub unsafe fn check_to_overalign(x: u64) -> HighAlignScalar { + // CHECK: %0 = alloca %HighAlignScalar, align 8 + // CHECK: store i64 %x, ptr %0, align 8 + // CHECK: %1 = load i64, ptr %0, align 8 + // CHECK: ret i64 %1 + transmute(x) +} + +// CHECK-LABEL: @check_from_overalign( +#[no_mangle] +pub unsafe fn check_from_overalign(x: HighAlignScalar) -> u64 { + // CHECK: %x = alloca %HighAlignScalar, align 8 + // CHECK: %[[VAL:.+]] = load i64, ptr %x, align 8 + // CHECK: ret i64 %[[VAL]] + transmute(x) +}