Skip to content

Commit

Permalink
Implement all llvm intrinsics necessary for the image crate
Browse files Browse the repository at this point in the history
Fixes #1379
  • Loading branch information
bjorn3 committed Oct 23, 2023
1 parent efd3081 commit a558968
Showing 1 changed file with 103 additions and 0 deletions.
103 changes: 103 additions & 0 deletions src/intrinsics/llvm_x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,109 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
fx.bcx.ins().sshr(a_lane, saturated_count)
});
}
"llvm.x86.sse2.psad.bw" => {
intrinsic_args!(fx, args => (a, b); intrinsic);

assert_eq!(a.layout(), b.layout());
let layout = a.layout();

let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.u8);
assert_eq!(ret_lane_ty, fx.tcx.types.u64);
assert_eq!(lane_count, ret_lane_count * 8);

let ret_lane_layout = fx.layout_of(fx.tcx.types.u64);
for out_lane_idx in 0..lane_count / 8 {
let mut lane_diff_acc = fx.bcx.ins().iconst(types::I64, 0);

for lane_idx in out_lane_idx * 8..out_lane_idx * 8 + 1 {
let a_lane = a.value_lane(fx, lane_idx).load_scalar(fx);
let b_lane = b.value_lane(fx, lane_idx).load_scalar(fx);

let lane_diff = fx.bcx.ins().isub(a_lane, b_lane);
let abs_lane_diff = fx.bcx.ins().iabs(lane_diff);
let abs_lane_diff = fx.bcx.ins().uextend(types::I64, abs_lane_diff);
lane_diff_acc = fx.bcx.ins().iadd(lane_diff_acc, abs_lane_diff);
}

let res_lane = CValue::by_val(lane_diff_acc, ret_lane_layout);

ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.ssse3.pmadd.ub.sw.128" => {
intrinsic_args!(fx, args => (a, b); intrinsic);

let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.u8);
assert_eq!(ret_lane_ty, fx.tcx.types.i16);
assert_eq!(lane_count, ret_lane_count * 2);

let ret_lane_layout = fx.layout_of(fx.tcx.types.i16);
for out_lane_idx in 0..lane_count / 2 {
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let a_lane0 = fx.bcx.ins().uextend(types::I16, a_lane0);
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let b_lane0 = fx.bcx.ins().sextend(types::I16, b_lane0);

let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let a_lane1 = fx.bcx.ins().uextend(types::I16, a_lane1);
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let b_lane1 = fx.bcx.ins().sextend(types::I16, b_lane1);

let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);

let (val, has_overflow) = fx.bcx.ins().sadd_overflow(mul0, mul1);

let rhs_ge_zero = fx.bcx.ins().icmp_imm(IntCC::SignedGreaterThanOrEqual, mul1, 0);

let min = fx.bcx.ins().iconst(types::I16, i64::from(i16::MIN as u16));
let max = fx.bcx.ins().iconst(types::I16, i64::from(i16::MAX as u16));

let sat_val = fx.bcx.ins().select(rhs_ge_zero, max, min);
let res_lane = fx.bcx.ins().select(has_overflow, sat_val, val);

let res_lane = CValue::by_val(res_lane, ret_lane_layout);

ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.sse2.pmadd.wd" => {
intrinsic_args!(fx, args => (a, b); intrinsic);

assert_eq!(a.layout(), b.layout());
let layout = a.layout();

let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert_eq!(lane_ty, fx.tcx.types.i16);
assert_eq!(ret_lane_ty, fx.tcx.types.i32);
assert_eq!(lane_count, ret_lane_count * 2);

let ret_lane_layout = fx.layout_of(fx.tcx.types.i32);
for out_lane_idx in 0..lane_count / 2 {
let a_lane0 = a.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let a_lane0 = fx.bcx.ins().uextend(types::I32, a_lane0);
let b_lane0 = b.value_lane(fx, out_lane_idx * 2).load_scalar(fx);
let b_lane0 = fx.bcx.ins().sextend(types::I32, b_lane0);

let a_lane1 = a.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let a_lane1 = fx.bcx.ins().uextend(types::I32, a_lane1);
let b_lane1 = b.value_lane(fx, out_lane_idx * 2 + 1).load_scalar(fx);
let b_lane1 = fx.bcx.ins().sextend(types::I32, b_lane1);

let mul0: Value = fx.bcx.ins().imul(a_lane0, b_lane0);
let mul1 = fx.bcx.ins().imul(a_lane1, b_lane1);

let res_lane = fx.bcx.ins().iadd(mul0, mul1);
let res_lane = CValue::by_val(res_lane, ret_lane_layout);

ret.place_lane(fx, out_lane_idx).write_cvalue(fx, res_lane);
}
}
_ => {
fx.tcx
.sess
Expand Down

0 comments on commit a558968

Please sign in to comment.