Skip to content

Commit

Permalink
Add another assembly test showing no shift instructions in masked loa…
Browse files Browse the repository at this point in the history
…d intrinsic
  • Loading branch information
jhorstmann committed Mar 1, 2024
1 parent df7fcb1 commit 83a5dd4
Showing 1 changed file with 39 additions and 24 deletions.
63 changes: 39 additions & 24 deletions tests/assembly/simd-intrinsic-mask-load.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
// verify that simd mask reductions do not introduce additional bit shift operations
//@ revisions: x86 aarch64
//@ [x86] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
//@ [x86] needs-llvm-components: x86
//@ [aarch64] compile-flags: --target=aarch64-unknown-linux-gnu
//@ [aarch64] needs-llvm-components: aarch64
//@ [aarch64] min-llvm-version: 15.0
// verify that simd masked load does not introduce additional bit shift operations
//@ revisions: x86-avx x86-avx512
//@ [x86-avx] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
//@ [x86-avx] compile-flags: -C target-feature=+avx
//@ [x86-avx] needs-llvm-components: x86
//@ [x86-avx512] compile-flags: --target=x86_64-unknown-linux-gnu -C llvm-args=-x86-asm-syntax=intel
//@ [x86-avx512] compile-flags: -C target-feature=+avx512f,+avx512vl,+avx512bw,+avx512dq
//@ [x86-avx512] needs-llvm-components: x86
//@ assembly-output: emit-asm
//@ compile-flags: --crate-type=lib -O

Expand All @@ -20,29 +21,43 @@ pub trait Sized {}
trait Copy {}

#[repr(simd)]
pub struct mask8x16([i8; 16]);
pub struct f32x8([f32; 8]);

#[repr(simd)]
pub struct m32x8([i32; 8]);

#[repr(simd)]
pub struct f64x4([f64; 4]);

#[repr(simd)]
pub struct m64x4([i64; 4]);

extern "rust-intrinsic" {
fn simd_reduce_all<T>(x: T) -> bool;
fn simd_reduce_any<T>(x: T) -> bool;
fn simd_masked_load<M, P, T>(mask: M, pointer: P, values: T) -> T;
}

// CHECK-LABEL: mask_reduce_all:
// CHECK-LABEL: load_f32x8
#[no_mangle]
pub unsafe fn mask_reduce_all(m: mask8x16) -> bool {
// x86: movdqa
// x86-NEXT: pmovmskb
// aarch64: cmge
// aarch64-NEXT: umaxv
simd_reduce_all(m)
pub unsafe fn load_f32x8(mask: m32x8, pointer: *const f32, output: *mut f32x8) {
// x86-avx-NOT: vpslld
// x86-avx: vmovaps ymm0
// x86-avx-NEXT: vmaskmovps
// x86-avx512-NOT: vpslld
// x86-avx512: vpcmpgtd k1
// x86-avx512-NEXT: vmovups ymm0 {k1} {z}
// x86-avx512-NEXT: vmovaps
*output = simd_masked_load(mask, pointer, f32x8([0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32, 0_f32]))

Check failure on line 49 in tests/assembly/simd-intrinsic-mask-load.rs

View workflow job for this annotation

GitHub Actions / PR - mingw-check-tidy

line longer than 100 chars
}

// CHECK-LABEL: mask_reduce_any:
// CHECK-LABEL: load_f64x4
#[no_mangle]
pub unsafe fn mask_reduce_any(m: mask8x16) -> bool {
// x86: movdqa
// x86-NEXT: pmovmskb
// aarch64: cmlt
// aarch64-NEXT: umaxv
simd_reduce_any(m)
pub unsafe fn load_f64x4(mask: m64x4, pointer: *const f64, output: *mut f64x4) {
// x86-avx-NOT: vpsllq
// x86-avx: vmovapd
// x86-avx-NEXT: vmaskmovpd ymm0
// x86-avx512-NOT: vpsllq
// x86-avx512: vpcmpgtq k1
// x86-avx512-NEXT: vmovupd ymm0 {k1} {z}
// x86-avx512-NEXT: vmovapd
*output = simd_masked_load(mask, pointer, f64x4([0_f64, 0_f64, 0_f64, 0_f64]))
}

0 comments on commit 83a5dd4

Please sign in to comment.