From 2dbe8d0b39a1b07e9cf4846d593908bed588ce9f Mon Sep 17 00:00:00 2001 From: gwenn Date: Tue, 17 Oct 2017 00:14:09 +0200 Subject: [PATCH] Avx (#126) * avx: _mm256_zextps128_ps256 * avx: _mm256_zextpd128_pd256 * avx: _mm256_set_m128 * avx: _mm256_set_m128d * avx: _mm256_castpd_ps * avx: _mm256_castps_pd * avx: _mm256_castps_si256 * avx: _mm256_castsi256_ps * avx: _mm256_zextsi128_si256 * avx: _mm256_set_m128i --- src/x86/avx.rs | 179 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 178 insertions(+), 1 deletion(-) diff --git a/src/x86/avx.rs b/src/x86/avx.rs index 6757e00f1699f..781869cfa7fed 100644 --- a/src/x86/avx.rs +++ b/src/x86/avx.rs @@ -7,6 +7,7 @@ use stdsimd_test::assert_instr; use simd_llvm::{simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8}; use v128::{f32x4, f64x2, i32x4, i64x2}; use v256::*; +use x86::{__m128i, __m256i}; /// Add packed double-precision (64-bit) floating-point elements /// in `a` and `b`. @@ -1827,6 +1828,34 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 { i64x4::new(a, a, a, a) } +/// Cast vector of type __m256d to type __m256. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 { + mem::transmute(a) +} + +/// Cast vector of type __m256 to type __m256d. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 { + mem::transmute(a) +} + +/// Casts vector of type __m256 to type __m256i. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castps_si256(a: f32x8) -> i64x4 { + mem::transmute(a) +} + +/// Casts vector of type __m256i to type __m256. +#[inline(always)] +#[target_feature = "+avx"] +pub unsafe fn _mm256_castsi256_ps(a: i64x4) -> f32x8 { + mem::transmute(a) +} + /// Casts vector of type __m256d to type __m256i. /// This intrinsic is only used for compilation and does not generate any /// instructions, thus it has zero latency. @@ -1899,6 +1928,37 @@ pub unsafe fn _mm256_castsi128_si256(a: i64x2) -> i64x4 { simd_shuffle4(a, a, [0, 1, 0, 0]) } +/// Constructs a 256-bit floating-point vector of [8 x float] from a +/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain +/// the value of the source vector. The upper 128 bits are set to zero. +#[inline(always)] +#[target_feature = "+avx,+sse"] +pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 { + use x86::sse::_mm_setzero_ps; + simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Constructs a 256-bit integer vector from a 128-bit integer vector. +/// The lower 128 bits contain the value of the source vector. The upper +/// 128 bits are set to zero. +#[inline(always)] +#[target_feature = "+avx,+sse2"] +pub unsafe fn _mm256_zextsi128_si256(a: i64x2) -> i64x4 { + use x86::sse2::_mm_setzero_si128; + simd_shuffle4(a, mem::transmute(_mm_setzero_si128()), [0, 1, 2, 3]) +} + +/// Constructs a 256-bit floating-point vector of [4 x double] from a +/// 128-bit floating-point vector of [2 x double]. The lower 128 bits +/// contain the value of the source vector. The upper 128 bits are set +/// to zero. +#[inline(always)] +#[target_feature = "+avx,+sse2"] +pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 { + use x86::sse2::_mm_setzero_pd; + simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3]) +} + /// Return vector of type `f32x8` with undefined elements. #[inline(always)] #[target_feature = "+avx"] @@ -1920,6 +1980,34 @@ pub unsafe fn _mm256_undefined_si256() -> i64x4 { i64x4::splat(mem::uninitialized()) } +/// Set packed __m256 returned vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 { + simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Set packed __m256d returned vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 { + let hi: f32x4 = mem::transmute(hi); + let lo: f32x4 = mem::transmute(lo); + mem::transmute(_mm256_set_m128(hi, lo)) +} + +/// Set packed __m256i returned vector with the supplied values. +#[inline(always)] +#[target_feature = "+avx"] +#[cfg_attr(test, assert_instr(vinsertf128))] +pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i { + let hi: f32x4 = mem::transmute(hi); + let lo: f32x4 = mem::transmute(lo); + mem::transmute(_mm256_set_m128(hi, lo)) +} + /// LLVM intrinsics used in the above functions #[allow(improper_ctypes)] extern "C" { @@ -2070,7 +2158,7 @@ mod tests { use stdsimd_test::simd_test; use test::black_box; // Used to inhibit constant-folding. - use v128::{f32x4, f64x2, i32x4, i64x2}; + use v128::{f32x4, f64x2, i8x16, i32x4, i64x2}; use v256::*; use x86::avx; @@ -3390,6 +3478,38 @@ mod tests { assert_eq!(r, i64x4::splat(1)); } + #[simd_test = "avx"] + unsafe fn _mm256_castpd_ps() { + let a = f64x4::new(1., 2., 3., 4.); + let r = avx::_mm256_castpd_ps(a); + let e = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castps_pd() { + let a = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25); + let r = avx::_mm256_castps_pd(a); + let e = f64x4::new(1., 2., 3., 4.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castps_si256() { + let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + let r = avx::_mm256_castps_si256(a); + let e = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_castsi256_ps() { + let a = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728); + let r = avx::_mm256_castsi256_ps(a); + let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq!(r, e); + } + #[simd_test = "avx"] unsafe fn _mm256_castpd_si256() { let a = f64x4::new(1., 2., 3., 4.); @@ -3424,4 +3544,61 @@ mod tests { let r = avx::_mm256_castsi256_si128(a); assert_eq!(r, i64x2::new(1, 2)); } + + #[simd_test = "avx"] + unsafe fn _mm256_zextps128_ps256() { + let a = f32x4::new(1., 2., 3., 4.); + let r = avx::_mm256_zextps128_ps256(a); + let e = f32x8::new(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_zextsi128_si256() { + let a = i64x2::new(1, 2); + let r = avx::_mm256_zextsi128_si256(a); + let e = i64x4::new(1, 2, 0, 0); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_zextpd128_pd256() { + let a = f64x2::new(1., 2.); + let r = avx::_mm256_zextpd128_pd256(a); + let e = f64x4::new(1., 2., 0., 0.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_set_m128() { + let hi = f32x4::new(5., 6., 7., 8.); + let lo = f32x4::new(1., 2., 3., 4.); + let r = avx::_mm256_set_m128(hi, lo); + let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_set_m128d() { + let hi = f64x2::new(3., 4.); + let lo = f64x2::new(1., 2.); + let r = avx::_mm256_set_m128d(hi, lo); + let e = f64x4::new(1., 2., 3., 4.); + assert_eq!(r, e); + } + + #[simd_test = "avx"] + unsafe fn _mm256_set_m128i() { + let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32); + let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16); + let r = avx::_mm256_set_m128i(hi, lo); + let e = i8x32::new( + 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32); + assert_eq!(r, e); + } }