Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* avx: _mm256_zextps128_ps256

* avx: _mm256_zextpd128_pd256

* avx: _mm256_set_m128

* avx: _mm256_set_m128d

* avx: _mm256_castpd_ps

* avx: _mm256_castps_pd

* avx: _mm256_castps_si256

* avx: _mm256_castsi256_ps

* avx: _mm256_zextsi128_si256

* avx: _mm256_set_m128i
  • Loading branch information
gwenn authored and alexcrichton committed Oct 16, 2017
1 parent 13d2384 commit 2dbe8d0
Showing 1 changed file with 178 additions and 1 deletion.
179 changes: 178 additions & 1 deletion src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use stdsimd_test::assert_instr;
use simd_llvm::{simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8};
use v128::{f32x4, f64x2, i32x4, i64x2};
use v256::*;
use x86::{__m128i, __m256i};

/// Add packed double-precision (64-bit) floating-point elements
/// in `a` and `b`.
Expand Down Expand Up @@ -1827,6 +1828,34 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 {
i64x4::new(a, a, a, a)
}

/// Cast vector of type __m256d to type __m256.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 {
mem::transmute(a)
}

/// Cast vector of type __m256 to type __m256d.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 {
mem::transmute(a)
}

/// Casts vector of type __m256 to type __m256i.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castps_si256(a: f32x8) -> i64x4 {
mem::transmute(a)
}

/// Casts vector of type __m256i to type __m256.
#[inline(always)]
#[target_feature = "+avx"]
pub unsafe fn _mm256_castsi256_ps(a: i64x4) -> f32x8 {
mem::transmute(a)
}

/// Casts vector of type __m256d to type __m256i.
/// This intrinsic is only used for compilation and does not generate any
/// instructions, thus it has zero latency.
Expand Down Expand Up @@ -1899,6 +1928,37 @@ pub unsafe fn _mm256_castsi128_si256(a: i64x2) -> i64x4 {
simd_shuffle4(a, a, [0, 1, 0, 0])
}

/// Constructs a 256-bit floating-point vector of [8 x float] from a
/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
/// the value of the source vector. The upper 128 bits are set to zero.
#[inline(always)]
#[target_feature = "+avx,+sse"]
pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
use x86::sse::_mm_setzero_ps;
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
}

/// Constructs a 256-bit integer vector from a 128-bit integer vector.
/// The lower 128 bits contain the value of the source vector. The upper
/// 128 bits are set to zero.
#[inline(always)]
#[target_feature = "+avx,+sse2"]
pub unsafe fn _mm256_zextsi128_si256(a: i64x2) -> i64x4 {
use x86::sse2::_mm_setzero_si128;
simd_shuffle4(a, mem::transmute(_mm_setzero_si128()), [0, 1, 2, 3])
}

/// Constructs a 256-bit floating-point vector of [4 x double] from a
/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
/// contain the value of the source vector. The upper 128 bits are set
/// to zero.
#[inline(always)]
#[target_feature = "+avx,+sse2"]
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
use x86::sse2::_mm_setzero_pd;
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
}

/// Return vector of type `f32x8` with undefined elements.
#[inline(always)]
#[target_feature = "+avx"]
Expand All @@ -1920,6 +1980,34 @@ pub unsafe fn _mm256_undefined_si256() -> i64x4 {
i64x4::splat(mem::uninitialized())
}

/// Set packed __m256 returned vector with the supplied values.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 {
simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
}

/// Set packed __m256d returned vector with the supplied values.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 {
let hi: f32x4 = mem::transmute(hi);
let lo: f32x4 = mem::transmute(lo);
mem::transmute(_mm256_set_m128(hi, lo))
}

/// Set packed __m256i returned vector with the supplied values.
#[inline(always)]
#[target_feature = "+avx"]
#[cfg_attr(test, assert_instr(vinsertf128))]
pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
let hi: f32x4 = mem::transmute(hi);
let lo: f32x4 = mem::transmute(lo);
mem::transmute(_mm256_set_m128(hi, lo))
}

/// LLVM intrinsics used in the above functions
#[allow(improper_ctypes)]
extern "C" {
Expand Down Expand Up @@ -2070,7 +2158,7 @@ mod tests {
use stdsimd_test::simd_test;
use test::black_box; // Used to inhibit constant-folding.

use v128::{f32x4, f64x2, i32x4, i64x2};
use v128::{f32x4, f64x2, i8x16, i32x4, i64x2};
use v256::*;
use x86::avx;

Expand Down Expand Up @@ -3390,6 +3478,38 @@ mod tests {
assert_eq!(r, i64x4::splat(1));
}

#[simd_test = "avx"]
unsafe fn _mm256_castpd_ps() {
let a = f64x4::new(1., 2., 3., 4.);
let r = avx::_mm256_castpd_ps(a);
let e = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_castps_pd() {
let a = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
let r = avx::_mm256_castps_pd(a);
let e = f64x4::new(1., 2., 3., 4.);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_castps_si256() {
let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
let r = avx::_mm256_castps_si256(a);
let e = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_castsi256_ps() {
let a = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728);
let r = avx::_mm256_castsi256_ps(a);
let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_castpd_si256() {
let a = f64x4::new(1., 2., 3., 4.);
Expand Down Expand Up @@ -3424,4 +3544,61 @@ mod tests {
let r = avx::_mm256_castsi256_si128(a);
assert_eq!(r, i64x2::new(1, 2));
}

#[simd_test = "avx"]
unsafe fn _mm256_zextps128_ps256() {
let a = f32x4::new(1., 2., 3., 4.);
let r = avx::_mm256_zextps128_ps256(a);
let e = f32x8::new(1., 2., 3., 4., 0., 0., 0., 0.);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_zextsi128_si256() {
let a = i64x2::new(1, 2);
let r = avx::_mm256_zextsi128_si256(a);
let e = i64x4::new(1, 2, 0, 0);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_zextpd128_pd256() {
let a = f64x2::new(1., 2.);
let r = avx::_mm256_zextpd128_pd256(a);
let e = f64x4::new(1., 2., 0., 0.);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_set_m128() {
let hi = f32x4::new(5., 6., 7., 8.);
let lo = f32x4::new(1., 2., 3., 4.);
let r = avx::_mm256_set_m128(hi, lo);
let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_set_m128d() {
let hi = f64x2::new(3., 4.);
let lo = f64x2::new(1., 2.);
let r = avx::_mm256_set_m128d(hi, lo);
let e = f64x4::new(1., 2., 3., 4.);
assert_eq!(r, e);
}

#[simd_test = "avx"]
unsafe fn _mm256_set_m128i() {
let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32);
let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16);
let r = avx::_mm256_set_m128i(hi, lo);
let e = i8x32::new(
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32);
assert_eq!(r, e);
}
}

0 comments on commit 2dbe8d0

Please sign in to comment.