-
Notifications
You must be signed in to change notification settings - Fork 137
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libsleef] Add modified Payne Hanek argument reduction #197
Changes from 1 commit
4d1bd3e
e215fc1
0fa5047
adeac9b
763f08f
4ccf1df
14cad71
0faa1f2
65f2112
a268a8b
5983c98
0c7d11f
b827834
2fdfae8
99080e6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -290,6 +290,12 @@ static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm256_loadu_pd(pt | |
static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm256_store_pd(ptr, v); } | ||
static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm256_storeu_pd(ptr, v); } | ||
|
||
static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { | ||
int a[4]; | ||
vstoreu_v_p_vi(a, vi); | ||
return _mm256_set_pd(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); | ||
} | ||
|
||
#if defined(_MSC_VER) | ||
// This function is needed when debugging on MSVC. | ||
static INLINE double vcast_d_vd(vdouble v) { | ||
|
@@ -477,6 +483,13 @@ static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm256_loadu_ps(ptr) | |
static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm256_store_ps(ptr, v); } | ||
static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm256_storeu_ps(ptr, v); } | ||
|
||
static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { | ||
int a[8]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, shouldn't it be |
||
vstoreu_v_p_vi2(a, vi2); | ||
return _mm256_set_ps(ptr[a[7]], ptr[a[6]], ptr[a[5]], ptr[a[4]], | ||
ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); | ||
} | ||
|
||
#ifdef _MSC_VER | ||
// This function is needed when debugging on MSVC. | ||
static INLINE float vcast_f_vf(vfloat v) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -74,6 +74,18 @@ static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { ptr[0] = v[0]; ptr[1] | |
|
||
static INLINE void vscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vstore_v_p_vd((double *)(&ptr[2*offset]), v); } | ||
|
||
static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { | ||
int a[4]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this also be |
||
vstoreu_v_p_vi(a, vi); | ||
return ((vdouble) { ptr[a[0]], ptr[a[1]] }); | ||
} | ||
|
||
static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { | ||
int a[4]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't this also be |
||
vstoreu_v_p_vi2(a, vi2); | ||
return ((vfloat) { ptr[a[0]], ptr[a[1]], ptr[a[2]], ptr[a[3]] }); | ||
} | ||
|
||
static INLINE vint vcast_vi_i(int i) { return (vint) { i, i }; } | ||
static INLINE vint2 vcast_vi2_i(int i) { return (vint2) { i, i, i, i }; } | ||
static INLINE vfloat vcast_vf_f(float f) { return (vfloat) { f, f, f, f }; } | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -267,6 +267,12 @@ static INLINE vdouble vloadu_vd_p(const double *ptr) { return _mm_loadu_pd(ptr); | |
static INLINE void vstore_v_p_vd(double *ptr, vdouble v) { _mm_store_pd(ptr, v); } | ||
static INLINE void vstoreu_v_p_vd(double *ptr, vdouble v) { _mm_storeu_pd(ptr, v); } | ||
|
||
static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { | ||
int a[4]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
vstoreu_v_p_vi(a, vi); | ||
return _mm_set_pd(ptr[a[1]], ptr[a[0]]); | ||
} | ||
|
||
#if defined(_MSC_VER) | ||
// This function is needed when debugging on MSVC. | ||
static INLINE double vcast_d_vd(vdouble v) { | ||
|
@@ -373,6 +379,12 @@ static INLINE vfloat vloadu_vf_p(const float *ptr) { return _mm_loadu_ps(ptr); } | |
static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { _mm_store_ps(ptr, v); } | ||
static INLINE void vstoreu_v_p_vf(float *ptr, vfloat v) { _mm_storeu_ps(ptr, v); } | ||
|
||
static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi) { | ||
int a[4]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
vstoreu_v_p_vi2(a, vi); | ||
return _mm_set_ps(ptr[a[3]], ptr[a[2]], ptr[a[1]], ptr[a[0]]); | ||
} | ||
|
||
#ifdef _MSC_VER | ||
// This function is useful when debugging on MSVC. | ||
static INLINE float vcast_f_vf(vfloat v) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -607,6 +607,9 @@ static INLINE vint vandnot_vi_vo_vi(vopmask x, vint y) { | |
static INLINE vint vand_vi_vi_vi(vint x, vint y) { | ||
return svand_s32_x(ptrue, x, y); | ||
} | ||
static INLINE vint vandnot_vi_vi_vi(vint x, vint y) { | ||
return svbic_s32_x(ptrue, y, x); | ||
} | ||
static INLINE vint vxor_vi_vi_vi(vint x, vint y) { | ||
return sveor_s32_x(ptrue, x, y); | ||
} | ||
|
@@ -657,6 +660,15 @@ static INLINE vint2 veq_vi2_vi2_vi2(vint2 x, vint2 y) { | |
return svsel_s32(svcmpeq_s32(ptrue, x, y), ALL_TRUE_MASK, ALL_FALSE_MASK); | ||
} | ||
|
||
// Gather | ||
|
||
static INLINE vdouble vgather_vd_p_vi(const double *ptr, vint vi) { | ||
return svldff1_gather_s64offset_f64(ptrue, ptr, svreinterpret_s64_s32(svzip1_s32(vi, svdup_n_s32(0)))); | ||
} | ||
|
||
static INLINE vfloat vgather_vf_p_vi2(const float *ptr, vint2 vi2) { | ||
return svldff1_gather_s32offset_f32(ptrue, ptr, vi2); | ||
} | ||
|
||
// Operations for DFT | ||
|
||
|
@@ -713,3 +725,28 @@ static INLINE void vstore_v_p_vf(float *ptr, vfloat v) { vstoreu_v_p_vf(ptr, v); | |
static INLINE void vstream_v_p_vf(float *ptr, vfloat v) { vstore_v_p_vf(ptr, v); } | ||
static INLINE void vsscatter2_v_p_i_i_vd(double *ptr, int offset, int step, vdouble v) { vscatter2_v_p_i_i_vd(ptr, offset, step, v); } | ||
static INLINE void vsscatter2_v_p_i_i_vf(float *ptr, int offset, int step, vfloat v) { vscatter2_v_p_i_i_vf(ptr, offset, step, v); } | ||
|
||
// These functions are for debugging | ||
static double vcast_d_vd(vdouble v) { | ||
double a[32]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It requires the code to be build with the C11 standard. |
||
vstoreu_v_p_vd(a, v); | ||
return a[0]; | ||
} | ||
|
||
static float vcast_f_vf(vfloat v) { | ||
float a[64]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
vstoreu_v_p_vf(a, v); | ||
return a[0]; | ||
} | ||
|
||
static int vcast_i_vi(vint v) { | ||
int a[64]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
vstoreu_v_p_vi(a, v); | ||
return a[0]; | ||
} | ||
|
||
static int vcast_i_vi2(vint2 v) { | ||
int a[64]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
vstoreu_v_p_vi2(a, v); | ||
return a[0]; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shouldn't this be
int a[VECLENSP]
?