Skip to content

Commit

Permalink
SIMD optimizations for equal, not equal operators and unary operator …
Browse files Browse the repository at this point in the history
…of vec4
  • Loading branch information
Groovounet committed May 25, 2016
1 parent 740e6d6 commit 0abd0f0
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 12 deletions.
44 changes: 32 additions & 12 deletions glm/detail/type_vec4.inl
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ namespace detail
return tvec4<T, P>(a.x >> b.x, a.y >> b.y, a.z >> b.z, a.w >> b.w);
}
};

template <typename T, precision P>
struct compute_vec4_equal
{
static bool call(tvec4<T, P> const & a, tvec4<T, P> const & b)
{
return (a.x == b.x) && (a.y == b.y) && (a.z == b.z) && (a.w == b.w);
}
};

template <typename T, precision P>
struct compute_vec4_nequal
{
static bool call(tvec4<T, P> const & a, tvec4<T, P> const & b)
{
return (a.x != b.x) || (a.y != b.y) || (a.z != b.z) || (a.w != b.w);
}
};
}//namespace detail

// -- Implicit basic constructors --
Expand Down Expand Up @@ -401,21 +419,23 @@ namespace detail
template <typename T, precision P>
GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator++()
{
++this->x;
++this->y;
++this->z;
++this->w;
return *this;
# if(1 || GLM_ARCH == GLM_ARCH_PURE)
++this->x; ++this->y; ++this->z; ++this->w;
return *this;
# else
return (*this = detail::compute_vec4_add<T, P>::call(*this, tvec4<T, P>(1)));
# endif
}

template <typename T, precision P>
GLM_FUNC_QUALIFIER tvec4<T, P> & tvec4<T, P>::operator--()
{
--this->x;
--this->y;
--this->z;
--this->w;
return *this;
# if(1 || GLM_ARCH == GLM_ARCH_PURE)
--this->x; --this->y; --this->z; --this->w;
return *this;
# else
return (*this = detail::compute_vec4_sub<T, P>::call(*this, tvec4<T, P>(1)));
# endif
}

template <typename T, precision P>
Expand Down Expand Up @@ -891,13 +911,13 @@ namespace detail
template <typename T, precision P>
GLM_FUNC_QUALIFIER bool operator==(tvec4<T, P> const & v1, tvec4<T, P> const & v2)
{
return (v1.x == v2.x) && (v1.y == v2.y) && (v1.z == v2.z) && (v1.w == v2.w);
return detail::compute_vec4_equal<T, P>::call(v1 ,v2);
}

template <typename T, precision P>
GLM_FUNC_QUALIFIER bool operator!=(tvec4<T, P> const & v1, tvec4<T, P> const & v2)
{
return (v1.x != v2.x) || (v1.y != v2.y) || (v1.z != v2.z) || (v1.w != v2.w);
return detail::compute_vec4_nequal<T, P>::call(v1 ,v2);
}

template <precision P>
Expand Down
55 changes: 55 additions & 0 deletions glm/detail/type_vec4_simd.inl
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,61 @@ namespace detail
return Result;
}
};
/*
template <precision P>
struct compute_vec4_equal<float, P>
{
static bool call(tvec4<float, P> const & a, tvec4<float, P> const & b)
{
return _mm_movemask_ps(_mm_cmpeq_ps(a.data, b.data)) == 0x0F;
}
};
template <precision P>
struct compute_vec4_equal<int32, P>
{
static bool call(tvec4<int32, P> const & a, tvec4<int32, P> const & b)
{
return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.data, b.data))) == 0x0F;
}
};
template <precision P>
struct compute_vec4_equal<uint32, P>
{
static bool call(tvec4<uint32, P> const & a, tvec4<uint32, P> const & b)
{
return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.data, b.data))) == 0x0F;
}
};
template <precision P>
struct compute_vec4_nequal<float, P>
{
static bool call(tvec4<float, P> const & a, tvec4<float, P> const & b)
{
return _mm_movemask_ps(_mm_cmpeq_ps(a.data, b.data)) != 0;
}
};
template <precision P>
struct compute_vec4_nequal<int32, P>
{
static bool call(tvec4<int32, P> const & a, tvec4<int32, P> const & b)
{
return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.data, b.data))) != 0;
}
};
template <precision P>
struct compute_vec4_nequal<uint32, P>
{
static bool call(tvec4<uint32, P> const & a, tvec4<uint32, P> const & b)
{
return _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(a.data, b.data))) != 0;
}
};
*/
}//namespace detail

# if !GLM_HAS_DEFAULTED_FUNCTIONS
Expand Down

0 comments on commit 0abd0f0

Please sign in to comment.