Skip to content

Commit

Permalink
Do not use masks in AVX2 double compressstore
Browse files Browse the repository at this point in the history
  • Loading branch information
sterrettm2 committed Nov 21, 2023
1 parent c6f768a commit 58ab67d
Showing 1 changed file with 4 additions and 10 deletions.
14 changes: 4 additions & 10 deletions src/avx2-emu-funcs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,21 +224,18 @@ int avx2_double_compressstore32(void *left_addr,
typename avx2_vector<T>::reg_t reg)
{
using vtype = avx2_vector<T>;
const __m256i oxff = _mm256_set1_epi32(0xFFFFFFFF);

T *leftStore = (T *)left_addr;
T *rightStore = (T *)right_addr;

int32_t shortMask = convert_avx2_mask_to_int(k);
const __m256i &perm = _mm256_loadu_si256(
(const __m256i *)avx2_compressstore_lut32_perm[shortMask].data());
const __m256i &left = _mm256_loadu_si256(
(const __m256i *)avx2_compressstore_lut32_left[shortMask].data());

typename vtype::reg_t temp = vtype::permutevar(reg, perm);

vtype::mask_storeu(leftStore, left, temp);
vtype::mask_storeu(rightStore, _mm256_xor_si256(oxff, left), temp);
vtype::storeu(leftStore, temp);
vtype::storeu(rightStore, temp);

return _mm_popcnt_u32(shortMask);
}
Expand All @@ -250,22 +247,19 @@ int32_t avx2_double_compressstore64(void *left_addr,
typename avx2_vector<T>::reg_t reg)
{
using vtype = avx2_vector<T>;
const __m256i oxff = _mm256_set1_epi32(0xFFFFFFFF);

T *leftStore = (T *)left_addr;
T *rightStore = (T *)right_addr;

int32_t shortMask = convert_avx2_mask_to_int_64bit(k);
const __m256i &perm = _mm256_loadu_si256(
(const __m256i *)avx2_compressstore_lut64_perm[shortMask].data());
const __m256i &left = _mm256_loadu_si256(
(const __m256i *)avx2_compressstore_lut64_left[shortMask].data());

typename vtype::reg_t temp = vtype::cast_from(
_mm256_permutevar8x32_epi32(vtype::cast_to(reg), perm));

vtype::mask_storeu(leftStore, left, temp);
vtype::mask_storeu(rightStore, _mm256_xor_si256(oxff, left), temp);
vtype::storeu(leftStore, temp);
vtype::storeu(rightStore, temp);

return _mm_popcnt_u32(shortMask);
}
Expand Down

0 comments on commit 58ab67d

Please sign in to comment.