Skip to content

Commit

Permalink
Fixes for uni_vaddps, uni_vsubps, uni_vpmulld, uni_vmulps, uni_vmaxps…
Browse files Browse the repository at this point in the history
…, uni_vminps and uni_vcmpps operations
  • Loading branch information
lohika-denis-kotov committed Oct 13, 2022
1 parent ca8b340 commit a39dca1
Showing 1 changed file with 78 additions and 30 deletions.
108 changes: 78 additions & 30 deletions src/cpu/x64/jit_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -713,16 +713,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
vdivps(x, op1, op2);
}

void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
void uni_vaddps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2) {
if (is_valid_isa(avx))
vaddps(x, op1, op2);
else {
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
addps(x, op2);
if (x.getIdx() == op1.getIdx()) {
addps(x, op2);
} else if (x.isEqualIfNotInherited(op2)) {
addps(x, op1);
} else {
movups(x, op1);
addps(x, op2);
}
}
}
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
void uni_vaddps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
const Xbyak::Operand &op2) {
vaddps(x, op1, op2);
}
Expand Down Expand Up @@ -831,16 +837,25 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
vsubss(x, Xbyak::Xmm(op1.getIdx()), Xbyak::Xmm(op2.getIdx()));
}

void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
void uni_vsubps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2) {
if (is_valid_isa(avx))
vsubps(x, op1, op2);
else {
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
subps(x, op2);
if (x.getIdx() == op1.getIdx()) {
subps(x, op2);
} else if (x.isEqualIfNotInherited(op2)) {
push(op1);
subps(op1, op2);
movups(x, op1);
pop(op1);
} else {
movups(x, op1);
subps(x, op2);
}
}
}
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
void uni_vsubps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
const Xbyak::Operand &op2) {
vsubps(x, op1, op2);
}
Expand Down Expand Up @@ -874,30 +889,42 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
}
}

void uni_vpmulld(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
const Xbyak::Operand &op) {
void uni_vpmulld(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2) {
if (is_valid_isa(avx)) {
vpmulld(x1, x2, op);
vpmulld(x, op1, op2);
} else {
if (x1.getIdx() != x2.getIdx()) movdqa(x1, x2);
pmulld(x1, op);
if (x.getIdx() == op1.getIdx()) {
pmulld(x, op2);
} else if (x.isEqualIfNotInherited(op2)) {
pmulld(x, op1);
} else {
movdqa(x, op1);
pmulld(x, op2);
}
}
}
void uni_vpmulld(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
const Xbyak::Operand &op) {
vpmulld(x1, x2, op);
}

void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
void uni_vmulps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2) {
if (is_valid_isa(avx))
vmulps(x, op1, op2);
else {
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
mulps(x, op2);
if (x.getIdx() == op1.getIdx()) {
mulps(x, op2);
} else if (x.isEqualIfNotInherited(op2)) {
mulps(x, op1);
} else {
movups(x, op1);
mulps(x, op2);
}
}
}
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
void uni_vmulps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
const Xbyak::Operand &op2) {
vmulps(x, op1, op2);
}
Expand Down Expand Up @@ -1299,16 +1326,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
vpsrld(x, op, imm);
}

void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
void uni_vmaxps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2) {
if (is_valid_isa(avx))
vmaxps(x, op1, op2);
else {
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
maxps(x, op2);
if (x.getIdx() == op1.getIdx()) {
maxps(x, op2);
} else if (x.isEqualIfNotInherited(op2)) {
maxps(x, op1);
} else {
movups(x, op1);
maxps(x, op2);
}
}
}
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
void uni_vmaxps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
const Xbyak::Operand &op2) {
vmaxps(x, op1, op2);
}
Expand All @@ -1323,17 +1356,23 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
}
}

void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Operand &op1,
void uni_vminps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2) {
if (is_valid_isa(avx))
vminps(x, op1, op2);
else {
if (!x.isEqualIfNotInherited(op1)) movups(x, op1);
minps(x, op2);
if (x.getIdx() == op1.getIdx()) {
minps(x, op2);
} else if (x.isEqualIfNotInherited(op2)) {
minps(x, op1);
} else {
movups(x, op1);
minps(x, op2);
}
}
}

void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Operand &op1,
void uni_vminps(const Xbyak::Ymm &x, const Xbyak::Ymm &op1,
const Xbyak::Operand &op2) {
vminps(x, op1, op2);
}
Expand Down Expand Up @@ -1370,13 +1409,22 @@ class jit_generator : public Xbyak::CodeGenerator, public c_compatible {
vpmovzxbd(y, op);
}

void uni_vcmpps(const Xbyak::Xmm &x1, const Xbyak::Xmm &x2,
const Xbyak::Operand &op, int cmp_predicate) {
void uni_vcmpps(const Xbyak::Xmm &x, const Xbyak::Xmm &op1,
const Xbyak::Operand &op2, int cmp_predicate) {
if (is_valid_isa(avx))
vcmpps(x1, x2, op, cmp_predicate);
vcmpps(x, op1, op2, cmp_predicate);
else {
if (x1.getIdx() != x2.getIdx()) uni_vmovups(x1, x2);
cmpps(x1, op, cmp_predicate);
if (x.getIdx() == op1.getIdx()) {
cmpps(x, op2, cmp_predicate);
} else if (x.isEqualIfNotInherited(op2)) {
push(op1);
cmpps(op1, op2, cmp_predicate);
movups(x, op1);
pop(op1);
} else {
movups(x, op1);
cmpps(x, op2, cmp_predicate);
}
}
}
void uni_vcmpps(const Xbyak::Ymm &x1, const Xbyak::Ymm &x2,
Expand Down

0 comments on commit a39dca1

Please sign in to comment.