Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable AVX512 Additional 16 SIMD Registers #79544

Merged
merged 36 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
9da5b6d
Change regMask_enum and regMaskTP to unsigned __int64_t on AMD64.
anthonycanino Oct 27, 2022
6c6c884
Add upper 16 SIMD registers to allocator.
anthonycanino Dec 14, 2022
bd6d2a5
Limit high SIMD reg to compatible intrinsics lsra build.
anthonycanino Jan 17, 2023
47ff9ca
Limit high SIMD reg to compatible intrinsics lsra build.
anthonycanino Nov 14, 2022
9cabef6
Limit high SIMD reg to compatible intrinsics and gentree nodes.
anthonycanino Dec 14, 2022
9f711ba
Fix for X86 throughput.
anthonycanino Dec 14, 2022
f29c146
Add upper simd stress test to the AVX512 testing pipeline.
anthonycanino Dec 14, 2022
b95d296
Formatting.
anthonycanino Dec 14, 2022
6a4dcd1
Fix wrong-sized attr for simd mov instruction.
anthonycanino Dec 16, 2022
d1f2fdb
Fix non-AMD64 LSRA stress mask.
anthonycanino Dec 16, 2022
c7807c7
Update src/coreclr/jit/compiler.h
anthonycanino Dec 19, 2022
0b6cb55
Update src/coreclr/jit/compiler.cpp
anthonycanino Dec 19, 2022
a45163c
Update src/coreclr/jit/gentree.cpp
anthonycanino Dec 19, 2022
730b4eb
Update src/coreclr/jit/hwintrinsic.h
anthonycanino Dec 19, 2022
34d6199
Update src/coreclr/jit/target.h
anthonycanino Dec 19, 2022
395f5e4
Update src/coreclr/jit/emitxarch.cpp
anthonycanino Dec 19, 2022
852e051
Remove unneeded vars
kunalspathak Dec 15, 2022
7238b49
Address PR comments.
anthonycanino Dec 19, 2022
2b56df7
Allow `emitinl.h` access to the `rbm` variables.
anthonycanino Dec 19, 2022
fa1a550
Replace RBM_LOWSIMD with `BuildEvexIncompatibleMask`.
anthonycanino Jan 9, 2023
564dc81
Move AVX512 dependent `targetamd.h` vars into compiler object.
anthonycanino Jan 12, 2023
5ea489d
Fixing some edge cases for `targetamd.h` variables.
anthonycanino Jan 17, 2023
e60aece
Fix a merge/rebase bug.
anthonycanino Jan 18, 2023
b28a231
Update src/coreclr/jit/compiler.h
anthonycanino Jan 26, 2023
37af7c3
Update src/coreclr/jit/lsra.cpp
anthonycanino Jan 26, 2023
e9be8f8
Update src/coreclr/jit/compiler.h
anthonycanino Jan 26, 2023
f680448
Fix nits.
anthonycanino Jan 26, 2023
7416501
Merge branch 'avx512-upper-regs' of https://github.com/anthonycanino/…
anthonycanino Jan 26, 2023
73f43b6
Trying VM changes.
anthonycanino Jan 31, 2023
c6f1a90
VM hack.
anthonycanino Feb 1, 2023
91cf3db
VM hack.
anthonycanino Feb 1, 2023
228c0c5
Revert "VM hack."
anthonycanino Feb 3, 2023
5490617
Adjust ACTUAL_REG_COUNT based on availability of AVX512.
anthonycanino Feb 6, 2023
3c7acdb
Use inline accessor functions instead of macros
BruceForstall Feb 8, 2023
e42db99
Merge pull request #6 from BruceForstall/anthonycanino_avx512-upper-r…
anthonycanino Feb 8, 2023
10b4703
Clearifying comments.
anthonycanino Feb 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eng/pipelines/common/templates/runtimes/run-test-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ jobs:
${{ if in(parameters.testGroup, 'jitstress-isas-avx512') }}:
scenarios:
- jitstress_isas_avx512_forceevex
- jitstress_isas_avx512_forceevex_stresshighregs
${{ if in(parameters.testGroup, 'jitstressregs-x86') }}:
scenarios:
- jitstressregs1_x86_noavx
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,17 @@ class CodeGen final : public CodeGenInterface
GenTree* addr, bool fold, bool* revPtr, GenTree** rv1Ptr, GenTree** rv2Ptr, unsigned* mulPtr, ssize_t* cnsPtr);

private:
#if defined(TARGET_AMD64)
regMaskTP get_RBM_ALLFLOAT() const
{
return compiler->rbmAllFloat;
}
regMaskTP get_RBM_FLT_CALLEE_TRASH() const
{
return compiler->rbmFltCalleeTrash;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
// Bit masks used in negating a float or double number.
// This is to avoid creating more than one data constant for these bitmasks when a
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3524,7 +3524,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
// this probably needs to be changed.

// Load
genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src, offset);
genCodeForLoadOffset(INS_movdqu, EA_16BYTE, xmmTmpReg, src, offset);
// Store
genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);

Expand Down Expand Up @@ -8347,7 +8347,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset
{
ins = INS_movdqu;
// This should be changed!
attr = EA_8BYTE;
attr = EA_16BYTE;
size = 16;
}
else
Expand Down
49 changes: 49 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3325,6 +3325,24 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
}
#endif // defined(DEBUG) && defined(TARGET_ARM64)

#if defined(TARGET_AMD64)
rbmAllFloat = RBM_ALLFLOAT_INIT;
rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;
availableRegCount = ACTUAL_REG_COUNT;

if (DoJitStressEvexEncoding())
{
rbmAllFloat |= RBM_HIGHFLOAT;
rbmFltCalleeTrash |= RBM_HIGHFLOAT;
cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
}
else
{
availableRegCount -= CNT_HIGHFLOAT;
}
#endif // TARGET_AMD64
}

#ifdef DEBUG
Expand Down Expand Up @@ -3528,6 +3546,37 @@ bool Compiler::compPromoteFewerStructs(unsigned lclNum)
return rejectThisPromo;
}

//------------------------------------------------------------------------
// dumpRegMask: display a register mask. For well-known sets of registers, display a well-known token instead of
// a potentially large number of registers.
//
// Arguments:
// regs - The set of registers to display
//
void Compiler::dumpRegMask(regMaskTP regs) const
{
if (regs == RBM_ALLINT)
{
printf("[allInt]");
}
else if (regs == (RBM_ALLINT & ~RBM_FPBASE))
{
printf("[allIntButFP]");
}
else if (regs == RBM_ALLFLOAT)
{
printf("[allFloat]");
}
else if (regs == RBM_ALLDOUBLE)
{
printf("[allDouble]");
}
else
{
dspRegMask(regs);
}
}

#endif // DEBUG

void Compiler::compInitDebuggingInfo()
Expand Down
44 changes: 44 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10361,6 +10361,8 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

bool compJitHaltMethod();

void dumpRegMask(regMaskTP regs) const;

#endif

/*
Expand Down Expand Up @@ -10635,6 +10637,48 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
GenTree* fgMorphMultiregStructArg(CallArg* arg);

bool killGCRefs(GenTree* tree);

#if defined(TARGET_AMD64)
private:
// The following are for initializing register allocator "constants" defined in targetamd64.h
// that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which increases
// the number of SIMD (xmm, ymm, and zmm) registers from 16 to 32.
// As only 64-bit xarch has the capability to have the additional registers, we limit the changes
// to TARGET_AMD64 only.
//
// Users of these values need to define four accessor functions:
//
// regMaskTP get_RBM_ALLFLOAT();
// regMaskTP get_RBM_FLT_CALLEE_TRASH();
// unsigned get_CNT_CALLEE_TRASH_FLOAT();
// unsigned get_AVAILABLE_REG_COUNT();
//
// which return the values of these variables.
//
// This was done to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only
// TARGET_AMD64 requires one.
//
regMaskTP rbmAllFloat;
regMaskTP rbmFltCalleeTrash;
unsigned cntCalleeTrashFloat;
unsigned availableRegCount;

public:
regMaskTP get_RBM_ALLFLOAT() const
{
return rbmAllFloat;
}
regMaskTP get_RBM_FLT_CALLEE_TRASH() const
{
return rbmFltCalleeTrash;
}
unsigned get_CNT_CALLEE_TRASH_FLOAT() const
{
return cntCalleeTrashFloat;
}

#endif // TARGET_AMD64

}; // end of class Compiler

//---------------------------------------------------------------------------------------------------------------------
Expand Down
23 changes: 22 additions & 1 deletion src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ void emitLocation::Print(LONG compMethodID) const
}
#endif // DEBUG

#if defined(TARGET_AMD64)
inline regMaskTP emitter::get_RBM_FLT_CALLEE_TRASH() const
{
return emitComp->rbmFltCalleeTrash;
}
inline unsigned emitter::get_AVAILABLE_REG_COUNT() const
{
return emitComp->availableRegCount;
}
#endif // TARGET_AMD64

/*****************************************************************************
*
* Return the name of an instruction format.
Expand Down Expand Up @@ -3204,11 +3215,19 @@ void emitter::emitDispRegSet(regMaskTP regs)

for (reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg))
{
if ((regs & genRegMask(reg)) == 0)
if (regs == RBM_NONE)
{
break;
}

regMaskTP curReg = genRegMask(reg);
if ((regs & curReg) == 0)
{
continue;
}

regs -= curReg;

if (sp)
{
printf(" ");
Expand Down Expand Up @@ -3378,6 +3397,7 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt,
#endif // TARGET_XARCH

/* Save the live GC registers in the unused register fields */
assert((gcrefRegs & RBM_CALLEE_TRASH) == 0);
emitEncodeCallGCregs(gcrefRegs, id);

return id;
Expand Down Expand Up @@ -3450,6 +3470,7 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt,
assert(!id->idIsLargeCns());

/* Save the live GC registers in the unused register fields */
assert((gcrefRegs & RBM_CALLEE_TRASH) == 0);
emitEncodeCallGCregs(gcrefRegs, id);

return id;
Expand Down
27 changes: 27 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,28 @@ class emitter
idAddr()->_idReg4 = reg;
assert(reg == idAddr()->_idReg4);
}
bool idHasReg3() const
{
switch (idInsFmt())
{
case IF_RWR_RRD_RRD:
case IF_RWR_RRD_RRD_CNS:
case IF_RWR_RRD_RRD_RRD:
Copy link
Member

@tannergooding tannergooding Dec 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it expected that this one is here and in idHasReg4?

Could we maybe simplify it to default: return idHasReg4()?

return true;
default:
return false;
}
}
bool idHasReg4() const
{
switch (idInsFmt())
{
case IF_RWR_RRD_RRD_RRD:
return true;
default:
return false;
}
}
#endif // defined(TARGET_XARCH)
#ifdef TARGET_ARMARCH
insOpts idInsOpt() const
Expand Down Expand Up @@ -1951,6 +1973,11 @@ class emitter
CORINFO_FIELD_HANDLE emitBlkConst(const void* cnsAddr, unsigned cnsSize, unsigned cnsAlign, var_types elemType);

private:
#if defined(TARGET_AMD64)
regMaskTP get_RBM_FLT_CALLEE_TRASH() const;
unsigned get_AVAILABLE_REG_COUNT() const;
#endif // TARGET_AMD64

CORINFO_FIELD_HANDLE emitFltOrDblConst(double constValue, emitAttr attr);
CORINFO_FIELD_HANDLE emitSimd8Const(simd8_t constValue);
CORINFO_FIELD_HANDLE emitSimd16Const(simd16_t constValue);
Expand Down
3 changes: 0 additions & 3 deletions src/coreclr/jit/emitinl.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,8 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
*
* Convert between a register mask and a smaller version for storage.
*/

/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskTP regmask, instrDesc* id)
{
assert((regmask & RBM_CALLEE_TRASH) == 0);

unsigned encodeMask;

#ifdef TARGET_X86
Expand Down
Loading