Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable AVX512 Additional 16 SIMD Registers #79544

Merged
merged 36 commits into from
Feb 8, 2023
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
9da5b6d
Change regMask_enum and regMaskTP to unsigned __int64_t on AMD64.
anthonycanino Oct 27, 2022
6c6c884
Add upper 16 SIMD registers to allocator.
anthonycanino Dec 14, 2022
bd6d2a5
Limit high SIMD reg to compatible intrinsics lsra build.
anthonycanino Jan 17, 2023
47ff9ca
Limit high SIMD reg to compatible intrinsics lsra build.
anthonycanino Nov 14, 2022
9cabef6
Limit high SIMD reg to compatible intrinsics and gentree nodes.
anthonycanino Dec 14, 2022
9f711ba
Fix for X86 throughput.
anthonycanino Dec 14, 2022
f29c146
Add upper simd stress test to the AVX512 testing pipeline.
anthonycanino Dec 14, 2022
b95d296
Formatting.
anthonycanino Dec 14, 2022
6a4dcd1
Fix wrong-sized attr for simd mov instruction.
anthonycanino Dec 16, 2022
d1f2fdb
Fix non-AMD64 LSRA stress mask.
anthonycanino Dec 16, 2022
c7807c7
Update src/coreclr/jit/compiler.h
anthonycanino Dec 19, 2022
0b6cb55
Update src/coreclr/jit/compiler.cpp
anthonycanino Dec 19, 2022
a45163c
Update src/coreclr/jit/gentree.cpp
anthonycanino Dec 19, 2022
730b4eb
Update src/coreclr/jit/hwintrinsic.h
anthonycanino Dec 19, 2022
34d6199
Update src/coreclr/jit/target.h
anthonycanino Dec 19, 2022
395f5e4
Update src/coreclr/jit/emitxarch.cpp
anthonycanino Dec 19, 2022
852e051
Remove unneeded vars
kunalspathak Dec 15, 2022
7238b49
Address PR comments.
anthonycanino Dec 19, 2022
2b56df7
Allow `emitinl.h` access to the `rbm` variables.
anthonycanino Dec 19, 2022
fa1a550
Replace RBM_LOWSIMD with `BuildEvexIncompatibleMask`.
anthonycanino Jan 9, 2023
564dc81
Move AVX512 dependent `targetamd.h` vars into compiler object.
anthonycanino Jan 12, 2023
5ea489d
Fixing some edge cases for `targetamd.h` variables.
anthonycanino Jan 17, 2023
e60aece
Fix a merge/rebase bug.
anthonycanino Jan 18, 2023
b28a231
Update src/coreclr/jit/compiler.h
anthonycanino Jan 26, 2023
37af7c3
Update src/coreclr/jit/lsra.cpp
anthonycanino Jan 26, 2023
e9be8f8
Update src/coreclr/jit/compiler.h
anthonycanino Jan 26, 2023
f680448
Fix nits.
anthonycanino Jan 26, 2023
7416501
Merge branch 'avx512-upper-regs' of https://github.com/anthonycanino/…
anthonycanino Jan 26, 2023
73f43b6
Trying VM changes.
anthonycanino Jan 31, 2023
c6f1a90
VM hack.
anthonycanino Feb 1, 2023
91cf3db
VM hack.
anthonycanino Feb 1, 2023
228c0c5
Revert "VM hack."
anthonycanino Feb 3, 2023
5490617
Adjust ACTUAL_REG_COUNT based on availability of AVX512.
anthonycanino Feb 6, 2023
3c7acdb
Use inline accessor functions instead of macros
BruceForstall Feb 8, 2023
e42db99
Merge pull request #6 from BruceForstall/anthonycanino_avx512-upper-r…
anthonycanino Feb 8, 2023
10b4703
Clearifying comments.
anthonycanino Feb 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions eng/pipelines/common/templates/runtimes/run-test-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ jobs:
${{ if in(parameters.testGroup, 'jitstress-isas-avx512') }}:
scenarios:
- jitstress_isas_avx512_forceevex
- jitstress_isas_avx512_forceevex_stresshighregs
${{ if in(parameters.testGroup, 'jitstressregs-x86') }}:
scenarios:
- jitstressregs1_x86_noavx
Expand Down
19 changes: 18 additions & 1 deletion src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX

#include "patchpointinfo.h"

// Please see the comment for these instance variables in `compiler.h`
#if defined(TARGET_AMD64)
#define RBM_ALLFLOAT_USE (this->rbmAllFloat)
#define RBM_FLT_CALLEE_TRASH_USE (this->rbmFltCalleeTrash)
#define CNT_CALLEE_TRASH_FLOAT_USE (this->cntCalleeTrashFloat)
#endif

/*****************************************************************************/

void CodeGenInterface::setFramePointerRequiredEH(bool value)
Expand Down Expand Up @@ -775,6 +782,16 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife)
}
}

#undef RBM_ALLFLOAT_USE
#undef RBM_FLT_CALLEE_TRASH_USE
#undef CNT_CALLEE_TRASH_FLOAT_USE

#if defined(TARGET_AMD64)
#define RBM_ALLFLOAT_USE (compiler->rbmAllFloat)
#define RBM_FLT_CALLEE_TRASH_USE (compiler->rbmFltCalleeTrash)
#define CNT_CALLEE_TRASH_FLOAT_USE (compiler->cntCalleeTrashFloat)
#endif

// Need an explicit instantiation.
template void Compiler::compChangeLife<true>(VARSET_VALARG_TP newLife);

Expand Down Expand Up @@ -9411,4 +9428,4 @@ bool CodeGen::genCanOmitNormalizationForBswap16(GenTree* tree)
}

return (cast->gtCastType == TYP_USHORT) || (cast->gtCastType == TYP_SHORT);
}
}
9 changes: 8 additions & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "emit.h"
#include "codegen.h"

// Please see the comment for these instance variables in `compiler.h`
#if defined(TARGET_AMD64)
#define RBM_ALLFLOAT_USE (compiler->rbmAllFloat)
#define RBM_FLT_CALLEE_TRASH_USE (compiler->rbmFltCalleeTrash)
#define CNT_CALLEE_TRASH_FLOAT_USE (compiler->cntCalleeTrashFloat)
#endif

//------------------------------------------------------------------------
// genInitializeRegisterState: Initialize the register state contained in 'regSet'.
//
Expand Down Expand Up @@ -2683,4 +2690,4 @@ void CodeGen::genCodeForSetcc(GenTreeCC* setcc)
inst_SETCC(setcc->gtCondition, setcc->TypeGet(), setcc->GetRegNum());
genProduceReg(setcc);
}
#endif // !TARGET_LOONGARCH64
#endif // !TARGET_LOONGARCH64
12 changes: 9 additions & 3 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfoencoder.h"
#include "patchpointinfo.h"

#if defined(TARGET_AMD64)
#define RBM_ALLFLOAT_USE (compiler->rbmAllFloat)
#define RBM_FLT_CALLEE_TRASH_USE (compiler->rbmFltCalleeTrash)
#define CNT_CALLEE_TRASH_FLOAT_USE (compiler->cntCalleeTrashFloat)
#endif

//---------------------------------------------------------------------
// genSetGSSecurityCookie: Set the "GS" security cookie in the prolog.
//
Expand Down Expand Up @@ -3524,7 +3530,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
// this probably needs to be changed.

// Load
genCodeForLoadOffset(INS_movdqu, EA_8BYTE, xmmTmpReg, src, offset);
genCodeForLoadOffset(INS_movdqu, EA_16BYTE, xmmTmpReg, src, offset);
// Store
genStoreRegToStackArg(TYP_STRUCT, xmmTmpReg, offset);

Expand Down Expand Up @@ -8347,7 +8353,7 @@ void CodeGen::genStoreRegToStackArg(var_types type, regNumber srcReg, int offset
{
ins = INS_movdqu;
// This should be changed!
attr = EA_8BYTE;
attr = EA_16BYTE;
size = 16;
}
else
Expand Down Expand Up @@ -11047,4 +11053,4 @@ bool CodeGenInterface::genCodeAddrNeedsReloc(size_t addr)
#endif // TARGET_X86
}

#endif // TARGET_XARCH
#endif // TARGET_XARCH
20 changes: 19 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3325,6 +3325,24 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
}
#endif // defined(DEBUG) && defined(TARGET_ARM64)

#if defined(TARGET_AMD64)
rbmAllFloat = RBM_ALLFLOAT_INIT;
rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT;
cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;
actualRegCount = ACTUAL_REG_COUNT;

if (DoJitStressEvexEncoding())
{
rbmAllFloat |= RBM_HIGHFLOAT;
rbmFltCalleeTrash |= RBM_HIGHFLOAT;
cntCalleeTrashFloat += CNT_CALLEE_TRASH_HIGHFLOAT;
}
else
{
actualRegCount -= CNT_HIGHFLOAT;
}
#endif // TARGET_AMD64
}

#ifdef DEBUG
Expand Down Expand Up @@ -10275,4 +10293,4 @@ void Compiler::EnregisterStats::Dump(FILE* fout) const
PRINT_STATS(m_stressLclFld, m_addrExposed);
PRINT_STATS(m_dispatchRetBuf, m_addrExposed);
}
#endif // TRACK_ENREG_STATS
#endif // TRACK_ENREG_STATS
19 changes: 19 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10635,6 +10635,25 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
GenTree* fgMorphMultiregStructArg(CallArg* arg);

bool killGCRefs(GenTree* tree);

#if defined(TARGET_AMD64)
public:
// The following are for initializing register allocator "constants" defined in targetamd64.h
// that now depend upon runtime ISA information, e.g., the presence of AVX512F/VL, which increases
// the number of simd (xmm,ymm, and zmm) registers from 16 to 32.
// As only 64-bit xarch has the capability to have the additional registers, we limit the changes
// to TARGET_AMD64 only.
//
// Users of `targetamd64.h` need to define three macros, RBM_ALLFLOAT_USE, RBM_FLT_CALLEE_TRASH_USE,
// and CNT_CALLEE_TRASH_FLOAT_USE which should point to these three variables respectively.
// We did this to avoid polluting all `targetXXX.h` macro definitions with a compiler parameter, where only
// TARGET_AMD64 requires one.
regMaskTP rbmAllFloat;
regMaskTP rbmFltCalleeTrash;
unsigned cntCalleeTrashFloat;
unsigned actualRegCount;
#endif // TARGET_AMD64

}; // end of class Compiler

//---------------------------------------------------------------------------------------------------------------------
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "emit.h"
#include "codegen.h"

// Please see the comment for these instance variables in `compiler.h`
#if defined(TARGET_AMD64)
#define RBM_ALLFLOAT_USE (emitComp->rbmAllFloat)
#define RBM_FLT_CALLEE_TRASH_USE (emitComp->rbmFltCalleeTrash)
#define CNT_CALLEE_TRASH_FLOAT_USE (emitComp->cntCalleeTrashFloat)
#define ACTUAL_REG_COUNT (emitComp->actualRegCount)
#endif

/*****************************************************************************
*
* Represent an emitter location.
Expand Down Expand Up @@ -3378,6 +3386,7 @@ emitter::instrDesc* emitter::emitNewInstrCallInd(int argCnt,
#endif // TARGET_XARCH

/* Save the live GC registers in the unused register fields */
assert((gcrefRegs & RBM_CALLEE_TRASH) == 0);
emitEncodeCallGCregs(gcrefRegs, id);

return id;
Expand Down Expand Up @@ -3450,6 +3459,7 @@ emitter::instrDesc* emitter::emitNewInstrCallDir(int argCnt,
assert(!id->idIsLargeCns());

/* Save the live GC registers in the unused register fields */
assert((gcrefRegs & RBM_CALLEE_TRASH) == 0);
emitEncodeCallGCregs(gcrefRegs, id);

return id;
Expand Down Expand Up @@ -9945,3 +9955,7 @@ void emitter::emitEnableGC()
}
}
#endif // !defined(JIT32_GCENCODER)

#undef RBM_ALLFLOAT_USE
#undef RBM_FLT_CALLEE_TRASH_USE
#undef CNT_CALLEE_TRASH_FLOAT_USE
22 changes: 22 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,28 @@ class emitter
idAddr()->_idReg4 = reg;
assert(reg == idAddr()->_idReg4);
}
bool idHasReg3() const
{
switch (idInsFmt())
{
case IF_RWR_RRD_RRD:
case IF_RWR_RRD_RRD_CNS:
case IF_RWR_RRD_RRD_RRD:
Copy link
Member

@tannergooding tannergooding Dec 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it expected that this one is here and in idHasReg4?

Could we maybe simplify it to default: return idHasReg4()?

return true;
default:
return false;
}
}
bool idHasReg4() const
{
switch (idInsFmt())
{
case IF_RWR_RRD_RRD_RRD:
return true;
default:
return false;
}
}
#endif // defined(TARGET_XARCH)
#ifdef TARGET_ARMARCH
insOpts idInsOpt() const
Expand Down
14 changes: 11 additions & 3 deletions src/coreclr/jit/emitinl.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@

#ifdef TARGET_XARCH

// Please see the comment for these instance variables in `compiler.h`
#if defined(TARGET_AMD64)
#define RBM_ALLFLOAT_USE (emitComp->rbmAllFloat)
#define RBM_FLT_CALLEE_TRASH_USE (emitComp->rbmFltCalleeTrash)
#define CNT_CALLEE_TRASH_FLOAT_USE (emitComp->cntCalleeTrashFloat)
#endif

/* static */
inline bool emitter::instrIs3opImul(instruction ins)
{
Expand Down Expand Up @@ -205,17 +212,18 @@ inline ssize_t emitter::emitGetInsAmdAny(instrDesc* id)
return id->idAddr()->iiaAddrMode.amDisp;
}

#undef RBM_ALLFLOAT_USE
#undef RBM_FLT_CALLEE_TRASH_USE
#undef CNT_CALLEE_TRASH_FLOAT_USE

#endif // TARGET_XARCH

/*****************************************************************************
*
* Convert between a register mask and a smaller version for storage.
*/

/*static*/ inline void emitter::emitEncodeCallGCregs(regMaskTP regmask, instrDesc* id)
{
assert((regmask & RBM_CALLEE_TRASH) == 0);

unsigned encodeMask;

#ifdef TARGET_X86
Expand Down
Loading