Skip to content

Commit

Permalink
Use internal blitter for internal memory transfers
Browse files Browse the repository at this point in the history
Signed-off-by: Kamil Kopryk <[email protected]>
Related-To: NEO-6621
  • Loading branch information
KamilKoprykIntel authored and Compute-Runtime-Automation committed May 11, 2022
1 parent c354b73 commit fb4b1cc
Show file tree
Hide file tree
Showing 14 changed files with 106 additions and 27 deletions.
14 changes: 9 additions & 5 deletions opencl/source/command_queue/command_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,15 +275,19 @@ CommandStreamReceiver &CommandQueue::selectCsrForBuiltinOperation(const CsrSelec

void CommandQueue::initializeBcsEngine(bool internalUsage) {
if (bcsAllowed && !bcsInitialized) {
auto &hwInfo = device->getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto &neoDevice = device->getNearestGenericSubDevice(0)->getDevice();
auto &selectorCopyEngine = neoDevice.getSelectorCopyEngine();
auto bcsEngineType = EngineHelpers::getBcsEngineType(device->getHardwareInfo(), device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)] = neoDevice.tryGetEngine(bcsEngineType, EngineUsage::Regular);
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
auto bcsIndex = EngineHelpers::getBcsIndex(bcsEngineType);
auto engineUsage = (internalUsage && hwHelper.preferInternalBcsEngine()) ? EngineUsage::Internal : EngineUsage::Regular;
bcsEngines[bcsIndex] = neoDevice.tryGetEngine(bcsEngineType, engineUsage);
bcsEngineTypes.push_back(bcsEngineType);
bcsInitialized = true;
if (bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]) {
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->osContext->ensureContextInitialized();
bcsEngines[EngineHelpers::getBcsIndex(bcsEngineType)]->commandStreamReceiver->initDirectSubmission();
if (bcsEngines[bcsIndex]) {
bcsEngines[bcsIndex]->osContext->ensureContextInitialized();
bcsEngines[bcsIndex]->commandStreamReceiver->initDirectSubmission();
}
}
}
Expand Down
26 changes: 26 additions & 0 deletions opencl/test/unit_test/command_queue/command_queue_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,32 @@ TEST(CommandQueue, givenDeviceWithSubDevicesSupportingBlitOperationsWhenQueueIsC
EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS));
}

TEST(CommandQueue, whenCommandQueueWithInternalUsageIsCreatedThenInternalBcsEngineIsUsed) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableBlitterForEnqueueOperations.set(1);
DebugManager.flags.DeferCmdQBcsInitialization.set(0);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.blitterOperationsSupported = true;
REQUIRE_FULL_BLITTER_OR_SKIP(&hwInfo);

auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
auto internalUsage = true;
auto expectedEngineType = EngineHelpers::linkCopyEnginesSupported(hwInfo, device->getDeviceBitfield())
? aub_stream::EngineType::ENGINE_BCS2
: aub_stream::EngineType::ENGINE_BCS;

for (auto preferInternalBcsEngine : {0, 1}) {
DebugManager.flags.PreferInternalBcsEngine.set(preferInternalBcsEngine);
auto engineUsage = hwHelper.preferInternalBcsEngine() ? EngineUsage::Internal : EngineUsage::Regular;
MockCommandQueue cmdQ(nullptr, device.get(), 0, internalUsage);
auto &bcsEngine = device->getEngine(expectedEngineType, engineUsage);

EXPECT_NE(nullptr, cmdQ.getBcsCommandStreamReceiver(expectedEngineType));
EXPECT_EQ(bcsEngine.commandStreamReceiver, cmdQ.getBcsCommandStreamReceiver(expectedEngineType));
}
}

INSTANTIATE_TEST_CASE_P(uint32_t,
CommandQueueWithBlitOperationsTests,
::testing::Values(CL_COMMAND_WRITE_BUFFER,
Expand Down
16 changes: 16 additions & 0 deletions opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,22 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatform
EXPECT_TRUE(hwHelper.timestampPacketWriteSupported());
}

HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformWhenPreferInternalBcsEngineThenReturnTrue) {
auto &hwHelper = HwHelperHw<FamilyType>::get();
EXPECT_TRUE(hwHelper.preferInternalBcsEngine());
}

HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenXeHPAndLaterPlatformAndDebugFlagsWhenPreferInternalBcsEngineThenReturnsCorrectResult) {
DebugManagerStateRestore restore;
auto &hwHelper = HwHelperHw<FamilyType>::get();

DebugManager.flags.PreferInternalBcsEngine.set(1);
EXPECT_TRUE(hwHelper.preferInternalBcsEngine());

DebugManager.flags.PreferInternalBcsEngine.set(0);
EXPECT_FALSE(hwHelper.preferInternalBcsEngine());
}

HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, givenAllFlagsSetWhenGetGpgpuEnginesThenReturnThreeRcsEnginesFourCcsEnginesAndOneBcsEngine) {
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
Expand Down
1 change: 1 addition & 0 deletions opencl/test/unit_test/test_files/igdrcl.config
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,7 @@ MakeEachAllocationResident = -1
AssignBCSAtEnqueue = -1
DeferCmdQGpgpuInitialization = -1
DeferCmdQBcsInitialization = -1
PreferInternalBcsEngine = -1
ReuseKernelBinaries = -1
EnableChipsetUniqueUUID = -1
ForceSimdMessageSizeInWalker = -1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ HWTEST2_F(HwHelperTestsXeHpcCore, givenRevisionEnumAndPlatformFamilyTypeThenProp
}

XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEnginesAndFourCcsEnginesAndLinkCopyEngines) {
const size_t numEngines = 17;
const size_t numEngines = 18;

HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
Expand Down Expand Up @@ -241,6 +241,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEn
{aub_stream::ENGINE_BCS, false, true},
{aub_stream::ENGINE_BCS1, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS3, false, true},
{aub_stream::ENGINE_BCS4, false, true},
{aub_stream::ENGINE_BCS5, false, true},
Expand All @@ -257,7 +258,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEn
}

XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEnginesAndFourCcsEnginesAndEightLinkCopyEngines) {
const size_t numEngines = 17;
const size_t numEngines = 18;

HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
Expand Down Expand Up @@ -290,6 +291,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEn
{aub_stream::ENGINE_BCS, false, true},
{aub_stream::ENGINE_BCS1, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS3, false, true},
{aub_stream::ENGINE_BCS4, false, true},
{aub_stream::ENGINE_BCS5, false, true},
Expand All @@ -306,7 +308,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenGetGpgpuEnginesThenReturnTwoCccsEn
}

XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsAsDefaultEngineWhenGetEnginesCalledThenChangeDefaultEngine) {
const size_t numEngines = 17;
const size_t numEngines = 18;

HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
Expand Down Expand Up @@ -339,6 +341,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsAsDefaultEngineWhenGetEngines
{aub_stream::ENGINE_BCS, false, true},
{aub_stream::ENGINE_BCS1, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS3, false, true},
{aub_stream::ENGINE_BCS4, false, true},
{aub_stream::ENGINE_BCS5, false, true},
Expand All @@ -355,7 +358,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCccsAsDefaultEngineWhenGetEngines
}

XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenOneCcsEnabledWhenGetEnginesCalledThenCreateOnlyOneCcs) {
const size_t numEngines = 14;
const size_t numEngines = 15;

HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
Expand Down Expand Up @@ -385,6 +388,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenOneCcsEnabledWhenGetEnginesCalled
{aub_stream::ENGINE_BCS, false, true},
{aub_stream::ENGINE_BCS1, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS2, false, true},
{aub_stream::ENGINE_BCS3, false, true},
{aub_stream::ENGINE_BCS4, false, true},
{aub_stream::ENGINE_BCS5, false, true},
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -47,8 +47,16 @@ PVCTEST_F(EngineNodeHelperPvcTests, givenPvcBaseDieA0AndTile1WhenGettingBcsEngin
auto deviceBitfield = 0b10;
auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine();

EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true));
EXPECT_EQ(ENGINE_BCS4, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true));
EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true));
EXPECT_EQ(ENGINE_BCS4, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, true));
{
auto internalUsage = true;
EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, internalUsage));
EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, internalUsage));
}
{
auto internalUsage = false;
EXPECT_EQ(ENGINE_BCS, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, internalUsage));
EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, internalUsage));
EXPECT_EQ(ENGINE_BCS4, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, internalUsage));
EXPECT_EQ(ENGINE_BCS2, EngineHelpers::getBcsEngineType(*pHwInfo, deviceBitfield, selectorCopyEngine, internalUsage));
}
}
1 change: 1 addition & 0 deletions shared/source/debug_settings/debug_variables_base.inl
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, MakeEachAllocationResident, -1, "-1: default, 0:
DECLARE_DEBUG_VARIABLE(int32_t, AssignBCSAtEnqueue, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQGpgpuInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, DeferCmdQBcsInitialization, -1, "-1: default, 0:disabled, 1: enabled.")
DECLARE_DEBUG_VARIABLE(int32_t, PreferInternalBcsEngine, -1, "-1: default, 0:disabled, 1: enabled. When enabled use internal bcs engine for internal transfers, when disabled use regular engine")
DECLARE_DEBUG_VARIABLE(int32_t, ReuseKernelBinaries, -1, "-1: default, 0:disabled, 1: enabled. If enabled, driver reuses kernel binaries.")

/*DIRECT SUBMISSION FLAGS*/
Expand Down
7 changes: 5 additions & 2 deletions shared/source/helpers/blit_commands_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device
if (!hwInfo.capabilityTable.blitterOperationsSupported) {
return BlitOperationResult::Unsupported;
}
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);

UNRECOVERABLE_IF(memoryBanks.none());

Expand All @@ -189,10 +190,12 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device

UNRECOVERABLE_IF(!pRootDevice->getDeviceBitfield().test(tileId));
auto pDeviceForBlit = pRootDevice->getNearestGenericSubDevice(tileId);

auto &selectorCopyEngine = pDeviceForBlit->getSelectorCopyEngine();
auto deviceBitfield = pDeviceForBlit->getDeviceBitfield();
auto bcsEngine = pDeviceForBlit->tryGetEngine(EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true), EngineUsage::Regular);
auto internalUsage = true;
auto bcsEngineType = EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, internalUsage);
auto bcsEngineUsage = hwHelper.preferInternalBcsEngine() ? EngineUsage::Internal : EngineUsage::Regular;
auto bcsEngine = pDeviceForBlit->tryGetEngine(bcsEngineType, bcsEngineUsage);
if (!bcsEngine) {
return BlitOperationResult::Unsupported;
}
Expand Down
2 changes: 1 addition & 1 deletion shared/source/helpers/engine_node_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ aub_stream::EngineType getBcsEngineType(const HardwareInfo &hwInfo, const Device
return DebugManager.flags.ForceBCSForInternalCopyEngine.get() == 0 ? aub_stream::EngineType::ENGINE_BCS
: static_cast<aub_stream::EngineType>(aub_stream::EngineType::ENGINE_BCS1 + DebugManager.flags.ForceBCSForInternalCopyEngine.get() - 1);
}
return selectLinkCopyEngine(hwInfo, deviceBitfield, selectorCopyEngine.selector);
return aub_stream::ENGINE_BCS2;
}

const bool isMainCopyEngineAlreadyUsed = selectorCopyEngine.isMainUsed.exchange(true);
Expand Down
4 changes: 2 additions & 2 deletions shared/source/helpers/hw_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ class HwHelper {
virtual void setSipKernelData(uint32_t *&sipKernelBinary, size_t &kernelBinarySize) const = 0;
virtual void adjustPreemptionSurfaceSize(size_t &csrSize) const = 0;
virtual size_t getSamplerStateSize() const = 0;

virtual bool preferInternalBcsEngine() const = 0;
virtual bool isScratchSpaceSurfaceStateAccessible() const = 0;
virtual uint64_t getRenderSurfaceStateBaseAddress(void *renderSurfaceState) const = 0;
virtual uint32_t getRenderSurfaceStatePitch(void *renderSurfaceState) const = 0;
Expand Down Expand Up @@ -391,7 +391,7 @@ class HwHelperHw : public HwHelper {
void adjustPreemptionSurfaceSize(size_t &csrSize) const override;

bool isScratchSpaceSurfaceStateAccessible() const override;

bool preferInternalBcsEngine() const override;
size_t getMax3dImageWidthOrHeight() const override;
uint64_t getMaxMemAllocSize() const override;
uint64_t getPatIndex(CacheRegion cacheRegion, CachePolicy cachePolicy) const override;
Expand Down
5 changes: 5 additions & 0 deletions shared/source/helpers/hw_helper_bdw_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,9 @@ inline bool HwHelperHw<GfxFamily>::isLinuxCompletionFenceSupported() const {
return false;
}

template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::preferInternalBcsEngine() const {
return false;
}

} // namespace NEO
10 changes: 10 additions & 0 deletions shared/source/helpers/hw_helper_xehp_and_later.inl
Original file line number Diff line number Diff line change
Expand Up @@ -214,4 +214,14 @@ inline bool HwHelperHw<GfxFamily>::isLinuxCompletionFenceSupported() const {
return false;
}

template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::preferInternalBcsEngine() const {
auto preferInternalBcsEngine = true;
if (DebugManager.flags.PreferInternalBcsEngine.get() != -1) {
preferInternalBcsEngine = static_cast<bool>(DebugManager.flags.PreferInternalBcsEngine.get());
}

return preferInternalBcsEngine;
}

} // namespace NEO
5 changes: 4 additions & 1 deletion shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,16 @@ const EngineInstancesContainer HwHelperHw<Family>::getGpgpuEngineInstances(const
if (hwInfo.capabilityTable.blitterOperationsSupported) {
if (hwInfo.featureTable.ftrBcsInfo.test(0)) {
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Regular}); // Main copy engine
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Internal}); // internal usage
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::Internal}); // Internal usage
}

for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
auto engineType = static_cast<aub_stream::EngineType>((i - 1) + aub_stream::ENGINE_BCS1); // Link copy engine
engines.push_back({engineType, EngineUsage::Regular});
if (i == 2) {
engines.push_back({engineType, EngineUsage::Internal}); // BCS2 for internal usage
}
}
}
}
Expand Down
12 changes: 5 additions & 7 deletions shared/test/common/helpers/engine_node_helper_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
Expand Down Expand Up @@ -122,14 +122,12 @@ TEST(EngineNodeHelperTest, givenLinkBcsEngineIsReleasedWhenGettingBcsEngineTypeT
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, false));
}

TEST(EngineNodeHelperTest, givenLinkCopyEnginesAndInternalUsageEnabledWhenGettingBcsEngineThenAlwaysReturnLinkEngine) {
TEST(EngineNodeHelperTest, givenLinkCopyEnginesAndInternalUsageEnabledWhenGettingBcsEngineThenUseBcs2only) {
SelectorCopyEngine selectorCopyEngine{};
HardwareInfo hwInfo = *::defaultHwInfo;
DeviceBitfield deviceBitfield = 0b11;
hwInfo.featureTable.ftrBcsInfo = 0b111;

EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true));
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true));
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true));
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, true));
auto isInternalUsage = true;
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, isInternalUsage));
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, EngineHelpers::getBcsEngineType(hwInfo, deviceBitfield, selectorCopyEngine, isInternalUsage));
}

0 comments on commit fb4b1cc

Please sign in to comment.