diff --git a/cmake/XglCompileDefinitions.cmake b/cmake/XglCompileDefinitions.cmake index 7a345008..3fac14e5 100644 --- a/cmake/XglCompileDefinitions.cmake +++ b/cmake/XglCompileDefinitions.cmake @@ -87,6 +87,18 @@ macro(xgl_set_compile_definitions) target_compile_definitions(xgl PRIVATE VKI_BUILD_PHOENIX2=1) endif() +#if VKI_BUILD_GFX115 + if(XGL_BUILD_GFX115) + target_compile_definitions(xgl PRIVATE VKI_BUILD_GFX115=1) + endif() +#endif + +#if VKI_BUILD_STRIX1 + if(XGL_BUILD_STRIX1) + target_compile_definitions(xgl PRIVATE VKI_BUILD_STRIX1=1) + endif() +#endif + if(XGL_BUILD_REMBRANDT) target_compile_definitions(xgl PRIVATE VKI_BUILD_REMBRANDT=1) endif() diff --git a/cmake/XglOptions.cmake b/cmake/XglOptions.cmake index 9fd74fbf..bac81e04 100644 --- a/cmake/XglOptions.cmake +++ b/cmake/XglOptions.cmake @@ -65,6 +65,14 @@ macro(xgl_options) option(XGL_BUILD_PHOENIX2 "Build vulkan for PHOENIX2" ON) +#if VKI_BUILD_GFX115 + option(XGL_BUILD_GFX115 "Build vulkan for GFX115" ON) +#endif + +#if VKI_BUILD_STRIX1 + option(XGL_BUILD_STRIX1 "Build vulkan for STRIX1" ON) +#endif + option(XGL_BUILD_TESTS "Build tests?" OFF) option(XGL_BUILD_TOOLS "Build tools?" OFF) diff --git a/cmake/XglOverrides.cmake b/cmake/XglOverrides.cmake index 70bddd86..b856086e 100644 --- a/cmake/XglOverrides.cmake +++ b/cmake/XglOverrides.cmake @@ -109,6 +109,14 @@ macro(xgl_overrides_pal) set(PAL_BUILD_PHOENIX2 ${XGL_BUILD_PHOENIX2} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#if VKI_BUILD_GFX115 + set(PAL_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#endif + +#if VKI_BUILD_STRIX1 + set(PAL_BUILD_STRIX1 ${XGL_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#endif + # Wayland set(PAL_BUILD_WAYLAND ${BUILD_WAYLAND_SUPPORT} CACHE BOOL "Build PAL with Wayland support" FORCE) @@ -161,6 +169,14 @@ macro(xgl_overrides_vkgc) set(LLPC_BUILD_PHOENIX2 ${XGL_BUILD_PHOENIX2} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#if VKI_BUILD_GFX115 + set(LLPC_BUILD_GFX115 ${XGL_BUILD_GFX115} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#endif + +#if VKI_BUILD_STRIX1 + set(LLPC_BUILD_STRIX1 ${XGL_BUILD_STRIX1} CACHE BOOL "${PROJECT_NAME} override." FORCE) +#endif + set(LLPC_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) endmacro() @@ -195,6 +211,12 @@ macro(xgl_overrides) set(LLVM_ENABLE_ASSERTIONS "${XGL_ENABLE_ASSERTIONS}" CACHE BOOL "LLVM_ENABLE_ASSERTIONS is overridden." FORCE) endif() + set(LLVM_INCLUDE_BENCHMARKS OFF CACHE BOOL "LLVM_INCLUDE_BENCHMARKS is overriden." FORCE) + + set(LLVM_INCLUDE_DOCS OFF CACHE BOOL "LLVM_INCLUDE_DOCS is overriden." FORCE) + + set(LLVM_INCLUDE_EXAMPLES OFF CACHE BOOL "LLVM_INCLUDE_EXAMPLES is overriden." FORCE) + set(VAM_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) set(ADDR_ENABLE_WERROR ${ICD_ANALYSIS_WARNINGS_AS_ERRORS} CACHE BOOL "${PROJECT_NAME} override." FORCE) diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index f1276f4c..7b95dbdd 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -28,7 +28,7 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "887") +set(ICD_PAL_CLIENT_MAJOR_VERSION "888") # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. @@ -37,7 +37,7 @@ set(ICD_GPUOPEN_CLIENT_MAJOR_VERSION "42") #if VKI_RAY_TRACING # This will become the value of GPURT_CLIENT_INTERFACE_MAJOR_VERSION if VKI_RAY_TRACING=1. # It describes the interface version of the GpuRT shared module that the ICD supports. -set(ICD_GPURT_CLIENT_MAJOR_VERSION "46") +set(ICD_GPURT_CLIENT_MAJOR_VERSION "47") #endif # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 2ae28859..ba102a99 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.292" + "api_version": "1.3.293" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.292", + "api_version": "1.3.293", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index a5984c29..27e608b3 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -600,7 +600,7 @@ constexpr AppProfilePatternEntry AppNameX4Foundations "x4" }; -constexpr AppProfilePatternEntry AppNameHaloInfiniteLauncher +constexpr AppProfilePatternEntry AppNameHaloInfinite { PatternAppNameLower, "haloinfinite.exe" @@ -780,6 +780,18 @@ constexpr AppProfilePatternEntry AppEngineGodot = "godot engine" }; +constexpr AppProfilePatternEntry AppNameArchean = +{ + PatternAppNameLower, + "archean" +}; + +constexpr AppProfilePatternEntry AppEngineXenon = +{ + PatternEngineNameLower, + "xenonengine" +}; + // Section END of AppProfilePatternEntry for all games // This is a table of patterns. The first matching pattern in this table will be returned. @@ -1414,12 +1426,21 @@ AppProfilePattern AppPatternTable[] = { AppProfile::DxvkHaloInfiniteLauncher, { - AppNameHaloInfiniteLauncher, + AppNameHaloInfinite, AppEngineDXVK, PatternEnd } }, + { + AppProfile::HaloInfinite, + { + AppNameHaloInfinite, + AppEngineVKD3D, + PatternEnd + } + }, + { AppProfile::Starfield, { @@ -1601,6 +1622,15 @@ AppProfilePattern AppPatternTable[] = AppEngineGodot, PatternEnd } + }, + + { + AppProfile::Archean, + { + AppNameArchean, + AppEngineXenon, + PatternEnd + } } }; diff --git a/icd/api/appopt/shader_profiles/llpc/generic/CSGO/profile.json b/icd/api/appopt/shader_profiles/llpc/generic/CSGO/profile.json index 2d415ea7..0d3db59d 100644 --- a/icd/api/appopt/shader_profiles/llpc/generic/CSGO/profile.json +++ b/icd/api/appopt/shader_profiles/llpc/generic/CSGO/profile.json @@ -11,6 +11,45 @@ "favorLatencyHiding": true } } + }, + { + "pattern": { + "cs": { + "codeHash": "0x24DFE73ADEC799EA 8D071534C2C16DD7" + } + }, + "action": { + "cs": { + "favorLatencyHiding": true + } + }, + "comment": "MSAA 8x" + }, + { + "pattern": { + "cs": { + "codeHash": "0xACDE520F0F6543C2 2D3BA91A14373265" + } + }, + "action": { + "cs": { + "favorLatencyHiding": true + } + }, + "comment": "MSAA 4x" + }, + { + "pattern": { + "cs": { + "codeHash": "0xABCCB3D9CB7B7526 D67F54AB28FF6F7D" + } + }, + "action": { + "cs": { + "favorLatencyHiding": true + } + }, + "comment": "MSAA 2x" } ] } \ No newline at end of file diff --git a/icd/api/devmode/devmode_ubertrace.h b/icd/api/devmode/devmode_ubertrace.h index 56cbaf60..5b2cdbc4 100644 --- a/icd/api/devmode/devmode_ubertrace.h +++ b/icd/api/devmode/devmode_ubertrace.h @@ -134,7 +134,7 @@ class DevModeUberTrace final : public IDevMode uint32 numMarkerStrings, const uint32* pMarkerStringOffsets, uint32 markerStringDataSize, - const char* pMarkerStringData); + const char* pMarkerStringData) override; private: DevModeUberTrace(Instance* pInstance); diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 72b7dbdf..cfe52317 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -1346,6 +1346,7 @@ static void BuildViewportState( { if (pVp != nullptr) { + EXTRACT_VK_STRUCTURES_0( viewportDepthClipControl, PipelineViewportDepthClipControlCreateInfoEXT, @@ -2302,7 +2303,7 @@ void GraphicsPipelineCommon::BuildPipelineObjectCreateInfo( hasMesh = true; } - pInfo->dynamicStates = GetDynamicStateFlags(pIn->pDynamicState, &libInfo);; + pInfo->dynamicStates = GetDynamicStateFlags(pIn->pDynamicState, &libInfo); if (libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) { diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index c00a3159..86d1af44 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -122,8 +122,8 @@ enum class AppProfile : uint32_t DxvkGodOfWar, // DXVK God of War ELEX2, // ELEX II X4Foundations, // X4: Foundations by Egosoft - DxvkHaloInfiniteLauncher,// DXVK Halo Infinite Launcher (Don't Confuse it with VKD3D - // Halo Infinite Game) + DxvkHaloInfiniteLauncher,// DXVK Halo Infinite Launcher + HaloInfinite, // VKD3D Halo Infinite Starfield, // VKD3D Starfield DxvkTf2, // DXVK Team Fortress 2 MetalGearSolid5, // Metal Gear Solid5 : The Phantom Pain @@ -147,6 +147,7 @@ enum class AppProfile : uint32_t Vkd3dEngine, // vkd3d-proton for steam games DXVK, // DXVK WindowKill, // Windowkill by torcado + Archean, // Archean by batcholi }; struct ProfileSettings diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index caa55424..49916033 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,7 +69,7 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 292 +#define VK_HEADER_VERSION 293 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) @@ -1121,6 +1121,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_POOL_OVERALLOCATION_FEATURES_NV = 1000546000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAW_ACCESS_CHAINS_FEATURES_NV = 1000555000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR = 1000558000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMMAND_BUFFER_INHERITANCE_FEATURES_NV = 1000559000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_7_FEATURES_KHR = 1000562000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_7_PROPERTIES_KHR = 1000562001, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LAYERED_API_PROPERTIES_LIST_KHR = 1000562002, @@ -19309,6 +19310,18 @@ typedef struct VkPhysicalDeviceRawAccessChainsFeaturesNV { +// VK_NV_command_buffer_inheritance is a preprocessor guard. Do not pass it to API calls. +#define VK_NV_command_buffer_inheritance 1 +#define VK_NV_COMMAND_BUFFER_INHERITANCE_SPEC_VERSION 1 +#define VK_NV_COMMAND_BUFFER_INHERITANCE_EXTENSION_NAME "VK_NV_command_buffer_inheritance" +typedef struct VkPhysicalDeviceCommandBufferInheritanceFeaturesNV { + VkStructureType sType; + void* pNext; + VkBool32 commandBufferInheritance; +} VkPhysicalDeviceCommandBufferInheritanceFeaturesNV; + + + // VK_NV_shader_atomic_float16_vector is a preprocessor guard. Do not pass it to API calls. #define VK_NV_shader_atomic_float16_vector 1 #define VK_NV_SHADER_ATOMIC_FLOAT16_VECTOR_SPEC_VERSION 1 diff --git a/icd/api/include/pipeline_binary_cache.h b/icd/api/include/pipeline_binary_cache.h index a244f765..372476e4 100644 --- a/icd/api/include/pipeline_binary_cache.h +++ b/icd/api/include/pipeline_binary_cache.h @@ -167,6 +167,12 @@ class PipelineBinaryCache CacheAdapter* GetCacheAdapter() { return m_pCacheAdapter; } + // Override the driver's default location + static constexpr char EnvVarPath[] = "AMD_VK_PIPELINE_CACHE_PATH"; + + // Filename of an additional, read-only archive + static constexpr char EnvVarReadOnlyFileName[] = "AMD_VK_PIPELINE_CACHE_READ_ONLY_FILENAME"; + private: PAL_DISALLOW_DEFAULT_CTOR(PipelineBinaryCache); PAL_DISALLOW_COPY_AND_ASSIGN(PipelineBinaryCache); @@ -215,15 +221,9 @@ class PipelineBinaryCache Util::IArchiveFile* OpenWritableArchive(const char* path, const char* fileName, size_t bufferSize); Util::ICacheLayer* CreateFileLayer(Util::IArchiveFile* pFile); - // Override the driver's default location - static constexpr char EnvVarPath[] = "AMD_VK_PIPELINE_CACHE_PATH"; - // Override the driver's default name (Hash of application name) static constexpr char EnvVarFileName[] = "AMD_VK_PIPELINE_CACHE_FILENAME"; - // Filename of an additional, read-only archive - static constexpr char EnvVarReadOnlyFileName[] = "AMD_VK_PIPELINE_CACHE_READ_ONLY_FILENAME"; - static const uint32_t ArchiveType; // TypeId created by hashed string VK_SHADER_PIPELINE_CACHE static const uint32_t ElfType; // TypeId created by hashed string VK_PIPELINE_ELF diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index a69e97a1..dcd3acb1 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -424,8 +424,9 @@ class PipelineCompiler const VkVertexInputBindingDescription2EXT* pVertexBindingDescriptions, uint32_t vertexAttributeDescriptionCount, const VkVertexInputAttributeDescription2EXT* pVertexAttributeDescriptions, - void* pUberFetchShaderInternalData, - bool isOffsetMode); + bool dynamicStride, + bool isOffsetMode, + void* pUberFetchShaderInternalData) const; uint32_t BuildUberFetchShaderInternalData( const VkPipelineVertexInputStateCreateInfo* pVertexInput, diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 442c2c49..694098bf 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -1509,6 +1509,10 @@ class CmdBuffer return &m_debugPrintf; } + void InsertDebugMarker( + const char* pLabelName, + bool isBegin); + private: PAL_DISALLOW_COPY_AND_ASSIGN(CmdBuffer); @@ -1923,10 +1927,6 @@ class CmdBuffer void FreePatchCpsList(); #endif - void InsertDebugMarker( - const char* pLabelName, - bool isBegin); - void BindVertexBuffersUpdateBindingRange( uint32_t deviceIdx, Pal::BufferViewInfo* pBinding, diff --git a/icd/api/include/vk_device.h b/icd/api/include/vk_device.h index 1ebee441..9ce4d34f 100644 --- a/icd/api/include/vk_device.h +++ b/icd/api/include/vk_device.h @@ -162,13 +162,12 @@ class Device uint32 deviceAddressBindingReport : 1; // True if EXT_DEVICE_MEMORY_REPORT or EXT_DEVICE_ADDRESS_BINDING_REPORT is enabled. uint32 gpuMemoryEventHandler : 1; - uint32 assumeDynamicTopologyInLibs : 1; // True if EXT_PRIMITIVES_GENERATED_QUERY is enabled. uint32 primitivesGeneratedQuery : 1; uint32 reserved1 : 1; uint32 reserved2 : 1; + uint32 deviceGeneratedCommands : 1; uint32 robustVertexBufferExtend : 1; - uint32 reserved : 11; }; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index fdbc0b35..0424401f 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -452,6 +452,7 @@ class DeviceExtensions final : public Extensions EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_VERTEX_INPUT_DYNAMIC_STATE, EXT_YCBCR_IMAGE_ARRAYS, + // AMD Extensions AMD_BUFFER_MARKER, AMD_DEVICE_COHERENT_MEMORY, diff --git a/icd/api/include/vk_indirect_commands_layout.h b/icd/api/include/vk_indirect_commands_layout.h index 92175b7e..dc80025c 100644 --- a/icd/api/include/vk_indirect_commands_layout.h +++ b/icd/api/include/vk_indirect_commands_layout.h @@ -53,24 +53,32 @@ struct IndirectParam; namespace vk { +enum IndirectCommandsLayoutType +{ + Standalone = 0, + GeneratedCommands +}; + enum IndirectCommandsActionType { Draw = 0, DrawIndexed, Dispatch, - MeshTask + DrawMeshTask }; struct IndirectCommandsInfo { IndirectCommandsActionType actionType; + IndirectCommandsLayoutType layoutType; + uint32_t strideInBytes; }; - // ===================================================================================================================== - // API implementation of Vulkan indirect commands layout - // - // Indirect commands layout objects describe the information of indirect commands, as well as how to interpret and - // process indirect buffers. +// ===================================================================================================================== +// API implementation of Vulkan NV indirect commands layout +// +// Indirect commands layout objects describe the information of indirect commands, as well as how to interpret and +// process indirect buffers. class IndirectCommandsLayoutNV final : public NonDispatchable { public: @@ -125,11 +133,9 @@ class IndirectCommandsLayoutNV final : public NonDispatchableGetPipelineBinaryCreateInfo(); - if (libInfo.pipelineInfo.rsState.perSampleShading || (libInfo.pipelineInfo.rsState.numSamples != 1)) - { - // pMultisampleState is not NULL. - pCreateInfo->pipelineInfo.rsState.perSampleShading = libInfo.pipelineInfo.rsState.perSampleShading; - pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo = libInfo.pipelineInfo.rsState.dynamicSampleInfo; - pCreateInfo->pipelineInfo.rsState.numSamples = libInfo.pipelineInfo.rsState.numSamples; - pCreateInfo->pipelineInfo.rsState.samplePatternIdx = libInfo.pipelineInfo.rsState.samplePatternIdx; - pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = libInfo.pipelineInfo.rsState.pixelShaderSamples; - } - else - { - pCreateInfo->pipelineInfo.rsState.numSamples = 1; - } + pCreateInfo->pipelineInfo.rsState.perSampleShading = libInfo.pipelineInfo.rsState.perSampleShading; + pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo = libInfo.pipelineInfo.rsState.dynamicSampleInfo; + pCreateInfo->pipelineInfo.rsState.numSamples = libInfo.pipelineInfo.rsState.numSamples; + pCreateInfo->pipelineInfo.rsState.samplePatternIdx = libInfo.pipelineInfo.rsState.samplePatternIdx; + pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = libInfo.pipelineInfo.rsState.pixelShaderSamples; pCreateInfo->pipelineInfo.dsState.depthTestEnable = libInfo.pipelineInfo.dsState.depthTestEnable; pCreateInfo->pipelineInfo.dsState.depthWriteEnable = libInfo.pipelineInfo.dsState.depthWriteEnable; @@ -1987,51 +1985,43 @@ static void BuildMultisampleState( GraphicsPipelineBinaryCreateInfo* pCreateInfo, const uint64_t dynamicStateFlags) { + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizationSamples)) + { + pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo = true; + } + if (pMs != nullptr) { - if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::RasterizationSamples)) - { - // This will be updated later - pCreateInfo->pipelineInfo.rsState.perSampleShading = true; - pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = 1; - pCreateInfo->pipelineInfo.rsState.samplePatternIdx = 0; - pCreateInfo->pipelineInfo.rsState.numSamples = 1; - pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo = true; - pCreateInfo->pipelineInfo.options.enableInterpModePatch = false; - } - else + if ((pMs->rasterizationSamples != 1) && (pCreateInfo->pipelineInfo.rsState.dynamicSampleInfo == false)) { - if (pMs->rasterizationSamples != 1) - { - uint32_t subpassCoverageSampleCount; - uint32_t subpassColorSampleCount; - GraphicsPipelineCommon::GetSubpassSampleCount( - pMs, pRenderPass, subpass, &subpassCoverageSampleCount, &subpassColorSampleCount, nullptr); - - if (pMs->sampleShadingEnable && (pMs->minSampleShading > 0.0f)) - { - pCreateInfo->pipelineInfo.rsState.perSampleShading = - ((subpassColorSampleCount * pMs->minSampleShading) > 1.0f); - pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = - Pow2Pad(static_cast(ceil(subpassColorSampleCount * pMs->minSampleShading))); - } - else - { - pCreateInfo->pipelineInfo.rsState.perSampleShading = false; - pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = 1; - } + uint32_t subpassCoverageSampleCount; + uint32_t subpassColorSampleCount; + GraphicsPipelineCommon::GetSubpassSampleCount( + pMs, pRenderPass, subpass, &subpassCoverageSampleCount, &subpassColorSampleCount, nullptr); - // NOTE: The sample pattern index here is actually the offset of sample position pair. This is - // different from the field of creation info of image view. For image view, the sample pattern - // index is really table index of the sample pattern. - pCreateInfo->pipelineInfo.rsState.samplePatternIdx = - Device::GetDefaultSamplePatternIndex(subpassCoverageSampleCount) * Pal::MaxMsaaRasterizerSamples; + if (pMs->sampleShadingEnable && (pMs->minSampleShading > 0.0f)) + { + pCreateInfo->pipelineInfo.rsState.perSampleShading = + ((subpassColorSampleCount * pMs->minSampleShading) > 1.0f); + pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = + Pow2Pad(static_cast(ceil(subpassColorSampleCount * pMs->minSampleShading))); + } + else + { + pCreateInfo->pipelineInfo.rsState.perSampleShading = false; + pCreateInfo->pipelineInfo.rsState.pixelShaderSamples = 1; } - pCreateInfo->pipelineInfo.rsState.numSamples = pMs->rasterizationSamples; - pCreateInfo->pipelineInfo.options.enableInterpModePatch = false; + // NOTE: The sample pattern index here is actually the offset of sample position pair. This is + // different from the field of creation info of image view. For image view, the sample pattern + // index is really table index of the sample pattern. + pCreateInfo->pipelineInfo.rsState.samplePatternIdx = + Device::GetDefaultSamplePatternIndex(subpassCoverageSampleCount) * Pal::MaxMsaaRasterizerSamples; } + pCreateInfo->pipelineInfo.rsState.numSamples = pMs->rasterizationSamples; + pCreateInfo->pipelineInfo.options.enableInterpModePatch = false; + if (pCreateInfo->pipelineInfo.rsState.perSampleShading) { EXTRACT_VK_STRUCTURES_0( @@ -2611,13 +2601,14 @@ static void BuildPreRasterizationShaderState( { const RenderPass* pRenderPass = RenderPass::ObjectFromHandle(pIn->renderPass); bool isConservativeOverestimation = false; + bool assumeDynamicTopologyInLibs = false; bool vertexInputAbsent = libInfo.flags.isLibrary && (libInfo.pVertexInputInterfaceLib == nullptr) && ((libInfo.libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) == 0); bool unrestrictedPrimitiveTopology = - pDevice->GetEnabledFeatures().assumeDynamicTopologyInLibs || + assumeDynamicTopologyInLibs || (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::PrimitiveTopology) && pDevice->GetEnabledFeatures().dynamicPrimitiveTopologyUnrestricted) || vertexInputAbsent; @@ -3335,7 +3326,8 @@ void PipelineCompiler::ApplyPipelineOptions( { pOptions->extendedRobustness.nullDescriptor = true; } - if (pDevice->GetEnabledFeatures().primitivesGeneratedQuery) + if (pDevice->GetEnabledFeatures().primitivesGeneratedQuery + ) { pOptions->enablePrimGeneratedQuery = true; } @@ -5471,17 +5463,18 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( const VkVertexInputBindingDescription2EXT* pVertexBindingDescriptions, uint32_t vertexAttributeDescriptionCount, const VkVertexInputAttributeDescription2EXT* pVertexAttributeDescriptions, - void* pUberFetchShaderInternalData, - bool isOffsetMode) + bool dynamicStride, + bool isOffsetMode, + void* pUberFetchShaderInternalData + ) const { - uint32_t dataSize = BuildUberFetchShaderInternalDataImp(vertexBindingDescriptionCount, pVertexBindingDescriptions, vertexAttributeDescriptionCount, pVertexAttributeDescriptions, vertexBindingDescriptionCount, pVertexBindingDescriptions, - false, + dynamicStride, isOffsetMode, pUberFetchShaderInternalData); @@ -5494,7 +5487,8 @@ uint32_t PipelineCompiler::BuildUberFetchShaderInternalData( const VkPipelineVertexInputStateCreateInfo* pVertexInput, bool dynamicStride, bool isOffsetMode, - void* pUberFetchShaderInternalData) const + void* pUberFetchShaderInternalData + ) const { const VkPipelineVertexInputDivisorStateCreateInfoEXT* pVertexDivisor = nullptr; const vk::VkStructHeader* pStructHeader = diff --git a/icd/api/raytrace/ray_tracing_device.cpp b/icd/api/raytrace/ray_tracing_device.cpp index fbb0bbb3..a01de905 100644 --- a/icd/api/raytrace/ray_tracing_device.cpp +++ b/icd/api/raytrace/ray_tracing_device.cpp @@ -510,7 +510,7 @@ VkResult RayTracingDevice::InitAccelStructTracker() "The size of the AccelStructTracker SRD mismatches between XGL and GPURT."); // Ensure the SRD size matches with the size reported by PAL - VK_ASSERT(sizeof(pTracker->srd) == + VK_ASSERT(sizeof(pTracker->srd) >= m_pDevice->VkPhysicalDevice(deviceIdx)->PalProperties().gfxipProperties.srdSizes.bufferView); pPalDevice->CreateUntypedBufferViewSrds(1, &viewInfo, &pTracker->srd); @@ -750,6 +750,7 @@ void RayTracingDevice::TraceDispatch( // ===================================================================================================================== void RayTracingDevice::TraceIndirectDispatch( uint32_t deviceIdx, + CmdBuffer* pCmdBuffer, GpuRt::RtPipelineType pipelineType, uint32_t originalThreadGroupSizeX, uint32_t originalThreadGroupSizeY, @@ -763,31 +764,33 @@ void RayTracingDevice::TraceIndirectDispatch( Pal::gpusize* pCounterMetadataVa, void* pConstants) { - if (m_pGpuRtDevice[deviceIdx]->RayHistoryTraceActive()) - { - GpuRt::RtDispatchInfo dispatchInfo = {}; - SetDispatchInfo(pipelineType, - 0, - 0, - 0, - shaderCount, - apiHash, - userMarkerContext, - pRaygenSbt, - pMissSbt, - pHitSbt, - &dispatchInfo); + GpuRt::RtDispatchInfo dispatchInfo = {}; - dispatchInfo.threadGroupSizeX = originalThreadGroupSizeX; - dispatchInfo.threadGroupSizeY = originalThreadGroupSizeY; - dispatchInfo.threadGroupSizeZ = originalThreadGroupSizeZ; + SetDispatchInfo(pipelineType, + 0, + 0, + 0, + shaderCount, + apiHash, + userMarkerContext, + pRaygenSbt, + pMissSbt, + pHitSbt, + &dispatchInfo); + + dispatchInfo.threadGroupSizeX = originalThreadGroupSizeX; + dispatchInfo.threadGroupSizeY = originalThreadGroupSizeY; + dispatchInfo.threadGroupSizeZ = originalThreadGroupSizeZ; + if (m_pGpuRtDevice[deviceIdx]->RayHistoryTraceActive()) + { m_pGpuRtDevice[deviceIdx]->TraceIndirectRtDispatch(pipelineType, dispatchInfo, 1, pCounterMetadataVa, pConstants); } + } // ===================================================================================================================== @@ -988,11 +991,16 @@ void RayTracingDevice::ClientInsertRGPMarker( Pal::ICmdBuffer* pPalCmdbuf = static_cast(cmdBuffer); vk::CmdBuffer* pCmdbuf = static_cast(pPalCmdbuf->GetClientData()); - if ((pCmdbuf != nullptr) && (pCmdbuf->GetSqttState() != nullptr)) + if (pCmdbuf != nullptr) { - pCmdbuf->GetSqttState()->WriteUserEventMarker( - isPush ? vk::RgpSqttMarkerUserEventPush : vk::RgpSqttMarkerUserEventPop, - pMarker); + if (pCmdbuf->GetSqttState() != nullptr) + { + pCmdbuf->GetSqttState()->WriteUserEventMarker( + isPush ? vk::RgpSqttMarkerUserEventPush : vk::RgpSqttMarkerUserEventPop, + pMarker); + } + + pCmdbuf->InsertDebugMarker(pMarker, isPush); } } diff --git a/icd/api/raytrace/ray_tracing_device.h b/icd/api/raytrace/ray_tracing_device.h index bbe7a575..246489eb 100644 --- a/icd/api/raytrace/ray_tracing_device.h +++ b/icd/api/raytrace/ray_tracing_device.h @@ -58,7 +58,7 @@ class RayTracingDevice Pal::IFence* pFence; }; - static const uint32_t BufferViewDwords = 4; + static const uint32_t BufferViewDwords = GpuRt::MaxBufferSrdSize; struct AccelStructTrackerResources { @@ -117,6 +117,7 @@ class RayTracingDevice void TraceIndirectDispatch( uint32_t deviceIdx, + CmdBuffer* pCmdBuffer, GpuRt::RtPipelineType pipelineType, uint32_t originalThreadGroupSizeX, uint32_t originalThreadGroupSizeY, diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp index fccb927f..d2a65adb 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp @@ -939,6 +939,19 @@ VkResult RayTracingPipeline::CreateImpl( if (result == VK_SUCCESS) { + const auto pBinaries = pipelineBinaries[DefaultDeviceIndex].pPipelineBins; + + // If pPipelineBinaries[DefaultDeviceIndex] is sufficient for all devices, the other pipeline binaries + // won't be created. Otherwise, like if gl_DeviceIndex is used, they will be. + if (pBinaries[0].pCode != nullptr) + { + localPipelineInfo.pipeline.flags.clientInternal = false; + localPipelineInfo.pipeline.pipelineBinarySize = pBinaries[0].codeSize; + localPipelineInfo.pipeline.pPipelineBinary = pBinaries[0].pCode; + localPipelineInfo.pipeline.maxFunctionCallDepth = + pipelineBinaries[DefaultDeviceIndex].maxFunctionCallDepth; + } + // Get the pipeline and shader size from PAL and allocate memory. pipelineSize = m_pDevice->PalDevice(DefaultDeviceIndex)->GetComputePipelineSize(localPipelineInfo.pipeline, nullptr); @@ -1007,19 +1020,6 @@ VkResult RayTracingPipeline::CreateImpl( void* pDeviceShaderLibraryMem = Util::VoidPtrInc(pPalShaderLibraryMem, deviceIdx * funcCount * shaderLibrarySize); - VK_ASSERT(pipelineSize == - m_pDevice->PalDevice(deviceIdx)->GetComputePipelineSize(localPipelineInfo.pipeline, nullptr)); - - // If pPipelineBinaries[DefaultDeviceIndex] is sufficient for all devices, the other pipeline binaries - // won't be created. Otherwise, like if gl_DeviceIndex is used, they will be. - if (pBinaries[0].pCode != nullptr) - { - localPipelineInfo.pipeline.flags.clientInternal = false; - localPipelineInfo.pipeline.pipelineBinarySize = pBinaries[0].codeSize; - localPipelineInfo.pipeline.pPipelineBinary = pBinaries[0].pCode; - localPipelineInfo.pipeline.maxFunctionCallDepth = pipelineBinaries[deviceIdx].maxFunctionCallDepth; - } - // Copy indirect function info uint32_t funcIndex = 0; const auto pShaderProp = &pipelineBinaries[deviceIdx].shaderPropSet.shaderProps[0]; @@ -1093,6 +1093,7 @@ VkResult RayTracingPipeline::CreateImpl( uint32_t intersectionStackMax = 0; uint32_t callableStackMax = 0; uint32_t backendStackSizeMax = 0; + uint32_t traceRayStackSize = 0; if ((palResult == Util::Result::Success) && ((funcCount > 0) || hasLibraries)) { @@ -1145,17 +1146,6 @@ VkResult RayTracingPipeline::CreateImpl( if (pShaderStackSize[funcIdx] == ~0ULL) { UpdateLibStackSizes(funcIdx); - - if (pTraceRayUsage[funcIdx]) - { - const uint32_t traceRayFuncIdx = pShaderNameMap[traceRayShaderIndex]; - if ((pShaderStackSize[traceRayFuncIdx] == ~0ULL) && - (ppDeviceShaderLibraries[traceRayFuncIdx] != nullptr)) - { - UpdateLibStackSizes(traceRayFuncIdx); - } - pShaderStackSize[funcIdx] += pShaderStackSize[traceRayFuncIdx]; - } } VK_ASSERT(pShaderStackSize[funcIdx] != ~0ULL); stackSize = pShaderStackSize[funcIdx]; @@ -1164,6 +1154,24 @@ VkResult RayTracingPipeline::CreateImpl( return stackSize; }; + auto GetTraceRayUsage = [&](uint32_t shaderIdx) -> bool + { + if (shaderIdx != VK_SHADER_UNUSED_KHR) + { + const uint32_t funcIdx = pShaderNameMap[shaderIdx]; + if (funcIdx < funcCount) + { + return pTraceRayUsage[funcIdx]; + } + } + return false; + }; + + if (m_hasTraceRay) + { + traceRayStackSize = GetFuncStackSize(traceRayShaderIndex); + } + for (uint32_t groupIdx = 0; groupIdx < m_createInfo.GetGroupCount(); ++groupIdx) { const auto& groupInfo = m_createInfo.GetGroupList().At(groupIdx); @@ -1173,6 +1181,19 @@ VkResult RayTracingPipeline::CreateImpl( { case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR: pCurrentStackSizes->generalSize = GetFuncStackSize(groupInfo.generalShader); + + if (GetTraceRayUsage(groupInfo.generalShader)) + { + if (m_hasTraceRay) + { + pCurrentStackSizes->generalSize += traceRayStackSize; + } + else + { + pCurrentStackSizes->metadata.generalSizeNeedAddTraceRay = 1; + } + } + switch (m_createInfo.GetStageList().At(groupInfo.generalShader).stage) { case VK_SHADER_STAGE_RAYGEN_BIT_KHR: @@ -1196,25 +1217,57 @@ VkResult RayTracingPipeline::CreateImpl( } break; + case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: + pCurrentStackSizes->intersectionSize = GetFuncStackSize(groupInfo.intersectionShader); + + if (GetTraceRayUsage(groupInfo.intersectionShader)) + { + if (m_hasTraceRay) + { + pCurrentStackSizes->intersectionSize += traceRayStackSize; + } + else + { + pCurrentStackSizes->metadata.intersectionSizeNeedAddTraceRay = 1; + } + } + + intersectionStackMax = Util::Max( + intersectionStackMax, static_cast(pCurrentStackSizes->intersectionSize)); + // falls through + case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR: - pCurrentStackSizes->anyHitSize = GetFuncStackSize(groupInfo.anyHitShader); + pCurrentStackSizes->anyHitSize = GetFuncStackSize(groupInfo.anyHitShader); pCurrentStackSizes->closestHitSize = GetFuncStackSize(groupInfo.closestHitShader); - anyHitStackMax = Util::Max( - anyHitStackMax, static_cast(pCurrentStackSizes->anyHitSize)); - closestHitStackMax = Util::Max( - closestHitStackMax, static_cast(pCurrentStackSizes->closestHitSize)); - break; - case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: - pCurrentStackSizes->anyHitSize = GetFuncStackSize(groupInfo.anyHitShader); - pCurrentStackSizes->closestHitSize = GetFuncStackSize(groupInfo.closestHitShader); - pCurrentStackSizes->intersectionSize = GetFuncStackSize(groupInfo.intersectionShader); + if (GetTraceRayUsage(groupInfo.anyHitShader)) + { + if (m_hasTraceRay) + { + pCurrentStackSizes->anyHitSize += traceRayStackSize; + } + else + { + pCurrentStackSizes->metadata.anyHitSizeNeedAddTraceRay = 1; + } + } + + if (GetTraceRayUsage(groupInfo.closestHitShader)) + { + if (m_hasTraceRay) + { + pCurrentStackSizes->closestHitSize += traceRayStackSize; + } + else + { + pCurrentStackSizes->metadata.closestHitSizeNeedAddTraceRay = 1; + } + } + anyHitStackMax = Util::Max( anyHitStackMax, static_cast(pCurrentStackSizes->anyHitSize)); closestHitStackMax = Util::Max( closestHitStackMax, static_cast(pCurrentStackSizes->closestHitSize)); - intersectionStackMax = Util::Max( - intersectionStackMax, static_cast(pCurrentStackSizes->intersectionSize)); break; default: @@ -1312,7 +1365,8 @@ VkResult RayTracingPipeline::CreateImpl( pPipelineLib->GetRayTracingShaderGroupStackSize( deviceIdx, libGroupIdx, - VK_SHADER_GROUP_SHADER_GENERAL_KHR); + VK_SHADER_GROUP_SHADER_GENERAL_KHR, + traceRayStackSize); if ((stages & VK_SHADER_STAGE_RAYGEN_BIT_KHR) != 0) { rayGenStackMax = Util::Max( @@ -1342,7 +1396,8 @@ VkResult RayTracingPipeline::CreateImpl( pPipelineLib->GetRayTracingShaderGroupStackSize( deviceIdx, libGroupIdx, - VK_SHADER_GROUP_SHADER_INTERSECTION_KHR); + VK_SHADER_GROUP_SHADER_INTERSECTION_KHR, + traceRayStackSize); intersectionStackMax = Util::Max( intersectionStackMax, static_cast(pStackSizes->intersectionSize)); @@ -1354,12 +1409,14 @@ VkResult RayTracingPipeline::CreateImpl( pPipelineLib->GetRayTracingShaderGroupStackSize( deviceIdx, libGroupIdx, - VK_SHADER_GROUP_SHADER_ANY_HIT_KHR); + VK_SHADER_GROUP_SHADER_ANY_HIT_KHR, + traceRayStackSize); pStackSizes->closestHitSize = pPipelineLib->GetRayTracingShaderGroupStackSize( deviceIdx, libGroupIdx, - VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR); + VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR, + traceRayStackSize); anyHitStackMax = Util::Max( anyHitStackMax, static_cast(pStackSizes->anyHitSize)); @@ -2043,7 +2100,8 @@ void RayTracingPipeline::GetRayTracingShaderGroupHandles( VkDeviceSize RayTracingPipeline::GetRayTracingShaderGroupStackSize( uint32_t deviceIndex, uint32_t group, - VkShaderGroupShaderKHR groupShader) const + VkShaderGroupShaderKHR groupShader, + VkDeviceSize traceRaySize) const { VkDeviceSize stackSize = 0; @@ -2053,15 +2111,31 @@ VkDeviceSize RayTracingPipeline::GetRayTracingShaderGroupStackSize( { case VK_SHADER_GROUP_SHADER_GENERAL_KHR: stackSize = m_pShaderGroupStackSizes[deviceIndex][group].generalSize; + if (m_pShaderGroupStackSizes[deviceIndex][group].metadata.generalSizeNeedAddTraceRay) + { + stackSize += traceRaySize; + } break; case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR: stackSize = m_pShaderGroupStackSizes[deviceIndex][group].closestHitSize; + if (m_pShaderGroupStackSizes[deviceIndex][group].metadata.closestHitSizeNeedAddTraceRay) + { + stackSize += traceRaySize; + } break; case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR: stackSize = m_pShaderGroupStackSizes[deviceIndex][group].anyHitSize; + if (m_pShaderGroupStackSizes[deviceIndex][group].metadata.anyHitSizeNeedAddTraceRay) + { + stackSize += traceRaySize; + } break; case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR: stackSize = m_pShaderGroupStackSizes[deviceIndex][group].intersectionSize; + if (m_pShaderGroupStackSizes[deviceIndex][group].metadata.intersectionSizeNeedAddTraceRay) + { + stackSize += traceRaySize; + } break; default: VK_NEVER_CALLED(); @@ -2398,7 +2472,7 @@ VKAPI_ATTR VkDeviceSize VKAPI_CALL vkGetRayTracingShaderGroupStackSizeKHR( { RayTracingPipeline* pPipeline = RayTracingPipeline::ObjectFromHandle(pipeline); - return pPipeline->GetRayTracingShaderGroupStackSize(DefaultDeviceIndex, group, groupShader); + return pPipeline->GetRayTracingShaderGroupStackSize(DefaultDeviceIndex, group, groupShader, 0); } }; // namespace entry diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.h b/icd/api/raytrace/vk_ray_tracing_pipeline.h index 8f01917a..394c7f68 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.h +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.h @@ -67,6 +67,21 @@ struct ShaderGroupStackSizes VkDeviceSize closestHitSize; VkDeviceSize anyHitSize; VkDeviceSize intersectionSize; + union + { + struct + { + // Following bits indicate corresponding stack size needs to consider trace ray stack size, but it is not + // available when the calculation happens (e.g., when compiling pipeline library). + uint32_t generalSizeNeedAddTraceRay : 1; + uint32_t closestHitSizeNeedAddTraceRay : 1; + uint32_t anyHitSizeNeedAddTraceRay : 1; + uint32_t intersectionSizeNeedAddTraceRay : 1; + uint32_t reserved : 28; + }; + uint32_t u32All; + + }metadata; }; struct ShaderGroupInfo @@ -188,7 +203,8 @@ class RayTracingPipeline final : public Pipeline, public NonDispatchableGpuVirtAddr(deviceIdx) + static_cast(offset), - stride + {stride}, }; PalCmdBuffer(deviceIdx)->CmdDispatchMeshIndirectMulti( @@ -1611,14 +1611,15 @@ VkResult CmdBuffer::Begin( DefaultLineWidth, limits.pointSizeRange[0], limits.pointSizeRange[1] }; - - utils::IterateMask deviceGroup(GetDeviceMask()); - do { - const uint32_t deviceIdx = deviceGroup.Index(); - PalCmdBuffer(deviceIdx)->CmdSetPointLineRasterState(params); + utils::IterateMask deviceGroup(GetDeviceMask()); + do + { + const uint32_t deviceIdx = deviceGroup.Index(); + PalCmdBuffer(deviceIdx)->CmdSetPointLineRasterState(params); + } + while (deviceGroup.IterateNext()); } - while (deviceGroup.IterateNext()); const uint32_t supportedVrsRates = deviceProps.gfxipProperties.supportedVrsRates; @@ -1642,6 +1643,21 @@ VkResult CmdBuffer::Begin( PalCmdBuffer(deviceIdx)->CmdBindSampleRateImage(nullptr); } while (deviceGroupVrs.IterateNext()); } + + // Reset transform feedback-related state once, in case it'll be used without first binding a valid xfb. + // This is legal, but no primitives data will be generated until a valid xfb is bound in the pipeline. + if (m_pDevice->IsExtensionEnabled(DeviceExtensions::EXT_TRANSFORM_FEEDBACK)) + { + utils::IterateMask deviceGroup(GetDeviceMask()); + do + { + // Disable transform feedback by setting bound buffer's size and stride to 0. + const uint32_t deviceIdx = deviceGroup.Index(); + const Pal::BindStreamOutTargetParams nullParams = {}; + PalCmdBuffer(deviceIdx)->CmdBindStreamOutTargets(nullParams); + } + while (deviceGroup.IterateNext()); + } } // Dirty all the dynamic states, the bit should be cleared with 0 when the corresponding state is @@ -3295,7 +3311,7 @@ void CmdBuffer::DrawIndirect( Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = { pBuffer->GpuVirtAddr(deviceIdx) + static_cast(offset), - stride + {stride}, }; if (useBufferCount) @@ -3343,7 +3359,7 @@ void CmdBuffer::DrawIndirect( #endif VK_ASSERT(stride <= indirectBufferSize); - Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = { indirectBufferVa, stride }; + Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = { indirectBufferVa, {stride} }; utils::IterateMask deviceGroup(m_curDeviceMask); do @@ -3434,7 +3450,7 @@ void CmdBuffer::DrawMeshTasksIndirect( VK_ASSERT(stride <= indirectBufferSize); - Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = { indirectBufferVa, stride }; + Pal::GpuVirtAddrAndStride gpuVirtAddrAndStride = { indirectBufferVa, {stride} }; utils::IterateMask deviceGroup(m_curDeviceMask); do @@ -3570,10 +3586,20 @@ void CmdBuffer::ExecuteIndirect( uint64_t barrierCmd = 0; if ((info.actionType == IndirectCommandsActionType::Draw) || - (info.actionType == IndirectCommandsActionType::DrawIndexed)) + (info.actionType == IndirectCommandsActionType::DrawIndexed) || + (info.actionType == IndirectCommandsActionType::DrawMeshTask)) { - const bool indexed = (info.actionType == IndirectCommandsActionType::DrawIndexed); - barrierCmd = (indexed ? DbgBarrierDrawIndexed : DbgBarrierDrawNonIndexed) | DbgBarrierDrawIndirect; + const bool isMeshTask = (info.actionType == IndirectCommandsActionType::DrawMeshTask); + const bool isIndexed = (info.actionType == IndirectCommandsActionType::DrawIndexed); + + if (isMeshTask) + { + barrierCmd = DbgBarrierDrawMeshTasksIndirect; + } + else + { + barrierCmd = (isIndexed ? DbgBarrierDrawIndexed : DbgBarrierDrawNonIndexed) | DbgBarrierDrawIndirect; + } DbgBarrierPreCmd(barrierCmd); @@ -3590,14 +3616,6 @@ void CmdBuffer::ExecuteIndirect( RebindPipeline(); } } - else if (info.actionType == IndirectCommandsActionType::MeshTask) - { - barrierCmd = DbgBarrierDrawMeshTasksIndirect; - - DbgBarrierPreCmd(barrierCmd); - - ValidateGraphicsStates(); - } else { VK_NEVER_CALLED(); @@ -3623,7 +3641,6 @@ void CmdBuffer::ExecuteIndirect( pArgumentBuffer->GpuVirtAddr(deviceIdx) + argumentOffset, maxCount, (pCountBuffer == nullptr) ? 0 : pCountBuffer->GpuVirtAddr(deviceIdx) + countOffset); - } while (deviceGroup.IterateNext()); @@ -4932,6 +4949,7 @@ void CmdBuffer::BeginRendering( m_allGpuState.dynamicRenderingInstance.viewMask = pRenderingInfo->viewMask; m_allGpuState.dynamicRenderingInstance.colorAttachmentCount = pRenderingInfo->colorAttachmentCount; m_allGpuState.dynamicRenderingInstance.enableResolveTarget = false; + m_allGpuState.dirtyGraphics.colorWriteMask = 1; for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) { @@ -5108,7 +5126,22 @@ void CmdBuffer::ResolveImage( const uint32_t sliceCount = Util::Min(subresRangeSrc.numSlices, subresRangeDst.numSlices); - regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; + VkFormat viewFormat = dynamicRenderingAttachments.pImageView->GetViewFormat(); + VkFormat resolveViewFormat = dynamicRenderingAttachments.pResolveImageView->GetViewFormat(); + + if ((viewFormat != dynamicRenderingAttachments.pImageView->GetImage()->GetFormat()) || + (resolveViewFormat != dynamicRenderingAttachments.pResolveImageView->GetImage()->GetFormat())) + { + // VUID-VkRenderingAttachmentInfo-imageView-06865: + // imageView and resolveImageView must have the same VkFormat + VK_ASSERT(viewFormat == resolveViewFormat); + regions[idx].swizzledFormat = VkToPalFormat(viewFormat, m_pDevice->GetRuntimeSettings()); + } + else + { + regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; + } + regions[idx].extent.width = renderArea.extent.width; regions[idx].extent.height = renderArea.extent.height; regions[idx].extent.depth = 1; @@ -5246,7 +5279,7 @@ void CmdBuffer::ResetEvent( if (pEvent->IsUseToken()) { - const Pal::ReleaseToken token = { 0xFFFFFF, 0xFF }; + const Pal::ReleaseToken token = { {0xFFFFFF, 0xFF} }; pEvent->SetSyncToken(token); } else @@ -8789,7 +8822,19 @@ void CmdBuffer::RPResolveMsaa( regions[idx].extent.height = renderArea.extent.height; regions[idx].extent.depth = 1; regions[idx].numSlices = sliceCount; - regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; + + if ((srcResolveFormat != srcAttachment.pImage->GetFormat()) || + (dstResolveFormat != dstAttachment.pImage->GetFormat())) + { + // VUID-VkSubpassDescription-pResolveAttachments-00850: + // each resolve attachment must have the same VkFormat as its corresponding color attachment + VK_ASSERT(srcResolveFormat == dstResolveFormat); + regions[idx].swizzledFormat = VkToPalFormat(srcResolveFormat, m_pDevice->GetRuntimeSettings()); + } + else + { + regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; + } regions[idx].pQuadSamplePattern = pSampleLocations; } @@ -9888,6 +9933,9 @@ DynamicVertexInputInternalData* CmdBuffer::BuildUberFetchShaderInternalData( if (m_pUberFetchShaderTempBuffer != nullptr) { void* pUberFetchShaderInternalData = m_pUberFetchShaderTempBuffer; + + bool isDynamicStride = (m_pDevice->GetEnabledFeatures().deviceGeneratedCommands == true); + utils::IterateMask deviceGroup(m_curDeviceMask); do { @@ -9899,8 +9947,9 @@ DynamicVertexInputInternalData* CmdBuffer::BuildUberFetchShaderInternalData( pVertexBindingDescriptions, vertexAttributeDescriptionCount, pVertexAttributeDescriptions, - pUberFetchShaderInternalData, - m_flags.offsetMode); + isDynamicStride, + m_flags.offsetMode, + pUberFetchShaderInternalData); Pal::gpusize gpuAddress = {}; if (uberFetchShaderInternalDataSize > 0) @@ -10321,8 +10370,8 @@ void CmdBuffer::EndTransformFeedback( { PalCmdBuffer(deviceIdx)->CmdSaveBufferFilledSizes(counterBufferAddr); - // Disable transform feedback by set bound buffer's size and stride to 0. - Pal::BindStreamOutTargetParams params = {}; + // Disable transform feedback by setting bound buffer's size and stride to 0. + const Pal::BindStreamOutTargetParams params = {}; PalCmdBuffer(deviceIdx)->CmdBindStreamOutTargets(params); m_pTransformFeedbackState->enabled = false; } @@ -10900,7 +10949,7 @@ void CmdBuffer::GetRayTracingDispatchArgs( memcpy(pConstants->descriptorTable.accelStructTrackerSrd, m_pDevice->RayTrace()->GetAccelStructTrackerSrd(deviceIdx), - sizeof(pConstants->descriptorTable.accelStructTrackerSrd)); + m_pDevice->GetProperties().descriptorSizes.bufferView); if (pPipeline->CheckIsCps()) { @@ -11200,6 +11249,7 @@ void CmdBuffer::TraceRaysIndirectPerDevice( initUserData.outputCounterMetaVa = 0uLL; m_pDevice->RayTrace()->TraceIndirectDispatch(deviceIdx, + this, GpuRt::RtPipelineType::RayTracing, 0, 0, @@ -11403,7 +11453,7 @@ void CmdBuffer::BindRayQueryConstants( { memcpy(constants.descriptorTable.accelStructTrackerSrd, VkDevice()->RayTrace()->GetAccelStructTrackerSrd(deviceIdx), - sizeof(constants.descriptorTable.accelStructTrackerSrd)); + VkDevice()->GetProperties().descriptorSizes.bufferView); } if (rtCountersEnabled) @@ -11449,6 +11499,7 @@ void CmdBuffer::BindRayQueryConstants( uint64 counterMetadataGpuVa = 0uLL; m_pDevice->RayTrace()->TraceIndirectDispatch(deviceIdx, + this, GpuRt::RtPipelineType::Compute, pOrigThreadgroupDims[0], pOrigThreadgroupDims[1], @@ -11837,16 +11888,15 @@ void CmdBuffer::ValidateGraphicsStates() const GraphicsPipeline* pGraphicsPipeline = m_allGpuState.pGraphicsPipeline; const bool isPointSizeUsed = (pGraphicsPipeline != nullptr) && pGraphicsPipeline->IsPointSizeUsed(); - Pal::ViewportParams viewport = PerGpuState(deviceIdx)->viewport; + Pal::ViewportParams viewportParams = PerGpuState(deviceIdx)->viewport; if (isPointSizeUsed) { // The default vaule is 1.0f which means the guardband is disabled. // Values more than 1.0f enable guardband. - viewport.horzDiscardRatio = 10.0f; - viewport.vertDiscardRatio = 10.0f; + viewportParams.horzDiscardRatio = 10.0f; + viewportParams.vertDiscardRatio = 10.0f; } - - PalCmdBuffer(deviceIdx)->CmdSetViewports(viewport); + PalCmdBuffer(deviceIdx)->CmdSetViewports(viewportParams); DbgBarrierPostCmd(DbgBarrierSetDynamicPipelineState); } diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 94c113f7..943a80d2 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -109,6 +109,7 @@ #include "palAutoBuffer.h" #include "palBorderColorPalette.h" #include "palVectorImpl.h" +#include "palArchiveFile.h" #include @@ -728,7 +729,6 @@ VkResult Device::Create( if (pPhysicalDevice->GetRuntimeSettings().dynamicPrimitiveTopologyUnrestricted) { deviceFeatures.dynamicPrimitiveTopologyUnrestricted = true; - deviceFeatures.assumeDynamicTopologyInLibs = deviceFeatures.graphicsPipelineLibrary; } } diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index 83db5be1..1e00c8e2 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -847,6 +847,7 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdSetRenderingInputAttachmentIndicesKHR ); INIT_DISPATCH_ENTRY(vkCmdSetDepthBias2EXT ); + } // ===================================================================================================================== diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 9e91a3f7..61a497ea 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -434,12 +434,9 @@ VkResult GraphicsPipeline::CreatePipelineObjects( const auto& info = pPalPipeline[deviceIdx]->GetInfo(); if ((info.ps.flags.perSampleShading == 1) || - (info.ps.flags.enablePops == 1) || - ((info.ps.flags.usesSampleMask == 1) && - (palProperties.gfxipProperties.flags.supportVrsWithDsExports == 0))) + (info.ps.flags.enablePops == 1)) { - // Override the shader rate to 1x1 if SampleId used in shader, or POPS is enabled, or - // supportVrsWithDsExports is not supported and SampleMask used in shader. + // Override the shader rate to 1x1 if SampleId used in shader, or POPS is enabled. Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); pObjectCreateInfo->flags.force1x1ShaderRate = true; @@ -462,6 +459,16 @@ VkResult GraphicsPipeline::CreatePipelineObjects( static_cast(Pal::VrsCombinerStage::PsIterSamples)] = Pal::VrsCombiner::Min; } } + else if ((info.ps.flags.usesSampleMask == 1) && + (palProperties.gfxipProperties.flags.supportVrsWithDsExports == 0)) + { + // Override the shader rate to 1x1 if supportVrsWithDsExports is not supported and SampleMask + // used in shader. + Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); + + pObjectCreateInfo->flags.force1x1ShaderRate = true; + pObjectCreateInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = 1; + } } if (pObjectCreateInfo->flags.bindMsaaObject) diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 8cb71cad..7eb83e83 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -300,7 +300,8 @@ void Image::ConvertImageCreateInfo( // regarding DCC. pPalCreateInfo->flags.perSubresInit = 1; - if (extStructs.pExternalMemoryImageCreateInfo != nullptr) + if ((extStructs.pExternalMemoryImageCreateInfo != nullptr) && + (extStructs.pExternalMemoryImageCreateInfo->handleTypes != 0)) { pPalCreateInfo->flags.invariant = 1; pPalCreateInfo->flags.optimalShareable = 1; diff --git a/icd/api/vk_indirect_commands_layout.cpp b/icd/api/vk_indirect_commands_layout.cpp index 80d85610..a05c1799 100644 --- a/icd/api/vk_indirect_commands_layout.cpp +++ b/icd/api/vk_indirect_commands_layout.cpp @@ -88,7 +88,7 @@ VkResult IndirectCommandsLayoutNV::Create( break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: - info.actionType = IndirectCommandsActionType::MeshTask; + info.actionType = IndirectCommandsActionType::DrawMeshTask; break; default: diff --git a/icd/api/vk_memory.cpp b/icd/api/vk_memory.cpp index 21fa1db5..171c6504 100644 --- a/icd/api/vk_memory.cpp +++ b/icd/api/vk_memory.cpp @@ -413,7 +413,7 @@ VkResult Memory::Create( pPalGpuMem, Memory::IntValueFromHandle(*pMemoryHandle), VK_OBJECT_TYPE_DEVICE_MEMORY, - pAllocInfo->memoryTypeIndex, + memoryProperties.memoryTypes[pAllocInfo->memoryTypeIndex].heapIndex, false ); } diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index 13c38323..01d28850 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -31,6 +31,7 @@ #include "include/khronos/vulkan.h" #include "include/color_space_helper.h" +#include "include/pipeline_binary_cache.h" #include "include/vk_buffer_view.h" #include "include/vk_descriptor_buffer.h" #include "include/vk_dispatch.h" @@ -404,6 +405,7 @@ PhysicalDevice::PhysicalDevice( m_memoryTypeMaskForDescriptorBuffers(0), m_pSettingsLoader(pSettingsLoader), m_sampleLocationSampleCounts(0), + m_formatFeaturesTable{}, m_vrHighPrioritySubEngineIndex(UINT32_MAX), m_RtCuHighComputeSubEngineIndex(UINT32_MAX), m_tunnelComputeSubEngineIndex(UINT32_MAX), @@ -4433,6 +4435,7 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_MAINTENANCE5)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_MAINTENANCE6)); + availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_MAINTENANCE7)); availableExtensions.AddExtension(VK_DEVICE_EXTENSION(KHR_PUSH_DESCRIPTOR)); @@ -4573,12 +4576,6 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_IMAGE_DRM_FORMAT_MODIFIER)); #endif - if ((pPhysicalDevice == nullptr) || - VerifyAstcHdrFormatSupport(*pPhysicalDevice)) - { - availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_TEXTURE_COMPRESSION_ASTC_HDR)); - } - if (pInstance->GetAPIVersion() >= VK_MAKE_API_VERSION(0, 1, 1, 0)) { availableExtensions.AddExtension(VK_DEVICE_EXTENSION(EXT_SUBGROUP_SIZE_CONTROL)); @@ -5200,6 +5197,9 @@ void PhysicalDevice::GetPhysicalDeviceDotProduct16Properties( const VkBool32 int16DotSupport = (Is16BitInstructionsSupported() #if VKI_BUILD_GFX11 && (PalProperties().gfxLevel < Pal::GfxIpLevel::GfxIp11_0) +#endif +#if VKI_BUILD_GFX115 + && (PalProperties().gfxLevel < Pal::GfxIpLevel::GfxIp11_5) #endif ) ? VK_TRUE : VK_FALSE; @@ -6451,7 +6451,8 @@ size_t PhysicalDevice::GetFeatures2( pExtInfo->subgroupSizeControl = VK_TRUE; pExtInfo->computeFullSubgroups = VK_TRUE; pExtInfo->synchronization2 = VK_TRUE; - pExtInfo->textureCompressionASTC_HDR = VerifyAstcHdrFormatSupport(*this); + VK_ASSERT(VerifyAstcHdrFormatSupport(*this) == VK_FALSE); + pExtInfo->textureCompressionASTC_HDR = VK_FALSE; pExtInfo->shaderZeroInitializeWorkgroupMemory = VK_TRUE; pExtInfo->dynamicRendering = VK_TRUE; pExtInfo->shaderIntegerDotProduct = VK_TRUE; @@ -7133,7 +7134,8 @@ size_t PhysicalDevice::GetFeatures2( if (updateFeatures) { - pExtInfo->textureCompressionASTC_HDR = VerifyAstcHdrFormatSupport(*this); + VK_ASSERT(VerifyAstcHdrFormatSupport(*this) == VK_FALSE); + pExtInfo->textureCompressionASTC_HDR = VK_FALSE; } structSize = sizeof(*pExtInfo); @@ -8463,12 +8465,12 @@ void PhysicalDevice::GetDeviceProperties2( { auto* pProps = static_cast(pNext); pProps->maxIndirectCommandsStreamCount = 1; - pProps->maxIndirectCommandsStreamStride = UINT32_MAX; + pProps->maxIndirectCommandsStreamStride = MaxIndirectCommandsStride; pProps->maxIndirectCommandsTokenCount = MaxIndirectTokenCount; pProps->maxIndirectCommandsTokenOffset = MaxIndirectTokenOffset; - pProps->minIndirectCommandsBufferOffsetAlignment = 4; - pProps->minSequencesCountBufferOffsetAlignment = 4; - pProps->minSequencesIndexBufferOffsetAlignment = 4; + pProps->minIndirectCommandsBufferOffsetAlignment = MinIndirectAlignment; + pProps->minSequencesCountBufferOffsetAlignment = MinIndirectAlignment; + pProps->minSequencesIndexBufferOffsetAlignment = MinIndirectAlignment; pProps->maxGraphicsShaderGroupCount = 0; pProps->maxIndirectSequenceCount = UINT32_MAX >> 1; break; @@ -8511,6 +8513,15 @@ void PhysicalDevice::GetDeviceProperties2( } #endif +#if VKI_COPY_MEMORY_INDIRECT + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COPY_MEMORY_INDIRECT_PROPERTIES_KHR: + { + auto* pProps = reinterpret_cast(pNext); + pProps->supportedQueues = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT; + break; + } +#endif + default: break; } diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 6ff46e2c..6c250699 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -860,8 +860,8 @@ void Pipeline::ElfHashToCacheId( // Extensions and features whose enablement affects compiler inputs (and hence the binary) hasher.Update(pDevice->IsExtensionEnabled(DeviceExtensions::AMD_SHADER_INFO)); - hasher.Update(pDevice->IsExtensionEnabled(DeviceExtensions::EXT_PRIMITIVES_GENERATED_QUERY)); { + hasher.Update(pDevice->IsExtensionEnabled(DeviceExtensions::EXT_PRIMITIVES_GENERATED_QUERY)); hasher.Update(pDevice->IsExtensionEnabled(DeviceExtensions::EXT_TRANSFORM_FEEDBACK)); hasher.Update(pDevice->IsExtensionEnabled(DeviceExtensions::EXT_SCALAR_BLOCK_LAYOUT)); hasher.Update(pDevice->GetEnabledFeatures().scalarBlockLayout); diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index dc8c46a5..68f3de3a 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -239,7 +239,8 @@ bool PipelineLayout::HasRayTracing( { bool rtFound = false; - if (pDevice->RayTrace() != nullptr) + if ((pDevice->RayTrace() != nullptr) + ) { if (pIn->setLayoutCount == 0) { @@ -1174,12 +1175,13 @@ void PipelineLayout::BuildLlpcRayTracingDispatchArgumentsMapping( uint32_t* pStaNodeCount ) const { + const uint32_t srdSize = GpuRt::MaxBufferSrdSize; const Vkgc::ResourceMappingNode TraceRayLayout[] = { // TODO: Replace binding and set with enum once it is defined in vkgcDefs.h { Vkgc::ResourceMappingNodeType::DescriptorConstBufferCompact, 2, 0, {{93, 17, 0}} }, - { Vkgc::ResourceMappingNodeType::DescriptorConstBuffer, 4, 2, {{93, 0, 0}} }, - { Vkgc::ResourceMappingNodeType::DescriptorBuffer, 4, 6, {{93, 1, 0}} }, + { Vkgc::ResourceMappingNodeType::DescriptorConstBuffer, srdSize, 2, {{93, 0, 0}} }, + { Vkgc::ResourceMappingNodeType::DescriptorBuffer, srdSize, 2 + srdSize, {{93, 1, 0}} }, }; const uint32_t TraceRayLayoutNodeCount = static_cast(Util::ArrayLen(TraceRayLayout)); diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index 73def9fc..e14c05e7 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -1102,7 +1102,7 @@ VkResult Queue::Submit( VkResult result = VK_SUCCESS; - const bool isSynchronization2 = std::is_same::value; + constexpr bool IsSynchronization2 = std::is_same::value; #if VKI_RAY_TRACING FreeRetiredCpsStackMem(); @@ -1159,7 +1159,7 @@ VkResult Queue::Submit( switch (static_cast(pHeader->sType)) { case VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO: - VK_ASSERT(isSynchronization2 == false); + VK_ASSERT(IsSynchronization2 == false); pDeviceGroupInfo = static_cast(pNext); break; @@ -1168,7 +1168,7 @@ VkResult Queue::Submit( const VkTimelineSemaphoreSubmitInfo* pTimelineSemaphoreInfo = static_cast(pNext); - VK_ASSERT(isSynchronization2 == false); + VK_ASSERT(IsSynchronization2 == false); waitValueCount = pTimelineSemaphoreInfo->waitSemaphoreValueCount; pWaitSemaphoreValues = pTimelineSemaphoreInfo->pWaitSemaphoreValues; @@ -1177,7 +1177,7 @@ VkResult Queue::Submit( break; } case VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO: - VK_ASSERT(isSynchronization2 == false); + VK_ASSERT(IsSynchronization2 == false); pProtectedSubmitInfo = static_cast(pNext); protectedSubmit = pProtectedSubmitInfo->protectedSubmit; @@ -1202,7 +1202,7 @@ VkResult Queue::Submit( uint32_t cmdBufferCount = 0; uint32_t waitSemaphoreCount = 0; - if (isSynchronization2) + if (IsSynchronization2) { const VkSubmitInfo2KHR* pSubmitInfoKhr = reinterpret_cast(&pSubmits[submitIdx]); @@ -1304,8 +1304,8 @@ VkResult Queue::Submit( palSubmitInfo.gpuMemRefCount = 0; palSubmitInfo.pGpuMemoryRefs = nullptr; - const uint32_t deviceCount = ((pDeviceGroupInfo == nullptr) && (isSynchronization2 == false)) ? - 1 : m_pDevice->NumPalDevices(); + const uint32_t deviceCount = (IsSynchronization2 || (pDeviceGroupInfo != nullptr)) ? + m_pDevice->NumPalDevices() : 1; for (uint32_t deviceIdx = 0; (deviceIdx < deviceCount) && (result == VK_SUCCESS); deviceIdx++) { Pal::Result palResult = Pal::Result::Success; @@ -1342,7 +1342,7 @@ VkResult Queue::Submit( for (uint32_t i = 0; i < cmdBufferCount; ++i) { - if (isSynchronization2) + if (IsSynchronization2) { const VkSubmitInfo2KHR* pSubmitInfoKhr = reinterpret_cast(&pSubmits[submitIdx]); @@ -1586,14 +1586,14 @@ VkResult Queue::Submit( virtStackFrame.FreeArray(pCmdBufInfos); } - if (isSynchronization2 && (pCmdBuffers != nullptr)) + if (IsSynchronization2 && (pCmdBuffers != nullptr)) { virtStackFrame.FreeArray(pCmdBuffers); } virtStackFrame.FreeArray(pPalCmdBuffers); - if (isSynchronization2) + if (IsSynchronization2) { const VkSubmitInfo2KHR* pSubmitInfoKhr = reinterpret_cast(&pSubmits[submitIdx]); diff --git a/icd/res/ver.h b/icd/res/ver.h index 36011de9..dd4a4df1 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 316 +#define VULKAN_ICD_BUILD_VERSION 318 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2024.Q3.1" +#define VULKAN_DRIVER_INFO_STR "2024.Q3.2" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index d493c70e..04cd3b66 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -495,6 +495,17 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.fsWaveSize = 64; } #endif + switch (pInfo->revision) + { +#if VKI_BUILD_STRIX1 + case Pal::AsicRevision::Strix1: + // Remove this when displayDcc corruption issue is fixed on Strix. + m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; + break; +#endif + default: + break; + } // Put command buffers in local for large/resizable BAR systems with > 7 GBs of local heap constexpr gpusize _1GB = 1024ull * 1024ull * 1024ull; @@ -1304,7 +1315,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { // Gives ~0.5% gain at 4k m_settings.enableAceShaderPrefetch = false; @@ -1320,7 +1331,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( } #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { // Gives ~2.22% gain at 1080p m_settings.enableAceShaderPrefetch = false; @@ -1330,13 +1341,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( if (appProfile == AppProfile::RayTracingWeekends) { -#if VKI_BUILD_GFX11 - if ((pInfo->revision != Pal::AsicRevision::Navi31) -#if VKI_BUILD_NAVI32 - && (pInfo->revision != Pal::AsicRevision::Navi32) -#endif - ) -#endif + if (pInfo->gfxipProperties.shaderCore.vgprsPerSimd == 1024) { { m_settings.rtUnifiedVgprLimit = 64; @@ -1508,7 +1513,7 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.forceMinImageCount = 3; #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp11_0) { // Gives ~0.9% gain at 1080p m_settings.enableAceShaderPrefetch = false; @@ -1700,12 +1705,21 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( #endif } + if (appProfile == AppProfile::HaloInfinite) + { + OverrideVkd3dCommonSettings(&m_settings); + + } + if (appProfile == AppProfile::Starfield) { OverrideVkd3dCommonSettings(&m_settings); #if VKI_BUILD_GFX11 if ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) +#if VKI_BUILD_GFX115 + || (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_5) +#endif ) { m_settings.fsWaveSize = 32; @@ -1723,6 +1737,13 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( m_settings.disableSingleMipAnisoOverride = false; } + if (appProfile == AppProfile::Archean) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + } + } + return result; } @@ -1862,6 +1883,7 @@ void VulkanSettingsLoader::ReadPublicSettings() { m_settings.vSyncControl = static_cast(vSyncControl); } + } // ===================================================================================================================== diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index 86935719..acc08788 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -4638,7 +4638,7 @@ }, "ValidValues": { "IsEnum": true, - "Is64Bit": true, + "EnumSize": 64, "Values": [ { "Name": "DbgBarrierDrawNonIndexed", diff --git a/icd/tools/generate/shaderProfileTemplate.py b/icd/tools/generate/shaderProfileTemplate.py index bb836509..9f3b449c 100644 --- a/icd/tools/generate/shaderProfileTemplate.py +++ b/icd/tools/generate/shaderProfileTemplate.py @@ -2364,6 +2364,10 @@ def json_enum_reader_template(values, prefix=""): "shaderStage": "ShaderStage::ShaderStageCompute" } }, + + # Add "comment" key which is not used by genShaderProfile script, but provides tuning notes. + "comment": [str], + # The "BuildTypes" key is not used by the genShaderProfile script in any way. It is included here simply to # mark this key as a valid key that can be a part of each entry in the entries list in profile.json files. "BuildTypes": [] @@ -2979,6 +2983,7 @@ def json_enum_reader_template(values, prefix=""): { "pattern", "action", + "comment", // BuildTypes key is added here only to maintain consistency. The value against this key (if any in JSON) \ is not read at runtime. "BuildTypes" @@ -3054,3 +3059,4 @@ def json_enum_reader_template(values, prefix=""): } } """ +