diff --git a/docs/FFX-CACAO-GUI.pdf b/docs/FFX-CACAO-GUI.pdf index e0f8f3c..41ca780 100644 Binary files a/docs/FFX-CACAO-GUI.pdf and b/docs/FFX-CACAO-GUI.pdf differ diff --git a/docs/FFX-CACAO-Technology.pdf b/docs/FFX-CACAO-Technology.pdf index 69c5a28..8592e59 100644 Binary files a/docs/FFX-CACAO-Technology.pdf and b/docs/FFX-CACAO-Technology.pdf differ diff --git a/doxygen.conf b/doxygen.conf index e70520d..e9932e9 100644 --- a/doxygen.conf +++ b/doxygen.conf @@ -1,7 +1,7 @@ DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = "FidelityFX CACAO" OPTIMIZE_OUTPUT_FOR_C = YES -INPUT = ffx-cacao/inc/ffx_cacao.h +INPUT = ffx-cacao/inc/ffx_cacao.h ffx-cacao/inc/ffx_cacao_impl.h OUTPUT_DIRECTORY = docs ENABLE_PREPROCESSING = YES diff --git a/ffx-cacao/inc/ffx_cacao.h b/ffx-cacao/inc/ffx_cacao.h index e67c49d..f270b46 100644 --- a/ffx-cacao/inc/ffx_cacao.h +++ b/ffx-cacao/inc/ffx_cacao.h @@ -1,4 +1,4 @@ -// Modifications Copyright © 2020. Advanced Micro Devices, Inc. All Rights Reserved. +// Modifications Copyright © 2021. Advanced Micro Devices, Inc. All Rights Reserved. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Copyright (c) 2016, Intel Corporation @@ -22,81 +22,54 @@ #pragma once -// In future it is planned that FidelityFX CACAO will allow SSAO generation at native resolution. -// However, at the current time the performance/image quality trade off is poor, and further optimisation -// work is being carried out. If you wish to experiment with native resolution SSAO generation enable this -// flag. Integrating FidelityFX CACAO into games with native resolution enabled is currently not recommended. -// #define FFX_CACAO_ENABLE_NATIVE_RESOLUTION - -// #define FFX_CACAO_ENABLE_PROFILING -// #define FFX_CACAO_ENABLE_D3D12 -// #define FFX_CACAO_ENABLE_VULKAN - #include -#ifdef FFX_CACAO_ENABLE_D3D12 -#include -#endif -#ifdef FFX_CACAO_ENABLE_VULKAN -#include -#endif -typedef uint8_t FfxCacaoBool; -static const FfxCacaoBool FFX_CACAO_TRUE = 1; -static const FfxCacaoBool FFX_CACAO_FALSE = 0; - -/** - The return codes for the API functions. -*/ -typedef enum FfxCacaoStatus { - FFX_CACAO_STATUS_OK = 0, - FFX_CACAO_STATUS_INVALID_ARGUMENT = -1, - FFX_CACAO_STATUS_INVALID_POINTER = -2, - FFX_CACAO_STATUS_OUT_OF_MEMORY = -3, - FFX_CACAO_STATUS_FAILED = -4, -} FfxCacaoStatus; +typedef uint8_t FFX_CACAO_Bool; +static const FFX_CACAO_Bool FFX_CACAO_TRUE = 1; +static const FFX_CACAO_Bool FFX_CACAO_FALSE = 0; /** The quality levels that FidelityFX CACAO can generate SSAO at. This affects the number of samples taken for generating SSAO. */ -typedef enum FfxCacaoQuality { +typedef enum FFX_CACAO_Quality { FFX_CACAO_QUALITY_LOWEST = 0, FFX_CACAO_QUALITY_LOW = 1, FFX_CACAO_QUALITY_MEDIUM = 2, FFX_CACAO_QUALITY_HIGH = 3, FFX_CACAO_QUALITY_HIGHEST = 4, -} FfxCacaoQuality; +} FFX_CACAO_Quality; /** A structure representing a 4x4 matrix of floats. The matrix is stored in row major order in memory. */ -typedef struct FfxCacaoMatrix4x4 { +typedef struct FFX_CACAO_Matrix4x4 { float elements[4][4]; -} FfxCacaoMatrix4x4; +} FFX_CACAO_Matrix4x4; /** A structure for the settings used by FidelityFX CACAO. These settings may be updated with each draw call. */ -typedef struct FfxCacaoSettings { +typedef struct FFX_CACAO_Settings { float radius; ///< [0.0, ~ ] World (view) space size of the occlusion sphere. - float shadowMultiplier; ///< [0.0, 5.0] Effect strength linear multiplier - float shadowPower; ///< [0.5, 5.0] Effect strength pow modifier - float shadowClamp; ///< [0.0, 1.0] Effect max limit (applied after multiplier but before blur) - float horizonAngleThreshold; ///< [0.0, 0.2] Limits self-shadowing (makes the sampling area less of a hemisphere, more of a spherical cone, to avoid self-shadowing and various artifacts due to low tessellation and depth buffer imprecision, etc.) - float fadeOutFrom; ///< [0.0, ~ ] Distance to start start fading out the effect. + float shadowMultiplier; ///< [0.0, 5.0] Effect strength linear multiplier. + float shadowPower; ///< [0.5, 5.0] Effect strength pow modifier. + float shadowClamp; ///< [0.0, 1.0] Effect max limit (applied after multiplier but before blur). + float horizonAngleThreshold; ///< [0.0, 0.2] Limits self-shadowing (makes the sampling area less of a hemisphere, more of a spherical cone, to avoid self-shadowing and various artifacts due to low tessellation and depth buffer imprecision, etc.). + float fadeOutFrom; ///< [0.0, ~ ] Distance to start fading out the effect. float fadeOutTo; ///< [0.0, ~ ] Distance at which the effect is faded out. - FfxCacaoQuality qualityLevel; ///< Effect quality, affects number of taps etc - float adaptiveQualityLimit; ///< [0.0, 1.0] (only for Quality Level 3) - uint32_t blurPassCount; ///< [ 0, 8] Number of edge-sensitive smart blur passes to apply - float sharpness; ///< [0.0, 1.0] (How much to bleed over edges; 1: not at all, 0.5: half-half; 0.0: completely ignore edges) + FFX_CACAO_Quality qualityLevel; ///< Effect quality, affects number of taps etc. + float adaptiveQualityLimit; ///< [0.0, 1.0] (only for quality level FFX_CACAO_QUALITY_HIGHEST). + uint32_t blurPassCount; ///< [ 0, 8] Number of edge-sensitive smart blur passes to apply. + float sharpness; ///< [0.0, 1.0] (How much to bleed over edges; 1: not at all, 0.5: half-half; 0.0: completely ignore edges). float temporalSupersamplingAngleOffset; ///< [0.0, PI] Used to rotate sampling kernel; If using temporal AA / supersampling, suggested to rotate by ( (frame%3)/3.0*PI ) or similar. Kernel is already symmetrical, which is why we use PI and not 2*PI. float temporalSupersamplingRadiusOffset; ///< [0.0, 2.0] Used to scale sampling kernel; If using temporal AA / supersampling, suggested to scale by ( 1.0f + (((frame%3)-1.0)/3.0)*0.1 ) or similar. float detailShadowStrength; ///< [0.0, 5.0] Used for high-res detail AO using neighboring depth pixels: adds a lot of detail but also reduces temporal stability (adds aliasing). - FfxCacaoBool generateNormals; ///< This option should be set to FFX_CACAO_TRUE if FidelityFX-CACAO should reconstruct a normal buffer from the depth buffer. It is required to be FFX_CACAO_TRUE if no normal buffer is provided. + FFX_CACAO_Bool generateNormals; ///< This option should be set to FFX_CACAO_TRUE if FidelityFX-CACAO should reconstruct a normal buffer from the depth buffer. It is required to be FFX_CACAO_TRUE if no normal buffer is provided. float bilateralSigmaSquared; ///< [0.0, ~ ] Sigma squared value for use in bilateral upsampler giving Gaussian blur term. Should be greater than 0.0. float bilateralSimilarityDistanceSigma; ///< [0.0, ~ ] Sigma squared value for use in bilateral upsampler giving similarity weighting for neighbouring pixels. Should be greater than 0.0. -} FfxCacaoSettings; +} FFX_CACAO_Settings; -static const FfxCacaoSettings FFX_CACAO_DEFAULT_SETTINGS = { +static const FFX_CACAO_Settings FFX_CACAO_DEFAULT_SETTINGS = { /* radius */ 1.2f, /* shadowMultiplier */ 1.0f, /* shadowPower */ 1.50f, @@ -116,271 +89,162 @@ static const FfxCacaoSettings FFX_CACAO_DEFAULT_SETTINGS = { /* bilateralSimilarityDistanceSigma */ 0.01f, }; - -#ifdef FFX_CACAO_ENABLE_D3D12 /** - A struct containing all of the data used by FidelityFX-CACAO. - A context corresponds to an ID3D12Device. + A C++ structure for the constant buffer used by FidelityFX CACAO. */ -typedef struct FfxCacaoD3D12Context FfxCacaoD3D12Context; +typedef struct FFX_CACAO_Constants { + float DepthUnpackConsts[2]; + float CameraTanHalfFOV[2]; -/** - The parameters for creating a context. -*/ -typedef struct FfxCacaoD3D12ScreenSizeInfo { - uint32_t width; ///< width of the input/output buffers - uint32_t height; ///< height of the input/output buffers - ID3D12Resource *depthBufferResource; ///< pointer to depth buffer ID3D12Resource - D3D12_SHADER_RESOURCE_VIEW_DESC depthBufferSrvDesc; ///< depth buffer D3D12_SHADER_RESOURCE_VIEW_DESC - ID3D12Resource *normalBufferResource; ///< optional pointer to normal buffer ID3D12Resource (leave as NULL if none is provided) - D3D12_SHADER_RESOURCE_VIEW_DESC normalBufferSrvDesc; ///< normal buffer D3D12_SHADER_RESOURCE_VIEW_DESC - ID3D12Resource *outputResource; ///< pointer to output buffer ID3D12Resource - D3D12_UNORDERED_ACCESS_VIEW_DESC outputUavDesc; ///< output buffer D3D12_UNORDERED_ACCESS_VIEW_DESC -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - FfxCacaoBool useDownsampledSsao; ///< Whether SSAO should be generated at native resolution or half resolution. It is recommended to enable this setting for improved performance. -#endif -} FfxCacaoD3D12ScreenSizeInfo; -#endif + float NDCToViewMul[2]; + float NDCToViewAdd[2]; -#ifdef FFX_CACAO_ENABLE_VULKAN -/** - A struct containing all of the data used by FidelityFX-CACAO. - A context corresponds to a VkDevice. -*/ -typedef struct FfxCacaoVkContext FfxCacaoVkContext; + float DepthBufferUVToViewMul[2]; + float DepthBufferUVToViewAdd[2]; -/** - Miscellaneous flags for used for Vulkan context creation by FidelityFX-CACAO - */ -typedef enum FfxCacaoVkCreateFlagsBits { - FFX_CACAO_VK_CREATE_USE_16_BIT = 0x00000001, ///< Flag controlling whether 16-bit optimisations are enabled in shaders. - FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS = 0x00000002, ///< Flag controlling whether debug markers should be used. - FFX_CACAO_VK_CREATE_NAME_OBJECTS = 0x00000004, ///< Flag controlling whether Vulkan objects should be named. -} FfxCacaoVkCreateFlagsBits; -typedef uint32_t FfxCacaoVkCreateFlags; + float EffectRadius; + float EffectShadowStrength; + float EffectShadowPow; + float EffectShadowClamp; -/** - The parameters for creating a context. -*/ -typedef struct FfxCacaoVkCreateInfo { - VkPhysicalDevice physicalDevice; ///< The VkPhysicalDevice corresponding to the VkDevice in use - VkDevice device; ///< The VkDevice to use FFX CACAO with - FfxCacaoVkCreateFlags flags; ///< Miscellaneous flags for context creation -} FfxCacaoVkCreateInfo; + float EffectFadeOutMul; + float EffectFadeOutAdd; + float EffectHorizonAngleThreshold; + float EffectSamplingRadiusNearLimitRec; -/** - The parameters necessary when changing the screen size of FidelityFX CACAO. -*/ -typedef struct FfxCacaoVkScreenSizeInfo { - uint32_t width; ///< width of the input/output buffers - uint32_t height; ///< height of the input/output buffers - VkImageView depthView; ///< An image view for the depth buffer, should be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when used with FFX CACAO - VkImageView normalsView; ///< An optional image view for the normal buffer (may be VK_NULL_HANDLE). Should be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when used with FFX CACAO - VkImage output; ///< An image for writing output from FFX CACAO, must have the same dimensions as the input - VkImageView outputView; ///< An image view corresponding to the output image. -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - FfxCacaoBool useDownsampledSsao; ///< Whether SSAO should be generated at native resolution or half resolution. It is recommended to enable this setting for improved performance. -#endif -} FfxCacaoVkScreenSizeInfo; -#endif - -#ifdef FFX_CACAO_ENABLE_PROFILING -/** - A timestamp. The label gives the name of the stage of the effect, and the ticks is the number of GPU ticks spent on that stage. -*/ -typedef struct FfxCacaoTimestamp { - const char *label; ///< name of timestamp stage - uint64_t ticks; ///< number of GPU ticks taken for stage -} FfxCacaoTimestamp; - -/** - An array of timestamps for detailed profiling information. The array timestamps contains numTimestamps entries. - Entry 0 of the timestamps array is guaranteed to be the total time taken by the effect. -*/ -typedef struct FfxCacaoDetailedTiming { - uint32_t numTimestamps; ///< number of timetstamps in the array timestamps - FfxCacaoTimestamp timestamps[32]; ///< array of timestamps for each FFX CACAO stage -} FfxCacaoDetailedTiming; -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif + float DepthPrecisionOffsetMod; + float NegRecEffectRadius; + float LoadCounterAvgDiv; + float AdaptiveSampleCountLimit; -#ifdef FFX_CACAO_ENABLE_D3D12 - /** - Gets the size in bytes required by a context. This is to be used to allocate space for the context. - For example: + float InvSharpness; + int PassIndex; + float BilateralSigmaSquared; + float BilateralSimilarityDistanceSigma; - \code{.cpp} - size_t ffxCacaoD3D12ContextSize = ffxCacaoD3D12GetContextSize(); - FfxCacaoD3D12Context *context = (FfxCacaoD3D12Context*)malloc(ffxCacaoD3D12GetContextSize); + float PatternRotScaleMatrices[5][4]; - // ... + float NormalsUnpackMul; + float NormalsUnpackAdd; + float DetailAOStrength; + float Dummy0; - ffxCacaoD3D12DestroyContext(context); - free(context); - \endcode + float SSAOBufferDimensions[2]; + float SSAOBufferInverseDimensions[2]; - \return The size in bytes of an FfxCacaoD3D12Context. - */ - size_t ffxCacaoD3D12GetContextSize(); + float DepthBufferDimensions[2]; + float DepthBufferInverseDimensions[2]; - /** - Initialises an FfxCacaoD3D12Context. + int DepthBufferOffset[2]; + float PerPassFullResUVOffset[2]; - \param context A pointer to the context to initialise. - \param device A pointer to the D3D12 device. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoD3D12InitContext(FfxCacaoD3D12Context* context, ID3D12Device* device); + float InputOutputBufferDimensions[2]; + float InputOutputBufferInverseDimensions[2]; - /** - Destroys an FfxCacaoD3D12Context. + float ImportanceMapDimensions[2]; + float ImportanceMapInverseDimensions[2]; - \param context A pointer to the context to be destroyed. - \return The corresponding error code. + float DeinterleavedDepthBufferDimensions[2]; + float DeinterleavedDepthBufferInverseDimensions[2]; - \note This function does not destroy screen size dependent resources, and must be called after ffxCacaoD3D12DestroyScreenSizeDependentResources. - */ - FfxCacaoStatus ffxCacaoD3D12DestroyContext(FfxCacaoD3D12Context* context); + float DeinterleavedDepthBufferOffset[2]; + float DeinterleavedDepthBufferNormalisedOffset[2]; - /** - Initialises screen size dependent resources for the FfxCacaoD3D12Context. + FFX_CACAO_Matrix4x4 NormalsWorldToViewspaceMatrix; +} FFX_CACAO_Constants; - \param context A pointer to the FfxCacaoD3D12Context. - \param info A pointer to an FfxCacaoD3D12ScreenSizeInfo struct containing screen size info. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoD3D12InitScreenSizeDependentResources(FfxCacaoD3D12Context* context, const FfxCacaoD3D12ScreenSizeInfo* info); +/** + A structure containing sizes of each of the buffers used by FidelityFX CACAO. + */ +typedef struct FFX_CACAO_BufferSizeInfo { + uint32_t inputOutputBufferWidth; + uint32_t inputOutputBufferHeight; - /** - Destroys screen size dependent resources for the FfxCacaoD3D12Context. + uint32_t ssaoBufferWidth; + uint32_t ssaoBufferHeight; - \param context A pointer to the FfxCacaoD3D12Context. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoD3D12DestroyScreenSizeDependentResources(FfxCacaoD3D12Context* context); + uint32_t depthBufferXOffset; + uint32_t depthBufferYOffset; - /** - Update the settings of the FfxCacaoD3D12Context to those stored in the FfxCacaoSettings struct. + uint32_t depthBufferWidth; + uint32_t depthBufferHeight; - \param context A pointer to the FfxCacaoD3D12Context to update. - \param settings A pointer to the FfxCacaoSettings struct containing the new settings. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoD3D12UpdateSettings(FfxCacaoD3D12Context* context, const FfxCacaoSettings* settings); + uint32_t deinterleavedDepthBufferXOffset; + uint32_t deinterleavedDepthBufferYOffset; - /** - Append commands for drawing FFX CACAO to the provided ID3D12GraphicsCommandList. + uint32_t deinterleavedDepthBufferWidth; + uint32_t deinterleavedDepthBufferHeight; - \param context A pointer to the FfxCacaoD3D12Context. - \param commandList A pointer to the ID3D12GraphicsCommandList to append commands to. - \param proj A pointer to the projection matrix. - \param normalsToView An optional pointer to a matrix for transforming normals to in the normal buffer to viewspace. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoD3D12Draw(FfxCacaoD3D12Context* context, ID3D12GraphicsCommandList* commandList, const FfxCacaoMatrix4x4* proj, const FfxCacaoMatrix4x4* normalsToView); + uint32_t importanceMapWidth; + uint32_t importanceMapHeight; -#if FFX_CACAO_ENABLE_PROFILING - /** - Get detailed performance timings from the previous frame. + uint32_t downsampledSsaoBufferWidth; + uint32_t downsampledSsaoBufferHeight; +} FFX_CACAO_BufferSizeInfo; - \param context A pointer to the FfxCacaoD3D12Context. - \param timings A pointer to an FfxCacaoDetailedTiming struct to fill in with detailed timings. - \result The corresponding error code. - */ - FfxCacaoStatus ffxCacaoD3D12GetDetailedTimings(FfxCacaoD3D12Context* context, FfxCacaoDetailedTiming* timings); -#endif +#ifdef __cplusplus +extern "C" +{ #endif -#ifdef FFX_CACAO_ENABLE_VULKAN /** - Gets the size in bytes required by a Vulkan context. This is to be used to allocate space for the context. - For example: + Update buffer size info for resolution width x height. \code{.cpp} - size_t ffxCacaoVkContextSize = ffxCacaoVkGetContextSize(); - FfxCacaoVkContext *context = (FfxCacaoVkContext*)malloc(ffxCacaoVkGetContextSize); - - // ... - - ffxCacaoVkDestroyContext(context); - free(context); + FFX_CACAO_BufferSizeInfo bufferSizeInfo = {}; + FFX_CACAO_UpdateBufferSizeInfo(width, height, useDownsampledSsao, &bufferSizeInfo); \endcode - \return The size in bytes of an FfxCacaoVkContext. + \param width Screen width. + \param height Screen height. + \param useDownsampledSsao Whether FFX CACAO should use downsampling. */ - size_t ffxCacaoVkGetContextSize(); + void FFX_CACAO_UpdateBufferSizeInfo(uint32_t width, uint32_t height, FFX_CACAO_Bool useDownsampledSsao, FFX_CACAO_BufferSizeInfo* bsi); /** - Initialises an FfxCacaoVkContext. - - \param context A pointer to the context to initialise. - \param info A pointer to an FfxCacaoVkCreateInfo struct with parameters such as the vulkan device. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoVkInitContext(FfxCacaoVkContext* context, const FfxCacaoVkCreateInfo *info); + Update the contents of the FFX CACAO constant buffer (an FFX_CACAO_Constants struct). Note, this function does not update + per pass constants. - /** - Destroys an FfxCacaoVkContext. - - \param context A pointer to the context to be destroyed. - \return The corresponding error code. - - \note This function does not destroy screen size dependent resources, and must be called after ffxCacaoVkDestroyScreenSizeDependentResources. - */ - FfxCacaoStatus ffxCacaoVkDestroyContext(FfxCacaoVkContext* context); - - /** - Initialises screen size dependent resources for the FfxCacaoVkContext. - - \param context A pointer to the FfxCacaoVkContext. - \param info A pointer to an FfxCacaoVkScreenSizeInfo struct containing screen size info. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoVkInitScreenSizeDependentResources(FfxCacaoVkContext* context, const FfxCacaoVkScreenSizeInfo* info); + \code{.cpp} + FFX_CACAO_Matrix4x4 proj = ...; // projection matrix for the frame + FFX_CACAO_Matrix4x4 normalsToView = ...; // normals world space to view space matrix for the frame + FFX_CACAO_Settings settings = ...; // settings + FFX_CACAO_BufferSizeInfo bufferSizeInfo = ...; // buffer size info - /** - Destroys screen size dependent resources for the FfxCacaoVkContext. + FFX_CACAO_Constants constants = {}; + FFX_CACAO_UpdateConstants(&constants, &settings, &bufferSizeInfo, &proj, &normalsToView); + \endcode - \param context A pointer to the FfxCacaoVkContext. - \return The corresponding error code. + \param consts FFX_CACAO_Constants constant buffer. + \param settings FFX_CACAO_Settings settings. + \param bufferSizeInfo FFX_CACAO_BufferSizeInfo buffer size info. + \param proj Projection matrix for the frame. + \param normalsToView Normals world space to view space matrix for the frame. */ - FfxCacaoStatus ffxCacaoVkDestroyScreenSizeDependentResources(FfxCacaoVkContext* context); + void FFX_CACAO_UpdateConstants(FFX_CACAO_Constants* consts, const FFX_CACAO_Settings* settings, const FFX_CACAO_BufferSizeInfo* bufferSizeInfo, const FFX_CACAO_Matrix4x4* proj, const FFX_CACAO_Matrix4x4* normalsToView); /** - Update the settings of the FfxCacaoVkContext to those stored in the FfxCacaoSettings struct. - - \param context A pointer to the FfxCacaoVkContext to update. - \param settings A pointer to the FfxCacaoSettings struct containing the new settings. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoVkUpdateSettings(FfxCacaoVkContext* context, const FfxCacaoSettings* settings); + Update the contents of the FFX CACAO constant buffer (an FFX_CACAO_Constants struct) with per pass constants. + FFX CACAO runs 4 passes which use different constants. It is recommended to have four separate FFX_CACAO_Constants structs + each filled with constants for each of the 4 passes. - /** - Append commands for drawing FFX CACAO to the provided VkCommandBuffer. + \code{.cpp} + FFX_CACAO_Settings settings = ...; // settings + FFX_CACAO_BufferSizeInfo bufferSizeInfo = ...; // buffer size info - \param context A pointer to the FfxCacaoVkContext. - \param commandList The VkCommandBuffer to append commands to. - \param proj A pointer to the projection matrix. - \param normalsToView An optional pointer to a matrix for transforming normals to in the normal buffer to viewspace. - \return The corresponding error code. - */ - FfxCacaoStatus ffxCacaoVkDraw(FfxCacaoVkContext* context, VkCommandBuffer commandList, const FfxCacaoMatrix4x4* proj, const FfxCacaoMatrix4x4* normalsToView); + FFX_CACAO_Constants perPassConstants[4] = {}; -#ifdef FFX_CACAO_ENABLE_PROFILING - /** - Get detailed performance timings from the previous frame. + for (int i = 0; i < 4; ++i) { + FFX_CACAO_UpdatePerPassConstants(&perPassConstants[i], &settings, &bufferSizeInfo, i); + } + \endcode - \param context A pointer to the FfxCacaoVkContext. - \param timings A pointer to an FfxCacaoDetailedTiming struct to fill in with detailed timings. - \result The corresponding error code. + \param consts FFX_CACAO_Constants constants buffer. + \param settings FFX_CACAO_Settings settings. + \param bufferSizeInfo FFX_CACAO_BufferSizeInfo buffer size info. + \param pass pass number. */ - FfxCacaoStatus ffxCacaoVkGetDetailedTimings(FfxCacaoVkContext* context, FfxCacaoDetailedTiming* timings); -#endif -#endif + void FFX_CACAO_UpdatePerPassConstants(FFX_CACAO_Constants* consts, const FFX_CACAO_Settings* settings, const FFX_CACAO_BufferSizeInfo* bufferSizeInfo, int pass); #ifdef __cplusplus } diff --git a/ffx-cacao/inc/ffx_cacao_impl.h b/ffx-cacao/inc/ffx_cacao_impl.h new file mode 100644 index 0000000..f5a723d --- /dev/null +++ b/ffx-cacao/inc/ffx_cacao_impl.h @@ -0,0 +1,312 @@ +// Modifications Copyright © 2021. Advanced Micro Devices, Inc. All Rights Reserved. + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/*! \file */ + +#pragma once + +#include "ffx_cacao.h" + +// #define FFX_CACAO_ENABLE_PROFILING +// #define FFX_CACAO_ENABLE_D3D12 +// #define FFX_CACAO_ENABLE_VULKAN + +#ifdef FFX_CACAO_ENABLE_D3D12 +#include +#endif +#ifdef FFX_CACAO_ENABLE_VULKAN +#include +#endif + +/** + The return codes for the API functions. +*/ +typedef enum FFX_CACAO_Status { + FFX_CACAO_STATUS_OK = 0, + FFX_CACAO_STATUS_INVALID_ARGUMENT = -1, + FFX_CACAO_STATUS_INVALID_POINTER = -2, + FFX_CACAO_STATUS_OUT_OF_MEMORY = -3, + FFX_CACAO_STATUS_FAILED = -4, +} FFX_CACAO_Status; + +#ifdef FFX_CACAO_ENABLE_D3D12 +/** + A struct containing all of the data used by FidelityFX-CACAO. + A context corresponds to an ID3D12Device. +*/ +typedef struct FFX_CACAO_D3D12Context FFX_CACAO_D3D12Context; + +/** + The parameters for creating a context. +*/ +typedef struct FFX_CACAO_D3D12ScreenSizeInfo { + uint32_t width; ///< width of the input/output buffers + uint32_t height; ///< height of the input/output buffers + ID3D12Resource *depthBufferResource; ///< pointer to depth buffer ID3D12Resource + D3D12_SHADER_RESOURCE_VIEW_DESC depthBufferSrvDesc; ///< depth buffer D3D12_SHADER_RESOURCE_VIEW_DESC + ID3D12Resource *normalBufferResource; ///< optional pointer to normal buffer ID3D12Resource (leave as NULL if none is provided) + D3D12_SHADER_RESOURCE_VIEW_DESC normalBufferSrvDesc; ///< normal buffer D3D12_SHADER_RESOURCE_VIEW_DESC + ID3D12Resource *outputResource; ///< pointer to output buffer ID3D12Resource + D3D12_UNORDERED_ACCESS_VIEW_DESC outputUavDesc; ///< output buffer D3D12_UNORDERED_ACCESS_VIEW_DESC + FFX_CACAO_Bool useDownsampledSsao; ///< Whether SSAO should be generated at native resolution or half resolution. It is recommended to enable this setting for improved performance. +} FFX_CACAO_D3D12ScreenSizeInfo; +#endif + +#ifdef FFX_CACAO_ENABLE_VULKAN +/** + A struct containing all of the data used by FidelityFX-CACAO. + A context corresponds to a VkDevice. +*/ +typedef struct FFX_CACAO_VkContext FFX_CACAO_VkContext; + +/** + Miscellaneous flags for used for Vulkan context creation by FidelityFX-CACAO + */ +typedef enum FFX_CACAO_VkCreateFlagsBits { + FFX_CACAO_VK_CREATE_USE_16_BIT = 0x00000001, ///< Flag controlling whether 16-bit optimisations are enabled in shaders. + FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS = 0x00000002, ///< Flag controlling whether debug markers should be used. + FFX_CACAO_VK_CREATE_NAME_OBJECTS = 0x00000004, ///< Flag controlling whether Vulkan objects should be named. +} FFX_CACAO_VkCreateFlagsBits; +typedef uint32_t FFX_CACAO_VkCreateFlags; + +/** + The parameters for creating a context. +*/ +typedef struct FFX_CACAO_VkCreateInfo { + VkPhysicalDevice physicalDevice; ///< The VkPhysicalDevice corresponding to the VkDevice in use + VkDevice device; ///< The VkDevice to use FFX CACAO with + FFX_CACAO_VkCreateFlags flags; ///< Miscellaneous flags for context creation +} FFX_CACAO_VkCreateInfo; + +/** + The parameters necessary when changing the screen size of FidelityFX CACAO. +*/ +typedef struct FFX_CACAO_VkScreenSizeInfo { + uint32_t width; ///< width of the input/output buffers + uint32_t height; ///< height of the input/output buffers + VkImageView depthView; ///< An image view for the depth buffer, should be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when used with FFX CACAO + VkImageView normalsView; ///< An optional image view for the normal buffer (may be VK_NULL_HANDLE). Should be in layout VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL when used with FFX CACAO + VkImage output; ///< An image for writing output from FFX CACAO, must have the same dimensions as the input + VkImageView outputView; ///< An image view corresponding to the output image. + FFX_CACAO_Bool useDownsampledSsao; ///< Whether SSAO should be generated at native resolution or half resolution. It is recommended to enable this setting for improved performance. +} FFX_CACAO_VkScreenSizeInfo; +#endif + +#ifdef FFX_CACAO_ENABLE_PROFILING +/** + A timestamp. The label gives the name of the stage of the effect, and the ticks is the number of GPU ticks spent on that stage. +*/ +typedef struct FFX_CACAO_Timestamp { + const char *label; ///< name of timestamp stage + uint64_t ticks; ///< number of GPU ticks taken for stage +} FFX_CACAO_Timestamp; + +/** + An array of timestamps for detailed profiling information. The array timestamps contains numTimestamps entries. + Entry 0 of the timestamps array is guaranteed to be the total time taken by the effect. +*/ +typedef struct FFX_CACAO_DetailedTiming { + uint32_t numTimestamps; ///< number of timetstamps in the array timestamps + FFX_CACAO_Timestamp timestamps[32]; ///< array of timestamps for each FFX CACAO stage +} FFX_CACAO_DetailedTiming; +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef FFX_CACAO_ENABLE_D3D12 + /** + Gets the size in bytes required by a context. This is to be used to allocate space for the context. + For example: + + \code{.cpp} + size_t FFX_CACAO_D3D12ContextSize = ffxCacaoD3D12GetContextSize(); + FFX_CACAO_D3D12Context *context = (FFX_CACAO_D3D12Context*)malloc(FFX_CACAO_D3D12GetContextSize); + + // ... + + FFX_CACAO_D3D12DestroyContext(context); + free(context); + \endcode + + \return The size in bytes of an FFX_CACAO_D3D12Context. + */ + size_t FFX_CACAO_D3D12GetContextSize(); + + /** + Initialises an FFX_CACAO_D3D12Context. + + \param context A pointer to the context to initialise. + \param device A pointer to the D3D12 device. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_D3D12InitContext(FFX_CACAO_D3D12Context* context, ID3D12Device* device); + + /** + Destroys an FFX_CACAO_D3D12Context. + + \param context A pointer to the context to be destroyed. + \return The corresponding error code. + + \note This function does not destroy screen size dependent resources, and must be called after FFX_CACAO_D3D12DestroyScreenSizeDependentResources. + */ + FFX_CACAO_Status FFX_CACAO_D3D12DestroyContext(FFX_CACAO_D3D12Context* context); + + /** + Initialises screen size dependent resources for the FFX_CACAO_D3D12Context. + + \param context A pointer to the FFX_CACAO_D3D12Context. + \param info A pointer to an FFX_CACAO_D3D12ScreenSizeInfo struct containing screen size info. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_D3D12InitScreenSizeDependentResources(FFX_CACAO_D3D12Context* context, const FFX_CACAO_D3D12ScreenSizeInfo* info); + + /** + Destroys screen size dependent resources for the FFX_CACAO_D3D12Context. + + \param context A pointer to the FFX_CACAO_D3D12Context. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_D3D12DestroyScreenSizeDependentResources(FFX_CACAO_D3D12Context* context); + + /** + Update the settings of the FFX_CACAO_D3D12Context to those stored in the FFX_CACAO_Settings struct. + + \param context A pointer to the FFX_CACAO_D3D12Context to update. + \param settings A pointer to the FFX_CACAO_Settings struct containing the new settings. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_D3D12UpdateSettings(FFX_CACAO_D3D12Context* context, const FFX_CACAO_Settings* settings); + + /** + Append commands for drawing FFX CACAO to the provided ID3D12GraphicsCommandList. + + \param context A pointer to the FFX_CACAO_D3D12Context. + \param commandList A pointer to the ID3D12GraphicsCommandList to append commands to. + \param proj A pointer to the projection matrix. + \param normalsToView An optional pointer to a matrix for transforming normals to in the normal buffer to viewspace. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_D3D12Draw(FFX_CACAO_D3D12Context* context, ID3D12GraphicsCommandList* commandList, const FFX_CACAO_Matrix4x4* proj, const FFX_CACAO_Matrix4x4* normalsToView); + +#if FFX_CACAO_ENABLE_PROFILING + /** + Get detailed performance timings from the previous frame. + + \param context A pointer to the FFX_CACAO_D3D12Context. + \param timings A pointer to an FFX_CACAO_DetailedTiming struct to fill in with detailed timings. + \result The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_D3D12GetDetailedTimings(FFX_CACAO_D3D12Context* context, FFX_CACAO_DetailedTiming* timings); +#endif +#endif + +#ifdef FFX_CACAO_ENABLE_VULKAN + /** + Gets the size in bytes required by a Vulkan context. This is to be used to allocate space for the context. + For example: + + \code{.cpp} + size_t FFX_CACAO_VkContextSize = ffxCacaoVkGetContextSize(); + FFX_CACAO_VkContext *context = (FFX_CACAO_VkContext*)malloc(FFX_CACAO_VkGetContextSize); + + // ... + + FFX_CACAO_VkDestroyContext(context); + free(context); + \endcode + + \return The size in bytes of an FFX_CACAO_VkContext. + */ + size_t FFX_CACAO_VkGetContextSize(); + + /** + Initialises an FFX_CACAO_VkContext. + + \param context A pointer to the context to initialise. + \param info A pointer to an FFX_CACAO_VkCreateInfo struct with parameters such as the vulkan device. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_VkInitContext(FFX_CACAO_VkContext* context, const FFX_CACAO_VkCreateInfo *info); + + /** + Destroys an FFX_CACAO_VkContext. + + \param context A pointer to the context to be destroyed. + \return The corresponding error code. + + \note This function does not destroy screen size dependent resources, and must be called after FFX_CACAO_VkDestroyScreenSizeDependentResources. + */ + FFX_CACAO_Status FFX_CACAO_VkDestroyContext(FFX_CACAO_VkContext* context); + + /** + Initialises screen size dependent resources for the FFX_CACAO_VkContext. + + \param context A pointer to the FFX_CACAO_VkContext. + \param info A pointer to an FFX_CACAO_VkScreenSizeInfo struct containing screen size info. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_VkInitScreenSizeDependentResources(FFX_CACAO_VkContext* context, const FFX_CACAO_VkScreenSizeInfo* info); + + /** + Destroys screen size dependent resources for the FFX_CACAO_VkContext. + + \param context A pointer to the FFX_CACAO_VkContext. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_VkDestroyScreenSizeDependentResources(FFX_CACAO_VkContext* context); + + /** + Update the settings of the FFX_CACAO_VkContext to those stored in the FFX_CACAO_Settings struct. + + \param context A pointer to the FFX_CACAO_VkContext to update. + \param settings A pointer to the FFX_CACAO_Settings struct containing the new settings. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_VkUpdateSettings(FFX_CACAO_VkContext* context, const FFX_CACAO_Settings* settings); + + /** + Append commands for drawing FFX CACAO to the provided VkCommandBuffer. + + \param context A pointer to the FFX_CACAO_VkContext. + \param commandList The VkCommandBuffer to append commands to. + \param proj A pointer to the projection matrix. + \param normalsToView An optional pointer to a matrix for transforming normals to in the normal buffer to viewspace. + \return The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_VkDraw(FFX_CACAO_VkContext* context, VkCommandBuffer commandList, const FFX_CACAO_Matrix4x4* proj, const FFX_CACAO_Matrix4x4* normalsToView); + +#ifdef FFX_CACAO_ENABLE_PROFILING + /** + Get detailed performance timings from the previous frame. + + \param context A pointer to the FFX_CACAO_VkContext. + \param timings A pointer to an FFX_CACAO_DetailedTiming struct to fill in with detailed timings. + \result The corresponding error code. + */ + FFX_CACAO_Status FFX_CACAO_VkGetDetailedTimings(FFX_CACAO_VkContext* context, FFX_CACAO_DetailedTiming* timings); +#endif +#endif + +#ifdef __cplusplus +} +#endif diff --git a/ffx-cacao/readme.md b/ffx-cacao/readme.md index cbc1e91..66a9151 100644 --- a/ffx-cacao/readme.md +++ b/ffx-cacao/readme.md @@ -4,7 +4,15 @@ The **FidelityFX CACAO** library implements screen space ambient occlusion for u # Project Integration -FidelityFX CACAO supports three compile time options. These are: +FidelityFX CACAO comes with two main header files, `ffx-cacao/inc/ffx_cacao.h` and `ffx-cacao/inc/ffx_cacao_impl.h`. The file `ffx-cacao/inc/ffx_cacao.h` contains reusable C++ functions and struct definitions for integration of FidelityFX CACAO into custom engines. The functions declared in this header file are defined in `ffx-cacao/src/ffx_cacao.cpp`. The header file `ffx-cacao/inc/ffx_cacao_impl.h` is for use in quick integration of FidelityFX CACAO into DX12 and Vulkan engines. The functions declared in this file are defined in `ffx-cacao/src/ffx_cacao_impl.cpp`, which serves as a reference implementation of FidelityFX CACAO. + +# Reusable Functions and Structs + +The reusable functions and structs provided in `ffx-cacao/src/ffx_cacao.h` are documented via doxygen comments in the header file itself. The functions and structs are used to initialise the constant buffers used by FidelityFX CACAO from a user friendly settings struct `FFX_CACAO_Settings`. + +# Reference Implementation + +The reference implementation of FidelityFX CACAO supports three compile time options. These are: ```C++ FFX_CACAO_ENABLE_D3D12 @@ -12,11 +20,11 @@ FFX_CACAO_ENABLE_VK FFX_CACAO_ENABLE_PROFILING ``` -For use with D3D12 or Vulkan, the symbols `FFX_CACAO_ENABLE_D3D12` or `FFX_CACAO_ENABLE_VK` must be defined. If you wish to get detailed timings from FFX CACAO the symbol `FFX_CACAO_ENABLE_PROFILING` must be defined. These symbols can either be defined in the header `ffx-cacao/inc/ffx_cacao.h` itself by uncommenting the respective definitions, or they can defined in compiler flags. The provided sample of FFX CACAO defines these symbols using compiler flags. +For use with D3D12 or Vulkan, the symbols `FFX_CACAO_ENABLE_D3D12` or `FFX_CACAO_ENABLE_VK` must be defined. If you wish to get detailed timings from FFX CACAO the symbol `FFX_CACAO_ENABLE_PROFILING` must be defined. These symbols can either be defined in the header `ffx-cacao/inc/ffx_cacao_impl.h` itself by uncommenting the respective definitions, or they can defined in compiler flags. The provided sample of FFX CACAO defines these symbols using compiler flags. # Context Initialisation and Shutdown -First the FFX CACAO header must be included. This can be found at `ffx-cacao/inc/ffx_cacao.h`. Then a context must be created. This is usually done only once per device. To create a context you must first query for the size of a context, allocate space for a context, then inintialise the context. +First the FFX CACAO header must be included. This can be found at `ffx-cacao/inc/ffx_cacao_impl.h`. Then a context must be created. This is usually done only once per device. To create a context you must first query for the size of a context, allocate space for a context, then inintialise the context. For D3D12 the initialisation and shutdown processes are as follows: diff --git a/ffx-cacao/src/build_shaders_dxil.bat b/ffx-cacao/src/build_shaders_dxil.bat index 2e8e59e..48cbb93 100644 --- a/ffx-cacao/src/build_shaders_dxil.bat +++ b/ffx-cacao/src/build_shaders_dxil.bat @@ -6,47 +6,50 @@ set cauldron_dxc=..\..\sample\libs\cauldron\libs\DXC\bin\dxc.exe -T cs_6_2 if not exist "PrecompiledShadersDXIL" mkdir "PrecompiledShadersDXIL" -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledDepths.h -Vn CSPrepareDownsampledDepthsDXIL -E CSPrepareDownsampledDepths ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOClearLoadCounter.h -Vn CSClearLoadCounterDXIL -E FFX_CACAO_ClearLoadCounter ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeDepths.h -Vn CSPrepareNativeDepthsDXIL -E CSPrepareNativeDepths ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledDepths.h -Vn CSPrepareDownsampledDepthsDXIL -E FFX_CACAO_PrepareDownsampledDepths ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsAndMips.h -Vn CSPrepareDownsampledDepthsAndMipsDXIL -E CSPrepareDownsampledDepthsAndMips ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeDepthsAndMips.h -Vn CSPrepareNativeDepthsAndMipsDXIL -E CSPrepareNativeDepthsAndMips ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeDepths.h -Vn CSPrepareNativeDepthsDXIL -E FFX_CACAO_PrepareNativeDepths ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledNormals.h -Vn CSPrepareDownsampledNormalsDXIL -E CSPrepareDownsampledNormals ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeNormals.h -Vn CSPrepareNativeNormalsDXIL -E CSPrepareNativeNormals ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsAndMips.h -Vn CSPrepareDownsampledDepthsAndMipsDXIL -E FFX_CACAO_PrepareDownsampledDepthsAndMips ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeDepthsAndMips.h -Vn CSPrepareNativeDepthsAndMipsDXIL -E FFX_CACAO_PrepareNativeDepthsAndMips ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledNormalsFromInputNormals.h -Vn CSPrepareDownsampledNormalsFromInputNormalsDXIL -E CSPrepareDownsampledNormalsFromInputNormals ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeNormalsFromInputNormals.h -Vn CSPrepareNativeNormalsFromInputNormalsDXIL -E CSPrepareNativeNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledNormals.h -Vn CSPrepareDownsampledNormalsDXIL -E FFX_CACAO_PrepareDownsampledNormals ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeNormals.h -Vn CSPrepareNativeNormalsDXIL -E FFX_CACAO_PrepareNativeNormals ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsHalf.h -Vn CSPrepareDownsampledDepthsHalfDXIL -E CSPrepareDownsampledDepthsHalf ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeDepthsHalf.h -Vn CSPrepareNativeDepthsHalfDXIL -E CSPrepareNativeDepthsHalf ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledNormalsFromInputNormals.h -Vn CSPrepareDownsampledNormalsFromInputNormalsDXIL -E FFX_CACAO_PrepareDownsampledNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeNormalsFromInputNormals.h -Vn CSPrepareNativeNormalsFromInputNormalsDXIL -E FFX_CACAO_PrepareNativeNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsHalf.h -Vn CSPrepareDownsampledDepthsHalfDXIL -E FFX_CACAO_PrepareDownsampledDepthsHalf ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPrepareNativeDepthsHalf.h -Vn CSPrepareNativeDepthsHalfDXIL -E FFX_CACAO_PrepareNativeDepthsHalf ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ0.h -Vn CSGenerateQ0DXIL -E CSGenerateQ0 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ1.h -Vn CSGenerateQ1DXIL -E CSGenerateQ1 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ2.h -Vn CSGenerateQ2DXIL -E CSGenerateQ2 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ3.h -Vn CSGenerateQ3DXIL -E CSGenerateQ3 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ3Base.h -Vn CSGenerateQ3BaseDXIL -E CSGenerateQ3Base ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateImportanceMap.h -Vn CSGenerateImportanceMapDXIL -E CSGenerateImportanceMap ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPostprocessImportanceMapA.h -Vn CSPostprocessImportanceMapADXIL -E CSPostprocessImportanceMapA ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPostprocessImportanceMapB.h -Vn CSPostprocessImportanceMapBDXIL -E CSPostprocessImportanceMapB ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ0.h -Vn CSGenerateQ0DXIL -E FFX_CACAO_GenerateQ0 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ1.h -Vn CSGenerateQ1DXIL -E FFX_CACAO_GenerateQ1 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ2.h -Vn CSGenerateQ2DXIL -E FFX_CACAO_GenerateQ2 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ3.h -Vn CSGenerateQ3DXIL -E FFX_CACAO_GenerateQ3 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateQ3Base.h -Vn CSGenerateQ3BaseDXIL -E FFX_CACAO_GenerateQ3Base ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur1.h -Vn CSEdgeSensitiveBlur1DXIL -E CSEdgeSensitiveBlur1 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur2.h -Vn CSEdgeSensitiveBlur2DXIL -E CSEdgeSensitiveBlur2 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur3.h -Vn CSEdgeSensitiveBlur3DXIL -E CSEdgeSensitiveBlur3 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur4.h -Vn CSEdgeSensitiveBlur4DXIL -E CSEdgeSensitiveBlur4 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur5.h -Vn CSEdgeSensitiveBlur5DXIL -E CSEdgeSensitiveBlur5 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur6.h -Vn CSEdgeSensitiveBlur6DXIL -E CSEdgeSensitiveBlur6 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur7.h -Vn CSEdgeSensitiveBlur7DXIL -E CSEdgeSensitiveBlur7 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur8.h -Vn CSEdgeSensitiveBlur8DXIL -E CSEdgeSensitiveBlur8 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOGenerateImportanceMap.h -Vn CSGenerateImportanceMapDXIL -E FFX_CACAO_GenerateImportanceMap ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPostprocessImportanceMapA.h -Vn CSPostprocessImportanceMapADXIL -E FFX_CACAO_PostprocessImportanceMapA ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOPostprocessImportanceMapB.h -Vn CSPostprocessImportanceMapBDXIL -E FFX_CACAO_PostprocessImportanceMapB ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOApply.h -Vn CSApplyDXIL -E CSApply ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAONonSmartApply.h -Vn CSNonSmartApplyDXIL -E CSNonSmartApply ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAONonSmartHalfApply.h -Vn CSNonSmartHalfApplyDXIL -E CSNonSmartHalfApply ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur1.h -Vn CSEdgeSensitiveBlur1DXIL -E FFX_CACAO_EdgeSensitiveBlur1 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur2.h -Vn CSEdgeSensitiveBlur2DXIL -E FFX_CACAO_EdgeSensitiveBlur2 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur3.h -Vn CSEdgeSensitiveBlur3DXIL -E FFX_CACAO_EdgeSensitiveBlur3 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur4.h -Vn CSEdgeSensitiveBlur4DXIL -E FFX_CACAO_EdgeSensitiveBlur4 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur5.h -Vn CSEdgeSensitiveBlur5DXIL -E FFX_CACAO_EdgeSensitiveBlur5 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur6.h -Vn CSEdgeSensitiveBlur6DXIL -E FFX_CACAO_EdgeSensitiveBlur6 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur7.h -Vn CSEdgeSensitiveBlur7DXIL -E FFX_CACAO_EdgeSensitiveBlur7 ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur8.h -Vn CSEdgeSensitiveBlur8DXIL -E FFX_CACAO_EdgeSensitiveBlur8 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5.h -Vn CSUpscaleBilateral5x5DXIL -E CSUpscaleBilateral5x5 ffx_cacao.hlsl -%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5Half.h -Vn CSUpscaleBilateral5x5HalfDXIL -E CSUpscaleBilateral5x5Half ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOApply.h -Vn CSApplyDXIL -E FFX_CACAO_Apply ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAONonSmartApply.h -Vn CSNonSmartApplyDXIL -E FFX_CACAO_NonSmartApply ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAONonSmartHalfApply.h -Vn CSNonSmartHalfApplyDXIL -E FFX_CACAO_NonSmartHalfApply ffx_cacao.hlsl + +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5NonSmart.h -Vn CSUpscaleBilateral5x5NonSmartDXIL -E FFX_CACAO_UpscaleBilateral5x5NonSmart ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5Smart.h -Vn CSUpscaleBilateral5x5SmartDXIL -E FFX_CACAO_UpscaleBilateral5x5Smart ffx_cacao.hlsl +%cauldron_dxc% -Fh PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5Half.h -Vn CSUpscaleBilateral5x5HalfDXIL -E FFX_CACAO_UpscaleBilateral5x5Half ffx_cacao.hlsl popd diff --git a/ffx-cacao/src/build_shaders_spirv.bat b/ffx-cacao/src/build_shaders_spirv.bat index 6f821bd..b21690c 100644 --- a/ffx-cacao/src/build_shaders_spirv.bat +++ b/ffx-cacao/src/build_shaders_spirv.bat @@ -7,95 +7,97 @@ set cauldron_dxc_32=..\..\sample\libs\cauldron\libs\DXC\bin\dxc.exe -Wno-convers if not exist "PrecompiledShadersSPIRV" mkdir "PrecompiledShadersSPIRV" -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOClearLoadCounter_16.h -Vn CSClearLoadCounterSPIRV16 -E CSClearLoadCounter ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOClearLoadCounter_16.h -Vn CSClearLoadCounterSPIRV16 -E FFX_CACAO_ClearLoadCounter ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_16.h -Vn CSPrepareDownsampledDepthsSPIRV16 -E CSPrepareDownsampledDepths ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_16.h -Vn CSPrepareDownsampledDepthsSPIRV16 -E FFX_CACAO_PrepareDownsampledDepths ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_16.h -Vn CSPrepareNativeDepthsSPIRV16 -E CSPrepareNativeDepths ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_16.h -Vn CSPrepareNativeDepthsSPIRV16 -E FFX_CACAO_PrepareNativeDepths ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_16.h -Vn CSPrepareDownsampledDepthsAndMipsSPIRV16 -E CSPrepareDownsampledDepthsAndMips ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_16.h -Vn CSPrepareNativeDepthsAndMipsSPIRV16 -E CSPrepareNativeDepthsAndMips ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_16.h -Vn CSPrepareDownsampledDepthsAndMipsSPIRV16 -E FFX_CACAO_PrepareDownsampledDepthsAndMips ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_16.h -Vn CSPrepareNativeDepthsAndMipsSPIRV16 -E FFX_CACAO_PrepareNativeDepthsAndMips ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_16.h -Vn CSPrepareDownsampledNormalsSPIRV16 -E CSPrepareDownsampledNormals ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_16.h -Vn CSPrepareNativeNormalsSPIRV16 -E CSPrepareNativeNormals ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_16.h -Vn CSPrepareDownsampledNormalsSPIRV16 -E FFX_CACAO_PrepareDownsampledNormals ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_16.h -Vn CSPrepareNativeNormalsSPIRV16 -E FFX_CACAO_PrepareNativeNormals ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_16.h -Vn CSPrepareDownsampledNormalsFromInputNormalsSPIRV16 -E CSPrepareDownsampledNormalsFromInputNormals ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_16.h -Vn CSPrepareNativeNormalsFromInputNormalsSPIRV16 -E CSPrepareNativeNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_16.h -Vn CSPrepareDownsampledNormalsFromInputNormalsSPIRV16 -E FFX_CACAO_PrepareDownsampledNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_16.h -Vn CSPrepareNativeNormalsFromInputNormalsSPIRV16 -E FFX_CACAO_PrepareNativeNormalsFromInputNormals ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_16.h -Vn CSPrepareDownsampledDepthsHalfSPIRV16 -E CSPrepareDownsampledDepthsHalf ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_16.h -Vn CSPrepareNativeDepthsHalfSPIRV16 -E CSPrepareNativeDepthsHalf ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_16.h -Vn CSPrepareDownsampledDepthsHalfSPIRV16 -E FFX_CACAO_PrepareDownsampledDepthsHalf ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_16.h -Vn CSPrepareNativeDepthsHalfSPIRV16 -E FFX_CACAO_PrepareNativeDepthsHalf ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ0_16.h -Vn CSGenerateQ0SPIRV16 -E CSGenerateQ0 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ1_16.h -Vn CSGenerateQ1SPIRV16 -E CSGenerateQ1 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ2_16.h -Vn CSGenerateQ2SPIRV16 -E CSGenerateQ2 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3_16.h -Vn CSGenerateQ3SPIRV16 -E CSGenerateQ3 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3Base_16.h -Vn CSGenerateQ3BaseSPIRV16 -E CSGenerateQ3Base ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ0_16.h -Vn CSGenerateQ0SPIRV16 -E FFX_CACAO_GenerateQ0 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ1_16.h -Vn CSGenerateQ1SPIRV16 -E FFX_CACAO_GenerateQ1 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ2_16.h -Vn CSGenerateQ2SPIRV16 -E FFX_CACAO_GenerateQ2 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3_16.h -Vn CSGenerateQ3SPIRV16 -E FFX_CACAO_GenerateQ3 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3Base_16.h -Vn CSGenerateQ3BaseSPIRV16 -E FFX_CACAO_GenerateQ3Base ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_16.h -Vn CSGenerateImportanceMapSPIRV16 -E CSGenerateImportanceMap ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_16.h -Vn CSPostprocessImportanceMapASPIRV16 -E CSPostprocessImportanceMapA ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_16.h -Vn CSPostprocessImportanceMapBSPIRV16 -E CSPostprocessImportanceMapB ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_16.h -Vn CSGenerateImportanceMapSPIRV16 -E FFX_CACAO_GenerateImportanceMap ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_16.h -Vn CSPostprocessImportanceMapASPIRV16 -E FFX_CACAO_PostprocessImportanceMapA ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_16.h -Vn CSPostprocessImportanceMapBSPIRV16 -E FFX_CACAO_PostprocessImportanceMapB ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_16.h -Vn CSEdgeSensitiveBlur1SPIRV16 -E CSEdgeSensitiveBlur1 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_16.h -Vn CSEdgeSensitiveBlur2SPIRV16 -E CSEdgeSensitiveBlur2 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_16.h -Vn CSEdgeSensitiveBlur3SPIRV16 -E CSEdgeSensitiveBlur3 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_16.h -Vn CSEdgeSensitiveBlur4SPIRV16 -E CSEdgeSensitiveBlur4 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_16.h -Vn CSEdgeSensitiveBlur5SPIRV16 -E CSEdgeSensitiveBlur5 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_16.h -Vn CSEdgeSensitiveBlur6SPIRV16 -E CSEdgeSensitiveBlur6 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_16.h -Vn CSEdgeSensitiveBlur7SPIRV16 -E CSEdgeSensitiveBlur7 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_16.h -Vn CSEdgeSensitiveBlur8SPIRV16 -E CSEdgeSensitiveBlur8 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_16.h -Vn CSEdgeSensitiveBlur1SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur1 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_16.h -Vn CSEdgeSensitiveBlur2SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur2 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_16.h -Vn CSEdgeSensitiveBlur3SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur3 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_16.h -Vn CSEdgeSensitiveBlur4SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur4 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_16.h -Vn CSEdgeSensitiveBlur5SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur5 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_16.h -Vn CSEdgeSensitiveBlur6SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur6 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_16.h -Vn CSEdgeSensitiveBlur7SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur7 ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_16.h -Vn CSEdgeSensitiveBlur8SPIRV16 -E FFX_CACAO_EdgeSensitiveBlur8 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOApply_16.h -Vn CSApplySPIRV16 -E CSApply ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAONonSmartApply_16.h -Vn CSNonSmartApplySPIRV16 -E CSNonSmartApply ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAONonSmartHalfApply_16.h -Vn CSNonSmartHalfApplySPIRV16 -E CSNonSmartHalfApply ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOApply_16.h -Vn CSApplySPIRV16 -E FFX_CACAO_Apply ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAONonSmartApply_16.h -Vn CSNonSmartApplySPIRV16 -E FFX_CACAO_NonSmartApply ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAONonSmartHalfApply_16.h -Vn CSNonSmartHalfApplySPIRV16 -E FFX_CACAO_NonSmartHalfApply ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5_16.h -Vn CSUpscaleBilateral5x5SPIRV16 -E CSUpscaleBilateral5x5 ffx_cacao.hlsl -%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_16.h -Vn CSUpscaleBilateral5x5HalfSPIRV16 -E CSUpscaleBilateral5x5Half ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Smart_16.h -Vn CSUpscaleBilateral5x5SmartSPIRV16 -E FFX_CACAO_UpscaleBilateral5x5Smart ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5NonSmart_16.h -Vn CSUpscaleBilateral5x5NonSmartSPIRV16 -E FFX_CACAO_UpscaleBilateral5x5NonSmart ffx_cacao.hlsl +%cauldron_dxc_16% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_16.h -Vn CSUpscaleBilateral5x5HalfSPIRV16 -E FFX_CACAO_UpscaleBilateral5x5Half ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOClearLoadCounter_32.h -Vn CSClearLoadCounterSPIRV32 -E CSClearLoadCounter ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOClearLoadCounter_32.h -Vn CSClearLoadCounterSPIRV32 -E FFX_CACAO_ClearLoadCounter ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_32.h -Vn CSPrepareDownsampledDepthsSPIRV32 -E CSPrepareDownsampledDepths ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_32.h -Vn CSPrepareDownsampledDepthsSPIRV32 -E FFX_CACAO_PrepareDownsampledDepths ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_32.h -Vn CSPrepareNativeDepthsSPIRV32 -E CSPrepareNativeDepths ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_32.h -Vn CSPrepareNativeDepthsSPIRV32 -E FFX_CACAO_PrepareNativeDepths ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_32.h -Vn CSPrepareDownsampledDepthsAndMipsSPIRV32 -E CSPrepareDownsampledDepthsAndMips ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_32.h -Vn CSPrepareNativeDepthsAndMipsSPIRV32 -E CSPrepareNativeDepthsAndMips ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_32.h -Vn CSPrepareDownsampledDepthsAndMipsSPIRV32 -E FFX_CACAO_PrepareDownsampledDepthsAndMips ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_32.h -Vn CSPrepareNativeDepthsAndMipsSPIRV32 -E FFX_CACAO_PrepareNativeDepthsAndMips ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_32.h -Vn CSPrepareDownsampledNormalsSPIRV32 -E CSPrepareDownsampledNormals ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_32.h -Vn CSPrepareNativeNormalsSPIRV32 -E CSPrepareNativeNormals ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_32.h -Vn CSPrepareDownsampledNormalsSPIRV32 -E FFX_CACAO_PrepareDownsampledNormals ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_32.h -Vn CSPrepareNativeNormalsSPIRV32 -E FFX_CACAO_PrepareNativeNormals ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_32.h -Vn CSPrepareDownsampledNormalsFromInputNormalsSPIRV32 -E CSPrepareDownsampledNormalsFromInputNormals ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_32.h -Vn CSPrepareNativeNormalsFromInputNormalsSPIRV32 -E CSPrepareNativeNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_32.h -Vn CSPrepareDownsampledNormalsFromInputNormalsSPIRV32 -E FFX_CACAO_PrepareDownsampledNormalsFromInputNormals ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_32.h -Vn CSPrepareNativeNormalsFromInputNormalsSPIRV32 -E FFX_CACAO_PrepareNativeNormalsFromInputNormals ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_32.h -Vn CSPrepareDownsampledDepthsHalfSPIRV32 -E CSPrepareDownsampledDepthsHalf ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_32.h -Vn CSPrepareNativeDepthsHalfSPIRV32 -E CSPrepareNativeDepthsHalf ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_32.h -Vn CSPrepareDownsampledDepthsHalfSPIRV32 -E FFX_CACAO_PrepareDownsampledDepthsHalf ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_32.h -Vn CSPrepareNativeDepthsHalfSPIRV32 -E FFX_CACAO_PrepareNativeDepthsHalf ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ0_32.h -Vn CSGenerateQ0SPIRV32 -E CSGenerateQ0 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ1_32.h -Vn CSGenerateQ1SPIRV32 -E CSGenerateQ1 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ2_32.h -Vn CSGenerateQ2SPIRV32 -E CSGenerateQ2 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3_32.h -Vn CSGenerateQ3SPIRV32 -E CSGenerateQ3 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3Base_32.h -Vn CSGenerateQ3BaseSPIRV32 -E CSGenerateQ3Base ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ0_32.h -Vn CSGenerateQ0SPIRV32 -E FFX_CACAO_GenerateQ0 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ1_32.h -Vn CSGenerateQ1SPIRV32 -E FFX_CACAO_GenerateQ1 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ2_32.h -Vn CSGenerateQ2SPIRV32 -E FFX_CACAO_GenerateQ2 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3_32.h -Vn CSGenerateQ3SPIRV32 -E FFX_CACAO_GenerateQ3 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateQ3Base_32.h -Vn CSGenerateQ3BaseSPIRV32 -E FFX_CACAO_GenerateQ3Base ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_32.h -Vn CSGenerateImportanceMapSPIRV32 -E CSGenerateImportanceMap ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_32.h -Vn CSPostprocessImportanceMapASPIRV32 -E CSPostprocessImportanceMapA ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_32.h -Vn CSPostprocessImportanceMapBSPIRV32 -E CSPostprocessImportanceMapB ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_32.h -Vn CSGenerateImportanceMapSPIRV32 -E FFX_CACAO_GenerateImportanceMap ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_32.h -Vn CSPostprocessImportanceMapASPIRV32 -E FFX_CACAO_PostprocessImportanceMapA ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_32.h -Vn CSPostprocessImportanceMapBSPIRV32 -E FFX_CACAO_PostprocessImportanceMapB ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_32.h -Vn CSEdgeSensitiveBlur1SPIRV32 -E CSEdgeSensitiveBlur1 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_32.h -Vn CSEdgeSensitiveBlur2SPIRV32 -E CSEdgeSensitiveBlur2 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_32.h -Vn CSEdgeSensitiveBlur3SPIRV32 -E CSEdgeSensitiveBlur3 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_32.h -Vn CSEdgeSensitiveBlur4SPIRV32 -E CSEdgeSensitiveBlur4 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_32.h -Vn CSEdgeSensitiveBlur5SPIRV32 -E CSEdgeSensitiveBlur5 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_32.h -Vn CSEdgeSensitiveBlur6SPIRV32 -E CSEdgeSensitiveBlur6 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_32.h -Vn CSEdgeSensitiveBlur7SPIRV32 -E CSEdgeSensitiveBlur7 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_32.h -Vn CSEdgeSensitiveBlur8SPIRV32 -E CSEdgeSensitiveBlur8 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_32.h -Vn CSEdgeSensitiveBlur1SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur1 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_32.h -Vn CSEdgeSensitiveBlur2SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur2 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_32.h -Vn CSEdgeSensitiveBlur3SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur3 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_32.h -Vn CSEdgeSensitiveBlur4SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur4 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_32.h -Vn CSEdgeSensitiveBlur5SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur5 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_32.h -Vn CSEdgeSensitiveBlur6SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur6 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_32.h -Vn CSEdgeSensitiveBlur7SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur7 ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_32.h -Vn CSEdgeSensitiveBlur8SPIRV32 -E FFX_CACAO_EdgeSensitiveBlur8 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOApply_32.h -Vn CSApplySPIRV32 -E CSApply ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAONonSmartApply_32.h -Vn CSNonSmartApplySPIRV32 -E CSNonSmartApply ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAONonSmartHalfApply_32.h -Vn CSNonSmartHalfApplySPIRV32 -E CSNonSmartHalfApply ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOApply_32.h -Vn CSApplySPIRV32 -E FFX_CACAO_Apply ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAONonSmartApply_32.h -Vn CSNonSmartApplySPIRV32 -E FFX_CACAO_NonSmartApply ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAONonSmartHalfApply_32.h -Vn CSNonSmartHalfApplySPIRV32 -E FFX_CACAO_NonSmartHalfApply ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5_32.h -Vn CSUpscaleBilateral5x5SPIRV32 -E CSUpscaleBilateral5x5 ffx_cacao.hlsl -%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_32.h -Vn CSUpscaleBilateral5x5HalfSPIRV32 -E CSUpscaleBilateral5x5Half ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Smart_32.h -Vn CSUpscaleBilateral5x5SmartSPIRV32 -E FFX_CACAO_UpscaleBilateral5x5Smart ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5NonSmart_32.h -Vn CSUpscaleBilateral5x5NonSmartSPIRV32 -E FFX_CACAO_UpscaleBilateral5x5NonSmart ffx_cacao.hlsl +%cauldron_dxc_32% -Fh PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_32.h -Vn CSUpscaleBilateral5x5HalfSPIRV32 -E FFX_CACAO_UpscaleBilateral5x5Half ffx_cacao.hlsl popd diff --git a/ffx-cacao/src/ffx_cacao.cpp b/ffx-cacao/src/ffx_cacao.cpp index 547bbf7..0c20374 100644 --- a/ffx-cacao/src/ffx_cacao.cpp +++ b/ffx-cacao/src/ffx_cacao.cpp @@ -1,4 +1,4 @@ -// Modifications Copyright © 2020. Advanced Micro Devices, Inc. All Rights Reserved. +// Modifications Copyright © 2021. Advanced Micro Devices, Inc. All Rights Reserved. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Copyright (c) 2016, Intel Corporation @@ -19,19 +19,14 @@ /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #include "ffx_cacao.h" -#include "ffx_cacao_defines.h" #include #include // cos, sin #include // memcpy #include // snprintf -#ifdef FFX_CACAO_ENABLE_D3D12 -#include -#endif - // Define symbol to enable DirectX debug markers created using Cauldron -// #define FFX_CACAO_ENABLE_CAULDRON_DEBUG +#define FFX_CACAO_ENABLE_CAULDRON_DEBUG #define FFX_CACAO_ASSERT(exp) assert(exp) #define FFX_CACAO_ARRAY_SIZE(xs) (sizeof(xs)/sizeof(xs[0])) @@ -42,268 +37,73 @@ #define FFX_CACAO_CLAMP(value, lower, upper) FFX_CACAO_MIN(FFX_CACAO_MAX(value, lower), upper) #define FFX_CACAO_OFFSET_OF(T, member) (size_t)(&(((T*)0)->member)) -#ifdef FFX_CACAO_ENABLE_D3D12 -#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsHalf.h" -#include "PrecompiledShadersDXIL/CACAOPrepareNativeDepthsHalf.h" - -#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsAndMips.h" -#include "PrecompiledShadersDXIL/CACAOPrepareNativeDepthsAndMips.h" - -#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledNormals.h" -#include "PrecompiledShadersDXIL/CACAOPrepareNativeNormals.h" - -#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledNormalsFromInputNormals.h" -#include "PrecompiledShadersDXIL/CACAOPrepareNativeNormalsFromInputNormals.h" - -#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledDepths.h" -#include "PrecompiledShadersDXIL/CACAOPrepareNativeDepths.h" - -#include "PrecompiledShadersDXIL/CACAOGenerateQ0.h" -#include "PrecompiledShadersDXIL/CACAOGenerateQ1.h" -#include "PrecompiledShadersDXIL/CACAOGenerateQ2.h" -#include "PrecompiledShadersDXIL/CACAOGenerateQ3.h" -#include "PrecompiledShadersDXIL/CACAOGenerateQ3Base.h" - -#include "PrecompiledShadersDXIL/CACAOGenerateImportanceMap.h" -#include "PrecompiledShadersDXIL/CACAOPostprocessImportanceMapA.h" -#include "PrecompiledShadersDXIL/CACAOPostprocessImportanceMapB.h" - -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur1.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur2.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur3.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur4.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur5.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur6.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur7.h" -#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur8.h" - -#include "PrecompiledShadersDXIL/CACAOApply.h" -#include "PrecompiledShadersDXIL/CACAONonSmartApply.h" -#include "PrecompiledShadersDXIL/CACAONonSmartHalfApply.h" - -#include "PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5.h" -#include "PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5Half.h" -#endif - -#ifdef FFX_CACAO_ENABLE_VULKAN -// 16 bit versions -#include "PrecompiledShadersSPIRV/CACAOClearLoadCounter_16.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_16.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_16.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_16.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_16.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_16.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_16.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_16.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_16.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_16.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_16.h" - -#include "PrecompiledShadersSPIRV/CACAOGenerateQ0_16.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ1_16.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ2_16.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ3_16.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ3Base_16.h" - -#include "PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_16.h" -#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_16.h" -#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_16.h" - -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_16.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_16.h" - -#include "PrecompiledShadersSPIRV/CACAOApply_16.h" -#include "PrecompiledShadersSPIRV/CACAONonSmartApply_16.h" -#include "PrecompiledShadersSPIRV/CACAONonSmartHalfApply_16.h" - -#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5_16.h" -#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_16.h" - -// 32 bit versions -#include "PrecompiledShadersSPIRV/CACAOClearLoadCounter_32.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_32.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_32.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_32.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_32.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_32.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_32.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_32.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_32.h" - -#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_32.h" -#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_32.h" - -#include "PrecompiledShadersSPIRV/CACAOGenerateQ0_32.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ1_32.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ2_32.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ3_32.h" -#include "PrecompiledShadersSPIRV/CACAOGenerateQ3Base_32.h" - -#include "PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_32.h" -#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_32.h" -#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_32.h" - -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_32.h" -#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_32.h" - -#include "PrecompiledShadersSPIRV/CACAOApply_32.h" -#include "PrecompiledShadersSPIRV/CACAONonSmartApply_32.h" -#include "PrecompiledShadersSPIRV/CACAONonSmartHalfApply_32.h" - -#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5_32.h" -#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_32.h" -#endif - #define MATRIX_ROW_MAJOR_ORDER 1 -#define MAX_BLUR_PASSES 8 - -#ifdef FFX_CACAO_ENABLE_CAULDRON_DEBUG -#include - -#define USER_MARKER(name) CAULDRON_DX12::UserMarker __marker(commandList, name) -#else -#define USER_MARKER(name) -#endif - -typedef struct FfxCacaoConstants { - float DepthUnpackConsts[2]; - float CameraTanHalfFOV[2]; - - float NDCToViewMul[2]; - float NDCToViewAdd[2]; - - float DepthBufferUVToViewMul[2]; - float DepthBufferUVToViewAdd[2]; - - float EffectRadius; // world (viewspace) maximum size of the shadow - float EffectShadowStrength; // global strength of the effect (0 - 5) - float EffectShadowPow; - float EffectShadowClamp; - - float EffectFadeOutMul; // effect fade out from distance (ex. 25) - float EffectFadeOutAdd; // effect fade out to distance (ex. 100) - float EffectHorizonAngleThreshold; // limit errors on slopes and caused by insufficient geometry tessellation (0.05 to 0.5) - float EffectSamplingRadiusNearLimitRec; // if viewspace pixel closer than this, don't enlarge shadow sampling radius anymore (makes no sense to grow beyond some distance, not enough samples to cover everything, so just limit the shadow growth; could be SSAOSettingsFadeOutFrom * 0.1 or less) - - float DepthPrecisionOffsetMod; - float NegRecEffectRadius; // -1.0 / EffectRadius - float LoadCounterAvgDiv; // 1.0 / ( halfDepthMip[SSAO_DEPTH_MIP_LEVELS-1].sizeX * halfDepthMip[SSAO_DEPTH_MIP_LEVELS-1].sizeY ) - float AdaptiveSampleCountLimit; - - float InvSharpness; - int PassIndex; - float BilateralSigmaSquared; - float BilateralSimilarityDistanceSigma; - - float PatternRotScaleMatrices[5][4]; - - float NormalsUnpackMul; - float NormalsUnpackAdd; - float DetailAOStrength; - float Dummy0; - - float SSAOBufferDimensions[2]; - float SSAOBufferInverseDimensions[2]; - - float DepthBufferDimensions[2]; - float DepthBufferInverseDimensions[2]; - - int DepthBufferOffset[2]; - float PerPassFullResUVOffset[2]; - - float InputOutputBufferDimensions[2]; - float InputOutputBufferInverseDimensions[2]; - - float ImportanceMapDimensions[2]; - float ImportanceMapInverseDimensions[2]; - - float DeinterleavedDepthBufferDimensions[2]; - float DeinterleavedDepthBufferInverseDimensions[2]; - - float DeinterleavedDepthBufferOffset[2]; - float DeinterleavedDepthBufferNormalisedOffset[2]; - - FfxCacaoMatrix4x4 NormalsWorldToViewspaceMatrix; -} FfxCacaoConstants; - -typedef struct ScreenSizeInfo { - uint32_t width; - uint32_t height; - uint32_t halfWidth; - uint32_t halfHeight; - uint32_t quarterWidth; - uint32_t quarterHeight; - uint32_t eighthWidth; - uint32_t eighthHeight; - uint32_t depthBufferWidth; - uint32_t depthBufferHeight; - uint32_t depthBufferHalfWidth; - uint32_t depthBufferHalfHeight; - uint32_t depthBufferQuarterWidth; - uint32_t depthBufferQuarterHeight; - uint32_t depthBufferOffsetX; - uint32_t depthBufferOffsetY; - uint32_t depthBufferHalfOffsetX; - uint32_t depthBufferHalfOffsetY; -} ScreenSizeInfo; - -typedef struct BufferSizeInfo { - uint32_t inputOutputBufferWidth; - uint32_t inputOutputBufferHeight; - - uint32_t ssaoBufferWidth; - uint32_t ssaoBufferHeight; - - uint32_t depthBufferXOffset; - uint32_t depthBufferYOffset; - - uint32_t depthBufferWidth; - uint32_t depthBufferHeight; - - uint32_t deinterleavedDepthBufferXOffset; - uint32_t deinterleavedDepthBufferYOffset; - - uint32_t deinterleavedDepthBufferWidth; - uint32_t deinterleavedDepthBufferHeight; - - uint32_t importanceMapWidth; - uint32_t importanceMapHeight; -} BufferSizeInfo; - -static const FfxCacaoMatrix4x4 FFX_CACAO_IDENTITY_MATRIX = { +static const FFX_CACAO_Matrix4x4 FFX_CACAO_IDENTITY_MATRIX = { { { 1.0f, 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 1.0f, 0.0f }, { 0.0f, 0.0f, 0.0f, 1.0f } } }; -inline static uint32_t dispatchSize(uint32_t tileSize, uint32_t totalSize) +void FFX_CACAO_UpdateBufferSizeInfo(uint32_t width, uint32_t height, FFX_CACAO_Bool useDownsampledSsao, FFX_CACAO_BufferSizeInfo* bsi) { - return (totalSize + tileSize - 1) / tileSize; + uint32_t halfWidth = (width + 1) / 2; + uint32_t halfHeight = (height + 1) / 2; + uint32_t quarterWidth = (halfWidth + 1) / 2; + uint32_t quarterHeight = (halfHeight + 1) / 2; + uint32_t eighthWidth = (quarterWidth + 1) / 2; + uint32_t eighthHeight = (quarterHeight + 1) / 2; + + uint32_t depthBufferWidth = width; + uint32_t depthBufferHeight = height; + uint32_t depthBufferHalfWidth = halfWidth; + uint32_t depthBufferHalfHeight = halfHeight; + uint32_t depthBufferQuarterWidth = quarterWidth; + uint32_t depthBufferQuarterHeight = quarterHeight; + + uint32_t depthBufferXOffset = 0; + uint32_t depthBufferYOffset = 0; + uint32_t depthBufferHalfXOffset = 0; + uint32_t depthBufferHalfYOffset = 0; + uint32_t depthBufferQuarterXOffset = 0; + uint32_t depthBufferQuarterYOffset = 0; + + bsi->inputOutputBufferWidth = width; + bsi->inputOutputBufferHeight = height; + bsi->depthBufferXOffset = depthBufferXOffset; + bsi->depthBufferYOffset = depthBufferYOffset; + bsi->depthBufferWidth = depthBufferWidth; + bsi->depthBufferHeight = depthBufferHeight; + + if (useDownsampledSsao) + { + bsi->ssaoBufferWidth = quarterWidth; + bsi->ssaoBufferHeight = quarterHeight; + bsi->deinterleavedDepthBufferXOffset = depthBufferQuarterXOffset; + bsi->deinterleavedDepthBufferYOffset = depthBufferQuarterYOffset; + bsi->deinterleavedDepthBufferWidth = depthBufferQuarterWidth; + bsi->deinterleavedDepthBufferHeight = depthBufferQuarterHeight; + bsi->importanceMapWidth = eighthWidth; + bsi->importanceMapHeight = eighthHeight; + bsi->downsampledSsaoBufferWidth = halfWidth; + bsi->downsampledSsaoBufferHeight = halfHeight; + } + else + { + bsi->ssaoBufferWidth = halfWidth; + bsi->ssaoBufferHeight = halfHeight; + bsi->deinterleavedDepthBufferXOffset = depthBufferHalfXOffset; + bsi->deinterleavedDepthBufferYOffset = depthBufferHalfYOffset; + bsi->deinterleavedDepthBufferWidth = depthBufferHalfWidth; + bsi->deinterleavedDepthBufferHeight = depthBufferHalfHeight; + bsi->importanceMapWidth = quarterWidth; + bsi->importanceMapHeight = quarterHeight; + bsi->downsampledSsaoBufferWidth = 1; + bsi->downsampledSsaoBufferHeight = 1; + } } -static void updateConstants(FfxCacaoConstants* consts, FfxCacaoSettings* settings, BufferSizeInfo* bufferSizeInfo, const FfxCacaoMatrix4x4* proj, const FfxCacaoMatrix4x4* normalsToView) +void FFX_CACAO_UpdateConstants(FFX_CACAO_Constants* consts, const FFX_CACAO_Settings* settings, const FFX_CACAO_BufferSizeInfo* bufferSizeInfo, const FFX_CACAO_Matrix4x4* proj, const FFX_CACAO_Matrix4x4* normalsToView) { consts->BilateralSigmaSquared = settings->bilateralSigmaSquared; consts->BilateralSimilarityDistanceSigma = settings->bilateralSimilarityDistanceSigma; @@ -361,9 +161,6 @@ static void updateConstants(FfxCacaoConstants* consts, FfxCacaoSettings* setting // if the depth precision is switched to 32bit float, this can be set to something closer to 1 (0.9999 is fine) consts->DepthPrecisionOffsetMod = 0.9992f; - // consts->RadiusDistanceScalingFunctionPow = 1.0f - CLAMP( m_settings.RadiusDistanceScalingFunction, 0.0f, 1.0f ); - - // Special settings for lowest quality level - just nerf the effect a tiny bit if (settings->qualityLevel <= FFX_CACAO_QUALITY_LOW) { @@ -432,7 +229,7 @@ static void updateConstants(FfxCacaoConstants* consts, FfxCacaoSettings* setting } } -static void updatePerPassConstants(FfxCacaoConstants* consts, FfxCacaoSettings* settings, BufferSizeInfo* bufferSizeInfo, int pass) +void FFX_CACAO_UpdatePerPassConstants(FFX_CACAO_Constants* consts, const FFX_CACAO_Settings* settings, const FFX_CACAO_BufferSizeInfo* bufferSizeInfo, int pass) { consts->PerPassFullResUVOffset[0] = ((float)(pass % 2)) / (float)bufferSizeInfo->ssaoBufferWidth; consts->PerPassFullResUVOffset[1] = ((float)(pass / 2)) / (float)bufferSizeInfo->ssaoBufferHeight; @@ -452,13 +249,11 @@ static void updatePerPassConstants(FfxCacaoConstants* consts, FfxCacaoSettings* float ca, sa; float angle0 = ((float)a + (float)b / (float)subPassCount) * (3.1415926535897932384626433832795f) * 0.5f; - // angle0 += additionalAngleOffset; ca = FFX_CACAO_COS(angle0); sa = FFX_CACAO_SIN(angle0); float scale = 1.0f + (a - 1.5f + (b - (subPassCount - 1.0f) * 0.5f) / (float)subPassCount) * 0.07f; - // scale *= additionalRadiusScale; consts->PatternRotScaleMatrices[subPass][0] = scale * ca; consts->PatternRotScaleMatrices[subPass][1] = scale * -sa; @@ -466,3912 +261,3 @@ static void updatePerPassConstants(FfxCacaoConstants* consts, FfxCacaoSettings* consts->PatternRotScaleMatrices[subPass][3] = -scale * ca; } } - -#ifdef FFX_CACAO_ENABLE_PROFILING -// TIMESTAMP(name) -#define TIMESTAMPS \ - TIMESTAMP(BEGIN) \ - TIMESTAMP(PREPARE) \ - TIMESTAMP(BASE_SSAO_PASS) \ - TIMESTAMP(IMPORTANCE_MAP) \ - TIMESTAMP(GENERATE_SSAO) \ - TIMESTAMP(EDGE_SENSITIVE_BLUR) \ - TIMESTAMP(BILATERAL_UPSAMPLE) \ - TIMESTAMP(APPLY) - -typedef enum TimestampID { -#define TIMESTAMP(name) TIMESTAMP_##name, - TIMESTAMPS -#undef TIMESTAMP - NUM_TIMESTAMPS -} TimestampID; - -static const char *TIMESTAMP_NAMES[NUM_TIMESTAMPS] = { -#define TIMESTAMP(name) "FFX_CACAO_" #name, - TIMESTAMPS -#undef TIMESTAMP -}; - -#define NUM_TIMESTAMP_BUFFERS 5 -#endif - -// ================================================================================= -// DirectX 12 -// ================================================================================= - -#ifdef FFX_CACAO_ENABLE_D3D12 - -static inline FfxCacaoStatus hresultToFfxCacaoStatus(HRESULT hr) -{ - switch (hr) - { - case E_FAIL: return FFX_CACAO_STATUS_FAILED; - case E_INVALIDARG: return FFX_CACAO_STATUS_INVALID_ARGUMENT; - case E_OUTOFMEMORY: return FFX_CACAO_STATUS_OUT_OF_MEMORY; - case E_NOTIMPL: return FFX_CACAO_STATUS_INVALID_ARGUMENT; - case S_FALSE: return FFX_CACAO_STATUS_OK; - case S_OK: return FFX_CACAO_STATUS_OK; - default: return FFX_CACAO_STATUS_FAILED; - } -} - -static inline void SetName(ID3D12Object* obj, const char* name) -{ - if (name == NULL) - { - return; - } - - FFX_CACAO_ASSERT(obj != NULL); - wchar_t buffer[1024]; - swprintf(buffer, FFX_CACAO_ARRAY_SIZE(buffer), L"%S", name); - obj->SetName(buffer); -} - -static inline size_t AlignOffset(size_t uOffset, size_t uAlign) -{ - return ((uOffset + (uAlign - 1)) & ~(uAlign - 1)); -} - -static size_t GetPixelByteSize(DXGI_FORMAT fmt) -{ - switch (fmt) - { - case(DXGI_FORMAT_R10G10B10A2_TYPELESS): - case(DXGI_FORMAT_R10G10B10A2_UNORM): - case(DXGI_FORMAT_R10G10B10A2_UINT): - case(DXGI_FORMAT_R11G11B10_FLOAT): - case(DXGI_FORMAT_R8G8B8A8_TYPELESS): - case(DXGI_FORMAT_R8G8B8A8_UNORM): - case(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB): - case(DXGI_FORMAT_R8G8B8A8_UINT): - case(DXGI_FORMAT_R8G8B8A8_SNORM): - case(DXGI_FORMAT_R8G8B8A8_SINT): - case(DXGI_FORMAT_B8G8R8A8_UNORM): - case(DXGI_FORMAT_B8G8R8X8_UNORM): - case(DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM): - case(DXGI_FORMAT_B8G8R8A8_TYPELESS): - case(DXGI_FORMAT_B8G8R8A8_UNORM_SRGB): - case(DXGI_FORMAT_B8G8R8X8_TYPELESS): - case(DXGI_FORMAT_B8G8R8X8_UNORM_SRGB): - case(DXGI_FORMAT_R16G16_TYPELESS): - case(DXGI_FORMAT_R16G16_FLOAT): - case(DXGI_FORMAT_R16G16_UNORM): - case(DXGI_FORMAT_R16G16_UINT): - case(DXGI_FORMAT_R16G16_SNORM): - case(DXGI_FORMAT_R16G16_SINT): - case(DXGI_FORMAT_R32_TYPELESS): - case(DXGI_FORMAT_D32_FLOAT): - case(DXGI_FORMAT_R32_FLOAT): - case(DXGI_FORMAT_R32_UINT): - case(DXGI_FORMAT_R32_SINT): - return 4; - - case(DXGI_FORMAT_BC1_TYPELESS): - case(DXGI_FORMAT_BC1_UNORM): - case(DXGI_FORMAT_BC1_UNORM_SRGB): - case(DXGI_FORMAT_BC4_TYPELESS): - case(DXGI_FORMAT_BC4_UNORM): - case(DXGI_FORMAT_BC4_SNORM): - case(DXGI_FORMAT_R16G16B16A16_FLOAT): - case(DXGI_FORMAT_R16G16B16A16_TYPELESS): - return 8; - - case(DXGI_FORMAT_BC2_TYPELESS): - case(DXGI_FORMAT_BC2_UNORM): - case(DXGI_FORMAT_BC2_UNORM_SRGB): - case(DXGI_FORMAT_BC3_TYPELESS): - case(DXGI_FORMAT_BC3_UNORM): - case(DXGI_FORMAT_BC3_UNORM_SRGB): - case(DXGI_FORMAT_BC5_TYPELESS): - case(DXGI_FORMAT_BC5_UNORM): - case(DXGI_FORMAT_BC5_SNORM): - case(DXGI_FORMAT_BC6H_TYPELESS): - case(DXGI_FORMAT_BC6H_UF16): - case(DXGI_FORMAT_BC6H_SF16): - case(DXGI_FORMAT_BC7_TYPELESS): - case(DXGI_FORMAT_BC7_UNORM): - case(DXGI_FORMAT_BC7_UNORM_SRGB): - case(DXGI_FORMAT_R32G32B32A32_FLOAT): - case(DXGI_FORMAT_R32G32B32A32_TYPELESS): - return 16; - - default: - FFX_CACAO_ASSERT(0); - break; - } - return 0; -} - -// ================================================================================================= -// GpuTimer implementation -// ================================================================================================= - -#ifdef FFX_CACAO_ENABLE_PROFILING -#define GPU_TIMER_MAX_VALUES_PER_FRAME (FFX_CACAO_ARRAY_SIZE(((FfxCacaoDetailedTiming*)0)->timestamps)) - -typedef struct D3D12Timestamp { - TimestampID timestampID; - uint64_t value; -} D3D12Timestamp; - -typedef struct GpuTimer { - ID3D12Resource *buffer; - ID3D12QueryHeap *queryHeap; - uint32_t currentFrame; - uint32_t collectFrame; - struct { - uint32_t len; - D3D12Timestamp timestamps[NUM_TIMESTAMPS]; - } timestampBuffers[NUM_TIMESTAMP_BUFFERS]; - -} GpuTimer; - -static FfxCacaoStatus gpuTimerInit(GpuTimer* gpuTimer, ID3D12Device* device) -{ - memset(gpuTimer, 0, sizeof(*gpuTimer)); - - D3D12_QUERY_HEAP_DESC queryHeapDesc = {}; - queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; - queryHeapDesc.Count = GPU_TIMER_MAX_VALUES_PER_FRAME * NUM_TIMESTAMP_BUFFERS; - queryHeapDesc.NodeMask = 0; - HRESULT hr = device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&gpuTimer->queryHeap)); - if (FAILED(hr)) - { - return hresultToFfxCacaoStatus(hr); - } - - hr = device->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint64_t) * NUM_TIMESTAMP_BUFFERS * GPU_TIMER_MAX_VALUES_PER_FRAME), - D3D12_RESOURCE_STATE_COPY_DEST, - nullptr, - IID_PPV_ARGS(&gpuTimer->buffer)); - if (FAILED(hr)) - { - FFX_CACAO_ASSERT(gpuTimer->queryHeap); - gpuTimer->queryHeap->Release(); - return hresultToFfxCacaoStatus(hr); - } - - SetName(gpuTimer->buffer, "CACAO::GPUTimer::buffer"); - - return FFX_CACAO_STATUS_OK; -} - -static void gpuTimerDestroy(GpuTimer* gpuTimer) -{ - FFX_CACAO_ASSERT(gpuTimer->buffer); - FFX_CACAO_ASSERT(gpuTimer->queryHeap); - gpuTimer->buffer->Release(); - gpuTimer->queryHeap->Release(); -} - -static void gpuTimerStartFrame(GpuTimer* gpuTimer) -{ - uint32_t frame = gpuTimer->currentFrame = (gpuTimer->currentFrame + 1) % NUM_TIMESTAMP_BUFFERS; - gpuTimer->timestampBuffers[frame].len = 0; - - uint32_t collectFrame = gpuTimer->collectFrame = (frame + 1) % NUM_TIMESTAMP_BUFFERS; - - uint32_t numMeasurements = gpuTimer->timestampBuffers[collectFrame].len; - if (!numMeasurements) - { - return; - } - - uint32_t start = GPU_TIMER_MAX_VALUES_PER_FRAME * collectFrame; - uint32_t end = GPU_TIMER_MAX_VALUES_PER_FRAME * (collectFrame + 1); - - D3D12_RANGE readRange; - readRange.Begin = start * sizeof(uint64_t); - readRange.End = end * sizeof(uint64_t); - uint64_t *timingsInTicks = NULL; - gpuTimer->buffer->Map(0, &readRange, (void**)&timingsInTicks); - - for (uint32_t i = 0; i < numMeasurements; ++i) - { - gpuTimer->timestampBuffers[collectFrame].timestamps[i].value = timingsInTicks[start + i]; - } - - D3D12_RANGE writtenRange = {}; - writtenRange.Begin = 0; - writtenRange.End = 0; - gpuTimer->buffer->Unmap(0, &writtenRange); -} - -static void gpuTimerGetTimestamp(GpuTimer* gpuTimer, ID3D12GraphicsCommandList* commandList, TimestampID timestampID) -{ - uint32_t frame = gpuTimer->currentFrame; - uint32_t curTimestamp = gpuTimer->timestampBuffers[frame].len++; - FFX_CACAO_ASSERT(curTimestamp < GPU_TIMER_MAX_VALUES_PER_FRAME); - gpuTimer->timestampBuffers[frame].timestamps[curTimestamp].timestampID = timestampID; - commandList->EndQuery(gpuTimer->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, frame * GPU_TIMER_MAX_VALUES_PER_FRAME + curTimestamp); -} - -static void gpuTimerEndFrame(GpuTimer* gpuTimer, ID3D12GraphicsCommandList* commandList) -{ - uint32_t frame = gpuTimer->currentFrame; - uint32_t numTimestamps = gpuTimer->timestampBuffers[frame].len; - commandList->ResolveQueryData( - gpuTimer->queryHeap, - D3D12_QUERY_TYPE_TIMESTAMP, - frame * GPU_TIMER_MAX_VALUES_PER_FRAME, - numTimestamps, - gpuTimer->buffer, - frame * GPU_TIMER_MAX_VALUES_PER_FRAME * sizeof(uint64_t)); -} - -static void gpuTimerCollectTimings(GpuTimer* gpuTimer, FfxCacaoDetailedTiming* timings) -{ - uint32_t frame = gpuTimer->collectFrame; - uint32_t numTimestamps = timings->numTimestamps = gpuTimer->timestampBuffers[frame].len; - - uint64_t prevTimeTicks = gpuTimer->timestampBuffers[frame].timestamps[0].value; - for (uint32_t i = 1; i < numTimestamps; ++i) - { - uint64_t thisTimeTicks = gpuTimer->timestampBuffers[frame].timestamps[i].value; - FfxCacaoTimestamp *t = &timings->timestamps[i]; - t->label = TIMESTAMP_NAMES[gpuTimer->timestampBuffers[frame].timestamps[i].timestampID]; - t->ticks = thisTimeTicks - prevTimeTicks; - prevTimeTicks = thisTimeTicks; - } - - timings->timestamps[0].label = "FFX_CACAO_TOTAL"; - timings->timestamps[0].ticks = prevTimeTicks - gpuTimer->timestampBuffers[frame].timestamps[0].value; -} -#endif - -// ================================================================================================= -// CbvSrvUav implementation -// ================================================================================================= - -typedef struct CbvSrvUav { - uint32_t size; - uint32_t descriptorSize; - D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor; - D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptor; - D3D12_CPU_DESCRIPTOR_HANDLE cpuVisibleCpuDescriptor; -} CbvSrvUav; - -static D3D12_CPU_DESCRIPTOR_HANDLE cbvSrvUavGetCpu(CbvSrvUav* cbvSrvUav, uint32_t i) -{ - D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = cbvSrvUav->cpuDescriptor; - cpuDescriptor.ptr += i * cbvSrvUav->descriptorSize; - return cpuDescriptor; -} - -static D3D12_CPU_DESCRIPTOR_HANDLE cbvSrvUavGetCpuVisibleCpu(CbvSrvUav* cbvSrvUav, uint32_t i) -{ - D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = cbvSrvUav->cpuVisibleCpuDescriptor; - cpuDescriptor.ptr += i * cbvSrvUav->descriptorSize; - return cpuDescriptor; -} - -static D3D12_GPU_DESCRIPTOR_HANDLE cbvSrvUavGetGpu(CbvSrvUav* cbvSrvUav, uint32_t i) -{ - D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptor = cbvSrvUav->gpuDescriptor; - gpuDescriptor.ptr += i * cbvSrvUav->descriptorSize; - return gpuDescriptor; -} - -// ================================================================================================= -// CbvSrvUavHeap implementation -// ================================================================================================= - -typedef struct CbvSrvUavHeap { - uint32_t index; - uint32_t descriptorCount; - uint32_t descriptorElementSize; - ID3D12DescriptorHeap *heap; - ID3D12DescriptorHeap *cpuVisibleHeap; -} ResourceViewHeap; - -static FfxCacaoStatus cbvSrvUavHeapInit(CbvSrvUavHeap* cbvSrvUavHeap, ID3D12Device* device, uint32_t descriptorCount) -{ - FFX_CACAO_ASSERT(cbvSrvUavHeap); - FFX_CACAO_ASSERT(device); - - cbvSrvUavHeap->descriptorCount = descriptorCount; - cbvSrvUavHeap->index = 0; - - cbvSrvUavHeap->descriptorElementSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); - - D3D12_DESCRIPTOR_HEAP_DESC descHeap; - descHeap.NumDescriptors = descriptorCount; - descHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - descHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - descHeap.NodeMask = 0; - - HRESULT hr = device->CreateDescriptorHeap(&descHeap, IID_PPV_ARGS(&cbvSrvUavHeap->heap)); - if (FAILED(hr)) - { - return hresultToFfxCacaoStatus(hr); - } - - SetName(cbvSrvUavHeap->heap, "FfxCacaoCbvSrvUavHeap"); - - D3D12_DESCRIPTOR_HEAP_DESC cpuVisibleDescHeap; - cpuVisibleDescHeap.NumDescriptors = descriptorCount; - cpuVisibleDescHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; - cpuVisibleDescHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; - cpuVisibleDescHeap.NodeMask = 0; - - hr = device->CreateDescriptorHeap(&cpuVisibleDescHeap, IID_PPV_ARGS(&cbvSrvUavHeap->cpuVisibleHeap)); - if (FAILED(hr)) - { - FFX_CACAO_ASSERT(cbvSrvUavHeap->heap); - cbvSrvUavHeap->heap->Release(); - return hresultToFfxCacaoStatus(hr); - } - - SetName(cbvSrvUavHeap->cpuVisibleHeap, "FfxCacaoCbvSrvUavCpuVisibleHeap"); - return FFX_CACAO_STATUS_OK; -} - -static void cbvSrvUavHeapDestroy(CbvSrvUavHeap* cbvSrvUavHeap) -{ - FFX_CACAO_ASSERT(cbvSrvUavHeap); - FFX_CACAO_ASSERT(cbvSrvUavHeap->heap); - FFX_CACAO_ASSERT(cbvSrvUavHeap->cpuVisibleHeap); - cbvSrvUavHeap->heap->Release(); - cbvSrvUavHeap->cpuVisibleHeap->Release(); -} - -static void cbvSrvUavHeapAllocDescriptor(CbvSrvUavHeap* cbvSrvUavHeap, CbvSrvUav* cbvSrvUav, uint32_t size) -{ - FFX_CACAO_ASSERT(cbvSrvUavHeap); - FFX_CACAO_ASSERT(cbvSrvUav); - FFX_CACAO_ASSERT(cbvSrvUavHeap->index + size <= cbvSrvUavHeap->descriptorCount); - - D3D12_CPU_DESCRIPTOR_HANDLE cpuView = cbvSrvUavHeap->heap->GetCPUDescriptorHandleForHeapStart(); - cpuView.ptr += cbvSrvUavHeap->index * cbvSrvUavHeap->descriptorElementSize; - - D3D12_GPU_DESCRIPTOR_HANDLE gpuView = cbvSrvUavHeap->heap->GetGPUDescriptorHandleForHeapStart(); - gpuView.ptr += cbvSrvUavHeap->index * cbvSrvUavHeap->descriptorElementSize; - - D3D12_CPU_DESCRIPTOR_HANDLE cpuVisibleCpuView = cbvSrvUavHeap->cpuVisibleHeap->GetCPUDescriptorHandleForHeapStart(); - cpuVisibleCpuView.ptr += cbvSrvUavHeap->index * cbvSrvUavHeap->descriptorElementSize; - - cbvSrvUavHeap->index += size; - - cbvSrvUav->size = size; - cbvSrvUav->descriptorSize = cbvSrvUavHeap->descriptorElementSize; - cbvSrvUav->cpuDescriptor = cpuView; - cbvSrvUav->gpuDescriptor = gpuView; - cbvSrvUav->cpuVisibleCpuDescriptor = cpuVisibleCpuView; -} - -// ================================================================================================= -// ConstantBufferRing implementation -// ================================================================================================= - -typedef struct ConstantBufferRing { - size_t pageSize; - size_t totalSize; - size_t currentOffset; - uint32_t currentPage; - uint32_t numPages; - char *data; - ID3D12Resource *buffer; -} ConstantBufferRing; - -static FfxCacaoStatus constantBufferRingInit(ConstantBufferRing* constantBufferRing, ID3D12Device* device, uint32_t numPages, size_t pageSize) -{ - FFX_CACAO_ASSERT(constantBufferRing); - FFX_CACAO_ASSERT(device); - - pageSize = AlignOffset(pageSize, 256); - size_t totalSize = numPages * pageSize; - char *data = NULL; - ID3D12Resource *buffer = NULL; - - HRESULT hr = device->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), - D3D12_HEAP_FLAG_NONE, - &CD3DX12_RESOURCE_DESC::Buffer(totalSize), - D3D12_RESOURCE_STATE_GENERIC_READ, - nullptr, - IID_PPV_ARGS(&buffer)); - if (FAILED(hr)) - { - return hresultToFfxCacaoStatus(hr); - } - - SetName(buffer, "DynamicBufferRing::m_pBuffer"); - - buffer->Map(0, NULL, (void**)&data); - - constantBufferRing->pageSize = pageSize; - constantBufferRing->totalSize = totalSize; - constantBufferRing->currentOffset = 0; - constantBufferRing->currentPage = 0; - constantBufferRing->numPages = numPages; - constantBufferRing->data = data; - constantBufferRing->buffer = buffer; - - return FFX_CACAO_STATUS_OK; -} - -static void constantBufferRingDestroy(ConstantBufferRing* constantBufferRing) -{ - FFX_CACAO_ASSERT(constantBufferRing); - FFX_CACAO_ASSERT(constantBufferRing->buffer); - constantBufferRing->buffer->Release(); -} - -static void constantBufferRingStartFrame(ConstantBufferRing* constantBufferRing) -{ - FFX_CACAO_ASSERT(constantBufferRing); - constantBufferRing->currentPage = (constantBufferRing->currentPage + 1) % constantBufferRing->numPages; - constantBufferRing->currentOffset = 0; -} - -static void constantBufferRingAlloc(ConstantBufferRing* constantBufferRing, size_t size, void **data, D3D12_GPU_VIRTUAL_ADDRESS *bufferViewDesc) -{ - FFX_CACAO_ASSERT(constantBufferRing); - size = AlignOffset(size, 256); - FFX_CACAO_ASSERT(constantBufferRing->currentOffset + size <= constantBufferRing->pageSize); - - size_t memOffset = constantBufferRing->pageSize * constantBufferRing->currentPage + constantBufferRing->currentOffset; - *data = constantBufferRing->data + memOffset; - constantBufferRing->currentOffset += size; - - *bufferViewDesc = constantBufferRing->buffer->GetGPUVirtualAddress() + memOffset; -} - -// ================================================================================================= -// ComputeShader implementation -// ================================================================================================= - -typedef struct ComputeShader { - ID3D12RootSignature *rootSignature; - ID3D12PipelineState *pipelineState; -} ComputeShader; - -static FfxCacaoStatus computeShaderInit(ComputeShader* computeShader, ID3D12Device* device, const char* name, const void* bytecode, size_t bytecodeLength, uint32_t uavTableSize, uint32_t srvTableSize, D3D12_STATIC_SAMPLER_DESC* staticSamplers, uint32_t numStaticSamplers) -{ - FFX_CACAO_ASSERT(computeShader); - FFX_CACAO_ASSERT(device); - FFX_CACAO_ASSERT(name); - FFX_CACAO_ASSERT(bytecode); - FFX_CACAO_ASSERT(staticSamplers); - - D3D12_SHADER_BYTECODE shaderByteCode = {}; - shaderByteCode.pShaderBytecode = bytecode; - shaderByteCode.BytecodeLength = bytecodeLength; - - // Create root signature - { - CD3DX12_DESCRIPTOR_RANGE DescRange[4]; - CD3DX12_ROOT_PARAMETER RTSlot[4]; - - // we'll always have a constant buffer - int parameterCount = 0; - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); - RTSlot[parameterCount++].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); - - // if we have a UAV table - if (uavTableSize > 0) - { - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, uavTableSize, 0); - RTSlot[parameterCount].InitAsDescriptorTable(1, &DescRange[parameterCount], D3D12_SHADER_VISIBILITY_ALL); - ++parameterCount; - } - - // if we have a SRV table - if (srvTableSize > 0) - { - DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, srvTableSize, 0); - RTSlot[parameterCount].InitAsDescriptorTable(1, &DescRange[parameterCount], D3D12_SHADER_VISIBILITY_ALL); - ++parameterCount; - } - - // the root signature contains 3 slots to be used - CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); - descRootSignature.NumParameters = parameterCount; - descRootSignature.pParameters = RTSlot; - descRootSignature.NumStaticSamplers = numStaticSamplers; - descRootSignature.pStaticSamplers = staticSamplers; - - // deny uneccessary access to certain pipeline stages - descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; - - ID3DBlob *outBlob, *errorBlob = NULL; - - HRESULT hr = D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &outBlob, &errorBlob); - if (FAILED(hr)) - { - return hresultToFfxCacaoStatus(hr); - } - - if (errorBlob) - { - errorBlob->Release(); - if (outBlob) - { - outBlob->Release(); - } - return FFX_CACAO_STATUS_FAILED; - } - - hr = device->CreateRootSignature(0, outBlob->GetBufferPointer(), outBlob->GetBufferSize(), IID_PPV_ARGS(&computeShader->rootSignature)); - if (FAILED(hr)) - { - outBlob->Release(); - return hresultToFfxCacaoStatus(hr); - } - - char nameBuffer[1024] = "PostProcCS::m_pRootSignature::"; - strncat_s(nameBuffer, name, FFX_CACAO_ARRAY_SIZE(nameBuffer)); - SetName(computeShader->rootSignature, nameBuffer); - - outBlob->Release(); - } - - // Create pipeline state - { - D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; - descPso.CS = shaderByteCode; - descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; - descPso.pRootSignature = computeShader->rootSignature; - descPso.NodeMask = 0; - - HRESULT hr = device->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&computeShader->pipelineState)); - if (FAILED(hr)) - { - computeShader->rootSignature->Release(); - return hresultToFfxCacaoStatus(hr); - } - - char nameBuffer[1024] = "PostProcCS::m_pPipeline::"; - strncat_s(nameBuffer, name, FFX_CACAO_ARRAY_SIZE(nameBuffer)); - SetName(computeShader->rootSignature, nameBuffer); - } - - return FFX_CACAO_STATUS_OK; -} - -static void computeShaderDestroy(ComputeShader* computeShader) -{ - FFX_CACAO_ASSERT(computeShader); - FFX_CACAO_ASSERT(computeShader->rootSignature); - FFX_CACAO_ASSERT(computeShader->pipelineState); - computeShader->rootSignature->Release(); - computeShader->pipelineState->Release(); -} - -static void computeShaderDraw(ComputeShader* computeShader, ID3D12GraphicsCommandList* commandList, D3D12_GPU_VIRTUAL_ADDRESS constantBuffer, CbvSrvUav *uavTable, CbvSrvUav *srvTable, uint32_t width, uint32_t height, uint32_t depth) -{ - FFX_CACAO_ASSERT(computeShader); - FFX_CACAO_ASSERT(commandList); - FFX_CACAO_ASSERT(uavTable); - FFX_CACAO_ASSERT(srvTable); - FFX_CACAO_ASSERT(computeShader->pipelineState); - FFX_CACAO_ASSERT(computeShader->rootSignature); - - commandList->SetComputeRootSignature(computeShader->rootSignature); - - int params = 0; - commandList->SetComputeRootConstantBufferView(params++, constantBuffer); - if (uavTable) - { - commandList->SetComputeRootDescriptorTable(params++, uavTable->gpuDescriptor); - } - if (srvTable) - { - commandList->SetComputeRootDescriptorTable(params++, srvTable->gpuDescriptor); - } - - commandList->SetPipelineState(computeShader->pipelineState); - commandList->Dispatch(width, height, depth); -} - -// ================================================================================================= -// Texture implementation -// ================================================================================================= - -typedef struct Texture { - ID3D12Resource *resource; - DXGI_FORMAT format; - uint32_t width; - uint32_t height; - uint32_t arraySize; - uint32_t mipMapCount; -} Texture; - -static FfxCacaoStatus textureInit(Texture* texture, ID3D12Device* device, const char* name, const CD3DX12_RESOURCE_DESC* desc, D3D12_RESOURCE_STATES initialState, const D3D12_CLEAR_VALUE* clearValue) -{ - FFX_CACAO_ASSERT(texture); - FFX_CACAO_ASSERT(device); - FFX_CACAO_ASSERT(name); - FFX_CACAO_ASSERT(desc); - - HRESULT hr = device->CreateCommittedResource( - &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), - D3D12_HEAP_FLAG_NONE, - desc, - initialState, - clearValue, - IID_PPV_ARGS(&texture->resource)); - if (FAILED(hr)) - { - return hresultToFfxCacaoStatus(hr); - } - - texture->format = desc->Format; - texture->width = (uint32_t)desc->Width; - texture->height = desc->Height; - texture->arraySize = desc->DepthOrArraySize; - texture->mipMapCount = desc->MipLevels; - - SetName(texture->resource, name); - - return FFX_CACAO_STATUS_OK; -} - -static void textureDestroy(Texture* texture) -{ - FFX_CACAO_ASSERT(texture); - FFX_CACAO_ASSERT(texture->resource); - texture->resource->Release(); -} - -static void textureCreateSrvFromDesc(Texture* texture, uint32_t index, CbvSrvUav* srv, const D3D12_SHADER_RESOURCE_VIEW_DESC* srvDesc) -{ - FFX_CACAO_ASSERT(texture); - FFX_CACAO_ASSERT(srv); - FFX_CACAO_ASSERT(srvDesc); - - ID3D12Device* device; - texture->resource->GetDevice(__uuidof(*device), (void**)&device); - - device->CreateShaderResourceView(texture->resource, srvDesc, cbvSrvUavGetCpu(srv, index)); - device->CreateShaderResourceView(texture->resource, srvDesc, cbvSrvUavGetCpuVisibleCpu(srv, index)); - - device->Release(); -} - -static void textureCreateSrv(Texture* texture, uint32_t index, CbvSrvUav* srv, int mipLevel, int arraySize, int firstArraySlice) -{ - FFX_CACAO_ASSERT(texture); - FFX_CACAO_ASSERT(srv); - - D3D12_RESOURCE_DESC resourceDesc = texture->resource->GetDesc(); - - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - - srvDesc.Format = resourceDesc.Format == DXGI_FORMAT_D32_FLOAT ? DXGI_FORMAT_R32_FLOAT : resourceDesc.Format; - if (resourceDesc.SampleDesc.Count == 1) - { - if (resourceDesc.DepthOrArraySize == 1) - { - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; - srvDesc.Texture2D.MostDetailedMip = (mipLevel == -1) ? 0 : mipLevel; - srvDesc.Texture2D.MipLevels = (mipLevel == -1) ? texture->mipMapCount : 1; - FFX_CACAO_ASSERT(arraySize == -1); - FFX_CACAO_ASSERT(firstArraySlice == -1); - } - else - { - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; - srvDesc.Texture2DArray.MostDetailedMip = (mipLevel == -1) ? 0 : mipLevel; - srvDesc.Texture2DArray.MipLevels = (mipLevel == -1) ? texture->mipMapCount : 1; - srvDesc.Texture2DArray.FirstArraySlice = (firstArraySlice == -1) ? 0 : firstArraySlice; - srvDesc.Texture2DArray.ArraySize = (arraySize == -1) ? resourceDesc.DepthOrArraySize : arraySize; - } - } - else - { - if (resourceDesc.DepthOrArraySize == 1) - { - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMS; - FFX_CACAO_ASSERT(mipLevel == -1); - FFX_CACAO_ASSERT(arraySize == -1); - FFX_CACAO_ASSERT(firstArraySlice == -1); - } - else - { - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DMSARRAY; - srvDesc.Texture2DMSArray.FirstArraySlice = (firstArraySlice == -1) ? 0 : firstArraySlice; - srvDesc.Texture2DMSArray.ArraySize = (arraySize == -1) ? resourceDesc.DepthOrArraySize : arraySize; - FFX_CACAO_ASSERT(mipLevel == -1); - } - } - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - - textureCreateSrvFromDesc(texture, index, srv, &srvDesc); -} - -static void textureCreateUavFromDesc(Texture* texture, uint32_t index, CbvSrvUav* uav, const D3D12_UNORDERED_ACCESS_VIEW_DESC* uavDesc) -{ - FFX_CACAO_ASSERT(texture); - FFX_CACAO_ASSERT(uav); - FFX_CACAO_ASSERT(uavDesc); - - ID3D12Device* device; - texture->resource->GetDevice(__uuidof(*device), (void**)&device); - - device->CreateUnorderedAccessView(texture->resource, NULL, uavDesc, cbvSrvUavGetCpu(uav, index)); - device->CreateUnorderedAccessView(texture->resource, NULL, uavDesc, cbvSrvUavGetCpuVisibleCpu(uav, index)); - - device->Release(); -} - -static void textureCreateUav(Texture* texture, uint32_t index, CbvSrvUav* uav, int mipLevel, int arraySize, int firstArraySlice) -{ - FFX_CACAO_ASSERT(texture); - FFX_CACAO_ASSERT(uav); - - D3D12_RESOURCE_DESC resourceDesc = texture->resource->GetDesc(); - - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - - uavDesc.Format = resourceDesc.Format; - if (arraySize == -1) - { - FFX_CACAO_ASSERT(firstArraySlice == -1); - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; - uavDesc.Texture2D.MipSlice = (mipLevel == -1) ? 0 : mipLevel; - } - else - { - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; - uavDesc.Texture2DArray.ArraySize = arraySize; - uavDesc.Texture2DArray.FirstArraySlice = firstArraySlice; - uavDesc.Texture2DArray.MipSlice = (mipLevel == -1) ? 0 : mipLevel; - } - - textureCreateUavFromDesc(texture, index, uav, &uavDesc); -} - -// ================================================================================================= -// CACAO implementation -// ================================================================================================= - -struct FfxCacaoD3D12Context { - FfxCacaoSettings settings; - FfxCacaoBool useDownsampledSsao; - - ID3D12Device *device; - CbvSrvUavHeap cbvSrvUavHeap; - -#ifdef FFX_CACAO_ENABLE_PROFILING - GpuTimer gpuTimer; -#endif - - ConstantBufferRing constantBufferRing; - BufferSizeInfo bufferSizeInfo; - ID3D12Resource *outputResource; - - // ========================================== - // Prepare shaders/resources - - ComputeShader prepareDownsampledDepthsAndMips; - ComputeShader prepareNativeDepthsAndMips; - - ComputeShader prepareDownsampledNormals; - ComputeShader prepareNativeNormals; - - ComputeShader prepareDownsampledNormalsFromInputNormals; - ComputeShader prepareNativeNormalsFromInputNormals; - - ComputeShader prepareDownsampledDepths; - ComputeShader prepareNativeDepths; - - ComputeShader prepareDownsampledDepthsHalf; - ComputeShader prepareNativeDepthsHalf; - - CbvSrvUav prepareDepthsNormalsAndMipsInputs; // <-- this is just the depth source - CbvSrvUav prepareDepthsAndMipsOutputs; - CbvSrvUav prepareDepthsOutputs; - CbvSrvUav prepareNormalsOutput; - CbvSrvUav prepareNormalsFromInputNormalsInput; - CbvSrvUav prepareNormalsFromInputNormalsOutput; - - // ========================================== - // Generate SSAO shaders/resources - - ComputeShader generateSSAO[5]; - - CbvSrvUav generateSSAOInputs[4]; - CbvSrvUav generateAdaptiveSSAOInputs[4]; - CbvSrvUav generateSSAOOutputsPing[4]; - CbvSrvUav generateSSAOOutputsPong[4]; - - // ========================================== - // Importance map generate/post process shaders/resources - - ComputeShader generateImportanceMap; - ComputeShader postprocessImportanceMapA; - ComputeShader postprocessImportanceMapB; - - CbvSrvUav generateImportanceMapInputs; - CbvSrvUav generateImportanceMapOutputs; - CbvSrvUav generateImportanceMapAInputs; - CbvSrvUav generateImportanceMapAOutputs; - CbvSrvUav generateImportanceMapBInputs; - CbvSrvUav generateImportanceMapBOutputs; - - // ========================================== - // De-interleave Blur shaders/resources - - ComputeShader edgeSensitiveBlur[8]; - - CbvSrvUav edgeSensitiveBlurInput[4]; - CbvSrvUav edgeSensitiveBlurOutput[4]; - - // ========================================== - // Apply shaders/resources - - ComputeShader smartApply; - ComputeShader nonSmartApply; - ComputeShader nonSmartHalfApply; - - CbvSrvUav createOutputInputsPing; - CbvSrvUav createOutputInputsPong; - CbvSrvUav createOutputOutputs; - - // ========================================== - // upscale shaders/resources - - ComputeShader upscaleBilateral5x5; - ComputeShader upscaleBilateral5x5Half; - - CbvSrvUav bilateralUpscaleInputsPing; - CbvSrvUav bilateralUpscaleInputsPong; - CbvSrvUav bilateralUpscaleOutputs; - - // ========================================== - // Intermediate buffers - - Texture deinterleavedDepths; - Texture deinterleavedNormals; - Texture ssaoBufferPing; - Texture ssaoBufferPong; - Texture importanceMap; - Texture importanceMapPong; - Texture loadCounter; - - CbvSrvUav loadCounterUav; // required for LoadCounter clear -}; - -static inline FfxCacaoD3D12Context* getAlignedD3D12ContextPointer(FfxCacaoD3D12Context* ptr) -{ - uintptr_t tmp = (uintptr_t)ptr; - tmp = (tmp + alignof(FfxCacaoD3D12Context) - 1) & (~(alignof(FfxCacaoD3D12Context) - 1)); - return (FfxCacaoD3D12Context*)tmp; -} -#endif - -#ifdef FFX_CACAO_ENABLE_VULKAN -// ================================================================================================= -// CACAO vulkan implementation -// ================================================================================================= - -// DESCRIPTOR_SET_LAYOUT(name, num_inputs, num_outputs) -#define DESCRIPTOR_SET_LAYOUTS \ - DESCRIPTOR_SET_LAYOUT(CLEAR_LOAD_COUNTER, 0, 1) \ - DESCRIPTOR_SET_LAYOUT(PREPARE_DEPTHS, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(PREPARE_DEPTHS_MIPS, 1, 4) \ - DESCRIPTOR_SET_LAYOUT(PREPARE_NORMALS, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(PREPARE_NORMALS_FROM_INPUT_NORMALS, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(GENERATE, 7, 1) \ - DESCRIPTOR_SET_LAYOUT(GENERATE_ADAPTIVE, 7, 1) \ - DESCRIPTOR_SET_LAYOUT(GENERATE_IMPORTANCE_MAP, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(POSTPROCESS_IMPORTANCE_MAP_A, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(POSTPROCESS_IMPORTANCE_MAP_B, 1, 2) \ - DESCRIPTOR_SET_LAYOUT(EDGE_SENSITIVE_BLUR, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(APPLY, 1, 1) \ - DESCRIPTOR_SET_LAYOUT(BILATERAL_UPSAMPLE, 4, 1) - -typedef enum DescriptorSetLayoutID { -#define DESCRIPTOR_SET_LAYOUT(name, _num_inputs, _num_outputs) DSL_##name, - DESCRIPTOR_SET_LAYOUTS -#undef DESCRIPTOR_SET_LAYOUT - NUM_DESCRIPTOR_SET_LAYOUTS -} DescriptorSetLayoutID; - -typedef struct DescriptorSetLayoutMetaData { - uint32_t numInputs; - uint32_t numOutputs; - const char *name; -} DescriptorSetLayoutMetaData; - -static const DescriptorSetLayoutMetaData DESCRIPTOR_SET_LAYOUT_META_DATA[NUM_DESCRIPTOR_SET_LAYOUTS] = { -#define DESCRIPTOR_SET_LAYOUT(name, num_inputs, num_outputs) { num_inputs, num_outputs, "FFX_CACAO_DSL_" #name }, - DESCRIPTOR_SET_LAYOUTS -#undef DESCRIPTOR_SET_LAYOUT -}; - -#define MAX_DESCRIPTOR_BINDINGS 32 -// define all the data for compute shaders -// COMPUTE_SHADER(enum_name, pascal_case_name, descriptor_set) -#define COMPUTE_SHADERS \ - COMPUTE_SHADER(CLEAR_LOAD_COUNTER, ClearLoadCounter, CLEAR_LOAD_COUNTER) \ - \ - COMPUTE_SHADER(PREPARE_DOWNSAMPLED_DEPTHS, PrepareDownsampledDepths, PREPARE_DEPTHS) \ - COMPUTE_SHADER(PREPARE_NATIVE_DEPTHS, PrepareNativeDepths, PREPARE_DEPTHS) \ - COMPUTE_SHADER(PREPARE_DOWNSAMPLED_DEPTHS_AND_MIPS, PrepareDownsampledDepthsAndMips, PREPARE_DEPTHS_MIPS) \ - COMPUTE_SHADER(PREPARE_NATIVE_DEPTHS_AND_MIPS, PrepareNativeDepthsAndMips, PREPARE_DEPTHS_MIPS) \ - COMPUTE_SHADER(PREPARE_DOWNSAMPLED_NORMALS, PrepareDownsampledNormals, PREPARE_NORMALS) \ - COMPUTE_SHADER(PREPARE_NATIVE_NORMALS, PrepareNativeNormals, PREPARE_NORMALS) \ - COMPUTE_SHADER(PREPARE_DOWNSAMPLED_NORMALS_FROM_INPUT_NORMALS, PrepareDownsampledNormalsFromInputNormals, PREPARE_NORMALS_FROM_INPUT_NORMALS) \ - COMPUTE_SHADER(PREPARE_NATIVE_NORMALS_FROM_INPUT_NORMALS, PrepareNativeNormalsFromInputNormals, PREPARE_NORMALS_FROM_INPUT_NORMALS) \ - COMPUTE_SHADER(PREPARE_DOWNSAMPLED_DEPTHS_HALF, PrepareDownsampledDepthsHalf, PREPARE_DEPTHS) \ - COMPUTE_SHADER(PREPARE_NATIVE_DEPTHS_HALF, PrepareNativeDepthsHalf, PREPARE_DEPTHS) \ - \ - COMPUTE_SHADER(GENERATE_Q0, GenerateQ0, GENERATE) \ - COMPUTE_SHADER(GENERATE_Q1, GenerateQ1, GENERATE) \ - COMPUTE_SHADER(GENERATE_Q2, GenerateQ2, GENERATE) \ - COMPUTE_SHADER(GENERATE_Q3, GenerateQ3, GENERATE_ADAPTIVE) \ - COMPUTE_SHADER(GENERATE_Q3_BASE, GenerateQ3Base, GENERATE) \ - \ - COMPUTE_SHADER(GENERATE_IMPORTANCE_MAP, GenerateImportanceMap, GENERATE_IMPORTANCE_MAP) \ - COMPUTE_SHADER(POSTPROCESS_IMPORTANCE_MAP_A, PostprocessImportanceMapA, POSTPROCESS_IMPORTANCE_MAP_A) \ - COMPUTE_SHADER(POSTPROCESS_IMPORTANCE_MAP_B, PostprocessImportanceMapB, POSTPROCESS_IMPORTANCE_MAP_B) \ - \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_1, EdgeSensitiveBlur1, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_2, EdgeSensitiveBlur2, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_3, EdgeSensitiveBlur3, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_4, EdgeSensitiveBlur4, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_5, EdgeSensitiveBlur5, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_6, EdgeSensitiveBlur6, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_7, EdgeSensitiveBlur7, EDGE_SENSITIVE_BLUR) \ - COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_8, EdgeSensitiveBlur8, EDGE_SENSITIVE_BLUR) \ - \ - COMPUTE_SHADER(APPLY, Apply, APPLY) \ - COMPUTE_SHADER(NON_SMART_APPLY, NonSmartApply, APPLY) \ - COMPUTE_SHADER(NON_SMART_HALF_APPLY, NonSmartHalfApply, APPLY) \ - \ - COMPUTE_SHADER(UPSCALE_BILATERAL_5X5, UpscaleBilateral5x5, BILATERAL_UPSAMPLE) \ - COMPUTE_SHADER(UPSCALE_BILATERAL_5X5_HALF, UpscaleBilateral5x5Half, BILATERAL_UPSAMPLE) - -typedef enum ComputeShaderID { -#define COMPUTE_SHADER(name, _pascal_name, _descriptor_set) CS_##name, - COMPUTE_SHADERS -#undef COMPUTE_SHADER - NUM_COMPUTE_SHADERS -} ComputeShaderID; - -typedef struct ComputeShaderMetaData { - const uint32_t *shaderSpirv16; - size_t spirv16Len; - const uint32_t *shaderSpirv32; - size_t spirv32Len; - const char *name; - DescriptorSetLayoutID descriptorSetLayoutID; - const char *objectName; -} ComputeShaderMetaData; - -static const ComputeShaderMetaData COMPUTE_SHADER_META_DATA[NUM_COMPUTE_SHADERS] = { -#define COMPUTE_SHADER(name, pascal_name, descriptor_set_layout) { (uint32_t*)CS##pascal_name##SPIRV16, FFX_CACAO_ARRAY_SIZE(CS##pascal_name##SPIRV16), (uint32_t*)CS##pascal_name##SPIRV32, FFX_CACAO_ARRAY_SIZE(CS##pascal_name##SPIRV32), "CS"#pascal_name, DSL_##descriptor_set_layout, "FFX_CACAO_CS_"#name }, - COMPUTE_SHADERS -#undef COMPUTE_SHADER -}; - -#define TEXTURES \ - TEXTURE(DEINTERLEAVED_DEPTHS, deinterleavedDepthBufferWidth, deinterleavedDepthBufferHeight, VK_FORMAT_R16_SFLOAT, 4, 4) \ - TEXTURE(DEINTERLEAVED_NORMALS, ssaoBufferWidth, ssaoBufferHeight, VK_FORMAT_R8G8B8A8_SNORM, 4, 1) \ - TEXTURE(SSAO_BUFFER_PING, ssaoBufferWidth, ssaoBufferHeight, VK_FORMAT_R8G8_UNORM, 4, 1) \ - TEXTURE(SSAO_BUFFER_PONG, ssaoBufferWidth, ssaoBufferHeight, VK_FORMAT_R8G8_UNORM, 4, 1) \ - TEXTURE(IMPORTANCE_MAP, importanceMapWidth, importanceMapHeight, VK_FORMAT_R8_UNORM, 1, 1) \ - TEXTURE(IMPORTANCE_MAP_PONG, importanceMapWidth, importanceMapHeight, VK_FORMAT_R8_UNORM, 1, 1) - -typedef enum TextureID { -#define TEXTURE(name, _width, _height, _format, _array_size, _num_mips) TEXTURE_##name, - TEXTURES -#undef TEXTURE - NUM_TEXTURES -} TextureID; - -typedef struct TextureMetaData { - size_t widthOffset; - size_t heightOffset; - VkFormat format; - uint32_t arraySize; - uint32_t numMips; - const char *name; -} TextureMetaData; - -static const TextureMetaData TEXTURE_META_DATA[NUM_TEXTURES] = { -#define TEXTURE(name, width, height, format, array_size, num_mips) { FFX_CACAO_OFFSET_OF(BufferSizeInfo, width), FFX_CACAO_OFFSET_OF(BufferSizeInfo, height), format, array_size, num_mips, "FFX_CACAO_" #name }, - TEXTURES -#undef TEXTURE -}; - -// SHADER_RESOURCE_VIEW(name, texture, view_dimension, most_detailed_mip, mip_levels, first_array_slice, array_size) -#define SHADER_RESOURCE_VIEWS \ - SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 4, 0, 4) \ - SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_0, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 4, 0, 1) \ - SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_1, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 4, 1, 1) \ - SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_2, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 4, 2, 1) \ - SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_3, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 4, 3, 1) \ - SHADER_RESOURCE_VIEW(DEINTERLEAVED_NORMALS, DEINTERLEAVED_NORMALS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 0, 4) \ - SHADER_RESOURCE_VIEW(IMPORTANCE_MAP, IMPORTANCE_MAP, VK_IMAGE_VIEW_TYPE_2D, 0, 1, 0, 1) \ - SHADER_RESOURCE_VIEW(IMPORTANCE_MAP_PONG, IMPORTANCE_MAP_PONG, VK_IMAGE_VIEW_TYPE_2D, 0, 1, 0, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 0, 4) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_0, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 0, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_1, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 1, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_2, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 2, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_3, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 3, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 0, 4) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_0, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 0, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_1, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 1, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_2, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 2, 1) \ - SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_3, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 3, 1) - -typedef enum ShaderResourceViewID { -#define SHADER_RESOURCE_VIEW(name, _texture, _view_dimension, _most_detailed_mip, _mip_levels, _first_array_slice, _array_size) SRV_##name, - SHADER_RESOURCE_VIEWS -#undef SHADER_RESOURCE_VIEW - NUM_SHADER_RESOURCE_VIEWS -} ShaderResourceViewID; - -typedef struct ShaderResourceViewMetaData { - TextureID texture; - VkImageViewType viewType; - uint32_t mostDetailedMip; - uint32_t mipLevels; - uint32_t firstArraySlice; - uint32_t arraySize; -} ShaderResourceViewMetaData; - -static const ShaderResourceViewMetaData SRV_META_DATA[NUM_SHADER_RESOURCE_VIEWS] = { -#define SHADER_RESOURCE_VIEW(_name, texture, view_dimension, most_detailed_mip, mip_levels, first_array_slice, array_size) { TEXTURE_##texture, view_dimension, most_detailed_mip, mip_levels, first_array_slice, array_size }, - SHADER_RESOURCE_VIEWS -#undef SHADER_RESOURCE_VIEW -}; - -// UNORDERED_ACCESS_VIEW(name, texture, view_dimension, mip_slice, first_array_slice, array_size) -#define UNORDERED_ACCESS_VIEWS \ - UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_0, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ - UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_1, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 1, 0, 4) \ - UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_2, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 2, 0, 4) \ - UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_3, DEINTERLEAVED_DEPTHS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 3, 0, 4) \ - UNORDERED_ACCESS_VIEW(DEINTERLEAVED_NORMALS, DEINTERLEAVED_NORMALS, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ - UNORDERED_ACCESS_VIEW(IMPORTANCE_MAP, IMPORTANCE_MAP, VK_IMAGE_VIEW_TYPE_2D, 0, 0, 1) \ - UNORDERED_ACCESS_VIEW(IMPORTANCE_MAP_PONG, IMPORTANCE_MAP_PONG, VK_IMAGE_VIEW_TYPE_2D, 0, 0, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_0, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 0, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_1, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_2, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 2, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_3, SSAO_BUFFER_PING, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 3, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_0, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 0, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_1, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 1, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_2, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 2, 1) \ - UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_3, SSAO_BUFFER_PONG, VK_IMAGE_VIEW_TYPE_2D_ARRAY, 0, 3, 1) - -typedef enum UnorderedAccessViewID { -#define UNORDERED_ACCESS_VIEW(name, _texture, _view_dimension, _mip_slice, _first_array_slice, _array_size) UAV_##name, - UNORDERED_ACCESS_VIEWS -#undef UNORDERED_ACCESS_VIEW - NUM_UNORDERED_ACCESS_VIEWS -} UnorderedAccessViewID; - -typedef struct UnorderedAccessViewMetaData { - TextureID textureID; - VkImageViewType viewType; - uint32_t mostDetailedMip; - uint32_t firstArraySlice; - uint32_t arraySize; -} UnorderedAccessViewMetaData; - -static const UnorderedAccessViewMetaData UAV_META_DATA[NUM_UNORDERED_ACCESS_VIEWS] = { -#define UNORDERED_ACCESS_VIEW(_name, texture, view_dimension, mip_slice, first_array_slice, array_size) { TEXTURE_##texture, view_dimension, mip_slice, first_array_slice, array_size }, - UNORDERED_ACCESS_VIEWS -#undef UNORDERED_ACCESS_VIEW -}; - -// DESCRIPTOR_SET(name, layout_name, pass) -#define DESCRIPTOR_SETS \ - DESCRIPTOR_SET(CLEAR_LOAD_COUNTER, CLEAR_LOAD_COUNTER, 0) \ - DESCRIPTOR_SET(PREPARE_DEPTHS, PREPARE_DEPTHS, 0) \ - DESCRIPTOR_SET(PREPARE_DEPTHS_MIPS, PREPARE_DEPTHS_MIPS, 0) \ - DESCRIPTOR_SET(PREPARE_NORMALS, PREPARE_NORMALS, 0) \ - DESCRIPTOR_SET(PREPARE_NORMALS_FROM_INPUT_NORMALS, PREPARE_NORMALS_FROM_INPUT_NORMALS, 0) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_0, GENERATE, 0) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_1, GENERATE, 1) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_2, GENERATE, 2) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_3, GENERATE, 3) \ - DESCRIPTOR_SET(GENERATE_0, GENERATE, 0) \ - DESCRIPTOR_SET(GENERATE_1, GENERATE, 1) \ - DESCRIPTOR_SET(GENERATE_2, GENERATE, 2) \ - DESCRIPTOR_SET(GENERATE_3, GENERATE, 3) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_0, GENERATE_ADAPTIVE, 0) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_1, GENERATE_ADAPTIVE, 1) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_2, GENERATE_ADAPTIVE, 2) \ - DESCRIPTOR_SET(GENERATE_ADAPTIVE_3, GENERATE_ADAPTIVE, 3) \ - DESCRIPTOR_SET(GENERATE_IMPORTANCE_MAP, GENERATE_IMPORTANCE_MAP, 0) \ - DESCRIPTOR_SET(POSTPROCESS_IMPORTANCE_MAP_A, POSTPROCESS_IMPORTANCE_MAP_A, 0) \ - DESCRIPTOR_SET(POSTPROCESS_IMPORTANCE_MAP_B, POSTPROCESS_IMPORTANCE_MAP_B, 0) \ - DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_0, EDGE_SENSITIVE_BLUR, 0) \ - DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_1, EDGE_SENSITIVE_BLUR, 1) \ - DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_2, EDGE_SENSITIVE_BLUR, 2) \ - DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_3, EDGE_SENSITIVE_BLUR, 3) \ - DESCRIPTOR_SET(APPLY_PING, APPLY, 0) \ - DESCRIPTOR_SET(APPLY_PONG, APPLY, 0) \ - DESCRIPTOR_SET(BILATERAL_UPSAMPLE_PING, BILATERAL_UPSAMPLE, 0) \ - DESCRIPTOR_SET(BILATERAL_UPSAMPLE_PONG, BILATERAL_UPSAMPLE, 0) - -typedef enum DescriptorSetID { -#define DESCRIPTOR_SET(name, _layout_name, _pass) DS_##name, - DESCRIPTOR_SETS -#undef DESCRIPTOR_SET - NUM_DESCRIPTOR_SETS -} DescriptorSetID; - -typedef struct DescriptorSetMetaData { - DescriptorSetLayoutID descriptorSetLayoutID; - uint32_t pass; - const char *name; -} DescriptorSetMetaData; - -static const DescriptorSetMetaData DESCRIPTOR_SET_META_DATA[NUM_DESCRIPTOR_SETS] = { -#define DESCRIPTOR_SET(name, layout_name, pass) { DSL_##layout_name, pass, "FFX_CACAO_DS_" #name }, - DESCRIPTOR_SETS -#undef DESCRIPTOR_SET -}; - -// INPUT_DESCRIPTOR(descriptor_set_name, srv_name, binding_num) -#define INPUT_DESCRIPTOR_BINDINGS \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_0, DEINTERLEAVED_DEPTHS_0, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_0, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_1, DEINTERLEAVED_DEPTHS_1, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_1, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_2, DEINTERLEAVED_DEPTHS_2, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_2, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_3, DEINTERLEAVED_DEPTHS_3, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_3, DEINTERLEAVED_NORMALS, 6) \ - \ - INPUT_DESCRIPTOR_BINDING(GENERATE_0, DEINTERLEAVED_DEPTHS_0, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_0, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_1, DEINTERLEAVED_DEPTHS_1, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_1, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_2, DEINTERLEAVED_DEPTHS_2, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_2, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_3, DEINTERLEAVED_DEPTHS_3, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_3, DEINTERLEAVED_NORMALS, 6) \ - \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, DEINTERLEAVED_DEPTHS_0, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, IMPORTANCE_MAP, 3) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, SSAO_BUFFER_PONG_0, 4) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, DEINTERLEAVED_DEPTHS_1, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, IMPORTANCE_MAP, 3) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, SSAO_BUFFER_PONG_1, 4) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, DEINTERLEAVED_DEPTHS_2, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, IMPORTANCE_MAP, 3) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, SSAO_BUFFER_PONG_2, 4) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, DEINTERLEAVED_NORMALS, 6) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, DEINTERLEAVED_DEPTHS_3, 0) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, IMPORTANCE_MAP, 3) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, SSAO_BUFFER_PONG_3, 4) \ - INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, DEINTERLEAVED_NORMALS, 6) \ - \ - INPUT_DESCRIPTOR_BINDING(GENERATE_IMPORTANCE_MAP, SSAO_BUFFER_PONG, 0) \ - INPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_A, IMPORTANCE_MAP, 0) \ - INPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_B, IMPORTANCE_MAP_PONG, 0) \ - \ - INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_0, SSAO_BUFFER_PING_0, 0) \ - INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_1, SSAO_BUFFER_PING_1, 0) \ - INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_2, SSAO_BUFFER_PING_2, 0) \ - INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_3, SSAO_BUFFER_PING_3, 0) \ - \ - INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PING, SSAO_BUFFER_PING, 0) \ - INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PING, DEINTERLEAVED_DEPTHS, 3) \ - INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PONG, SSAO_BUFFER_PONG, 0) \ - INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PONG, DEINTERLEAVED_DEPTHS, 3) \ - \ - INPUT_DESCRIPTOR_BINDING(APPLY_PING, SSAO_BUFFER_PING, 0) \ - INPUT_DESCRIPTOR_BINDING(APPLY_PONG, SSAO_BUFFER_PONG, 0) - -// need this to define NUM_INPUT_DESCRIPTOR_BINDINGS -typedef enum InputDescriptorBindingID { -#define INPUT_DESCRIPTOR_BINDING(descriptor_set_name, srv_name, _binding_num) INPUT_DESCRIPTOR_BINDING_##descriptor_set_name##_##srv_name, - INPUT_DESCRIPTOR_BINDINGS -#undef INPUT_DESCRIPTOR_BINDING - NUM_INPUT_DESCRIPTOR_BINDINGS -} InputDescriptorBindingID; - -typedef struct InputDescriptorBindingMetaData { - DescriptorSetID descriptorID; - ShaderResourceViewID srvID; - uint32_t bindingNumber; -} InputDescriptorBindingMetaData; - -static const InputDescriptorBindingMetaData INPUT_DESCRIPTOR_BINDING_META_DATA[NUM_INPUT_DESCRIPTOR_BINDINGS] = { -#define INPUT_DESCRIPTOR_BINDING(descriptor_set_name, srv_name, binding_num) { DS_##descriptor_set_name, SRV_##srv_name, binding_num }, - INPUT_DESCRIPTOR_BINDINGS -#undef INPUT_DESCRIPTOR_BINDING -}; - -// OUTPUT_DESCRIPTOR(descriptor_set_name, uav_name, binding_num) -#define OUTPUT_DESCRIPTOR_BINDINGS \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS, DEINTERLEAVED_DEPTHS_MIP_0, 0) \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_0, 0) \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_1, 1) \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_2, 2) \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_3, 3) \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_NORMALS, DEINTERLEAVED_NORMALS, 0) \ - OUTPUT_DESCRIPTOR_BINDING(PREPARE_NORMALS_FROM_INPUT_NORMALS, DEINTERLEAVED_NORMALS, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_0, SSAO_BUFFER_PONG_0, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_1, SSAO_BUFFER_PONG_1, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_2, SSAO_BUFFER_PONG_2, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_3, SSAO_BUFFER_PONG_3, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_0, SSAO_BUFFER_PING_0, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_1, SSAO_BUFFER_PING_1, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_2, SSAO_BUFFER_PING_2, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_3, SSAO_BUFFER_PING_3, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, SSAO_BUFFER_PING_0, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, SSAO_BUFFER_PING_1, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, SSAO_BUFFER_PING_2, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, SSAO_BUFFER_PING_3, 0) \ - OUTPUT_DESCRIPTOR_BINDING(GENERATE_IMPORTANCE_MAP, IMPORTANCE_MAP, 0) \ - OUTPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_A, IMPORTANCE_MAP_PONG, 0) \ - OUTPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_B, IMPORTANCE_MAP, 0) \ - OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_0, SSAO_BUFFER_PONG_0, 0) \ - OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_1, SSAO_BUFFER_PONG_1, 0) \ - OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_2, SSAO_BUFFER_PONG_2, 0) \ - OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_3, SSAO_BUFFER_PONG_3, 0) - -typedef enum OutputDescriptorBindingID { -#define OUTPUT_DESCRIPTOR_BINDING(descriptor_set_name, uav_name, _binding_num) OUTPUT_DESCRIPTOR_BINDING_##descriptor_set_name##_##uav_name, - OUTPUT_DESCRIPTOR_BINDINGS -#undef OUTPUT_DESCRIPTOR_BINDING - NUM_OUTPUT_DESCRIPTOR_BINDINGS -} OutputDescriptorBindingID; - -typedef struct OutputDescriptorBindingMetaData { - DescriptorSetID descriptorID; - UnorderedAccessViewID uavID; - uint32_t bindingNumber; -} OutputDescriptorBindingMetaData; - -static const OutputDescriptorBindingMetaData OUTPUT_DESCRIPTOR_BINDING_META_DATA[NUM_OUTPUT_DESCRIPTOR_BINDINGS] = { -#define OUTPUT_DESCRIPTOR_BINDING(descriptor_set_name, uav_name, binding_num) { DS_##descriptor_set_name, UAV_##uav_name, binding_num }, - OUTPUT_DESCRIPTOR_BINDINGS -#undef OUTPUT_DESCRIPTOR_BINDING -}; - -#define NUM_BACK_BUFFERS 3 -#define NUM_SAMPLERS 5 -typedef struct FfxCacaoVkContext { - FfxCacaoSettings settings; - FfxCacaoBool useDownsampledSsao; - BufferSizeInfo bufferSizeInfo; - -#ifdef FFX_CACAO_ENABLE_PROFILING - VkQueryPool timestampQueryPool; - uint32_t collectBuffer; - struct { - TimestampID timestamps[NUM_TIMESTAMPS]; - uint64_t timings[NUM_TIMESTAMPS]; - uint32_t numTimestamps; - } timestampQueries[NUM_BACK_BUFFERS]; -#endif - - VkPhysicalDevice physicalDevice; - VkDevice device; - PFN_vkCmdDebugMarkerBeginEXT vkCmdDebugMarkerBegin; - PFN_vkCmdDebugMarkerEndEXT vkCmdDebugMarkerEnd; - PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName; - - - VkDescriptorSetLayout descriptorSetLayouts[NUM_DESCRIPTOR_SET_LAYOUTS]; - VkPipelineLayout pipelineLayouts[NUM_DESCRIPTOR_SET_LAYOUTS]; - - VkShaderModule computeShaders[NUM_COMPUTE_SHADERS]; - VkPipeline computePipelines[NUM_COMPUTE_SHADERS]; - - VkDescriptorSet descriptorSets[NUM_BACK_BUFFERS][NUM_DESCRIPTOR_SETS]; - VkDescriptorPool descriptorPool; - - VkSampler samplers[NUM_SAMPLERS]; - - VkImage textures[NUM_TEXTURES]; - VkDeviceMemory textureMemory[NUM_TEXTURES]; - VkImageView shaderResourceViews[NUM_SHADER_RESOURCE_VIEWS]; - VkImageView unorderedAccessViews[NUM_UNORDERED_ACCESS_VIEWS]; - - VkImage loadCounter; - VkDeviceMemory loadCounterMemory; - VkImageView loadCounterView; - - VkImage output; - - uint32_t currentConstantBuffer; - VkBuffer constantBuffer[NUM_BACK_BUFFERS][4]; - VkDeviceMemory constantBufferMemory[NUM_BACK_BUFFERS][4]; -} FfxCacaoVkContext; - -static inline FfxCacaoVkContext* getAlignedVkContextPointer(FfxCacaoVkContext* ptr) -{ - uintptr_t tmp = (uintptr_t)ptr; - tmp = (tmp + alignof(FfxCacaoVkContext) - 1) & (~(alignof(FfxCacaoVkContext) - 1)); - return (FfxCacaoVkContext*)tmp; -} -#endif - -// ================================================================================= -// Interface -// ================================================================================= - -#ifdef __cplusplus -extern "C" -{ -#endif - -#ifdef FFX_CACAO_ENABLE_D3D12 -size_t ffxCacaoD3D12GetContextSize() -{ - return sizeof(FfxCacaoD3D12Context) + alignof(FfxCacaoD3D12Context) - 1; -} - -FfxCacaoStatus ffxCacaoD3D12InitContext(FfxCacaoD3D12Context* context, ID3D12Device* device) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - if (device == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - -#define COMPUTE_SHADER_INIT(name, entryPoint, uavSize, srvSize) \ - errorStatus = computeShaderInit(&context->name, device, #entryPoint, entryPoint ## DXIL, sizeof(entryPoint ## DXIL), uavSize, srvSize, samplers, FFX_CACAO_ARRAY_SIZE(samplers)); \ - if (errorStatus) \ - { \ - goto error_create_ ## entryPoint; \ - } -#define ERROR_COMPUTE_SHADER_DESTROY(name, entryPoint) \ - computeShaderDestroy(&context->name); \ -error_create_ ## entryPoint: - - FfxCacaoStatus errorStatus = FFX_CACAO_STATUS_FAILED; - - context->device = device; - CbvSrvUavHeap *cbvSrvUavHeap = &context->cbvSrvUavHeap; - errorStatus = cbvSrvUavHeapInit(cbvSrvUavHeap, device, 256); - if (errorStatus) - { - goto error_create_cbv_srv_uav_heap; - } - errorStatus = constantBufferRingInit(&context->constantBufferRing, device, 5, 1024 * 5); - if (errorStatus) - { - goto error_create_constant_buffer_ring; - } -#ifdef FFX_CACAO_ENABLE_PROFILING - errorStatus = gpuTimerInit(&context->gpuTimer, device); - if (errorStatus) - { - goto error_create_gpu_timer; - } -#endif - - D3D12_STATIC_SAMPLER_DESC samplers[5] = { }; - - samplers[0].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplers[0].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[0].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[0].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[0].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplers[0].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplers[0].MinLOD = 0.0f; - samplers[0].MaxLOD = D3D12_FLOAT32_MAX; - samplers[0].MipLODBias = 0; - samplers[0].MaxAnisotropy = 1; - samplers[0].ShaderRegister = 0; - samplers[0].RegisterSpace = 0; - samplers[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - - samplers[1].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplers[1].AddressU = D3D12_TEXTURE_ADDRESS_MODE_MIRROR; - samplers[1].AddressV = D3D12_TEXTURE_ADDRESS_MODE_MIRROR; - samplers[1].AddressW = D3D12_TEXTURE_ADDRESS_MODE_MIRROR; - samplers[1].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplers[1].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplers[1].MinLOD = 0.0f; - samplers[1].MaxLOD = D3D12_FLOAT32_MAX; - samplers[1].MipLODBias = 0; - samplers[1].MaxAnisotropy = 1; - samplers[1].ShaderRegister = 1; - samplers[1].RegisterSpace = 0; - samplers[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - - samplers[2].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; - samplers[2].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[2].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[2].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[2].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplers[2].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplers[2].MinLOD = 0.0f; - samplers[2].MaxLOD = D3D12_FLOAT32_MAX; - samplers[2].MipLODBias = 0; - samplers[2].MaxAnisotropy = 1; - samplers[2].ShaderRegister = 2; - samplers[2].RegisterSpace = 0; - samplers[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - - samplers[3].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplers[3].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[3].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[3].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; - samplers[3].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplers[3].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplers[3].MinLOD = 0.0f; - samplers[3].MaxLOD = D3D12_FLOAT32_MAX; - samplers[3].MipLODBias = 0; - samplers[3].MaxAnisotropy = 1; - samplers[3].ShaderRegister = 3; - samplers[3].RegisterSpace = 0; - samplers[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - - samplers[4].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; - samplers[4].AddressU = D3D12_TEXTURE_ADDRESS_MODE_BORDER; - samplers[4].AddressV = D3D12_TEXTURE_ADDRESS_MODE_BORDER; - samplers[4].AddressW = D3D12_TEXTURE_ADDRESS_MODE_BORDER; - samplers[4].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; - samplers[4].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; - samplers[4].MinLOD = 0.0f; - samplers[4].MaxLOD = D3D12_FLOAT32_MAX; - samplers[4].MipLODBias = 0; - samplers[4].MaxAnisotropy = 1; - samplers[4].ShaderRegister = 4; - samplers[4].RegisterSpace = 0; - samplers[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; - - // ===================================== - // Prepare shaders/resources - - COMPUTE_SHADER_INIT(prepareDownsampledDepthsHalf, CSPrepareDownsampledDepthsHalf, 1, 1); - COMPUTE_SHADER_INIT(prepareNativeDepthsHalf, CSPrepareNativeDepthsHalf, 1, 1); - - COMPUTE_SHADER_INIT(prepareDownsampledDepthsAndMips, CSPrepareDownsampledDepthsAndMips, 4, 1); - COMPUTE_SHADER_INIT(prepareNativeDepthsAndMips, CSPrepareNativeDepthsAndMips, 4, 1); - - COMPUTE_SHADER_INIT(prepareDownsampledNormals, CSPrepareDownsampledNormals, 1, 1); - COMPUTE_SHADER_INIT(prepareNativeNormals, CSPrepareNativeNormals, 1, 1); - - COMPUTE_SHADER_INIT(prepareDownsampledNormalsFromInputNormals, CSPrepareDownsampledNormalsFromInputNormals, 1, 1); - COMPUTE_SHADER_INIT(prepareNativeNormalsFromInputNormals, CSPrepareNativeNormalsFromInputNormals, 1, 1); - - COMPUTE_SHADER_INIT(prepareDownsampledDepths, CSPrepareDownsampledDepths, 1, 1); - COMPUTE_SHADER_INIT(prepareNativeDepths, CSPrepareNativeDepths, 1, 1); - - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->prepareDepthsAndMipsOutputs, 4); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->prepareDepthsOutputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->prepareDepthsNormalsAndMipsInputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->prepareNormalsOutput, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->prepareNormalsFromInputNormalsInput, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->prepareNormalsFromInputNormalsOutput, 1); - - // ===================================== - // Generate SSAO shaders/resources - - COMPUTE_SHADER_INIT(generateSSAO[0], CSGenerateQ0, 1, 7); - COMPUTE_SHADER_INIT(generateSSAO[1], CSGenerateQ1, 1, 7); - COMPUTE_SHADER_INIT(generateSSAO[2], CSGenerateQ2, 1, 7); - COMPUTE_SHADER_INIT(generateSSAO[3], CSGenerateQ3, 1, 7); - COMPUTE_SHADER_INIT(generateSSAO[4], CSGenerateQ3Base, 2, 7); - - for (int i = 0; i < 4; ++i) - { - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateSSAOInputs[i], 7); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateAdaptiveSSAOInputs[i], 7); - - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateSSAOOutputsPing[i], 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateSSAOOutputsPong[i], 1); - } - - // ===================================== - // Importance map shaders/resources - - COMPUTE_SHADER_INIT(generateImportanceMap, CSGenerateImportanceMap, 1, 1); - COMPUTE_SHADER_INIT(postprocessImportanceMapA, CSPostprocessImportanceMapA, 1, 1); - COMPUTE_SHADER_INIT(postprocessImportanceMapB, CSPostprocessImportanceMapB, 2, 1); - - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateImportanceMapInputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateImportanceMapOutputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateImportanceMapAInputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateImportanceMapAOutputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateImportanceMapBInputs, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->generateImportanceMapBOutputs, 2); - - // ===================================== - // De-interleave Blur shaders/resources - - COMPUTE_SHADER_INIT(edgeSensitiveBlur[0], CSEdgeSensitiveBlur1, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[1], CSEdgeSensitiveBlur2, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[2], CSEdgeSensitiveBlur3, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[3], CSEdgeSensitiveBlur4, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[4], CSEdgeSensitiveBlur5, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[5], CSEdgeSensitiveBlur6, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[6], CSEdgeSensitiveBlur7, 1, 1); - COMPUTE_SHADER_INIT(edgeSensitiveBlur[7], CSEdgeSensitiveBlur8, 1, 1); - - for (int i = 0; i < 4; ++i) - { - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->edgeSensitiveBlurOutput[i], 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->edgeSensitiveBlurInput[i], 1); - } - - // ===================================== - // Apply shaders/resources - - COMPUTE_SHADER_INIT(smartApply, CSApply, 1, 1); - COMPUTE_SHADER_INIT(nonSmartApply, CSNonSmartApply, 1, 1); - COMPUTE_SHADER_INIT(nonSmartHalfApply, CSNonSmartHalfApply, 1, 1); - - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->createOutputInputsPing, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->createOutputInputsPong, 1); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->createOutputOutputs, 1); - - // ===================================== - // Upacale shaders/resources - - COMPUTE_SHADER_INIT(upscaleBilateral5x5, CSUpscaleBilateral5x5, 1, 4); - COMPUTE_SHADER_INIT(upscaleBilateral5x5Half, CSUpscaleBilateral5x5Half, 1, 4); - - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->bilateralUpscaleInputsPing, 4); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->bilateralUpscaleInputsPong, 4); - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->bilateralUpscaleOutputs, 1); - - // ===================================== - // Misc - - errorStatus = textureInit(&context->loadCounter, device, "CACAO::m_loadCounter", &CD3DX12_RESOURCE_DESC::Tex1D(DXGI_FORMAT_R32_UINT, 1, 1, 1, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL); - if (errorStatus) - { - goto error_create_load_counter_texture; - } - - // create uav for load counter - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = DXGI_FORMAT_R32_UINT; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - uavDesc.Texture1D.MipSlice = 0; - - cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->loadCounterUav, 1); // required for clearing the load counter - textureCreateUavFromDesc(&context->loadCounter, 0, &context->loadCounterUav, &uavDesc); - } - - return FFX_CACAO_STATUS_OK; - -error_create_load_counter_texture: - - ERROR_COMPUTE_SHADER_DESTROY(upscaleBilateral5x5Half, CSUpscaleBilateral5x5Half); - ERROR_COMPUTE_SHADER_DESTROY(upscaleBilateral5x5, CSUpscaleBilateral5x5); - - ERROR_COMPUTE_SHADER_DESTROY(nonSmartHalfApply, CSNonSmartHalfApply); - ERROR_COMPUTE_SHADER_DESTROY(nonSmartApply, CSNonSmartApply); - ERROR_COMPUTE_SHADER_DESTROY(smartApply, CSApply); - - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[7], CSEdgeSensitiveBlur8); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[6], CSEdgeSensitiveBlur7); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[5], CSEdgeSensitiveBlur6); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[4], CSEdgeSensitiveBlur5); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[3], CSEdgeSensitiveBlur4); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[2], CSEdgeSensitiveBlur3); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[1], CSEdgeSensitiveBlur2); - ERROR_COMPUTE_SHADER_DESTROY(edgeSensitiveBlur[0], CSEdgeSensitiveBlur1); - - ERROR_COMPUTE_SHADER_DESTROY(postprocessImportanceMapB, CSPostprocessImportanceMapB); - ERROR_COMPUTE_SHADER_DESTROY(postprocessImportanceMapA, CSPostprocessImportanceMapA); - ERROR_COMPUTE_SHADER_DESTROY(generateImportanceMap, CSGenerateImportanceMap); - - ERROR_COMPUTE_SHADER_DESTROY(generateSSAO[4], CSGenerateQ3Base); - ERROR_COMPUTE_SHADER_DESTROY(generateSSAO[3], CSGenerateQ3); - ERROR_COMPUTE_SHADER_DESTROY(generateSSAO[2], CSGenerateQ2); - ERROR_COMPUTE_SHADER_DESTROY(generateSSAO[1], CSGenerateQ1); - ERROR_COMPUTE_SHADER_DESTROY(generateSSAO[0], CSGenerateQ0); - - ERROR_COMPUTE_SHADER_DESTROY(prepareNativeDepths, CSPrepareNativeDepths); - ERROR_COMPUTE_SHADER_DESTROY(prepareDownsampledDepths, CSPrepareDownsampledDepths); - - ERROR_COMPUTE_SHADER_DESTROY(prepareNativeNormalsFromInputNormals, CSPrepareNativeNormalsFromInputNormals); - ERROR_COMPUTE_SHADER_DESTROY(prepareDownsampledNormalsFromInputNormals, CSPrepareDownsampledNormalsFromInputNormals); - - ERROR_COMPUTE_SHADER_DESTROY(prepareNativeNormals, CSPrepareNativeNormals); - ERROR_COMPUTE_SHADER_DESTROY(prepareDownsampledNormals, CSPrepareDownsampledNormals); - - ERROR_COMPUTE_SHADER_DESTROY(prepareNativeDepthsAndMips, CSPrepareNativeDepthsAndMips); - ERROR_COMPUTE_SHADER_DESTROY(prepareDownsampledDepthsAndMips, CSPrepareDownsampledDepthsAndMips); - - ERROR_COMPUTE_SHADER_DESTROY(prepareNativeDepthsHalf, CSPrepareNativeDepthsHalf); - ERROR_COMPUTE_SHADER_DESTROY(prepareDownsampledDepthsHalf, CSPrepareDownsampledDepthsHalf); - -#ifdef FFX_CACAO_ENABLE_PROFILING - gpuTimerDestroy(&context->gpuTimer); -error_create_gpu_timer: -#endif - constantBufferRingDestroy(&context->constantBufferRing); -error_create_constant_buffer_ring: - cbvSrvUavHeapDestroy(&context->cbvSrvUavHeap); -error_create_cbv_srv_uav_heap: - - return errorStatus; - -#undef COMPUTE_SHADER_INIT -#undef ERROR_COMPUTE_SHADER_DESTROY -} - -FfxCacaoStatus ffxCacaoD3D12DestroyContext(FfxCacaoD3D12Context* context) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - - textureDestroy(&context->loadCounter); - - computeShaderDestroy(&context->upscaleBilateral5x5Half); - computeShaderDestroy(&context->upscaleBilateral5x5); - - computeShaderDestroy(&context->nonSmartHalfApply); - computeShaderDestroy(&context->nonSmartApply); - computeShaderDestroy(&context->smartApply); - - computeShaderDestroy(&context->edgeSensitiveBlur[7]); - computeShaderDestroy(&context->edgeSensitiveBlur[6]); - computeShaderDestroy(&context->edgeSensitiveBlur[5]); - computeShaderDestroy(&context->edgeSensitiveBlur[4]); - computeShaderDestroy(&context->edgeSensitiveBlur[3]); - computeShaderDestroy(&context->edgeSensitiveBlur[2]); - computeShaderDestroy(&context->edgeSensitiveBlur[1]); - computeShaderDestroy(&context->edgeSensitiveBlur[0]); - - computeShaderDestroy(&context->postprocessImportanceMapB); - computeShaderDestroy(&context->postprocessImportanceMapA); - computeShaderDestroy(&context->generateImportanceMap); - - computeShaderDestroy(&context->generateSSAO[4]); - computeShaderDestroy(&context->generateSSAO[3]); - computeShaderDestroy(&context->generateSSAO[2]); - computeShaderDestroy(&context->generateSSAO[1]); - computeShaderDestroy(&context->generateSSAO[0]); - - computeShaderDestroy(&context->prepareNativeDepths); - computeShaderDestroy(&context->prepareDownsampledDepths); - - computeShaderDestroy(&context->prepareNativeNormalsFromInputNormals); - computeShaderDestroy(&context->prepareDownsampledNormalsFromInputNormals); - - computeShaderDestroy(&context->prepareNativeNormals); - computeShaderDestroy(&context->prepareDownsampledNormals); - - computeShaderDestroy(&context->prepareNativeDepthsAndMips); - computeShaderDestroy(&context->prepareDownsampledDepthsAndMips); - - computeShaderDestroy(&context->prepareNativeDepthsHalf); - computeShaderDestroy(&context->prepareDownsampledDepthsHalf); - -#ifdef FFX_CACAO_ENABLE_PROFILING - gpuTimerDestroy(&context->gpuTimer); -#endif - constantBufferRingDestroy(&context->constantBufferRing); - cbvSrvUavHeapDestroy(&context->cbvSrvUavHeap); - - return FFX_CACAO_STATUS_OK; -} - -FfxCacaoStatus ffxCacaoD3D12InitScreenSizeDependentResources(FfxCacaoD3D12Context* context, const FfxCacaoD3D12ScreenSizeInfo* info) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - if (info == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - FfxCacaoBool useDownsampledSsao = info->useDownsampledSsao; -#else - FfxCacaoBool useDownsampledSsao = FFX_CACAO_TRUE; -#endif - context->useDownsampledSsao = useDownsampledSsao; - FfxCacaoStatus errorStatus; - -#define TEXTURE_INIT(name, label, format, width, height, arraySize, mipLevels) \ - errorStatus = textureInit(&context->name, device, "CACAO::" #name, &CD3DX12_RESOURCE_DESC::Tex2D(format, width, height, arraySize, mipLevels, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL); \ - if (errorStatus) \ - { \ - goto error_create_texture_ ## label;\ - } -#define ERROR_TEXTURE_DESTROY(name, label) \ - textureDestroy(&context->name); \ -error_create_texture_ ## label: - - - ID3D12Device * device = context->device; - - uint32_t width = info->width; - uint32_t height = info->height; - uint32_t halfWidth = (width + 1) / 2; - uint32_t halfHeight = (height + 1) / 2; - uint32_t quarterWidth = (halfWidth + 1) / 2; - uint32_t quarterHeight = (halfHeight + 1) / 2; - uint32_t eighthWidth = (quarterWidth + 1) / 2; - uint32_t eighthHeight = (quarterHeight + 1) / 2; - -#if 1 - uint32_t depthBufferWidth = width; - uint32_t depthBufferHeight = height; - uint32_t depthBufferHalfWidth = halfWidth; - uint32_t depthBufferHalfHeight = halfHeight; - uint32_t depthBufferQuarterWidth = quarterWidth; - uint32_t depthBufferQuarterHeight = quarterHeight; - - uint32_t depthBufferXOffset = 0; - uint32_t depthBufferYOffset = 0; - uint32_t depthBufferHalfXOffset = 0; - uint32_t depthBufferHalfYOffset = 0; - uint32_t depthBufferQuarterXOffset = 0; - uint32_t depthBufferQuarterYOffset = 0; -#else - uint32_t depthBufferWidth = info->depthBufferWidth; - uint32_t depthBufferHeight = info->depthBufferHeight; - uint32_t depthBufferHalfWidth = (depthBufferWidth + 1) / 2; - uint32_t depthBufferHalfHeight = (depthBufferHeight + 1) / 2; - uint32_t depthBufferQuarterWidth = (depthBufferHalfWidth + 1) / 2; - uint32_t depthBufferQuarterHeight = (depthBufferHalfHeight + 1) / 2; - - uint32_t depthBufferXOffset = info->depthBufferXOffset; - uint32_t depthBufferYOffset = info->depthBufferYOffset; - uint32_t depthBufferHalfXOffset = (depthBufferXOffset + 1) / 2; // XXX - is this really right? - uint32_t depthBufferHalfYOffset = (depthBufferYOffset + 1) / 2; // XXX - is this really right? - uint32_t depthBufferQuarterXOffset = (depthBufferHalfXOffset + 1) / 2; // XXX - is this really right? - uint32_t depthBufferQuarterYOffset = (depthBufferHalfYOffset + 1) / 2; // XXX - is this really right? -#endif - - BufferSizeInfo bsi = {}; - bsi.inputOutputBufferWidth = width; - bsi.inputOutputBufferHeight = height; - bsi.depthBufferXOffset = depthBufferXOffset; - bsi.depthBufferYOffset = depthBufferYOffset; - bsi.depthBufferWidth = depthBufferWidth; - bsi.depthBufferHeight = depthBufferHeight; - - if (useDownsampledSsao) - { - bsi.ssaoBufferWidth = quarterWidth; - bsi.ssaoBufferHeight = quarterHeight; - bsi.deinterleavedDepthBufferXOffset = depthBufferQuarterXOffset; - bsi.deinterleavedDepthBufferYOffset = depthBufferQuarterYOffset; - bsi.deinterleavedDepthBufferWidth = depthBufferQuarterWidth; - bsi.deinterleavedDepthBufferHeight = depthBufferQuarterHeight; - bsi.importanceMapWidth = eighthWidth; - bsi.importanceMapHeight = eighthHeight; - } - else - { - bsi.ssaoBufferWidth = halfWidth; - bsi.ssaoBufferHeight = halfHeight; - bsi.deinterleavedDepthBufferXOffset = depthBufferHalfXOffset; - bsi.deinterleavedDepthBufferYOffset = depthBufferHalfYOffset; - bsi.deinterleavedDepthBufferWidth = depthBufferHalfWidth; - bsi.deinterleavedDepthBufferHeight = depthBufferHalfHeight; - bsi.importanceMapWidth = quarterWidth; - bsi.importanceMapHeight = quarterHeight; - } - - context->bufferSizeInfo = bsi; - - // ======================================= - // allocate intermediate textures - - TEXTURE_INIT(deinterleavedDepths, deinterleaved_depths, DXGI_FORMAT_R16_FLOAT, bsi.deinterleavedDepthBufferWidth, bsi.deinterleavedDepthBufferHeight, 4, 4); - TEXTURE_INIT(deinterleavedNormals, deinterleaved_normals, DXGI_FORMAT_R8G8B8A8_SNORM, bsi.ssaoBufferWidth, bsi.ssaoBufferHeight, 4, 1); - - TEXTURE_INIT(ssaoBufferPing, ssao_buffer_ping, DXGI_FORMAT_R8G8_UNORM, bsi.ssaoBufferWidth, bsi.ssaoBufferHeight, 4, 1); - TEXTURE_INIT(ssaoBufferPong, ssao_buffer_pong, DXGI_FORMAT_R8G8_UNORM, bsi.ssaoBufferWidth, bsi.ssaoBufferHeight, 4, 1); - - TEXTURE_INIT(importanceMap, importance_map, DXGI_FORMAT_R8_UNORM, bsi.importanceMapWidth, bsi.importanceMapHeight, 1, 1); - TEXTURE_INIT(importanceMapPong, importance_map_pong, DXGI_FORMAT_R8_UNORM, bsi.importanceMapWidth, bsi.importanceMapHeight, 1, 1); - - // ======================================= - // Init Prepare SRVs/UAVs - - for (int i = 0; i < 4; ++i) - { - textureCreateUav(&context->deinterleavedDepths, i, &context->prepareDepthsAndMipsOutputs, i, 4, 0); - } - textureCreateUav(&context->deinterleavedDepths, 0, &context->prepareDepthsOutputs, 0, 4, 0); - textureCreateUav(&context->deinterleavedNormals, 0, &context->prepareNormalsOutput, 0, 4, 0); - - device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, context->prepareDepthsNormalsAndMipsInputs.cpuDescriptor); - - textureCreateUav(&context->deinterleavedNormals, 0, &context->prepareNormalsFromInputNormalsOutput, 0, 4, 0); - device->CreateShaderResourceView(info->normalBufferResource, &info->normalBufferSrvDesc, context->prepareNormalsFromInputNormalsInput.cpuDescriptor); - - // ======================================= - // Init Generate SSAO SRVs/UAVs - - for (int i = 0; i < 4; ++i) - { - D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; - srvDesc.Format = DXGI_FORMAT_R32_UINT; - srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - srvDesc.Texture1D.MostDetailedMip = 0; - srvDesc.Texture1D.MipLevels = 1; - - D3D12_SHADER_RESOURCE_VIEW_DESC zeroTextureSRVDesc = {}; - zeroTextureSRVDesc.Format = DXGI_FORMAT_R32_FLOAT; - zeroTextureSRVDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; - zeroTextureSRVDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; - zeroTextureSRVDesc.Texture1D.MostDetailedMip = 0; - zeroTextureSRVDesc.Texture1D.MipLevels = 1; - - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = DXGI_FORMAT_R32_UINT; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - uavDesc.Texture1D.MipSlice = 0; - - textureCreateSrv(&context->deinterleavedDepths, 0, &context->generateSSAOInputs[i], -1, 1, i); - textureCreateSrv(&context->deinterleavedNormals, 6, &context->generateSSAOInputs[i], 0, 4, 0); - - - textureCreateSrv(&context->deinterleavedDepths, 0, &context->generateAdaptiveSSAOInputs[i], -1, 1, i); - textureCreateSrvFromDesc(&context->loadCounter, 2, &context->generateAdaptiveSSAOInputs[i], &srvDesc); - textureCreateSrv(&context->importanceMap, 3, &context->generateAdaptiveSSAOInputs[i], -1, -1, -1); - textureCreateSrv(&context->ssaoBufferPong, 4, &context->generateAdaptiveSSAOInputs[i], -1, -1, -1); - textureCreateSrv(&context->deinterleavedNormals, 6, &context->generateAdaptiveSSAOInputs[i], 0, 4, 0); - - textureCreateUav(&context->ssaoBufferPing, 0, &context->generateSSAOOutputsPing[i], 0, 1, i); - - textureCreateUav(&context->ssaoBufferPong, 0, &context->generateSSAOOutputsPong[i], 0, 1, i); - - } - - // ======================================= - // Init Generate/Postprocess Importance map SRVs/UAVs - - { - D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; - uavDesc.Format = DXGI_FORMAT_R32_UINT; - uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; - uavDesc.Texture1D.MipSlice = 0; - - textureCreateSrv(&context->ssaoBufferPong, 0, &context->generateImportanceMapInputs, -1, -1, -1); - textureCreateUav(&context->importanceMap, 0, &context->generateImportanceMapOutputs, -1, -1, -1); - - textureCreateSrv(&context->importanceMap, 0, &context->generateImportanceMapAInputs, -1, -1, -1); - textureCreateUav(&context->importanceMapPong, 0, &context->generateImportanceMapAOutputs, -1, -1, -1); - - textureCreateSrv(&context->importanceMapPong, 0, &context->generateImportanceMapBInputs, -1, -1, -1); - textureCreateUav(&context->importanceMap, 0, &context->generateImportanceMapBOutputs, -1, -1, -1); - textureCreateUavFromDesc(&context->loadCounter, 1, &context->generateImportanceMapBOutputs, &uavDesc); - } - - // ======================================= - // Init De-interleave Blur SRVs/UAVs - - for (int i = 0; i < 4; ++i) - { - textureCreateSrv(&context->ssaoBufferPing, 0, &context->edgeSensitiveBlurInput[i], 0, 1, i); - textureCreateUav(&context->ssaoBufferPong, 0, &context->edgeSensitiveBlurOutput[i], 0, 1, i); - } - - // ======================================= - // Init apply SRVs/UAVs - - textureCreateSrv(&context->ssaoBufferPing, 0, &context->createOutputInputsPing, 0, 4, 0); - textureCreateSrv(&context->ssaoBufferPong, 0, &context->createOutputInputsPong, 0, 4, 0); - - context->device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, context->createOutputOutputs.cpuDescriptor); - context->device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, context->createOutputOutputs.cpuVisibleCpuDescriptor); - - // ======================================= - // Init upscale SRVs/UAVs - - textureCreateSrv(&context->ssaoBufferPing, 0, &context->bilateralUpscaleInputsPing, -1, -1, -1); - context->device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, cbvSrvUavGetCpu(&context->bilateralUpscaleInputsPing, 1)); - textureCreateSrv(&context->deinterleavedDepths, 3, &context->bilateralUpscaleInputsPing, 0, -1, -1); - - textureCreateSrv(&context->ssaoBufferPong, 0, &context->bilateralUpscaleInputsPong, -1, -1, -1); - context->device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, cbvSrvUavGetCpu(&context->bilateralUpscaleInputsPong, 1)); - textureCreateSrv(&context->deinterleavedDepths, 3, &context->bilateralUpscaleInputsPong, 0, -1, -1); - - context->device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, context->bilateralUpscaleOutputs.cpuDescriptor); - - // ======================================= - // Init debug SRVs/UAVs - - context->outputResource = info->outputResource; - - return FFX_CACAO_STATUS_OK; - - ERROR_TEXTURE_DESTROY(importanceMapPong, importance_map_pong); - ERROR_TEXTURE_DESTROY(importanceMap, importance_map); - - ERROR_TEXTURE_DESTROY(ssaoBufferPong, ssao_buffer_pong); - ERROR_TEXTURE_DESTROY(ssaoBufferPing, ssao_buffer_ping); - - ERROR_TEXTURE_DESTROY(deinterleavedNormals, deinterleaved_normals); - ERROR_TEXTURE_DESTROY(deinterleavedDepths, deinterleaved_depths); - - return errorStatus; - -#undef TEXTURE_INIT -#undef ERROR_TEXTURE_DESTROY -} - -FfxCacaoStatus ffxCacaoD3D12DestroyScreenSizeDependentResources(FfxCacaoD3D12Context* context) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - - textureDestroy(&context->importanceMapPong); - textureDestroy(&context->importanceMap); - - textureDestroy(&context->ssaoBufferPong); - textureDestroy(&context->ssaoBufferPing); - - textureDestroy(&context->deinterleavedNormals); - textureDestroy(&context->deinterleavedDepths); - - return FFX_CACAO_STATUS_OK; -} - -FfxCacaoStatus ffxCacaoD3D12UpdateSettings(FfxCacaoD3D12Context* context, const FfxCacaoSettings* settings) -{ - if (context == NULL || settings == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - - memcpy(&context->settings, settings, sizeof(*settings)); - - return FFX_CACAO_STATUS_OK; -} - -FfxCacaoStatus ffxCacaoD3D12Draw(FfxCacaoD3D12Context* context, ID3D12GraphicsCommandList* commandList, const FfxCacaoMatrix4x4* proj, const FfxCacaoMatrix4x4* normalsToView) -{ - if (context == NULL || commandList == NULL || proj == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - - -#ifdef FFX_CACAO_ENABLE_PROFILING -#define GET_TIMESTAMP(name) gpuTimerGetTimestamp(&context->gpuTimer, commandList, TIMESTAMP_##name) -#else -#define GET_TIMESTAMP(name) -#endif - BufferSizeInfo *bsi = &context->bufferSizeInfo; - - - USER_MARKER("FidelityFX CACAO"); - - constantBufferRingStartFrame(&context->constantBufferRing); - -#ifdef FFX_CACAO_ENABLE_PROFILING - gpuTimerStartFrame(&context->gpuTimer); -#endif - - GET_TIMESTAMP(BEGIN); - - // set the descriptor heaps - { - ID3D12DescriptorHeap *descriptorHeaps[] = { context->cbvSrvUavHeap.heap }; - commandList->SetDescriptorHeaps(FFX_CACAO_ARRAY_SIZE(descriptorHeaps), descriptorHeaps); - } - - // clear load counter - { - UINT clearValue[] = { 0, 0, 0, 0 }; - commandList->ClearUnorderedAccessViewUint(context->loadCounterUav.gpuDescriptor, context->loadCounterUav.cpuVisibleCpuDescriptor, context->loadCounter.resource, clearValue, 0, NULL); - } - - // move this to initialisation - D3D12_GPU_VIRTUAL_ADDRESS cbCACAOHandle; - FfxCacaoConstants *pCACAOConsts; - D3D12_GPU_VIRTUAL_ADDRESS cbCACAOPerPassHandle[4]; - FfxCacaoConstants *pPerPassConsts[4]; - - // upload constant buffers - { - constantBufferRingAlloc(&context->constantBufferRing, sizeof(*pCACAOConsts), (void**)&pCACAOConsts, &cbCACAOHandle); - updateConstants(pCACAOConsts, &context->settings, bsi, proj, normalsToView); - - for (int i = 0; i < 4; ++i) - { - constantBufferRingAlloc(&context->constantBufferRing, sizeof(*pPerPassConsts[0]), (void**)&pPerPassConsts[i], &cbCACAOPerPassHandle[i]); - updateConstants(pPerPassConsts[i], &context->settings, bsi, proj, normalsToView); - updatePerPassConstants(pPerPassConsts[i], &context->settings, &context->bufferSizeInfo, i); - } - } - - // prepare depths, normals and mips - { - USER_MARKER("Prepare downsampled depths, normals and mips"); - - - switch (context->settings.qualityLevel) - { - case FFX_CACAO_QUALITY_LOWEST: { - uint32_t dispatchWidth = dispatchSize(PREPARE_DEPTHS_HALF_WIDTH, bsi->deinterleavedDepthBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_DEPTHS_HALF_HEIGHT, bsi->deinterleavedDepthBufferHeight); - ComputeShader *prepareDepthsHalf = context->useDownsampledSsao ? &context->prepareDownsampledDepthsHalf : &context->prepareNativeDepthsHalf; - computeShaderDraw(prepareDepthsHalf, commandList, cbCACAOHandle, &context->prepareDepthsOutputs, &context->prepareDepthsNormalsAndMipsInputs, dispatchWidth, dispatchHeight, 1); - break; - } - case FFX_CACAO_QUALITY_LOW: { - uint32_t dispatchWidth = dispatchSize(PREPARE_DEPTHS_WIDTH, bsi->deinterleavedDepthBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_DEPTHS_HEIGHT, bsi->deinterleavedDepthBufferHeight); - ComputeShader *prepareDepths = context->useDownsampledSsao ? &context->prepareDownsampledDepths : &context->prepareNativeDepths; - computeShaderDraw(prepareDepths, commandList, cbCACAOHandle, &context->prepareDepthsOutputs, &context->prepareDepthsNormalsAndMipsInputs, dispatchWidth, dispatchHeight, 1); - break; - } - default: { - uint32_t dispatchWidth = dispatchSize(PREPARE_DEPTHS_AND_MIPS_WIDTH, bsi->deinterleavedDepthBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_DEPTHS_AND_MIPS_HEIGHT, bsi->deinterleavedDepthBufferHeight); - ComputeShader *prepareDepthsAndMips = context->useDownsampledSsao ? &context->prepareDownsampledDepthsAndMips : &context->prepareNativeDepthsAndMips; - computeShaderDraw(prepareDepthsAndMips, commandList, cbCACAOHandle, &context->prepareDepthsAndMipsOutputs, &context->prepareDepthsNormalsAndMipsInputs, dispatchWidth, dispatchHeight, 1); - break; - } - } - - if (context->settings.generateNormals) - { - uint32_t dispatchWidth = dispatchSize(PREPARE_NORMALS_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_NORMALS_HEIGHT, bsi->ssaoBufferHeight); - ComputeShader *prepareNormals = context->useDownsampledSsao ? &context->prepareDownsampledNormals : &context->prepareNativeNormals; - computeShaderDraw(prepareNormals, commandList, cbCACAOHandle, &context->prepareNormalsOutput, &context->prepareDepthsNormalsAndMipsInputs, dispatchWidth, dispatchHeight, 1); - } - else - { - uint32_t dispatchWidth = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT, bsi->ssaoBufferHeight); - ComputeShader *prepareNormalsFromInputNormals = context->useDownsampledSsao ? &context->prepareDownsampledNormalsFromInputNormals : &context->prepareNativeNormalsFromInputNormals; - computeShaderDraw(prepareNormalsFromInputNormals, commandList, cbCACAOHandle, &context->prepareNormalsFromInputNormalsOutput, &context->prepareNormalsFromInputNormalsInput, dispatchWidth, dispatchHeight, 1); - } - - GET_TIMESTAMP(PREPARE); - } - - // deinterleaved depths and normals are now read only resources, also used in the next stage - { - D3D12_RESOURCE_BARRIER resourceBarriers[] = { - CD3DX12_RESOURCE_BARRIER::Transition(context->deinterleavedDepths.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(context->deinterleavedNormals.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), - }; - commandList->ResourceBarrier(FFX_CACAO_ARRAY_SIZE(resourceBarriers), resourceBarriers); - } - - // base pass for highest quality setting - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST) - { - USER_MARKER("Generate High Quality Base Pass"); - - // SSAO - { - USER_MARKER("SSAO"); - - for (int pass = 0; pass < 4; ++pass) - { - CbvSrvUav *inputs = &context->generateSSAOInputs[pass]; - uint32_t dispatchWidth = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferHeight); - computeShaderDraw(&context->generateSSAO[4], commandList, cbCACAOPerPassHandle[pass], &context->generateSSAOOutputsPong[pass], inputs, dispatchWidth, dispatchHeight, 1); - } - GET_TIMESTAMP(BASE_SSAO_PASS); - } - - // results written by base pass are now a reaad only resource, used in next stage - commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPong.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); - - // generate importance map - { - USER_MARKER("Importance Map"); - - CD3DX12_RESOURCE_BARRIER barriers[2]; - UINT barrierCount; - - uint32_t dispatchWidth = dispatchSize(IMPORTANCE_MAP_WIDTH, bsi->importanceMapWidth); - uint32_t dispatchHeight = dispatchSize(IMPORTANCE_MAP_HEIGHT, bsi->importanceMapHeight); - - computeShaderDraw(&context->generateImportanceMap, commandList, cbCACAOHandle, &context->generateImportanceMapOutputs, &context->generateImportanceMapInputs, dispatchWidth, dispatchHeight, 1); - - barrierCount = 0; - barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->importanceMap.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - commandList->ResourceBarrier(barrierCount, barriers); - - computeShaderDraw(&context->postprocessImportanceMapA, commandList, cbCACAOHandle, &context->generateImportanceMapAOutputs, &context->generateImportanceMapAInputs, dispatchWidth, dispatchHeight, 1); - - barrierCount = 0; - barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->importanceMap.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->importanceMapPong.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - commandList->ResourceBarrier(barrierCount, barriers); - - computeShaderDraw(&context->postprocessImportanceMapB, commandList, cbCACAOHandle, &context->generateImportanceMapBOutputs, &context->generateImportanceMapBInputs, dispatchWidth, dispatchHeight, 1); - - barrierCount = 0; - barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->importanceMap.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->loadCounter.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); - commandList->ResourceBarrier(barrierCount, barriers); - - GET_TIMESTAMP(IMPORTANCE_MAP); - } - } - - int blurPassCount = context->settings.blurPassCount; - blurPassCount = FFX_CACAO_CLAMP(blurPassCount, 0, MAX_BLUR_PASSES); - - // main ssao generation - { - USER_MARKER("Generate SSAO"); - - ComputeShader *generate = &context->generateSSAO[FFX_CACAO_MAX(0, context->settings.qualityLevel - 1)]; - for (int pass = 0; pass < 4; ++pass) - { - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) - { - continue; - } - - CbvSrvUav *input = context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST ? &context->generateAdaptiveSSAOInputs[pass] : &context->generateSSAOInputs[pass]; - CbvSrvUav *output = &context->generateSSAOOutputsPing[pass]; // blurPassCount == 0 ? &context->generateSSAOOutputsPing[pass] : &context->generateSSAOOutputsPong[pass]; - - uint32_t dispatchWidth = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferHeight); - computeShaderDraw(generate, commandList, cbCACAOPerPassHandle[pass], output, input, dispatchWidth, dispatchHeight, 1); - } - - GET_TIMESTAMP(GENERATE_SSAO); - } - - // de-interleaved blur - if (blurPassCount) - { - // only need to transition pong to writable if we didn't already use it in the base pass - CD3DX12_RESOURCE_BARRIER barriers[] = { - CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPing.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPong.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), - }; - commandList->ResourceBarrier(context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST ? 2 : 1, barriers); - - USER_MARKER("Deinterleaved blur"); - - for (int pass = 0; pass < 4; ++pass) - { - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) - { - continue; - } - - uint32_t w = 4 * BLUR_WIDTH - 2 * blurPassCount; - uint32_t h = 3 * BLUR_HEIGHT - 2 * blurPassCount; - uint32_t blurPassIndex = blurPassCount - 1; - uint32_t dispatchWidth = dispatchSize(w, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(h, bsi->ssaoBufferHeight); - computeShaderDraw(&context->edgeSensitiveBlur[blurPassIndex], commandList, cbCACAOPerPassHandle[pass], &context->edgeSensitiveBlurOutput[pass], &context->edgeSensitiveBlurInput[pass], dispatchWidth, dispatchHeight, 1); - } - - GET_TIMESTAMP(EDGE_SENSITIVE_BLUR); - - commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPong.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); - } - else - { - commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPing.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); - } - - - if (context->useDownsampledSsao) - { - USER_MARKER("Upscale"); - - CbvSrvUav *inputs = blurPassCount ? &context->bilateralUpscaleInputsPong : &context->bilateralUpscaleInputsPing; - ComputeShader *upscaler = context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST ? &context->upscaleBilateral5x5Half : &context->upscaleBilateral5x5; - uint32_t dispatchWidth = dispatchSize(2 * BILATERAL_UPSCALE_WIDTH, bsi->inputOutputBufferWidth); - uint32_t dispatchHeight = dispatchSize(2 * BILATERAL_UPSCALE_HEIGHT, bsi->inputOutputBufferHeight); - computeShaderDraw(upscaler, commandList, cbCACAOHandle, &context->bilateralUpscaleOutputs, inputs, dispatchWidth, dispatchHeight, 1); - - GET_TIMESTAMP(BILATERAL_UPSAMPLE); - } - else - { - USER_MARKER("Create Output"); - CbvSrvUav *inputs = blurPassCount ? &context->createOutputInputsPong : &context->createOutputInputsPing; - uint32_t dispatchWidth = dispatchSize(APPLY_WIDTH, bsi->inputOutputBufferWidth); - uint32_t dispatchHeight = dispatchSize(APPLY_HEIGHT, bsi->inputOutputBufferHeight); - switch (context->settings.qualityLevel) - { - case FFX_CACAO_QUALITY_LOWEST: - computeShaderDraw(&context->nonSmartHalfApply, commandList, cbCACAOHandle, &context->createOutputOutputs, inputs, dispatchWidth, dispatchHeight, 1); - break; - case FFX_CACAO_QUALITY_LOW: - computeShaderDraw(&context->nonSmartApply, commandList, cbCACAOHandle, &context->createOutputOutputs, inputs, dispatchWidth, dispatchHeight, 1); - break; - default: - computeShaderDraw(&context->smartApply, commandList, cbCACAOHandle, &context->createOutputOutputs, inputs, dispatchWidth, dispatchHeight, 1); - break; - } - GET_TIMESTAMP(APPLY); - } - - // end frame resource barrier - { - uint32_t numBarriers = 0; - D3D12_RESOURCE_BARRIER resourceBarriers[10] = {}; - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->deinterleavedDepths.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->deinterleavedNormals.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->outputResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_GENERIC_READ); - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPing.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST || blurPassCount) - { - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->ssaoBufferPong.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - } - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST) - { - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->importanceMap.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->importanceMapPong.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->loadCounter.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); - } - commandList->ResourceBarrier(numBarriers, resourceBarriers); - } - -#ifdef FFX_CACAO_ENABLE_PROFILING - gpuTimerEndFrame(&context->gpuTimer, commandList); -#endif - - return FFX_CACAO_STATUS_OK; - -#undef GET_TIMESTAMP -} - -#ifdef FFX_CACAO_ENABLE_PROFILING -FfxCacaoStatus ffxCacaoD3D12GetDetailedTimings(FfxCacaoD3D12Context* context, FfxCacaoDetailedTiming* timings) -{ - if (context == NULL || timings == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedD3D12ContextPointer(context); - - gpuTimerCollectTimings(&context->gpuTimer, timings); - - return FFX_CACAO_STATUS_OK; -} -#endif -#endif - -#ifdef FFX_CACAO_ENABLE_VULKAN -inline static void setObjectName(VkDevice device, FfxCacaoVkContext* context, VkObjectType type, uint64_t handle, const char* name) -{ - if (!context->vkSetDebugUtilsObjectName) - { - return; - } - - VkDebugUtilsObjectNameInfoEXT info = {}; - info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; - info.pNext = NULL; - info.objectType = type; - info.objectHandle = handle; - info.pObjectName = name; - - VkResult result = context->vkSetDebugUtilsObjectName(device, &info); - FFX_CACAO_ASSERT(result == VK_SUCCESS); -} - -inline static uint32_t getBestMemoryHeapIndex(VkPhysicalDevice physicalDevice, VkMemoryRequirements memoryRequirements, VkMemoryPropertyFlags desiredProperties) -{ - VkPhysicalDeviceMemoryProperties memoryProperties; - vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties); - - uint32_t chosenMemoryTypeIndex = VK_MAX_MEMORY_TYPES; - for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) - { - uint32_t typeBit = 1 << i; - // can we allocate to memory of this type - if (memoryRequirements.memoryTypeBits & typeBit) - { - VkMemoryType currentMemoryType = memoryProperties.memoryTypes[i]; - // do we want to allocate to memory of this type - if ((currentMemoryType.propertyFlags & desiredProperties) == desiredProperties) - { - chosenMemoryTypeIndex = i; - break; - } - } - } - return chosenMemoryTypeIndex; -} - -size_t ffxCacaoVkGetContextSize() -{ - return sizeof(FfxCacaoVkContext) + alignof(FfxCacaoVkContext) - 1; -} - -FfxCacaoStatus ffxCacaoVkInitContext(FfxCacaoVkContext* context, const FfxCacaoVkCreateInfo* info) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - if (info == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - memset(context, 0, sizeof(*context)); - - VkDevice device = info->device; - VkPhysicalDevice physicalDevice = info->physicalDevice; - VkResult result; - FfxCacaoBool use16Bit = info->flags & FFX_CACAO_VK_CREATE_USE_16_BIT ? FFX_CACAO_TRUE : FFX_CACAO_FALSE; - FfxCacaoStatus errorStatus = FFX_CACAO_STATUS_FAILED; - - context->device = device; - context->physicalDevice = physicalDevice; - - if (info->flags & FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS) - { - context->vkCmdDebugMarkerBegin = (PFN_vkCmdDebugMarkerBeginEXT)vkGetDeviceProcAddr(device, "vkCmdDebugMarkerBeginEXT"); - context->vkCmdDebugMarkerEnd = (PFN_vkCmdDebugMarkerEndEXT)vkGetDeviceProcAddr(device, "vkCmdDebugMarkerEndEXT"); - } - if (info->flags & FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS) - { - context->vkSetDebugUtilsObjectName = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(device, "vkSetDebugUtilsObjectNameEXT"); - } - - uint32_t numSamplersInited = 0; - uint32_t numDescriptorSetLayoutsInited = 0; - uint32_t numPipelineLayoutsInited = 0; - uint32_t numShaderModulesInited = 0; - uint32_t numPipelinesInited = 0; - uint32_t numConstantBackBuffersInited = 0; - - VkSampler samplers[NUM_SAMPLERS]; - { - VkSamplerCreateInfo samplerCreateInfo = {}; - samplerCreateInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; - samplerCreateInfo.pNext = NULL; - samplerCreateInfo.flags = 0; - samplerCreateInfo.magFilter = VK_FILTER_LINEAR; - samplerCreateInfo.minFilter = VK_FILTER_LINEAR; - samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; - samplerCreateInfo.mipLodBias = 0.0f; - samplerCreateInfo.anisotropyEnable = VK_FALSE; - samplerCreateInfo.compareEnable = VK_FALSE; - samplerCreateInfo.minLod = -1000.0f; - samplerCreateInfo.maxLod = 1000.0f; - samplerCreateInfo.unnormalizedCoordinates = VK_FALSE; - - result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); - if (result != VK_SUCCESS) - { - goto error_init_samplers; - } - setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_POINT_CLAMP_SAMPLER"); - ++numSamplersInited; - - samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; - - result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); - if (result != VK_SUCCESS) - { - goto error_init_samplers; - } - setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_POINT_MIRROR_SAMPLER"); - ++numSamplersInited; - - samplerCreateInfo.magFilter = VK_FILTER_LINEAR; - samplerCreateInfo.minFilter = VK_FILTER_LINEAR; - samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; - samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; - - result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); - if (result != VK_SUCCESS) - { - goto error_init_samplers; - } - setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_LINEAR_CLAMP_SAMPLER"); - ++numSamplersInited; - - samplerCreateInfo.magFilter = VK_FILTER_NEAREST; - samplerCreateInfo.minFilter = VK_FILTER_NEAREST; - samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - - result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); - if (result != VK_SUCCESS) - { - goto error_init_samplers; - } - setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_VIEWSPACE_DEPTH_TAP_SAMPLER"); - ++numSamplersInited; - - samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER; - samplerCreateInfo.borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK; - - result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); - if (result != VK_SUCCESS) - { - goto error_init_samplers; - } - setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_ZERO_TEXTURE_SAMPLER"); - ++numSamplersInited; - - for (uint32_t i = 0; i < FFX_CACAO_ARRAY_SIZE(samplers); ++i) - { - context->samplers[i] = samplers[i]; - } - } - - // create descriptor set layouts - for ( ; numDescriptorSetLayoutsInited < NUM_DESCRIPTOR_SET_LAYOUTS; ++numDescriptorSetLayoutsInited) - { - VkDescriptorSetLayout descriptorSetLayout; - DescriptorSetLayoutMetaData dslMetaData = DESCRIPTOR_SET_LAYOUT_META_DATA[numDescriptorSetLayoutsInited]; - - VkDescriptorSetLayoutBinding bindings[MAX_DESCRIPTOR_BINDINGS] = {}; - uint32_t numBindings = 0; - for (uint32_t samplerBinding = 0; samplerBinding < FFX_CACAO_ARRAY_SIZE(samplers); ++samplerBinding) - { - VkDescriptorSetLayoutBinding binding = {}; - binding.binding = samplerBinding; - binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = &samplers[samplerBinding]; - bindings[numBindings++] = binding; - } - - // constant buffer binding - { - VkDescriptorSetLayoutBinding binding = {}; - binding.binding = 10; - binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = NULL; - bindings[numBindings++] = binding; - } - - for (uint32_t inputBinding = 0; inputBinding < dslMetaData.numInputs; ++inputBinding) - { - VkDescriptorSetLayoutBinding binding = {}; - binding.binding = 20 + inputBinding; - binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = NULL; - bindings[numBindings++] = binding; - } - - for (uint32_t outputBinding = 0; outputBinding < dslMetaData.numOutputs; ++outputBinding) - { - VkDescriptorSetLayoutBinding binding = {}; - binding.binding = 30 + outputBinding; // g_PrepareDepthsOut register(u0) - binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - binding.descriptorCount = 1; - binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; - binding.pImmutableSamplers = NULL; - bindings[numBindings++] = binding; - } - - VkDescriptorSetLayoutCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.bindingCount = numBindings; - info.pBindings = bindings; - - result = vkCreateDescriptorSetLayout(device, &info, NULL, &descriptorSetLayout); - if (result != VK_SUCCESS) - { - goto error_init_descriptor_set_layouts; - } - setObjectName(device, context, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, (uint64_t)descriptorSetLayout, dslMetaData.name); - - context->descriptorSetLayouts[numDescriptorSetLayoutsInited] = descriptorSetLayout; - } - - // create pipeline layouts - for ( ; numPipelineLayoutsInited < NUM_DESCRIPTOR_SET_LAYOUTS; ++numPipelineLayoutsInited) - { - VkPipelineLayout pipelineLayout; - - DescriptorSetLayoutMetaData dslMetaData = DESCRIPTOR_SET_LAYOUT_META_DATA[numPipelineLayoutsInited]; - - VkPipelineLayoutCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.setLayoutCount = 1; - info.pSetLayouts = &context->descriptorSetLayouts[numPipelineLayoutsInited]; - info.pushConstantRangeCount = 0; - info.pPushConstantRanges = NULL; - - result = vkCreatePipelineLayout(device, &info, NULL, &pipelineLayout); - if (result != VK_SUCCESS) - { - goto error_init_pipeline_layouts; - } - setObjectName(device, context, VK_OBJECT_TYPE_PIPELINE_LAYOUT, (uint64_t)pipelineLayout, dslMetaData.name); - - context->pipelineLayouts[numPipelineLayoutsInited] = pipelineLayout; - } - - for ( ; numShaderModulesInited < NUM_COMPUTE_SHADERS; ++numShaderModulesInited) - { - VkShaderModule shaderModule; - ComputeShaderMetaData csMetaData = COMPUTE_SHADER_META_DATA[numShaderModulesInited]; - - VkShaderModuleCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - info.pNext = 0; - info.flags = 0; - if (use16Bit) - { - info.codeSize = csMetaData.spirv16Len; - info.pCode = csMetaData.shaderSpirv16; - } - else - { - info.codeSize = csMetaData.spirv32Len; - info.pCode = csMetaData.shaderSpirv32; - } - - result = vkCreateShaderModule(device, &info, NULL, &shaderModule); - if (result != VK_SUCCESS) - { - goto error_init_shader_modules; - } - setObjectName(device, context, VK_OBJECT_TYPE_SHADER_MODULE, (uint64_t)shaderModule, csMetaData.objectName); - - context->computeShaders[numShaderModulesInited] = shaderModule; - } - - for ( ; numPipelinesInited < NUM_COMPUTE_SHADERS; ++numPipelinesInited) - { - VkPipeline pipeline; - ComputeShaderMetaData csMetaData = COMPUTE_SHADER_META_DATA[numPipelinesInited]; - - VkPipelineShaderStageCreateInfo stageInfo = {}; - stageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - stageInfo.pNext = NULL; - stageInfo.flags = 0; - stageInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; - stageInfo.module = context->computeShaders[numPipelinesInited]; - stageInfo.pName = csMetaData.name; - stageInfo.pSpecializationInfo = NULL; - - VkComputePipelineCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.stage = stageInfo; - info.layout = context->pipelineLayouts[csMetaData.descriptorSetLayoutID]; - info.basePipelineHandle = VK_NULL_HANDLE; - info.basePipelineIndex = 0; - - result = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &info, NULL, &pipeline); - if (result != VK_SUCCESS) - { - goto error_init_pipelines; - } - setObjectName(device, context, VK_OBJECT_TYPE_PIPELINE, (uint64_t)pipeline, csMetaData.objectName); - - context->computePipelines[numPipelinesInited] = pipeline; - } - - // create descriptor pool - { - VkDescriptorPool descriptorPool; - - VkDescriptorPoolSize poolSizes[4] = {}; - poolSizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; - poolSizes[0].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 5; - poolSizes[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - poolSizes[1].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 7; - poolSizes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - poolSizes[2].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 4; - poolSizes[3].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - poolSizes[3].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 1; - - VkDescriptorPoolCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.maxSets = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS; - info.poolSizeCount = FFX_CACAO_ARRAY_SIZE(poolSizes); - info.pPoolSizes = poolSizes; - - result = vkCreateDescriptorPool(device, &info, NULL, &descriptorPool); - if (result != VK_SUCCESS) - { - goto error_init_descriptor_pool; - } - setObjectName(device, context, VK_OBJECT_TYPE_DESCRIPTOR_POOL, (uint64_t)descriptorPool, "FFX_CACAO_DESCRIPTOR_POOL"); - - context->descriptorPool = descriptorPool; - } - - // allocate descriptor sets - { - VkDescriptorSetLayout descriptorSetLayouts[NUM_DESCRIPTOR_SETS]; - for (uint32_t i = 0; i < NUM_DESCRIPTOR_SETS; ++i) { - descriptorSetLayouts[i] = context->descriptorSetLayouts[DESCRIPTOR_SET_META_DATA[i].descriptorSetLayoutID]; - } - - for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { - VkDescriptorSetAllocateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; - info.pNext = NULL; - info.descriptorPool = context->descriptorPool; - info.descriptorSetCount = FFX_CACAO_ARRAY_SIZE(descriptorSetLayouts); // FFX_CACAO_ARRAY_SIZE(context->descriptorSetLayouts); - info.pSetLayouts = descriptorSetLayouts; // context->descriptorSetLayouts; - - result = vkAllocateDescriptorSets(device, &info, context->descriptorSets[i]); - if (result != VK_SUCCESS) - { - goto error_allocate_descriptor_sets; - } - } - - char name[1024]; - for (uint32_t j = 0; j < NUM_BACK_BUFFERS; ++j) { - for (uint32_t i = 0; i < NUM_DESCRIPTOR_SETS; ++i) { - DescriptorSetMetaData dsMetaData = DESCRIPTOR_SET_META_DATA[i]; - snprintf(name, FFX_CACAO_ARRAY_SIZE(name), "%s_%u", dsMetaData.name, j); - setObjectName(device, context, VK_OBJECT_TYPE_DESCRIPTOR_SET, (uint64_t)context->descriptorSets[j][i], name); - } - } - } - - // assign memory to constant buffers - for ( ; numConstantBackBuffersInited < NUM_BACK_BUFFERS; ++numConstantBackBuffersInited) - { - for (uint32_t j = 0; j < 4; ++j) - { - VkBuffer buffer = context->constantBuffer[numConstantBackBuffersInited][j]; - - VkBufferCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.size = sizeof(FfxCacaoConstants); - info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; - info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - info.queueFamilyIndexCount = 0; - info.pQueueFamilyIndices = NULL; - - result = vkCreateBuffer(device, &info, NULL, &buffer); - if (result != VK_SUCCESS) - { - goto error_init_constant_buffers; - } - char name[1024]; - snprintf(name, FFX_CACAO_ARRAY_SIZE(name), "FFX_CACAO_CONSTANT_BUFFER_PASS_%u_BACK_BUFFER_%u", j, numConstantBackBuffersInited); - setObjectName(device, context, VK_OBJECT_TYPE_BUFFER, (uint64_t)buffer, name); - - VkMemoryRequirements memoryRequirements; - vkGetBufferMemoryRequirements(device, buffer, &memoryRequirements); - - uint32_t chosenMemoryTypeIndex = getBestMemoryHeapIndex(physicalDevice, memoryRequirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - if (chosenMemoryTypeIndex == VK_MAX_MEMORY_TYPES) - { - vkDestroyBuffer(device, buffer, NULL); - goto error_init_constant_buffers; - } - - VkMemoryAllocateInfo allocationInfo = {}; - allocationInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocationInfo.pNext = NULL; - allocationInfo.allocationSize = memoryRequirements.size; - allocationInfo.memoryTypeIndex = chosenMemoryTypeIndex; - - VkDeviceMemory memory; - result = vkAllocateMemory(device, &allocationInfo, NULL, &memory); - if (result != VK_SUCCESS) - { - vkDestroyBuffer(device, buffer, NULL); - goto error_init_constant_buffers; - } - - result = vkBindBufferMemory(device, buffer, memory, 0); - if (result != VK_SUCCESS) - { - vkDestroyBuffer(device, buffer, NULL); - goto error_init_constant_buffers; - } - - context->constantBufferMemory[numConstantBackBuffersInited][j] = memory; - context->constantBuffer[numConstantBackBuffersInited][j] = buffer; - } - } - - // create load counter VkImage - { - VkImage image = VK_NULL_HANDLE; - - VkImageCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.imageType = VK_IMAGE_TYPE_1D; - info.format = VK_FORMAT_R32_UINT; - info.extent.width = 1; - info.extent.height = 1; - info.extent.depth = 1; - info.mipLevels = 1; - info.arrayLayers = 1; - info.samples = VK_SAMPLE_COUNT_1_BIT; - info.tiling = VK_IMAGE_TILING_OPTIMAL; - info.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; - info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - info.queueFamilyIndexCount = 0; - info.pQueueFamilyIndices = NULL; - info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - result = vkCreateImage(device, &info, NULL, &image); - if (result != VK_SUCCESS) - { - goto error_init_load_counter_image; - } - - setObjectName(device, context, VK_OBJECT_TYPE_IMAGE, (uint64_t)image, "FFX_CACAO_LOAD_COUNTER"); - - VkMemoryRequirements memoryRequirements; - vkGetImageMemoryRequirements(device, image, &memoryRequirements); - - uint32_t chosenMemoryTypeIndex = getBestMemoryHeapIndex(physicalDevice, memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (chosenMemoryTypeIndex == VK_MAX_MEMORY_TYPES) - { - vkDestroyImage(device, image, NULL); - goto error_init_load_counter_image; - } - - VkMemoryAllocateInfo allocationInfo = {}; - allocationInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocationInfo.pNext = NULL; - allocationInfo.allocationSize = memoryRequirements.size; - allocationInfo.memoryTypeIndex = chosenMemoryTypeIndex; - - VkDeviceMemory memory; - result = vkAllocateMemory(device, &allocationInfo, NULL, &memory); - if (result != VK_SUCCESS) - { - vkDestroyImage(device, image, NULL); - goto error_init_load_counter_image; - } - - result = vkBindImageMemory(device, image, memory, 0); - if (result != VK_SUCCESS) - { - vkDestroyImage(device, image, NULL); - goto error_init_load_counter_image; - } - - context->loadCounter = image; - context->loadCounterMemory = memory; - } - - // create load counter view - { - VkImageView imageView; - - VkImageViewCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.image = context->loadCounter; - info.viewType = VK_IMAGE_VIEW_TYPE_1D; - info.format = VK_FORMAT_R32_UINT; - info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - info.subresourceRange.baseMipLevel = 0; - info.subresourceRange.levelCount = 1; - info.subresourceRange.baseArrayLayer = 0; - info.subresourceRange.layerCount = 1; - - result = vkCreateImageView(device, &info, NULL, &imageView); - if (result != VK_SUCCESS) - { - goto error_init_load_counter_view; - } - - context->loadCounterView = imageView; - } - -#ifdef FFX_CACAO_ENABLE_PROFILING - // create timestamp query pool - { - VkQueryPool queryPool = VK_NULL_HANDLE; - - VkQueryPoolCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.queryType = VK_QUERY_TYPE_TIMESTAMP; - info.queryCount = NUM_TIMESTAMPS * NUM_BACK_BUFFERS; - - result = vkCreateQueryPool(device, &info, NULL, &queryPool); - if (result != VK_SUCCESS) - { - goto error_init_query_pool; - } - - context->timestampQueryPool = queryPool; - } -#endif - - return FFX_CACAO_STATUS_OK; - -#ifdef FFX_CACAO_ENABLE_PROFILING - vkDestroyQueryPool(device, context->timestampQueryPool, NULL); -error_init_query_pool: -#endif - - vkDestroyImageView(device, context->loadCounterView, NULL); -error_init_load_counter_view: - vkDestroyImage(device, context->loadCounter, NULL); - vkFreeMemory(device, context->loadCounterMemory, NULL); -error_init_load_counter_image: - -error_init_constant_buffers: - for (uint32_t i = 0; i < numConstantBackBuffersInited; ++i) - { - for (uint32_t j = 0; j < 4; ++j) - { - vkDestroyBuffer(device, context->constantBuffer[i][j], NULL); - vkFreeMemory(device, context->constantBufferMemory[i][j], NULL); - } - } - -error_allocate_descriptor_sets: - vkDestroyDescriptorPool(device, context->descriptorPool, NULL); -error_init_descriptor_pool: - -error_init_pipelines: - for (uint32_t i = 0; i < numPipelinesInited; ++i) - { - vkDestroyPipeline(device, context->computePipelines[i], NULL); - } - -error_init_shader_modules: - for (uint32_t i = 0; i < numShaderModulesInited; ++i) - { - vkDestroyShaderModule(device, context->computeShaders[i], NULL); - } - -error_init_pipeline_layouts: - for (uint32_t i = 0; i < numPipelineLayoutsInited; ++i) - { - vkDestroyPipelineLayout(device, context->pipelineLayouts[i], NULL); - } - -error_init_descriptor_set_layouts: - for (uint32_t i = 0; i < numDescriptorSetLayoutsInited; ++i) - { - vkDestroyDescriptorSetLayout(device, context->descriptorSetLayouts[i], NULL); - } - - -error_init_samplers: - for (uint32_t i = 0; i < numSamplersInited; ++i) - { - vkDestroySampler(device, context->samplers[i], NULL); - } - - return errorStatus; -} - -FfxCacaoStatus ffxCacaoVkDestroyContext(FfxCacaoVkContext* context) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - - VkDevice device = context->device; - -#ifdef FFX_CACAO_ENABLE_PROFILING - vkDestroyQueryPool(device, context->timestampQueryPool, NULL); -#endif - - vkDestroyImageView(device, context->loadCounterView, NULL); - vkDestroyImage(device, context->loadCounter, NULL); - vkFreeMemory(device, context->loadCounterMemory, NULL); - - for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) - { - for (uint32_t j = 0; j < 4; ++j) - { - vkDestroyBuffer(device, context->constantBuffer[i][j], NULL); - vkFreeMemory(device, context->constantBufferMemory[i][j], NULL); - } - } - - vkDestroyDescriptorPool(device, context->descriptorPool, NULL); - - for (uint32_t i = 0; i < NUM_COMPUTE_SHADERS; ++i) - { - vkDestroyPipeline(device, context->computePipelines[i], NULL); - } - - for (uint32_t i = 0; i < NUM_COMPUTE_SHADERS; ++i) - { - vkDestroyShaderModule(device, context->computeShaders[i], NULL); - } - - for (uint32_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; ++i) - { - vkDestroyPipelineLayout(device, context->pipelineLayouts[i], NULL); - } - - for(uint32_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; ++i) - { - vkDestroyDescriptorSetLayout(device, context->descriptorSetLayouts[i], NULL); - } - - - for (uint32_t i = 0; i < FFX_CACAO_ARRAY_SIZE(context->samplers); ++i) - { - vkDestroySampler(device, context->samplers[i], NULL); - } - - return FFX_CACAO_STATUS_OK; -} - -FfxCacaoStatus ffxCacaoVkInitScreenSizeDependentResources(FfxCacaoVkContext* context, const FfxCacaoVkScreenSizeInfo* info) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - if (info == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - FfxCacaoBool useDownsampledSsao = info->useDownsampledSsao; -#else - FfxCacaoBool useDownsampledSsao = FFX_CACAO_TRUE; -#endif - context->useDownsampledSsao = useDownsampledSsao; - context->output = info->output; - - VkDevice device = context->device; - VkPhysicalDevice physicalDevice = context->physicalDevice; - VkPhysicalDeviceMemoryProperties memoryProperties; - vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties); - VkResult result; - - uint32_t width = info->width; - uint32_t height = info->height; - uint32_t halfWidth = (width + 1) / 2; - uint32_t halfHeight = (height + 1) / 2; - uint32_t quarterWidth = (halfWidth + 1) / 2; - uint32_t quarterHeight = (halfHeight + 1) / 2; - uint32_t eighthWidth = (quarterWidth + 1) / 2; - uint32_t eighthHeight = (quarterHeight + 1) / 2; - - uint32_t depthBufferWidth = width; - uint32_t depthBufferHeight = height; - uint32_t depthBufferHalfWidth = halfWidth; - uint32_t depthBufferHalfHeight = halfHeight; - uint32_t depthBufferQuarterWidth = quarterWidth; - uint32_t depthBufferQuarterHeight = quarterHeight; - - uint32_t depthBufferXOffset = 0; - uint32_t depthBufferYOffset = 0; - uint32_t depthBufferHalfXOffset = 0; - uint32_t depthBufferHalfYOffset = 0; - uint32_t depthBufferQuarterXOffset = 0; - uint32_t depthBufferQuarterYOffset = 0; - - BufferSizeInfo bsi = {}; - bsi.inputOutputBufferWidth = width; - bsi.inputOutputBufferHeight = height; - bsi.depthBufferXOffset = depthBufferXOffset; - bsi.depthBufferYOffset = depthBufferYOffset; - bsi.depthBufferWidth = depthBufferWidth; - bsi.depthBufferHeight = depthBufferHeight; - - if (useDownsampledSsao) - { - bsi.ssaoBufferWidth = quarterWidth; - bsi.ssaoBufferHeight = quarterHeight; - bsi.deinterleavedDepthBufferXOffset = depthBufferQuarterXOffset; - bsi.deinterleavedDepthBufferYOffset = depthBufferQuarterYOffset; - bsi.deinterleavedDepthBufferWidth = depthBufferQuarterWidth; - bsi.deinterleavedDepthBufferHeight = depthBufferQuarterHeight; - bsi.importanceMapWidth = eighthWidth; - bsi.importanceMapHeight = eighthHeight; - } - else - { - bsi.ssaoBufferWidth = halfWidth; - bsi.ssaoBufferHeight = halfHeight; - bsi.deinterleavedDepthBufferXOffset = depthBufferHalfXOffset; - bsi.deinterleavedDepthBufferYOffset = depthBufferHalfYOffset; - bsi.deinterleavedDepthBufferWidth = depthBufferHalfWidth; - bsi.deinterleavedDepthBufferHeight = depthBufferHalfHeight; - bsi.importanceMapWidth = quarterWidth; - bsi.importanceMapHeight = quarterHeight; - } - - context->bufferSizeInfo = bsi; - - FfxCacaoStatus errorStatus = FFX_CACAO_STATUS_FAILED; - uint32_t numTextureImagesInited = 0; - uint32_t numTextureMemoriesInited = 0; - uint32_t numSrvsInited = 0; - uint32_t numUavsInited = 0; - - // create images for textures - for ( ; numTextureImagesInited < NUM_TEXTURES; ++numTextureImagesInited) - { - TextureMetaData metaData = TEXTURE_META_DATA[numTextureImagesInited]; - VkImage image = VK_NULL_HANDLE; - - VkImageCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.imageType = VK_IMAGE_TYPE_2D; - info.format = metaData.format; - info.extent.width = *(uint32_t*)((uint8_t*)&bsi + metaData.widthOffset); - info.extent.height = *(uint32_t*)((uint8_t*)&bsi + metaData.heightOffset); - info.extent.depth = 1; - info.mipLevels = metaData.numMips; - info.arrayLayers = metaData.arraySize; - info.samples = VK_SAMPLE_COUNT_1_BIT; - info.tiling = VK_IMAGE_TILING_OPTIMAL; - info.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; - info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; - info.queueFamilyIndexCount = 0; - info.pQueueFamilyIndices = NULL; - info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; - - result = vkCreateImage(device, &info, NULL, &image); - if (result != VK_SUCCESS) - { - goto error_init_texture_images; - } - - setObjectName(device, context, VK_OBJECT_TYPE_IMAGE, (uint64_t)image, metaData.name); - - context->textures[numTextureImagesInited] = image; - } - - // allocate memory for textures - for ( ; numTextureMemoriesInited < NUM_TEXTURES; ++numTextureMemoriesInited) - { - VkImage image = context->textures[numTextureMemoriesInited]; - - VkMemoryRequirements memoryRequirements; - vkGetImageMemoryRequirements(device, image, &memoryRequirements); - - uint32_t chosenMemoryTypeIndex = getBestMemoryHeapIndex(physicalDevice, memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - if (chosenMemoryTypeIndex == VK_MAX_MEMORY_TYPES) - { - goto error_init_texture_memories; - } - - VkMemoryAllocateInfo allocationInfo = {}; - allocationInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; - allocationInfo.pNext = NULL; - allocationInfo.allocationSize = memoryRequirements.size; - allocationInfo.memoryTypeIndex = chosenMemoryTypeIndex; - - VkDeviceMemory memory; - result = vkAllocateMemory(device, &allocationInfo, NULL, &memory); - if (result != VK_SUCCESS) - { - goto error_init_texture_memories; - } - - result = vkBindImageMemory(device, image, memory, 0); - if (result != VK_SUCCESS) - { - vkFreeMemory(device, memory, NULL); - goto error_init_texture_memories; - } - - context->textureMemory[numTextureMemoriesInited] = memory; - } - - // create srv image views - for ( ; numSrvsInited < NUM_SHADER_RESOURCE_VIEWS; ++numSrvsInited) - { - VkImageView imageView; - ShaderResourceViewMetaData srvMetaData = SRV_META_DATA[numSrvsInited]; - - VkImageViewCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.image = context->textures[srvMetaData.texture]; - info.viewType = srvMetaData.viewType; - info.format = TEXTURE_META_DATA[srvMetaData.texture].format; - info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - info.subresourceRange.baseMipLevel = srvMetaData.mostDetailedMip; - info.subresourceRange.levelCount = srvMetaData.mipLevels; - info.subresourceRange.baseArrayLayer = srvMetaData.firstArraySlice; - info.subresourceRange.layerCount = srvMetaData.arraySize; - - result = vkCreateImageView(device, &info, NULL, &imageView); - if (result != VK_SUCCESS) - { - goto error_init_srvs; - } - - context->shaderResourceViews[numSrvsInited] = imageView; - } - - // create uav image views - for ( ; numUavsInited < NUM_UNORDERED_ACCESS_VIEWS; ++numUavsInited) - { - VkImageView imageView; - UnorderedAccessViewMetaData uavMetaData = UAV_META_DATA[numUavsInited]; - - VkImageViewCreateInfo info = {}; - info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - info.pNext = NULL; - info.flags = 0; - info.image = context->textures[uavMetaData.textureID]; - info.viewType = uavMetaData.viewType; - info.format = TEXTURE_META_DATA[uavMetaData.textureID].format; - info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; - info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; - info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - info.subresourceRange.baseMipLevel = uavMetaData.mostDetailedMip; - info.subresourceRange.levelCount = 1; - info.subresourceRange.baseArrayLayer = uavMetaData.firstArraySlice; - info.subresourceRange.layerCount = uavMetaData.arraySize; - - result = vkCreateImageView(device, &info, NULL, &imageView); - if (result != VK_SUCCESS) - { - goto error_init_uavs; - } - - context->unorderedAccessViews[numUavsInited] = imageView; - } - - // update descriptor sets from table - for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { - VkDescriptorImageInfo imageInfos[NUM_INPUT_DESCRIPTOR_BINDINGS + NUM_OUTPUT_DESCRIPTOR_BINDINGS] = {}; - VkDescriptorImageInfo *curImageInfo = imageInfos; - VkWriteDescriptorSet writes[NUM_INPUT_DESCRIPTOR_BINDINGS + NUM_OUTPUT_DESCRIPTOR_BINDINGS] = {}; - VkWriteDescriptorSet *curWrite = writes; - - // write input descriptor bindings - for (uint32_t j = 0; j < NUM_INPUT_DESCRIPTOR_BINDINGS; ++j) - { - InputDescriptorBindingMetaData bindingMetaData = INPUT_DESCRIPTOR_BINDING_META_DATA[j]; - - curImageInfo->sampler = VK_NULL_HANDLE; - curImageInfo->imageView = context->shaderResourceViews[bindingMetaData.srvID]; - curImageInfo->imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - - curWrite->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - curWrite->pNext = NULL; - curWrite->dstSet = context->descriptorSets[i][bindingMetaData.descriptorID]; - curWrite->dstBinding = 20 + bindingMetaData.bindingNumber; - curWrite->descriptorCount = 1; - curWrite->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - curWrite->pImageInfo = curImageInfo; - - ++curWrite; ++curImageInfo; - } - - // write output descriptor bindings - for (uint32_t j = 0; j < NUM_OUTPUT_DESCRIPTOR_BINDINGS; ++j) - { - OutputDescriptorBindingMetaData bindingMetaData = OUTPUT_DESCRIPTOR_BINDING_META_DATA[j]; - - curImageInfo->sampler = VK_NULL_HANDLE; - curImageInfo->imageView = context->unorderedAccessViews[bindingMetaData.uavID]; - curImageInfo->imageLayout = VK_IMAGE_LAYOUT_GENERAL; - - curWrite->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - curWrite->pNext = VK_NULL_HANDLE; - curWrite->dstSet = context->descriptorSets[i][bindingMetaData.descriptorID]; - curWrite->dstBinding = 30 + bindingMetaData.bindingNumber; - curWrite->descriptorCount = 1; - curWrite->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - curWrite->pImageInfo = curImageInfo; - - ++curWrite; ++curImageInfo; - } - - vkUpdateDescriptorSets(device, FFX_CACAO_ARRAY_SIZE(writes), writes, 0, NULL); - } - - // update descriptor sets with inputs - for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { -#define MAX_NUM_MISC_INPUT_DESCRIPTORS 32 - - VkDescriptorImageInfo imageInfos[MAX_NUM_MISC_INPUT_DESCRIPTORS] = {}; - VkWriteDescriptorSet writes[MAX_NUM_MISC_INPUT_DESCRIPTORS] = {}; - - for (uint32_t i = 0; i < FFX_CACAO_ARRAY_SIZE(writes); ++i) - { - VkDescriptorImageInfo *imageInfo = imageInfos + i; - VkWriteDescriptorSet *write = writes + i; - - imageInfo->sampler = VK_NULL_HANDLE; - - write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - write->pNext = NULL; - write->descriptorCount = 1; - write->pImageInfo = imageInfo; - } - - uint32_t cur = 0; - - // register(t0) -> 20 - // register(u0) -> 30 - imageInfos[cur].imageView = info->depthView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_DEPTHS]; - writes[cur].dstBinding = 20; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->depthView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_DEPTHS_MIPS]; - writes[cur].dstBinding = 20; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->depthView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_NORMALS]; - writes[cur].dstBinding = 20; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->depthView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PING]; - writes[cur].dstBinding = 21; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->depthView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PONG]; - writes[cur].dstBinding = 21; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->outputView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PING]; - writes[cur].dstBinding = 30; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->outputView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PONG]; - writes[cur].dstBinding = 30; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->outputView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][DS_APPLY_PING]; - writes[cur].dstBinding = 30; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ++cur; - - imageInfos[cur].imageView = info->outputView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][DS_APPLY_PONG]; - writes[cur].dstBinding = 30; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ++cur; - - imageInfos[cur].imageView = context->loadCounterView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][DS_POSTPROCESS_IMPORTANCE_MAP_B]; - writes[cur].dstBinding = 31; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ++cur; - - imageInfos[cur].imageView = context->loadCounterView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][DS_CLEAR_LOAD_COUNTER]; - writes[cur].dstBinding = 30; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - ++cur; - - for (uint32_t pass = 0; pass < 4; ++pass) - { - imageInfos[cur].imageView = context->loadCounterView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; - writes[cur].dstSet = context->descriptorSets[i][(DescriptorSetID)(DS_GENERATE_ADAPTIVE_0 + pass)]; - writes[cur].dstBinding = 22; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - } - - if (info->normalsView) { - imageInfos[cur].imageView = info->normalsView; - imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_NORMALS_FROM_INPUT_NORMALS]; - writes[cur].dstBinding = 20; - writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - ++cur; - } - - FFX_CACAO_ASSERT(cur <= MAX_NUM_MISC_INPUT_DESCRIPTORS); - vkUpdateDescriptorSets(device, cur, writes, 0, NULL); - } - - // update descriptor sets with constant buffers - for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { - VkDescriptorBufferInfo bufferInfos[NUM_DESCRIPTOR_SETS] = {}; - VkDescriptorBufferInfo *curBufferInfo = bufferInfos; - VkWriteDescriptorSet writes[NUM_DESCRIPTOR_SETS] = {}; - VkWriteDescriptorSet *curWrite = writes; - - for (uint32_t j = 0; j < NUM_DESCRIPTOR_SETS; ++j) - { - DescriptorSetMetaData dsMetaData = DESCRIPTOR_SET_META_DATA[j]; - - curBufferInfo->buffer = context->constantBuffer[i][dsMetaData.pass]; - curBufferInfo->offset = 0; - curBufferInfo->range = VK_WHOLE_SIZE; - - curWrite->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - curWrite->pNext = NULL; - curWrite->dstSet = context->descriptorSets[i][j]; - curWrite->dstBinding = 10; - curWrite->dstArrayElement = 0; - curWrite->descriptorCount = 1; - curWrite->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - curWrite->pBufferInfo = curBufferInfo; - - ++curWrite; - ++curBufferInfo; - } - - vkUpdateDescriptorSets(device, FFX_CACAO_ARRAY_SIZE(writes), writes, 0, NULL); - } - - return FFX_CACAO_STATUS_OK; - -error_init_uavs: - for (uint32_t i = 0; i < numUavsInited; ++i) - { - vkDestroyImageView(device, context->unorderedAccessViews[i], NULL); - } - -error_init_srvs: - for (uint32_t i = 0; i < numSrvsInited; ++i) - { - vkDestroyImageView(device, context->shaderResourceViews[i], NULL); - } - -error_init_texture_memories: - for (uint32_t i = 0; i < numTextureMemoriesInited; ++i) - { - vkFreeMemory(device, context->textureMemory[i], NULL); - } - -error_init_texture_images: - for (uint32_t i = 0; i < numTextureImagesInited; ++i) - { - vkDestroyImage(device, context->textures[i], NULL); - } - - return errorStatus; -} - -FfxCacaoStatus ffxCacaoVkDestroyScreenSizeDependentResources(FfxCacaoVkContext* context) -{ - if (context == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - - VkDevice device = context->device; - - for (uint32_t i = 0; i < NUM_UNORDERED_ACCESS_VIEWS; ++i) - { - vkDestroyImageView(device, context->unorderedAccessViews[i], NULL); - } - - for (uint32_t i = 0; i < NUM_SHADER_RESOURCE_VIEWS; ++i) - { - vkDestroyImageView(device, context->shaderResourceViews[i], NULL); - } - - for (uint32_t i = 0; i < NUM_TEXTURES; ++i) - { - vkFreeMemory(device, context->textureMemory[i], NULL); - } - - for (uint32_t i = 0; i < NUM_TEXTURES; ++i) - { - vkDestroyImage(device, context->textures[i], NULL); - } - - return FFX_CACAO_STATUS_OK; -} - -FfxCacaoStatus ffxCacaoVkUpdateSettings(FfxCacaoVkContext* context, const FfxCacaoSettings* settings) -{ - if (context == NULL || settings == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - - memcpy(&context->settings, settings, sizeof(*settings)); - - return FFX_CACAO_STATUS_OK; -} - -static inline void computeDispatch(FfxCacaoVkContext* context, VkCommandBuffer cb, DescriptorSetID ds, ComputeShaderID cs, uint32_t width, uint32_t height) -{ - DescriptorSetLayoutID dsl = DESCRIPTOR_SET_META_DATA[ds].descriptorSetLayoutID; - vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_COMPUTE, context->pipelineLayouts[dsl], 0, 1, &context->descriptorSets[context->currentConstantBuffer][ds], 0, NULL); - vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, context->computePipelines[cs]); - vkCmdDispatch(cb, width, height, 1); -} - -typedef struct BarrierList -{ - uint32_t len; - VkImageMemoryBarrier barriers[32]; -} BarrierList; - -static inline void pushBarrier(BarrierList* barrierList, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout, VkAccessFlags srcAccessFlags, VkAccessFlags dstAccessFlags) -{ - FFX_CACAO_ASSERT(barrierList->len < FFX_CACAO_ARRAY_SIZE(barrierList->barriers)); - VkImageMemoryBarrier barrier = {}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = NULL; - barrier.srcAccessMask = srcAccessFlags; - barrier.dstAccessMask = dstAccessFlags; - barrier.oldLayout = oldLayout; - barrier.newLayout = newLayout; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier.subresourceRange.baseMipLevel = 0; - barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; - barrier.subresourceRange.baseArrayLayer = 0; - barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; - barrier.image = image; - barrierList->barriers[barrierList->len++] = barrier; -} - -static inline void beginDebugMarker(FfxCacaoVkContext* context, VkCommandBuffer cb, const char* name) -{ - if (context->vkCmdDebugMarkerBegin) - { - VkDebugMarkerMarkerInfoEXT info = {}; - info.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT; - info.pNext = NULL; - info.pMarkerName = name; - info.color[0] = 1.0f; - info.color[1] = 0.0f; - info.color[2] = 0.0f; - info.color[3] = 1.0f; - - context->vkCmdDebugMarkerBegin(cb, &info); - } -} - -static inline void endDebugMarker(FfxCacaoVkContext* context, VkCommandBuffer cb) -{ - if (context->vkCmdDebugMarkerEnd) - { - context->vkCmdDebugMarkerEnd(cb); - } -} - -FfxCacaoStatus ffxCacaoVkDraw(FfxCacaoVkContext* context, VkCommandBuffer cb, const FfxCacaoMatrix4x4* proj, const FfxCacaoMatrix4x4* normalsToView) -{ - if (context == NULL || cb == VK_NULL_HANDLE || proj == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - - FfxCacaoSettings *settings = &context->settings; - BufferSizeInfo *bsi = &context->bufferSizeInfo; - VkDevice device = context->device; - VkDescriptorSet *ds = context->descriptorSets[context->currentConstantBuffer]; - VkImage *tex = context->textures; - VkResult result; - BarrierList barrierList; - - uint32_t curBuffer = context->currentConstantBuffer; - curBuffer = (curBuffer + 1) % NUM_BACK_BUFFERS; - context->currentConstantBuffer = curBuffer; -#ifdef FFX_CACAO_ENABLE_PROFILING - { - uint32_t collectBuffer = context->collectBuffer = (curBuffer + 1) % NUM_BACK_BUFFERS; - if (uint32_t numQueries = context->timestampQueries[collectBuffer].numTimestamps) - { - uint32_t offset = collectBuffer * NUM_TIMESTAMPS; - vkGetQueryPoolResults(device, context->timestampQueryPool, offset, numQueries, numQueries * sizeof(uint64_t), context->timestampQueries[collectBuffer].timings, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); - } - } -#endif - - beginDebugMarker(context, cb, "FidelityFX CACAO"); - - // update constant buffer - - for (uint32_t i = 0; i < 4; ++i) - { - VkDeviceMemory memory = context->constantBufferMemory[curBuffer][i]; - void *data = NULL; - result = vkMapMemory(device, memory, 0, VK_WHOLE_SIZE, 0, &data); - FFX_CACAO_ASSERT(result == VK_SUCCESS); - updateConstants((FfxCacaoConstants*)data, settings, bsi, proj, normalsToView); - updatePerPassConstants((FfxCacaoConstants*)data, settings, bsi, i); - vkUnmapMemory(device, memory); - } - -#ifdef FFX_CACAO_ENABLE_PROFILING - uint32_t queryPoolOffset = curBuffer * NUM_TIMESTAMPS; - uint32_t numTimestamps = 0; - vkCmdResetQueryPool(cb, context->timestampQueryPool, queryPoolOffset, NUM_TIMESTAMPS); -#define GET_TIMESTAMP(name) \ - context->timestampQueries[curBuffer].timestamps[numTimestamps] = TIMESTAMP_##name; \ - vkCmdWriteTimestamp(cb, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, context->timestampQueryPool, queryPoolOffset + numTimestamps++); -#else -#define GET_TIMESTAMP(name) -#endif - - GET_TIMESTAMP(BEGIN) - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_DEPTHS], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_NORMALS], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PING], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP_PONG], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, context->loadCounter, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - - // prepare depths, normals and mips - { - beginDebugMarker(context, cb, "Prepare downsampled depths, normals and mips"); - - // clear load counter - computeDispatch(context, cb, DS_CLEAR_LOAD_COUNTER, CS_CLEAR_LOAD_COUNTER, 1, 1); - - switch (context->settings.qualityLevel) - { - case FFX_CACAO_QUALITY_LOWEST: { - uint32_t dispatchWidth = dispatchSize(PREPARE_DEPTHS_HALF_WIDTH, bsi->deinterleavedDepthBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_DEPTHS_HALF_HEIGHT, bsi->deinterleavedDepthBufferHeight); - ComputeShaderID csPrepareDepthsHalf = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS_HALF : CS_PREPARE_NATIVE_DEPTHS_HALF; - computeDispatch(context, cb, DS_PREPARE_DEPTHS, csPrepareDepthsHalf, dispatchWidth, dispatchHeight); - break; - } - case FFX_CACAO_QUALITY_LOW: { - uint32_t dispatchWidth = dispatchSize(PREPARE_DEPTHS_WIDTH, bsi->deinterleavedDepthBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_DEPTHS_HEIGHT, bsi->deinterleavedDepthBufferHeight); - ComputeShaderID csPrepareDepths = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS : CS_PREPARE_NATIVE_DEPTHS; - computeDispatch(context, cb, DS_PREPARE_DEPTHS, csPrepareDepths, dispatchWidth, dispatchHeight); - break; - } - default: { - uint32_t dispatchWidth = dispatchSize(PREPARE_DEPTHS_AND_MIPS_WIDTH, bsi->deinterleavedDepthBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_DEPTHS_AND_MIPS_HEIGHT, bsi->deinterleavedDepthBufferHeight); - ComputeShaderID csPrepareDepthsAndMips = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS_AND_MIPS : CS_PREPARE_NATIVE_DEPTHS_AND_MIPS; - computeDispatch(context, cb, DS_PREPARE_DEPTHS_MIPS, csPrepareDepthsAndMips, dispatchWidth, dispatchHeight); - break; - } - } - - if (context->settings.generateNormals) - { - uint32_t dispatchWidth = dispatchSize(PREPARE_NORMALS_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_NORMALS_HEIGHT, bsi->ssaoBufferHeight); - ComputeShaderID csPrepareNormals = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_NORMALS : CS_PREPARE_NATIVE_NORMALS; - computeDispatch(context, cb, DS_PREPARE_NORMALS, csPrepareNormals, dispatchWidth, dispatchHeight); - } - else - { - uint32_t dispatchWidth = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT, bsi->ssaoBufferHeight); - ComputeShaderID csPrepareNormalsFromInputNormals = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_NORMALS_FROM_INPUT_NORMALS : CS_PREPARE_NATIVE_NORMALS_FROM_INPUT_NORMALS; - computeDispatch(context, cb, DS_PREPARE_NORMALS_FROM_INPUT_NORMALS, csPrepareNormalsFromInputNormals, dispatchWidth, dispatchHeight); - } - - endDebugMarker(context, cb); - GET_TIMESTAMP(PREPARE) - } - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_DEPTHS], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_NORMALS], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - pushBarrier(&barrierList, context->loadCounter, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - - // base pass for highest quality setting - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST) - { - beginDebugMarker(context, cb, "Generate High Quality Base Pass"); - - // SSAO - { - beginDebugMarker(context, cb, "Base SSAO"); - - uint32_t dispatchWidth = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferHeight); - - for (int pass = 0; pass < 4; ++pass) - { - computeDispatch(context, cb, (DescriptorSetID)(DS_GENERATE_ADAPTIVE_BASE_0 + pass), CS_GENERATE_Q3_BASE, dispatchWidth, dispatchHeight); - } - - endDebugMarker(context, cb); - } - - GET_TIMESTAMP(BASE_SSAO_PASS) - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - - // generate importance map - { - beginDebugMarker(context, cb, "Importance Map"); - - uint32_t dispatchWidth = dispatchSize(IMPORTANCE_MAP_WIDTH, bsi->importanceMapWidth); - uint32_t dispatchHeight = dispatchSize(IMPORTANCE_MAP_HEIGHT, bsi->importanceMapHeight); - - computeDispatch(context, cb, DS_GENERATE_IMPORTANCE_MAP, CS_GENERATE_IMPORTANCE_MAP, dispatchWidth, dispatchHeight); - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - - computeDispatch(context, cb, DS_POSTPROCESS_IMPORTANCE_MAP_A, CS_POSTPROCESS_IMPORTANCE_MAP_A, dispatchWidth, dispatchHeight); - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_WRITE_BIT); - pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP_PONG], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - - computeDispatch(context, cb, DS_POSTPROCESS_IMPORTANCE_MAP_B, CS_POSTPROCESS_IMPORTANCE_MAP_B, dispatchWidth, dispatchHeight); - - endDebugMarker(context, cb); - } - - endDebugMarker(context, cb); - GET_TIMESTAMP(IMPORTANCE_MAP) - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - pushBarrier(&barrierList, context->loadCounter, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - } - - // main ssao generation - { - beginDebugMarker(context, cb, "Generate SSAO"); - - uint32_t dispatchWidth = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(GENERATE_WIDTH, bsi->ssaoBufferHeight); - - ComputeShaderID generateCS = (ComputeShaderID)(CS_GENERATE_Q0 + FFX_CACAO_MAX(0, context->settings.qualityLevel - 1)); - for (int pass = 0; pass < 4; ++pass) - { - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) - { - continue; - } - - DescriptorSetID descriptorSetID = context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST ? DS_GENERATE_ADAPTIVE_0 : DS_GENERATE_0; - descriptorSetID = (DescriptorSetID)(descriptorSetID + pass); - - computeDispatch(context, cb, descriptorSetID, generateCS, dispatchWidth, dispatchHeight); - } - - endDebugMarker(context, cb); - GET_TIMESTAMP(GENERATE_SSAO) - } - - uint32_t blurPassCount = context->settings.blurPassCount; - blurPassCount = FFX_CACAO_CLAMP(blurPassCount, 0, MAX_BLUR_PASSES); - - // de-interleaved blur - if (blurPassCount) - { - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PING], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - - beginDebugMarker(context, cb, "Deinterleaved Blur"); - - uint32_t w = 4 * BLUR_WIDTH - 2 * blurPassCount; - uint32_t h = 3 * BLUR_HEIGHT - 2 * blurPassCount; - uint32_t dispatchWidth = dispatchSize(w, bsi->ssaoBufferWidth); - uint32_t dispatchHeight = dispatchSize(h, bsi->ssaoBufferHeight); - - for (int pass = 0; pass < 4; ++pass) - { - if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) - { - continue; - } - - ComputeShaderID blurShaderID = (ComputeShaderID)(CS_EDGE_SENSITIVE_BLUR_1 + blurPassCount - 1); - DescriptorSetID descriptorSetID = (DescriptorSetID)(DS_EDGE_SENSITIVE_BLUR_0 + pass); - computeDispatch(context, cb, descriptorSetID, blurShaderID, dispatchWidth, dispatchHeight); - } - - endDebugMarker(context, cb); - GET_TIMESTAMP(EDGE_SENSITIVE_BLUR) - - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - pushBarrier(&barrierList, context->output, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - } - else - { - barrierList.len = 0; - pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PING], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - pushBarrier(&barrierList, context->output, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - } - - - if (context->useDownsampledSsao) - { - beginDebugMarker(context, cb, "Bilateral Upsample"); - - uint32_t dispatchWidth = dispatchSize(2 * BILATERAL_UPSCALE_WIDTH, bsi->inputOutputBufferWidth); - uint32_t dispatchHeight = dispatchSize(2 * BILATERAL_UPSCALE_HEIGHT, bsi->inputOutputBufferHeight); - - DescriptorSetID descriptorSetID = blurPassCount ? DS_BILATERAL_UPSAMPLE_PONG : DS_BILATERAL_UPSAMPLE_PING; - ComputeShaderID upscaler = context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST ? CS_UPSCALE_BILATERAL_5X5_HALF : CS_UPSCALE_BILATERAL_5X5; - - computeDispatch(context, cb, descriptorSetID, upscaler, dispatchWidth, dispatchHeight); - - endDebugMarker(context, cb); - GET_TIMESTAMP(BILATERAL_UPSAMPLE) - } - else - { - beginDebugMarker(context, cb, "Reinterleave"); - - uint32_t dispatchWidth = dispatchSize(APPLY_WIDTH, bsi->inputOutputBufferWidth); - uint32_t dispatchHeight = dispatchSize(APPLY_HEIGHT, bsi->inputOutputBufferHeight); - - DescriptorSetID descriptorSetID = blurPassCount ? DS_APPLY_PONG : DS_APPLY_PING; - - switch (context->settings.qualityLevel) - { - case FFX_CACAO_QUALITY_LOWEST: - computeDispatch(context, cb, descriptorSetID, CS_NON_SMART_HALF_APPLY, dispatchWidth, dispatchHeight); - break; - case FFX_CACAO_QUALITY_LOW: - computeDispatch(context, cb, descriptorSetID, CS_NON_SMART_APPLY, dispatchWidth, dispatchHeight); - break; - default: - computeDispatch(context, cb, descriptorSetID, CS_APPLY, dispatchWidth, dispatchHeight); - break; - } - - endDebugMarker(context, cb); - GET_TIMESTAMP(APPLY) - } - - endDebugMarker(context, cb); - - barrierList.len = 0; - pushBarrier(&barrierList, context->output, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); - vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); - -#ifdef FFX_CACAO_ENABLE_PROFILING - context->timestampQueries[curBuffer].numTimestamps = numTimestamps; -#endif - - return FFX_CACAO_STATUS_OK; -} - -#ifdef FFX_CACAO_ENABLE_PROFILING -FfxCacaoStatus ffxCacaoVkGetDetailedTimings(FfxCacaoVkContext* context, FfxCacaoDetailedTiming* timings) -{ - if (context == NULL || timings == NULL) - { - return FFX_CACAO_STATUS_INVALID_POINTER; - } - context = getAlignedVkContextPointer(context); - - uint32_t bufferIndex = context->collectBuffer; - uint32_t numTimestamps = context->timestampQueries[bufferIndex].numTimestamps; - uint64_t prevTime = context->timestampQueries[bufferIndex].timings[0]; - for (uint32_t i = 1; i < numTimestamps; ++i) - { - TimestampID timestampID = context->timestampQueries[bufferIndex].timestamps[i]; - timings->timestamps[i].label = TIMESTAMP_NAMES[timestampID]; - uint64_t time = context->timestampQueries[bufferIndex].timings[i]; - timings->timestamps[i].ticks = time - prevTime; - prevTime = time; - } - timings->timestamps[0].label = "FFX_CACAO_TOTAL"; - timings->timestamps[0].ticks = prevTime - context->timestampQueries[bufferIndex].timings[0]; - timings->numTimestamps = numTimestamps; - - return FFX_CACAO_STATUS_OK; -} -#endif -#endif - -#ifdef __cplusplus -} -#endif diff --git a/ffx-cacao/src/ffx_cacao.hlsl b/ffx-cacao/src/ffx_cacao.hlsl index 027ad68..58dd95f 100644 --- a/ffx-cacao/src/ffx_cacao.hlsl +++ b/ffx-cacao/src/ffx_cacao.hlsl @@ -1,17 +1,17 @@ -// Modifications Copyright 2020. Advanced Micro Devices, Inc. All Rights Reserved. +// Modifications Copyright 2021. Advanced Micro Devices, Inc. All Rights Reserved. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Copyright (c) 2016, Intel Corporation -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated -// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to // permit persons to whom the Software is furnished to do so, subject to the following conditions: -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of // the Software. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO -// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // File changes (yyyy-mm-dd) @@ -20,76 +20,9 @@ #include "ffx_cacao_defines.h" +#include "ffx_cacao_bindings.hlsl" -#define SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION 1 - -#define INTELSSAO_MAIN_DISK_SAMPLE_COUNT (32) - -struct CACAOConstants -{ - float2 DepthUnpackConsts; - float2 CameraTanHalfFOV; - - float2 NDCToViewMul; - float2 NDCToViewAdd; - - float2 DepthBufferUVToViewMul; - float2 DepthBufferUVToViewAdd; - - float EffectRadius; // world (viewspace) maximum size of the shadow - float EffectShadowStrength; // global strength of the effect (0 - 5) - float EffectShadowPow; - float EffectShadowClamp; - - float EffectFadeOutMul; // effect fade out from distance (ex. 25) - float EffectFadeOutAdd; // effect fade out to distance (ex. 100) - float EffectHorizonAngleThreshold; // limit errors on slopes and caused by insufficient geometry tessellation (0.05 to 0.5) - float EffectSamplingRadiusNearLimitRec; // if viewspace pixel closer than this, don't enlarge shadow sampling radius anymore (makes no sense to grow beyond some distance, not enough samples to cover everything, so just limit the shadow growth; could be SSAOSettingsFadeOutFrom * 0.1 or less) - - float DepthPrecisionOffsetMod; - float NegRecEffectRadius; // -1.0 / EffectRadius - float LoadCounterAvgDiv; // 1.0 / ( halfDepthMip[SSAO_DEPTH_MIP_LEVELS-1].sizeX * halfDepthMip[SSAO_DEPTH_MIP_LEVELS-1].sizeY ) - float AdaptiveSampleCountLimit; - - float InvSharpness; - int PassIndex; - float BilateralSigmaSquared; - float BilateralSimilarityDistanceSigma; - - float4 PatternRotScaleMatrices[5]; - - float NormalsUnpackMul; - float NormalsUnpackAdd; - float DetailAOStrength; - float Dummy0; - - float2 SSAOBufferDimensions; - float2 SSAOBufferInverseDimensions; - - float2 DepthBufferDimensions; - float2 DepthBufferInverseDimensions; - - int2 DepthBufferOffset; - float2 PerPassFullResUVOffset; - - float2 OutputBufferDimensions; - float2 OutputBufferInverseDimensions; - - float2 ImportanceMapDimensions; - float2 ImportanceMapInverseDimensions; - - float2 DeinterleavedDepthBufferDimensions; - float2 DeinterleavedDepthBufferInverseDimensions; - - float2 DeinterleavedDepthBufferOffset; - float2 DeinterleavedDepthBufferNormalisedOffset; - -#if SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION - float4x4 NormalsWorldToViewspaceMatrix; -#endif -}; - -static const float4 g_samplePatternMain[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = +static const float4 g_FFX_CACAO_samplePatternMain[] = { 0.78488064, 0.56661671, 1.500000, -0.126083, 0.26022232, -0.29575172, 1.500000, -1.064030, 0.10459357, 0.08372527, 1.110000, -2.730563, -0.68286800, 0.04963045, 1.090000, -0.498827, -0.13570161, -0.64190155, 1.250000, -0.532765, -0.26193795, -0.08205118, 0.670000, -1.783245, -0.61177456, 0.66664219, 0.710000, -0.044234, 0.43675563, 0.25119025, 0.610000, -1.167283, @@ -101,15 +34,13 @@ static const float4 g_samplePatternMain[INTELSSAO_MAIN_DISK_SAMPLE_COUNT] = -0.15064627, -0.14949332, 0.600000, -1.896062, 0.53180975, -0.35210401, 0.600000, -0.758838, 0.41487166, 0.81442589, 0.600000, -0.505648, -0.24106961, -0.32721516, 0.600000, -1.665244 }; -#define SSAO_MAX_TAPS (32) -#define SSAO_MAX_REF_TAPS (512) -#define SSAO_ADAPTIVE_TAP_BASE_COUNT (5) -#define SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (SSAO_MAX_TAPS - SSAO_ADAPTIVE_TAP_BASE_COUNT) -#define SSAO_DEPTH_MIP_LEVELS (4) +#define FFX_CACAO_MAX_TAPS (32) +#define FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT (5) +#define FFX_CACAO_ADAPTIVE_TAP_FLEXIBLE_COUNT (FFX_CACAO_MAX_TAPS - FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT) -// these values can be changed (up to SSAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors +// these values can be changed (up to FFX_CACAO_MAX_TAPS) with no changes required elsewhere; values for 4th and 5th preset are ignored but array needed to avoid compilation errors // the actual number of texture samples is two times this value (each "tap" has two symmetrical depth texture samples) -static const uint g_numTaps[5] = { 3, 5, 12, 0, 0 }; +static const uint g_FFX_CACAO_numTaps[5] = { 3, 5, 12, 0, 0 }; ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -118,59 +49,35 @@ static const uint g_numTaps[5] = { 3, 5, 12, 0, 0 }; // Each has its own cost. To disable just set to 5 or above. // // (experimental) tilts the disk (although only half of the samples!) towards surface normal; this helps with effect uniformity between objects but reduces effect distance and has other side-effects -#define SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar -#define SSAO_TILT_SAMPLES_AMOUNT (0.4) +#define FFX_CACAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET (99) // to disable simply set to 99 or similar +#define FFX_CACAO_TILT_SAMPLES_AMOUNT (0.4) // -#define SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar -#define SSAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) +#define FFX_CACAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET (1) // to disable simply set to 99 or similar +#define FFX_CACAO_HALOING_REDUCTION_AMOUNT (0.6) // values from 0.0 - 1.0, 1.0 means max weighting (will cause artifacts, 0.8 is more reasonable) // -#define SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) //2 // to disable simply set to 99 or similar -#define SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges +#define FFX_CACAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (2) //2 // to disable simply set to 99 or similar +#define FFX_CACAO_NORMAL_BASED_EDGES_DOT_THRESHOLD (0.5) // use 0-0.1 for super-sharp normal-based edges // -#define SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) //1 // whether to use DetailAOStrength; to disable simply set to 99 or similar +#define FFX_CACAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET (1) //1 // whether to use DetailAOStrength; to disable simply set to 99 or similar // -#define SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too -#define SSAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically +#define FFX_CACAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET (2) // !!warning!! the MIP generation on the C++ side will be enabled on quality preset 2 regardless of this value, so if changing here, change the C++ side too +#define FFX_CACAO_DEPTH_MIPS_GLOBAL_OFFSET (-4.3) // best noise/quality/performance tradeoff, found empirically // -// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for +// !!warning!! the edge handling is hard-coded to 'disabled' on quality level 0, and enabled above, on the C++ side; while toggling it here will work for // testing purposes, it will not yield performance gains (or correct results) -#define SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) +#define FFX_CACAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET (1) // -#define SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (99) // 99 means disabled; only helpful if artifacts at the edges caused by lack of out of screen depth data are not acceptable with the depth sampler in either clamp or mirror modes +#define FFX_CACAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET (99) // 99 means disabled; only helpful if artifacts at the edges caused by lack of out of screen depth data are not acceptable with the depth sampler in either clamp or mirror modes ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - -SamplerState g_PointClampSampler : register(s0); // corresponds to SSAO_SAMPLERS_SLOT0 -SamplerState g_PointMirrorSampler : register(s1); // corresponds to SSAO_SAMPLERS_SLOT2 -SamplerState g_LinearClampSampler : register(s2); // corresponds to SSAO_SAMPLERS_SLOT1 -SamplerState g_ViewspaceDepthTapSampler : register(s3); // corresponds to SSAO_SAMPLERS_SLOT3 -SamplerState g_ZeroTextureSampler : register(s4); - -cbuffer SSAOConstantsBuffer : register(b0) // corresponds to SSAO_CONSTANTS_BUFFERSLOT -{ - CACAOConstants g_CACAOConsts; -} - - -RWTexture1D g_ClearLoadCounterInput : register(u0); -[numthreads(1, 1, 1)] -void CSClearLoadCounter() -{ - g_ClearLoadCounterInput[0] = 0; -} - // packing/unpacking for edges; 2 bits per edge mean 4 gradient values (0, 0.33, 0.66, 1) for smoother transitions! -float PackEdges(float4 edgesLRTB) +float FFX_CACAO_PackEdges(float4 edgesLRTB) { - // int4 edgesLRTBi = int4( saturate( edgesLRTB ) * 3.0 + 0.5 ); - // return ( (edgesLRTBi.x << 6) + (edgesLRTBi.y << 4) + (edgesLRTBi.z << 2) + (edgesLRTBi.w << 0) ) / 255.0; - - // optimized, should be same as above edgesLRTB = round(saturate(edgesLRTB) * 3.05); return dot(edgesLRTB, float4(64.0 / 255.0, 16.0 / 255.0, 4.0 / 255.0, 1.0 / 255.0)); } -float4 UnpackEdges(float _packedVal) +float4 FFX_CACAO_UnpackEdges(float _packedVal) { uint packedVal = (uint)(_packedVal * 255.5); float4 edgesLRTB; @@ -179,70 +86,54 @@ float4 UnpackEdges(float _packedVal) edgesLRTB.z = float((packedVal >> 2) & 0x03) / 3.0; edgesLRTB.w = float((packedVal >> 0) & 0x03) / 3.0; - return saturate(edgesLRTB + g_CACAOConsts.InvSharpness); + return saturate(edgesLRTB + g_FFX_CACAO_Consts.InvSharpness); } -float ScreenSpaceToViewSpaceDepth(float screenDepth) +float FFX_CACAO_ScreenSpaceToViewSpaceDepth(float screenDepth) { - float depthLinearizeMul = g_CACAOConsts.DepthUnpackConsts.x; - float depthLinearizeAdd = g_CACAOConsts.DepthUnpackConsts.y; - - // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" - - // Set your depthLinearizeMul and depthLinearizeAdd to: - // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); - // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear ); + float depthLinearizeMul = g_FFX_CACAO_Consts.DepthUnpackConsts.x; + float depthLinearizeAdd = g_FFX_CACAO_Consts.DepthUnpackConsts.y; return depthLinearizeMul / (depthLinearizeAdd - screenDepth); } -float4 ScreenSpaceToViewSpaceDepth(float4 screenDepth) +float4 FFX_CACAO_ScreenSpaceToViewSpaceDepth(float4 screenDepth) { - float depthLinearizeMul = g_CACAOConsts.DepthUnpackConsts.x; - float depthLinearizeAdd = g_CACAOConsts.DepthUnpackConsts.y; - - // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" - - // Set your depthLinearizeMul and depthLinearizeAdd to: - // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); - // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear ); + float depthLinearizeMul = g_FFX_CACAO_Consts.DepthUnpackConsts.x; + float depthLinearizeAdd = g_FFX_CACAO_Consts.DepthUnpackConsts.y; return depthLinearizeMul / (depthLinearizeAdd - screenDepth); } -float4 CalculateEdges(const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ) +float4 FFX_CACAO_CalculateEdges(const float centerZ, const float leftZ, const float rightZ, const float topZ, const float bottomZ) { // slope-sensitive depth-based edge detection float4 edgesLRTB = float4(leftZ, rightZ, topZ, bottomZ) - centerZ; float4 edgesLRTBSlopeAdjusted = edgesLRTB + edgesLRTB.yxwz; edgesLRTB = min(abs(edgesLRTB), abs(edgesLRTBSlopeAdjusted)); return saturate((1.3 - edgesLRTB / (centerZ * 0.040))); - - // cheaper version but has artifacts - // edgesLRTB = abs( float4( leftZ, rightZ, topZ, bottomZ ) - centerZ; ); - // return saturate( ( 1.3 - edgesLRTB / (pixZ * 0.06 + 0.1) ) ); } -float3 NDCToViewspace(float2 pos, float viewspaceDepth) +float3 FFX_CACAO_NDCToViewSpace(float2 pos, float viewspaceDepth) { float3 ret; - ret.xy = (g_CACAOConsts.NDCToViewMul * pos.xy + g_CACAOConsts.NDCToViewAdd) * viewspaceDepth; + ret.xy = (g_FFX_CACAO_Consts.NDCToViewMul * pos.xy + g_FFX_CACAO_Consts.NDCToViewAdd) * viewspaceDepth; ret.z = viewspaceDepth; return ret; } -float3 DepthBufferUVToViewspace(float2 pos, float viewspaceDepth) +float3 FFX_CACAO_DepthBufferUVToViewSpace(float2 pos, float viewspaceDepth) { float3 ret; - ret.xy = (g_CACAOConsts.DepthBufferUVToViewMul * pos.xy + g_CACAOConsts.DepthBufferUVToViewAdd) * viewspaceDepth; + ret.xy = (g_FFX_CACAO_Consts.DepthBufferUVToViewMul * pos.xy + g_FFX_CACAO_Consts.DepthBufferUVToViewAdd) * viewspaceDepth; ret.z = viewspaceDepth; return ret; } -float3 CalculateNormal(const float4 edgesLRTB, float3 pixCenterPos, float3 pixLPos, float3 pixRPos, float3 pixTPos, float3 pixBPos) +float3 FFX_CACAO_CalculateNormal(const float4 edgesLRTB, float3 pixCenterPos, float3 pixLPos, float3 pixRPos, float3 pixTPos, float3 pixBPos) { // Get this pixel's viewspace normal float4 acceptedNormals = float4(edgesLRTB.x*edgesLRTB.z, edgesLRTB.z*edgesLRTB.y, edgesLRTB.y*edgesLRTB.w, edgesLRTB.w*edgesLRTB.x); @@ -262,85 +153,45 @@ float3 CalculateNormal(const float4 edgesLRTB, float3 pixCenterPos, float3 pixLP return pixelNormal; } +// ============================================================================= +// Clear Load Counter -// ================================================================================ -// Blur stuff - -Texture2DArray g_BlurInput : register(t0); -RWTexture2DArray g_BlurOutput : register(u0); - -void AddSample(float ssaoValue, float edgeValue, inout float sum, inout float sumWeight) +[numthreads(1, 1, 1)] +void FFX_CACAO_ClearLoadCounter() { - float weight = edgeValue; - - sum += (weight * ssaoValue); - sumWeight += weight; + FFX_CACAO_ClearLoadCounter_SetLoadCounter(0); } -float2 SampleBlurredWide(float2 inPos, float2 coord) -{ - float3 fullCoord = float3(coord, 0.0f); - float2 vC = g_BlurInput.SampleLevel(g_PointMirrorSampler, fullCoord, 0.0, int2(0, 0)).xy; - float2 vL = g_BlurInput.SampleLevel(g_PointMirrorSampler, fullCoord, 0.0, int2(-2, 0)).xy; - float2 vT = g_BlurInput.SampleLevel(g_PointMirrorSampler, fullCoord, 0.0, int2(0, -2)).xy; - float2 vR = g_BlurInput.SampleLevel(g_PointMirrorSampler, fullCoord, 0.0, int2(2, 0)).xy; - float2 vB = g_BlurInput.SampleLevel(g_PointMirrorSampler, fullCoord, 0.0, int2(0, 2)).xy; - - float packedEdges = vC.y; - float4 edgesLRTB = UnpackEdges(packedEdges); - edgesLRTB.x *= UnpackEdges(vL.y).y; - edgesLRTB.z *= UnpackEdges(vT.y).w; - edgesLRTB.y *= UnpackEdges(vR.y).x; - edgesLRTB.w *= UnpackEdges(vB.y).z; - - float ssaoValue = vC.x; - float ssaoValueL = vL.x; - float ssaoValueT = vT.x; - float ssaoValueR = vR.x; - float ssaoValueB = vB.x; - - float sumWeight = 0.8f; - float sum = ssaoValue * sumWeight; - - AddSample(ssaoValueL, edgesLRTB.x, sum, sumWeight); - AddSample(ssaoValueR, edgesLRTB.y, sum, sumWeight); - AddSample(ssaoValueT, edgesLRTB.z, sum, sumWeight); - AddSample(ssaoValueB, edgesLRTB.w, sum, sumWeight); - - float ssaoAvg = sum / sumWeight; - - ssaoValue = ssaoAvg; //min( ssaoValue, ssaoAvg ) * 0.2 + ssaoAvg * 0.8; - - return float2(ssaoValue, packedEdges); -} +// ============================================================================= +// Edge Sensitive Blur -uint PackFloat16(min16float2 v) +uint FFX_CACAO_PackFloat16(min16float2 v) { uint2 p = f32tof16(float2(v)); return p.x | (p.y << 16); } -min16float2 UnpackFloat16(uint a) +min16float2 FFX_CACAO_UnpackFloat16(uint a) { float2 tmp = f16tof32(uint2(a & 0xFFFF, a >> 16)); return min16float2(tmp); } // all in one, SIMD in yo SIMD dawg, shader -#define TILE_WIDTH 4 -#define TILE_HEIGHT 3 -#define HALF_TILE_WIDTH (TILE_WIDTH / 2) -#define QUARTER_TILE_WIDTH (TILE_WIDTH / 4) +#define FFX_CACAO_TILE_WIDTH 4 +#define FFX_CACAO_TILE_HEIGHT 3 +#define FFX_CACAO_HALF_TILE_WIDTH (FFX_CACAO_TILE_WIDTH / 2) +#define FFX_CACAO_QUARTER_TILE_WIDTH (FFX_CACAO_TILE_WIDTH / 4) -#define ARRAY_WIDTH (HALF_TILE_WIDTH * BLUR_WIDTH + 2) -#define ARRAY_HEIGHT (TILE_HEIGHT * BLUR_HEIGHT + 2) +#define FFX_CACAO_ARRAY_WIDTH (FFX_CACAO_HALF_TILE_WIDTH * FFX_CACAO_BLUR_WIDTH + 2) +#define FFX_CACAO_ARRAY_HEIGHT (FFX_CACAO_TILE_HEIGHT * FFX_CACAO_BLUR_HEIGHT + 2) -#define ITERS 4 +#define FFX_CACAO_ITERS 4 -groupshared uint s_BlurF16Front_4[ARRAY_WIDTH][ARRAY_HEIGHT]; -groupshared uint s_BlurF16Back_4[ARRAY_WIDTH][ARRAY_HEIGHT]; +groupshared uint s_FFX_CACAO_BlurF16Front_4[FFX_CACAO_ARRAY_WIDTH][FFX_CACAO_ARRAY_HEIGHT]; +groupshared uint s_FFX_CACAO_BlurF16Back_4[FFX_CACAO_ARRAY_WIDTH][FFX_CACAO_ARRAY_HEIGHT]; -struct Edges_4 +struct FFX_CACAO_Edges_4 { min16float4 left; min16float4 right; @@ -348,23 +199,23 @@ struct Edges_4 min16float4 bottom; }; -Edges_4 UnpackEdgesFloat16_4(min16float4 _packedVal) +FFX_CACAO_Edges_4 FFX_CACAO_UnpackEdgesFloat16_4(min16float4 _packedVal) { uint4 packedVal = (uint4)(_packedVal * 255.5); - Edges_4 result; - result.left = min16float4(saturate(min16float4((packedVal >> 6) & 0x03) / 3.0 + g_CACAOConsts.InvSharpness)); - result.right = min16float4(saturate(min16float4((packedVal >> 4) & 0x03) / 3.0 + g_CACAOConsts.InvSharpness)); - result.top = min16float4(saturate(min16float4((packedVal >> 2) & 0x03) / 3.0 + g_CACAOConsts.InvSharpness)); - result.bottom = min16float4(saturate(min16float4((packedVal >> 0) & 0x03) / 3.0 + g_CACAOConsts.InvSharpness)); + FFX_CACAO_Edges_4 result; + result.left = min16float4(saturate(min16float4((packedVal >> 6) & 0x03) / 3.0 + g_FFX_CACAO_Consts.InvSharpness)); + result.right = min16float4(saturate(min16float4((packedVal >> 4) & 0x03) / 3.0 + g_FFX_CACAO_Consts.InvSharpness)); + result.top = min16float4(saturate(min16float4((packedVal >> 2) & 0x03) / 3.0 + g_FFX_CACAO_Consts.InvSharpness)); + result.bottom = min16float4(saturate(min16float4((packedVal >> 0) & 0x03) / 3.0 + g_FFX_CACAO_Consts.InvSharpness)); return result; } -min16float4 CalcBlurredSampleF16_4(min16float4 packedEdges, min16float4 centre, min16float4 left, min16float4 right, min16float4 top, min16float4 bottom) +min16float4 FFX_CACAO_CalcBlurredSampleF16_4(min16float4 packedEdges, min16float4 centre, min16float4 left, min16float4 right, min16float4 top, min16float4 bottom) { min16float4 sum = centre * min16float(0.5f); min16float4 weight = min16float4(0.5f, 0.5f, 0.5f, 0.5f); - Edges_4 edges = UnpackEdgesFloat16_4(packedEdges); + FFX_CACAO_Edges_4 edges = FFX_CACAO_UnpackEdgesFloat16_4(packedEdges); sum += left * edges.left; weight += edges.left; @@ -378,36 +229,35 @@ min16float4 CalcBlurredSampleF16_4(min16float4 packedEdges, min16float4 centre, return sum / weight; } -void LDSEdgeSensitiveBlur(const uint blurPasses, const uint2 tid, const uint2 gid) +void FFX_CACAO_LDSEdgeSensitiveBlur(const uint blurPasses, const uint2 tid, const uint2 gid) { - int2 imageCoord = gid * (int2(TILE_WIDTH * BLUR_WIDTH, TILE_HEIGHT * BLUR_HEIGHT) - (2*blurPasses)) + int2(TILE_WIDTH, TILE_HEIGHT) * tid - blurPasses; - int2 bufferCoord = int2(HALF_TILE_WIDTH, TILE_HEIGHT) * tid + 1; + int2 imageCoord = gid * (int2(FFX_CACAO_TILE_WIDTH * FFX_CACAO_BLUR_WIDTH, FFX_CACAO_TILE_HEIGHT * FFX_CACAO_BLUR_HEIGHT) - (2*blurPasses)) + int2(FFX_CACAO_TILE_WIDTH, FFX_CACAO_TILE_HEIGHT) * tid - blurPasses; + int2 bufferCoord = int2(FFX_CACAO_HALF_TILE_WIDTH, FFX_CACAO_TILE_HEIGHT) * tid + 1; - // todo -- replace this with gathers. - min16float4 packedEdges[QUARTER_TILE_WIDTH][TILE_HEIGHT]; + min16float4 packedEdges[FFX_CACAO_QUARTER_TILE_WIDTH][FFX_CACAO_TILE_HEIGHT]; { - float2 input[TILE_WIDTH][TILE_HEIGHT]; + float2 input[FFX_CACAO_TILE_WIDTH][FFX_CACAO_TILE_HEIGHT]; int y; [unroll] - for (y = 0; y < TILE_HEIGHT; ++y) + for (y = 0; y < FFX_CACAO_TILE_HEIGHT; ++y) { [unroll] - for (int x = 0; x < TILE_WIDTH; ++x) + for (int x = 0; x < FFX_CACAO_TILE_WIDTH; ++x) { - input[x][y] = g_BlurInput.SampleLevel(g_PointMirrorSampler, float3((imageCoord + int2(x, y) + 0.5f) * g_CACAOConsts.SSAOBufferInverseDimensions, 0.0f), 0).xy; + float2 sampleUV = (float2(imageCoord + int2(x, y)) + 0.5f) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + input[x][y] = FFX_CACAO_EdgeSensitiveBlur_SampleInput(sampleUV); } } [unroll] - for (y = 0; y < TILE_HEIGHT; ++y) + for (y = 0; y < FFX_CACAO_TILE_HEIGHT; ++y) { [unroll] - for (int x = 0; x < QUARTER_TILE_WIDTH; ++x) + for (int x = 0; x < FFX_CACAO_QUARTER_TILE_WIDTH; ++x) { min16float2 ssaoVals = min16float2(input[4 * x + 0][y].x, input[4 * x + 1][y].x); - s_BlurF16Front_4[bufferCoord.x + 2*x + 0][bufferCoord.y + y] = PackFloat16(ssaoVals); + s_FFX_CACAO_BlurF16Front_4[bufferCoord.x + 2*x + 0][bufferCoord.y + y] = FFX_CACAO_PackFloat16(ssaoVals); ssaoVals = min16float2(input[4 * x + 2][y].x, input[4 * x + 3][y].x); - s_BlurF16Front_4[bufferCoord.x + 2*x + 1][bufferCoord.y + y] = PackFloat16(ssaoVals); - // min16float2 ssaoVals = min16float2(1, 1); + s_FFX_CACAO_BlurF16Front_4[bufferCoord.x + 2*x + 1][bufferCoord.y + y] = FFX_CACAO_PackFloat16(ssaoVals); packedEdges[x][y] = min16float4(input[4 * x + 0][y].y, input[4 * x + 1][y].y, input[4 * x + 2][y].y, input[4 * x + 3][y].y); } } @@ -419,24 +269,24 @@ void LDSEdgeSensitiveBlur(const uint blurPasses, const uint2 tid, const uint2 gi for (uint i = 0; i < (blurPasses + 1) / 2; ++i) { [unroll] - for (int y = 0; y < TILE_HEIGHT; ++y) + for (int y = 0; y < FFX_CACAO_TILE_HEIGHT; ++y) { [unroll] - for (int x = 0; x < QUARTER_TILE_WIDTH; ++x) + for (int x = 0; x < FFX_CACAO_QUARTER_TILE_WIDTH; ++x) { int2 c = bufferCoord + int2(2*x, y); - min16float4 centre = min16float4(UnpackFloat16(s_BlurF16Front_4[c.x + 0][c.y + 0]), UnpackFloat16(s_BlurF16Front_4[c.x + 1][c.y + 0])); - min16float4 top = min16float4(UnpackFloat16(s_BlurF16Front_4[c.x + 0][c.y - 1]), UnpackFloat16(s_BlurF16Front_4[c.x + 1][c.y - 1])); - min16float4 bottom = min16float4(UnpackFloat16(s_BlurF16Front_4[c.x + 0][c.y + 1]), UnpackFloat16(s_BlurF16Front_4[c.x + 1][c.y + 1])); + min16float4 centre = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 0][c.y + 0]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 1][c.y + 0])); + min16float4 top = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 0][c.y - 1]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 1][c.y - 1])); + min16float4 bottom = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 0][c.y + 1]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 1][c.y + 1])); - min16float2 tmp = UnpackFloat16(s_BlurF16Front_4[c.x - 1][c.y + 0]); + min16float2 tmp = FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x - 1][c.y + 0]); min16float4 left = min16float4(tmp.y, centre.xyz); - tmp = UnpackFloat16(s_BlurF16Front_4[c.x + 2][c.y + 0]); + tmp = FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[c.x + 2][c.y + 0]); min16float4 right = min16float4(centre.yzw, tmp.x); - min16float4 tmp_4 = CalcBlurredSampleF16_4(packedEdges[x][y], centre, left, right, top, bottom); - s_BlurF16Back_4[c.x + 0][c.y] = PackFloat16(tmp_4.xy); - s_BlurF16Back_4[c.x + 1][c.y] = PackFloat16(tmp_4.zw); + min16float4 tmp_4 = FFX_CACAO_CalcBlurredSampleF16_4(packedEdges[x][y], centre, left, right, top, bottom); + s_FFX_CACAO_BlurF16Back_4[c.x + 0][c.y] = FFX_CACAO_PackFloat16(tmp_4.xy); + s_FFX_CACAO_BlurF16Back_4[c.x + 1][c.y] = FFX_CACAO_PackFloat16(tmp_4.zw); } } GroupMemoryBarrierWithGroupSync(); @@ -444,24 +294,24 @@ void LDSEdgeSensitiveBlur(const uint blurPasses, const uint2 tid, const uint2 gi if (2 * i + 1 < blurPasses) { [unroll] - for (int y = 0; y < TILE_HEIGHT; ++y) + for (int y = 0; y < FFX_CACAO_TILE_HEIGHT; ++y) { [unroll] - for (int x = 0; x < QUARTER_TILE_WIDTH; ++x) + for (int x = 0; x < FFX_CACAO_QUARTER_TILE_WIDTH; ++x) { int2 c = bufferCoord + int2(2 * x, y); - min16float4 centre = min16float4(UnpackFloat16(s_BlurF16Back_4[c.x + 0][c.y + 0]), UnpackFloat16(s_BlurF16Back_4[c.x + 1][c.y + 0])); - min16float4 top = min16float4(UnpackFloat16(s_BlurF16Back_4[c.x + 0][c.y - 1]), UnpackFloat16(s_BlurF16Back_4[c.x + 1][c.y - 1])); - min16float4 bottom = min16float4(UnpackFloat16(s_BlurF16Back_4[c.x + 0][c.y + 1]), UnpackFloat16(s_BlurF16Back_4[c.x + 1][c.y + 1])); + min16float4 centre = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 0][c.y + 0]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 1][c.y + 0])); + min16float4 top = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 0][c.y - 1]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 1][c.y - 1])); + min16float4 bottom = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 0][c.y + 1]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 1][c.y + 1])); - min16float2 tmp = UnpackFloat16(s_BlurF16Back_4[c.x - 1][c.y + 0]); + min16float2 tmp = FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x - 1][c.y + 0]); min16float4 left = min16float4(tmp.y, centre.xyz); - tmp = UnpackFloat16(s_BlurF16Back_4[c.x + 2][c.y + 0]); + tmp = FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[c.x + 2][c.y + 0]); min16float4 right = min16float4(centre.yzw, tmp.x); - min16float4 tmp_4 = CalcBlurredSampleF16_4(packedEdges[x][y], centre, left, right, top, bottom); - s_BlurF16Front_4[c.x + 0][c.y] = PackFloat16(tmp_4.xy); - s_BlurF16Front_4[c.x + 1][c.y] = PackFloat16(tmp_4.zw); + min16float4 tmp_4 = FFX_CACAO_CalcBlurredSampleF16_4(packedEdges[x][y], centre, left, right, top, bottom); + s_FFX_CACAO_BlurF16Front_4[c.x + 0][c.y] = FFX_CACAO_PackFloat16(tmp_4.xy); + s_FFX_CACAO_BlurF16Front_4[c.x + 1][c.y] = FFX_CACAO_PackFloat16(tmp_4.zw); } } GroupMemoryBarrierWithGroupSync(); @@ -469,131 +319,121 @@ void LDSEdgeSensitiveBlur(const uint blurPasses, const uint2 tid, const uint2 gi } [unroll] - for (uint y = 0; y < TILE_HEIGHT; ++y) + for (uint y = 0; y < FFX_CACAO_TILE_HEIGHT; ++y) { - uint outputY = TILE_HEIGHT * tid.y + y; - if (blurPasses <= outputY && outputY < TILE_HEIGHT * BLUR_HEIGHT - blurPasses) + uint outputY = FFX_CACAO_TILE_HEIGHT * tid.y + y; + if (blurPasses <= outputY && outputY < FFX_CACAO_TILE_HEIGHT * FFX_CACAO_BLUR_HEIGHT - blurPasses) { [unroll] - for (int x = 0; x < QUARTER_TILE_WIDTH; ++x) + for (int x = 0; x < FFX_CACAO_QUARTER_TILE_WIDTH; ++x) { - uint outputX = TILE_WIDTH * tid.x + 4 * x; + uint outputX = FFX_CACAO_TILE_WIDTH * tid.x + 4 * x; min16float4 ssaoVal; if (blurPasses % 2 == 0) { - ssaoVal = min16float4(UnpackFloat16(s_BlurF16Front_4[bufferCoord.x + x][bufferCoord.y + y]), UnpackFloat16(s_BlurF16Front_4[bufferCoord.x + x + 1][bufferCoord.y + y])); + ssaoVal = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[bufferCoord.x + x][bufferCoord.y + y]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Front_4[bufferCoord.x + x + 1][bufferCoord.y + y])); } else { - ssaoVal = min16float4(UnpackFloat16(s_BlurF16Back_4[bufferCoord.x + x][bufferCoord.y + y]), UnpackFloat16(s_BlurF16Back_4[bufferCoord.x + x + 1][bufferCoord.y + y])); + ssaoVal = min16float4(FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[bufferCoord.x + x][bufferCoord.y + y]), FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BlurF16Back_4[bufferCoord.x + x + 1][bufferCoord.y + y])); } - if (blurPasses <= outputX && outputX < TILE_WIDTH * BLUR_WIDTH - blurPasses) + if (blurPasses <= outputX && outputX < FFX_CACAO_TILE_WIDTH * FFX_CACAO_BLUR_WIDTH - blurPasses) { - g_BlurOutput[int3(imageCoord + int2(4 * x, y), 0)] = float2(ssaoVal.x, packedEdges[x][y].x); + FFX_CACAO_EdgeSensitiveBlur_StoreOutput(imageCoord + int2(4 * x, y), float2(ssaoVal.x, packedEdges[x][y].x)); } outputX += 1; - if (blurPasses <= outputX && outputX < TILE_WIDTH * BLUR_WIDTH - blurPasses) + if (blurPasses <= outputX && outputX < FFX_CACAO_TILE_WIDTH * FFX_CACAO_BLUR_WIDTH - blurPasses) { - g_BlurOutput[int3(imageCoord + int2(4 * x + 1, y), 0)] = float2(ssaoVal.y, packedEdges[x][y].y); + FFX_CACAO_EdgeSensitiveBlur_StoreOutput(imageCoord + int2(4 * x + 1, y), float2(ssaoVal.y, packedEdges[x][y].y)); } outputX += 1; - if (blurPasses <= outputX && outputX < TILE_WIDTH * BLUR_WIDTH - blurPasses) + if (blurPasses <= outputX && outputX < FFX_CACAO_TILE_WIDTH * FFX_CACAO_BLUR_WIDTH - blurPasses) { - g_BlurOutput[int3(imageCoord + int2(4 * x + 2, y), 0)] = float2(ssaoVal.z, packedEdges[x][y].z); + FFX_CACAO_EdgeSensitiveBlur_StoreOutput(imageCoord + int2(4 * x + 2, y), float2(ssaoVal.z, packedEdges[x][y].z)); } outputX += 1; - if (blurPasses <= outputX && outputX < TILE_WIDTH * BLUR_WIDTH - blurPasses) + if (blurPasses <= outputX && outputX < FFX_CACAO_TILE_WIDTH * FFX_CACAO_BLUR_WIDTH - blurPasses) { - g_BlurOutput[int3(imageCoord + int2(4 * x + 3, y), 0)] = float2(ssaoVal.w, packedEdges[x][y].w); + FFX_CACAO_EdgeSensitiveBlur_StoreOutput(imageCoord + int2(4 * x + 3, y), float2(ssaoVal.w, packedEdges[x][y].w)); } } } } } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur1(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur1(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(1, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(1, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur2(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur2(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(2, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(2, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur3(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur3(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(3, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(3, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur4(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur4(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(4, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(4, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur5(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur5(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(5, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(5, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur6(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur6(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(6, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(6, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur7(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur7(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(7, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(7, tid, gid); } -[numthreads(BLUR_WIDTH, BLUR_HEIGHT, 1)] -void CSEdgeSensitiveBlur8(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BLUR_WIDTH, FFX_CACAO_BLUR_HEIGHT, 1)] +void FFX_CACAO_EdgeSensitiveBlur8(uint2 tid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - LDSEdgeSensitiveBlur(8, tid, gid); + FFX_CACAO_LDSEdgeSensitiveBlur(8, tid, gid); } -#undef TILE_WIDTH -#undef TILE_HEIGHT -#undef ARRAY_WIDTH -#undef ARRAY_HEIGHT -#undef ITERS - - +#undef FFX_CACAO_TILE_WIDTH +#undef FFX_CACAO_TILE_HEIGHT +#undef FFX_CACAO_HALF_TILE_WIDTH +#undef FFX_CACAO_QUARTER_TILE_WIDTH +#undef FFX_CACAO_ARRAY_WIDTH +#undef FFX_CACAO_ARRAY_HEIGHT +#undef FFX_CACAO_ITERS // ======================================================================================================= -// SSAO stuff - -Texture2DArray g_ViewspaceDepthSource : register(t0); -Texture1D g_LoadCounter : register(t2); -Texture2D g_ImportanceMap : register(t3); -Texture2DArray g_FinalSSAO : register(t4); -Texture1D g_ZeroTexture : register(t5); -Texture2DArray g_deinterlacedNormals : register(t6); - -RWTexture2DArray g_SSAOOutput : register(u0); - +// SSAO Generation // calculate effect radius and fit our screen sampling pattern inside it -void CalculateRadiusParameters(const float pixCenterLength, const float2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq) +void FFX_CACAO_CalculateRadiusParameters(const float pixCenterLength, const float2 pixelDirRBViewspaceSizeAtCenterZ, out float pixLookupRadiusMod, out float effectRadius, out float falloffCalcMulSq) { - effectRadius = g_CACAOConsts.EffectRadius; + effectRadius = g_FFX_CACAO_Consts.EffectRadius; // leaving this out for performance reasons: use something similar if radius needs to scale based on distance - //effectRadius *= pow( pixCenterLength, g_CACAOConsts.RadiusDistanceScalingFunctionPow); + //effectRadius *= pow( pixCenterLength, g_FFX_CACAO_Consts.RadiusDistanceScalingFunctionPow); // when too close, on-screen sampling disk will grow beyond screen size; limit this to avoid closeup temporal artifacts - const float tooCloseLimitMod = saturate(pixCenterLength * g_CACAOConsts.EffectSamplingRadiusNearLimitRec) * 0.8 + 0.2; + const float tooCloseLimitMod = saturate(pixCenterLength * g_FFX_CACAO_Consts.EffectSamplingRadiusNearLimitRec) * 0.8 + 0.2; effectRadius *= tooCloseLimitMod; @@ -604,65 +444,49 @@ void CalculateRadiusParameters(const float pixCenterLength, const float2 pixelDi falloffCalcMulSq = -1.0f / (effectRadius*effectRadius); } - -float3 DecodeNormal(float3 encodedNormal) -{ - float3 normal = encodedNormal * g_CACAOConsts.NormalsUnpackMul.xxx + g_CACAOConsts.NormalsUnpackAdd.xxx; - -#if SSAO_ENABLE_NORMAL_WORLD_TO_VIEW_CONVERSION - normal = mul(normal, (float3x3)g_CACAOConsts.NormalsWorldToViewspaceMatrix).xyz; -#endif - - // normal = normalize( normal ); // normalize adds around 2.5% cost on High settings but makes little (PSNR 66.7) visual difference when normals are as in the sample (stored in R8G8B8A8_UNORM, - // // decoded in the shader), however it will likely be required if using different encoding/decoding or the inputs are not normalized, etc. - - return normal; -} - // all vectors in viewspace -float CalculatePixelObscurance(float3 pixelNormal, float3 hitDelta, float falloffCalcMulSq) +float FFX_CACAO_CalculatePixelObscurance(float3 pixelNormal, float3 hitDelta, float falloffCalcMulSq) { float lengthSq = dot(hitDelta, hitDelta); float NdotD = dot(pixelNormal, hitDelta) / sqrt(lengthSq); float falloffMult = max(0.0, lengthSq * falloffCalcMulSq + 1.0); - return max(0, NdotD - g_CACAOConsts.EffectHorizonAngleThreshold) * falloffMult; + return max(0, NdotD - g_FFX_CACAO_Consts.EffectHorizonAngleThreshold) * falloffMult; } -void SSAOTapInner(const int qualityLevel, inout float obscuranceSum, inout float weightSum, const float2 samplingUV, const float mipLevel, const float3 pixCenterPos, const float3 negViewspaceDir, float3 pixelNormal, const float falloffCalcMulSq, const float weightMod, const int dbgTapIndex) +void FFX_CACAO_SSAOTapInner(const int qualityLevel, inout float obscuranceSum, inout float weightSum, const float2 samplingUV, const float mipLevel, const float3 pixCenterPos, const float3 negViewspaceDir, float3 pixelNormal, const float falloffCalcMulSq, const float weightMod, const int dbgTapIndex) { // get depth at sample - float viewspaceSampleZ = g_ViewspaceDepthSource.SampleLevel(g_ViewspaceDepthTapSampler, float3(samplingUV.xy, 0.0f), mipLevel).x; // * g_CACAOConsts.MaxViewspaceDepth; + float viewspaceSampleZ = FFX_CACAO_SSAOGeneration_SampleViewspaceDepthMip(samplingUV, mipLevel); // convert to viewspace - // float3 hitPos = NDCToViewspace(samplingUV.xy, viewspaceSampleZ).xyz; - float3 hitPos = DepthBufferUVToViewspace(samplingUV.xy, viewspaceSampleZ).xyz; + float3 hitPos = FFX_CACAO_DepthBufferUVToViewSpace(samplingUV.xy, viewspaceSampleZ).xyz; float3 hitDelta = hitPos - pixCenterPos; - float obscurance = CalculatePixelObscurance(pixelNormal, hitDelta, falloffCalcMulSq); + float obscurance = FFX_CACAO_CalculatePixelObscurance(pixelNormal, hitDelta, falloffCalcMulSq); float weight = 1.0; - if (qualityLevel >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) + if (qualityLevel >= FFX_CACAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { //float reduct = max( 0, dot( hitDelta, negViewspaceDir ) ); float reduct = max(0, -hitDelta.z); // cheaper, less correct version - reduct = saturate(reduct * g_CACAOConsts.NegRecEffectRadius + 2.0); // saturate( 2.0 - reduct / g_CACAOConsts.EffectRadius ); - weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT); + reduct = saturate(reduct * g_FFX_CACAO_Consts.NegRecEffectRadius + 2.0); // saturate( 2.0 - reduct / g_FFX_CACAO_Consts.EffectRadius ); + weight = FFX_CACAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - FFX_CACAO_HALOING_REDUCTION_AMOUNT); } weight *= weightMod; obscuranceSum += obscurance * weight; weightSum += weight; } -void SSAOTap(const int qualityLevel, inout float obscuranceSum, inout float weightSum, const int tapIndex, const float2x2 rotScale, const float3 pixCenterPos, const float3 negViewspaceDir, float3 pixelNormal, const float2 normalizedScreenPos, const float2 depthBufferUV, const float mipOffset, const float falloffCalcMulSq, float weightMod, float2 normXY, float normXYLength) +void FFX_CACAO_SSAOTap(const int qualityLevel, inout float obscuranceSum, inout float weightSum, const int tapIndex, const float2x2 rotScale, const float3 pixCenterPos, const float3 negViewspaceDir, float3 pixelNormal, const float2 normalizedScreenPos, const float2 depthBufferUV, const float mipOffset, const float falloffCalcMulSq, float weightMod, float2 normXY, float normXYLength) { float2 sampleOffset; float samplePow2Len; // patterns { - float4 newSample = g_samplePatternMain[tapIndex]; + float4 newSample = g_FFX_CACAO_samplePatternMain[tapIndex]; sampleOffset = mul(rotScale, newSample.xy); samplePow2Len = newSample.w; // precalculated, same as: samplePow2Len = log2( length( newSample.xy ) ); weightMod *= newSample.z; @@ -671,20 +495,20 @@ void SSAOTap(const int qualityLevel, inout float obscuranceSum, inout float weig // snap to pixel center (more correct obscurance math, avoids artifacts) sampleOffset = round(sampleOffset); - // calculate MIP based on the sample distance from the centre, similar to as described + // calculate MIP based on the sample distance from the centre, similar to as described // in http://graphics.cs.williams.edu/papers/SAOHPG12/. - float mipLevel = (qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (samplePow2Len + mipOffset); + float mipLevel = (qualityLevel < FFX_CACAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (samplePow2Len + mipOffset); - float2 samplingUV = sampleOffset * g_CACAOConsts.DeinterleavedDepthBufferInverseDimensions + depthBufferUV; + float2 samplingUV = sampleOffset * g_FFX_CACAO_Consts.DeinterleavedDepthBufferInverseDimensions + depthBufferUV; - SSAOTapInner(qualityLevel, obscuranceSum, weightSum, samplingUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2); + FFX_CACAO_SSAOTapInner(qualityLevel, obscuranceSum, weightSum, samplingUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2); // for the second tap, just use the mirrored offset float2 sampleOffsetMirroredUV = -sampleOffset; // tilt the second set of samples so that the disk is effectively rotated by the normal // effective at removing one set of artifacts, but too expensive for lower quality settings - if (qualityLevel >= SSAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) + if (qualityLevel >= FFX_CACAO_TILT_SAMPLES_ENABLE_AT_QUALITY_PRESET) { float dotNorm = dot(sampleOffsetMirroredUV, normXY); sampleOffsetMirroredUV -= dotNorm * normXYLength * normXY; @@ -692,62 +516,33 @@ void SSAOTap(const int qualityLevel, inout float obscuranceSum, inout float weig } // snap to pixel center (more correct obscurance math, avoids artifacts) - float2 samplingMirroredUV = sampleOffsetMirroredUV * g_CACAOConsts.DeinterleavedDepthBufferInverseDimensions + depthBufferUV; + float2 samplingMirroredUV = sampleOffsetMirroredUV * g_FFX_CACAO_Consts.DeinterleavedDepthBufferInverseDimensions + depthBufferUV; - SSAOTapInner(qualityLevel, obscuranceSum, weightSum, samplingMirroredUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2 + 1); + FFX_CACAO_SSAOTapInner(qualityLevel, obscuranceSum, weightSum, samplingMirroredUV, mipLevel, pixCenterPos, negViewspaceDir, pixelNormal, falloffCalcMulSq, weightMod, tapIndex * 2 + 1); } -struct SSAOHits +struct FFX_CACAO_SSAOHits { float3 hits[2]; float weightMod; }; -SSAOHits SSAOGetHits(const int qualityLevel, const float2 depthBufferUV, const int tapIndex, const float mipOffset, const float2x2 rotScale, const float4 newSample) -{ - SSAOHits result; - - float2 sampleOffset; - float samplePow2Len; - - // patterns - { - // float4 newSample = g_samplePatternMain[tapIndex]; - sampleOffset = mul(rotScale, newSample.xy); - samplePow2Len = newSample.w; // precalculated, same as: samplePow2Len = log2( length( newSample.xy ) ); - result.weightMod = newSample.z; - } - - // snap to pixel center (more correct obscurance math, avoids artifacts) - sampleOffset = round(sampleOffset) * g_CACAOConsts.DeinterleavedDepthBufferInverseDimensions; - - float mipLevel = (qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (samplePow2Len + mipOffset); - - float2 sampleUV = depthBufferUV + sampleOffset; - result.hits[0] = float3(sampleUV, g_ViewspaceDepthSource.SampleLevel(g_ViewspaceDepthTapSampler, float3(sampleUV, 0.0f), mipLevel).x); - - sampleUV = depthBufferUV - sampleOffset; - result.hits[1] = float3(sampleUV, g_ViewspaceDepthSource.SampleLevel(g_ViewspaceDepthTapSampler, float3(sampleUV, 0.0f), mipLevel).x); - - return result; -} - -struct SSAOSampleData +struct FFX_CACAO_SSAOSampleData { float2 uvOffset; float mipLevel; float weightMod; }; -SSAOSampleData SSAOGetSampleData(const int qualityLevel, const float2x2 rotScale, const float4 newSample, const float mipOffset) +FFX_CACAO_SSAOSampleData FFX_CACAO_SSAOGetSampleData(const int qualityLevel, const float2x2 rotScale, const float4 newSample, const float mipOffset) { float2 sampleOffset = mul(rotScale, newSample.xy); - sampleOffset = round(sampleOffset) * g_CACAOConsts.DeinterleavedDepthBufferInverseDimensions; + sampleOffset = round(sampleOffset) * g_FFX_CACAO_Consts.DeinterleavedDepthBufferInverseDimensions; float samplePow2Len = newSample.w; - float mipLevel = (qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (samplePow2Len + mipOffset); + float mipLevel = (qualityLevel < FFX_CACAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (samplePow2Len + mipOffset); - SSAOSampleData result; + FFX_CACAO_SSAOSampleData result; result.uvOffset = sampleOffset; result.mipLevel = mipLevel; @@ -756,64 +551,55 @@ SSAOSampleData SSAOGetSampleData(const int qualityLevel, const float2x2 rotScale return result; } -SSAOHits SSAOGetHits2(SSAOSampleData data, const float2 depthBufferUV) +FFX_CACAO_SSAOHits FFX_CACAO_SSAOGetHits2(FFX_CACAO_SSAOSampleData data, const float2 depthBufferUV) { - SSAOHits result; + FFX_CACAO_SSAOHits result; result.weightMod = data.weightMod; float2 sampleUV = depthBufferUV + data.uvOffset; - result.hits[0] = float3(sampleUV, g_ViewspaceDepthSource.SampleLevel(g_ViewspaceDepthTapSampler, float3(sampleUV, 0.0f), data.mipLevel).x); + result.hits[0] = float3(sampleUV, FFX_CACAO_SSAOGeneration_SampleViewspaceDepthMip(sampleUV, data.mipLevel)); sampleUV = depthBufferUV - data.uvOffset; - result.hits[1] = float3(sampleUV, g_ViewspaceDepthSource.SampleLevel(g_ViewspaceDepthTapSampler, float3(sampleUV, 0.0f), data.mipLevel).x); + result.hits[1] = float3(sampleUV, FFX_CACAO_SSAOGeneration_SampleViewspaceDepthMip(sampleUV, data.mipLevel)); return result; } -void SSAOAddHits(const int qualityLevel, const float3 pixCenterPos, const float3 pixelNormal, const float falloffCalcMulSq, inout float weightSum, inout float obscuranceSum, SSAOHits hits) +void FFX_CACAO_SSAOAddHits(const int qualityLevel, const float3 pixCenterPos, const float3 pixelNormal, const float falloffCalcMulSq, inout float weightSum, inout float obscuranceSum, FFX_CACAO_SSAOHits hits) { float weight = hits.weightMod; [unroll] for (int hitIndex = 0; hitIndex < 2; ++hitIndex) { float3 hit = hits.hits[hitIndex]; - float3 hitPos = DepthBufferUVToViewspace(hit.xy, hit.z); + float3 hitPos = FFX_CACAO_DepthBufferUVToViewSpace(hit.xy, hit.z); float3 hitDelta = hitPos - pixCenterPos; - float obscurance = CalculatePixelObscurance(pixelNormal, hitDelta, falloffCalcMulSq); + float obscurance = FFX_CACAO_CalculatePixelObscurance(pixelNormal, hitDelta, falloffCalcMulSq); - if (qualityLevel >= SSAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) + if (qualityLevel >= FFX_CACAO_HALOING_REDUCTION_ENABLE_AT_QUALITY_PRESET) { //float reduct = max( 0, dot( hitDelta, negViewspaceDir ) ); float reduct = max(0, -hitDelta.z); // cheaper, less correct version - reduct = saturate(reduct * g_CACAOConsts.NegRecEffectRadius + 2.0); // saturate( 2.0 - reduct / g_CACAOConsts.EffectRadius ); - weight = SSAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - SSAO_HALOING_REDUCTION_AMOUNT); + reduct = saturate(reduct * g_FFX_CACAO_Consts.NegRecEffectRadius + 2.0); // saturate( 2.0 - reduct / g_FFX_CACAO_Consts.EffectRadius ); + weight = FFX_CACAO_HALOING_REDUCTION_AMOUNT * reduct + (1.0 - FFX_CACAO_HALOING_REDUCTION_AMOUNT); } obscuranceSum += obscurance * weight; weightSum += weight; } } -void SSAOTap2(const int qualityLevel, inout float obscuranceSum, inout float weightSum, const int tapIndex, const float2x2 rotScale, const float3 pixCenterPos, const float3 negViewspaceDir, float3 pixelNormal, const float2 normalizedScreenPos, const float mipOffset, const float falloffCalcMulSq, float weightMod, float2 normXY, float normXYLength) -{ - float4 newSample = g_samplePatternMain[tapIndex]; - SSAOSampleData data = SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); - SSAOHits hits = SSAOGetHits2(data, normalizedScreenPos); - SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); -} - - -void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, out float outWeight, const float2 SVPos/*, const float2 normalizedScreenPos*/, uniform int qualityLevel, bool adaptiveBase) +void FFX_CACAO_GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, out float outWeight, const float2 SVPos/*, const float2 normalizedScreenPos*/, uniform int qualityLevel, bool adaptiveBase) { float2 SVPosRounded = trunc(SVPos); uint2 SVPosui = uint2(SVPosRounded); //same as uint2( SVPos ) - const int numberOfTaps = (adaptiveBase) ? (SSAO_ADAPTIVE_TAP_BASE_COUNT) : (g_numTaps[qualityLevel]); + const int numberOfTaps = (adaptiveBase) ? (FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT) : (g_FFX_CACAO_numTaps[qualityLevel]); float pixZ, pixLZ, pixTZ, pixRZ, pixBZ; - float2 depthBufferUV = (SVPos + 0.5f) * g_CACAOConsts.DeinterleavedDepthBufferInverseDimensions + g_CACAOConsts.DeinterleavedDepthBufferNormalisedOffset; - float4 valuesUL = g_ViewspaceDepthSource.GatherRed(g_PointMirrorSampler, float3(depthBufferUV, 0.0f), int2(-1, -1)); - float4 valuesBR = g_ViewspaceDepthSource.GatherRed(g_PointMirrorSampler, float3(depthBufferUV, 0.0f)); + float2 depthBufferUV = (SVPos + 0.5f) * g_FFX_CACAO_Consts.DeinterleavedDepthBufferInverseDimensions + g_FFX_CACAO_Consts.DeinterleavedDepthBufferNormalisedOffset; + float4 valuesUL = FFX_CACAO_SSAOGeneration_GatherViewspaceDepthOffset(depthBufferUV, int2(-1, -1)); + float4 valuesBR = FFX_CACAO_SSAOGeneration_GatherViewspaceDepthOffset(depthBufferUV, int2(0, 0)); // get this pixel's viewspace depth - pixZ = valuesUL.y; //float pixZ = g_ViewspaceDepthSource.SampleLevel( g_PointMirrorSampler, float3(normalizedScreenPos, 0.0f), 0.0 ).x; // * g_CACAOConsts.MaxViewspaceDepth; + pixZ = valuesUL.y; // get left right top bottom neighbouring pixels for edge detection (gets compiled out on qualityLevel == 0) pixLZ = valuesUL.x; @@ -821,31 +607,30 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o pixRZ = valuesBR.z; pixBZ = valuesBR.x; - // float2 normalizedScreenPos = SVPosRounded * g_CACAOConsts.Viewport2xPixelSize + g_CACAOConsts.Viewport2xPixelSize_x_025; - float2 normalizedScreenPos = (SVPosRounded + 0.5f) * g_CACAOConsts.SSAOBufferInverseDimensions; - float3 pixCenterPos = NDCToViewspace(normalizedScreenPos, pixZ); // g + // float2 normalizedScreenPos = SVPosRounded * g_FFX_CACAO_Consts.Viewport2xPixelSize + g_FFX_CACAO_Consts.Viewport2xPixelSize_x_025; + float2 normalizedScreenPos = (SVPosRounded + 0.5f) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float3 pixCenterPos = FFX_CACAO_NDCToViewSpace(normalizedScreenPos, pixZ); // g // Load this pixel's viewspace normal - // uint2 fullResCoord = 2 * (SVPosui * 2 + g_CACAOConsts.PerPassFullResCoordOffset.xy); - int3 normalCoord = int3(SVPosui, g_CACAOConsts.PassIndex); - float3 pixelNormal = g_deinterlacedNormals[normalCoord].xyz; + // uint2 fullResCoord = 2 * (SVPosui * 2 + g_FFX_CACAO_Consts.PerPassFullResCoordOffset.xy); + float3 pixelNormal = FFX_CACAO_SSAOGeneration_GetNormalPass(SVPosui, g_FFX_CACAO_Consts.PassIndex); - // optimized approximation of: float2 pixelDirRBViewspaceSizeAtCenterZ = NDCToViewspace( normalizedScreenPos.xy + g_CACAOConsts._ViewportPixelSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy; - // const float2 pixelDirRBViewspaceSizeAtCenterZ = pixCenterPos.z * g_CACAOConsts.NDCToViewMul * g_CACAOConsts.Viewport2xPixelSize; - const float2 pixelDirRBViewspaceSizeAtCenterZ = pixCenterPos.z * g_CACAOConsts.NDCToViewMul * g_CACAOConsts.SSAOBufferInverseDimensions; + // optimized approximation of: float2 pixelDirRBViewspaceSizeAtCenterZ = FFX_CACAO_NDCToViewSpace( normalizedScreenPos.xy + g_FFX_CACAO_Consts._ViewportPixelSize.xy, pixCenterPos.z ).xy - pixCenterPos.xy; + // const float2 pixelDirRBViewspaceSizeAtCenterZ = pixCenterPos.z * g_FFX_CACAO_Consts.NDCToViewMul * g_FFX_CACAO_Consts.Viewport2xPixelSize; + const float2 pixelDirRBViewspaceSizeAtCenterZ = pixCenterPos.z * g_FFX_CACAO_Consts.NDCToViewMul * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; float pixLookupRadiusMod; float falloffCalcMulSq; // calculate effect radius and fit our screen sampling pattern inside it float effectViewspaceRadius; - CalculateRadiusParameters(length(pixCenterPos), pixelDirRBViewspaceSizeAtCenterZ, pixLookupRadiusMod, effectViewspaceRadius, falloffCalcMulSq); + FFX_CACAO_CalculateRadiusParameters(length(pixCenterPos), pixelDirRBViewspaceSizeAtCenterZ, pixLookupRadiusMod, effectViewspaceRadius, falloffCalcMulSq); // calculate samples rotation/scaling float2x2 rotScale; { // reduce effect radius near the screen edges slightly; ideally, one would render a larger depth buffer (5% on each side) instead - if (!adaptiveBase && (qualityLevel >= SSAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) + if (!adaptiveBase && (qualityLevel >= FFX_CACAO_REDUCE_RADIUS_NEAR_SCREEN_BORDER_ENABLE_AT_QUALITY_PRESET)) { float nearScreenBorder = min(min(depthBufferUV.x, 1.0 - depthBufferUV.x), min(depthBufferUV.y, 1.0 - depthBufferUV.y)); nearScreenBorder = saturate(10.0 * nearScreenBorder + 0.6); @@ -854,7 +639,7 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o // load & update pseudo-random rotation matrix uint pseudoRandomIndex = uint(SVPosRounded.y * 2 + SVPosRounded.x) % 5; - float4 rs = g_CACAOConsts.PatternRotScaleMatrices[pseudoRandomIndex]; + float4 rs = g_FFX_CACAO_Consts.PatternRotScaleMatrices[pseudoRandomIndex]; rotScale = float2x2(rs.x * pixLookupRadiusMod, rs.y * pixLookupRadiusMod, rs.z * pixLookupRadiusMod, rs.w * pixLookupRadiusMod); } @@ -866,15 +651,15 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o float4 edgesLRTB = float4(1.0, 1.0, 1.0, 1.0); // Move center pixel slightly towards camera to avoid imprecision artifacts due to using of 16bit depth buffer; a lot smaller offsets needed when using 32bit floats - pixCenterPos *= g_CACAOConsts.DepthPrecisionOffsetMod; + pixCenterPos *= g_FFX_CACAO_Consts.DepthPrecisionOffsetMod; - if (!adaptiveBase && (qualityLevel >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) + if (!adaptiveBase && (qualityLevel >= FFX_CACAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - edgesLRTB = CalculateEdges(pixZ, pixLZ, pixRZ, pixTZ, pixBZ); + edgesLRTB = FFX_CACAO_CalculateEdges(pixZ, pixLZ, pixRZ, pixTZ, pixBZ); } // adds a more high definition sharp effect, which gets blurred out (reuses left/right/top/bottom samples that we used for edge detection) - if (!adaptiveBase && (qualityLevel >= SSAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) + if (!adaptiveBase && (qualityLevel >= FFX_CACAO_DETAIL_AO_ENABLE_AT_QUALITY_PRESET)) { // disable in case of quality level 4 (reference) if (qualityLevel != 4) @@ -882,38 +667,37 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o //approximate neighbouring pixels positions (actually just deltas or "positions - pixCenterPos" ) float3 viewspaceDirZNormalized = float3(pixCenterPos.xy / pixCenterPos.zz, 1.0); - // very close approximation of: float3 pixLPos = NDCToViewspace( normalizedScreenPos + float2( -g_CACAOConsts.HalfViewportPixelSize.x, 0.0 ), pixLZ ).xyz - pixCenterPos.xyz; + // very close approximation of: float3 pixLPos = FFX_CACAO_NDCToViewSpace( normalizedScreenPos + float2( -g_FFX_CACAO_Consts.HalfViewportPixelSize.x, 0.0 ), pixLZ ).xyz - pixCenterPos.xyz; float3 pixLDelta = float3(-pixelDirRBViewspaceSizeAtCenterZ.x, 0.0, 0.0) + viewspaceDirZNormalized * (pixLZ - pixCenterPos.z); - // very close approximation of: float3 pixRPos = NDCToViewspace( normalizedScreenPos + float2( +g_CACAOConsts.HalfViewportPixelSize.x, 0.0 ), pixRZ ).xyz - pixCenterPos.xyz; + // very close approximation of: float3 pixRPos = FFX_CACAO_NDCToViewSpace( normalizedScreenPos + float2( +g_FFX_CACAO_Consts.HalfViewportPixelSize.x, 0.0 ), pixRZ ).xyz - pixCenterPos.xyz; float3 pixRDelta = float3(+pixelDirRBViewspaceSizeAtCenterZ.x, 0.0, 0.0) + viewspaceDirZNormalized * (pixRZ - pixCenterPos.z); - // very close approximation of: float3 pixTPos = NDCToViewspace( normalizedScreenPos + float2( 0.0, -g_CACAOConsts.HalfViewportPixelSize.y ), pixTZ ).xyz - pixCenterPos.xyz; + // very close approximation of: float3 pixTPos = FFX_CACAO_NDCToViewSpace( normalizedScreenPos + float2( 0.0, -g_FFX_CACAO_Consts.HalfViewportPixelSize.y ), pixTZ ).xyz - pixCenterPos.xyz; float3 pixTDelta = float3(0.0, -pixelDirRBViewspaceSizeAtCenterZ.y, 0.0) + viewspaceDirZNormalized * (pixTZ - pixCenterPos.z); - // very close approximation of: float3 pixBPos = NDCToViewspace( normalizedScreenPos + float2( 0.0, +g_CACAOConsts.HalfViewportPixelSize.y ), pixBZ ).xyz - pixCenterPos.xyz; + // very close approximation of: float3 pixBPos = FFX_CACAO_NDCToViewSpace( normalizedScreenPos + float2( 0.0, +g_FFX_CACAO_Consts.HalfViewportPixelSize.y ), pixBZ ).xyz - pixCenterPos.xyz; float3 pixBDelta = float3(0.0, +pixelDirRBViewspaceSizeAtCenterZ.y, 0.0) + viewspaceDirZNormalized * (pixBZ - pixCenterPos.z); const float rangeReductionConst = 4.0f; // this is to avoid various artifacts const float modifiedFalloffCalcMulSq = rangeReductionConst * falloffCalcMulSq; float4 additionalObscurance; - additionalObscurance.x = CalculatePixelObscurance(pixelNormal, pixLDelta, modifiedFalloffCalcMulSq); - additionalObscurance.y = CalculatePixelObscurance(pixelNormal, pixRDelta, modifiedFalloffCalcMulSq); - additionalObscurance.z = CalculatePixelObscurance(pixelNormal, pixTDelta, modifiedFalloffCalcMulSq); - additionalObscurance.w = CalculatePixelObscurance(pixelNormal, pixBDelta, modifiedFalloffCalcMulSq); + additionalObscurance.x = FFX_CACAO_CalculatePixelObscurance(pixelNormal, pixLDelta, modifiedFalloffCalcMulSq); + additionalObscurance.y = FFX_CACAO_CalculatePixelObscurance(pixelNormal, pixRDelta, modifiedFalloffCalcMulSq); + additionalObscurance.z = FFX_CACAO_CalculatePixelObscurance(pixelNormal, pixTDelta, modifiedFalloffCalcMulSq); + additionalObscurance.w = FFX_CACAO_CalculatePixelObscurance(pixelNormal, pixBDelta, modifiedFalloffCalcMulSq); - obscuranceSum += g_CACAOConsts.DetailAOStrength * dot(additionalObscurance, edgesLRTB); + obscuranceSum += g_FFX_CACAO_Consts.DetailAOStrength * dot(additionalObscurance, edgesLRTB); } } // Sharp normals also create edges - but this adds to the cost as well - if (!adaptiveBase && (qualityLevel >= SSAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) + if (!adaptiveBase && (qualityLevel >= FFX_CACAO_NORMAL_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { - float3 neighbourNormalL = g_deinterlacedNormals[normalCoord + int3(-1, +0, 0)].xyz; - float3 neighbourNormalR = g_deinterlacedNormals[normalCoord + int3(+1, +0, 0)].xyz; - float3 neighbourNormalT = g_deinterlacedNormals[normalCoord + int3(+0, -1, 0)].xyz; - float3 neighbourNormalB = g_deinterlacedNormals[normalCoord + int3(+0, +1, 0)].xyz; - + float3 neighbourNormalL = FFX_CACAO_SSAOGeneration_GetNormalPass(SVPosui + int2(-1, +0), g_FFX_CACAO_Consts.PassIndex); + float3 neighbourNormalR = FFX_CACAO_SSAOGeneration_GetNormalPass(SVPosui + int2(+1, +0), g_FFX_CACAO_Consts.PassIndex); + float3 neighbourNormalT = FFX_CACAO_SSAOGeneration_GetNormalPass(SVPosui + int2(+0, -1), g_FFX_CACAO_Consts.PassIndex); + float3 neighbourNormalB = FFX_CACAO_SSAOGeneration_GetNormalPass(SVPosui + int2(+0, +1), g_FFX_CACAO_Consts.PassIndex); - const float dotThreshold = SSAO_NORMAL_BASED_EDGES_DOT_THRESHOLD; + const float dotThreshold = FFX_CACAO_NORMAL_BASED_EDGES_DOT_THRESHOLD; float4 normalEdgesLRTB; normalEdgesLRTB.x = saturate((dot(pixelNormal, neighbourNormalL) + dotThreshold)); @@ -921,8 +705,8 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o normalEdgesLRTB.z = saturate((dot(pixelNormal, neighbourNormalT) + dotThreshold)); normalEdgesLRTB.w = saturate((dot(pixelNormal, neighbourNormalB) + dotThreshold)); - //#define SSAO_SMOOTHEN_NORMALS // fixes some aliasing artifacts but kills a lot of high detail and adds to the cost - not worth it probably but feel free to play with it -#ifdef SSAO_SMOOTHEN_NORMALS + //#define FFX_CACAO_SMOOTHEN_NORMALS // fixes some aliasing artifacts but kills a lot of high detail and adds to the cost - not worth it probably but feel free to play with it +#ifdef FFX_CACAO_SMOOTHEN_NORMALS //neighbourNormalL = LoadNormal( fullResCoord, int2( -1, 0 ) ); //neighbourNormalR = LoadNormal( fullResCoord, int2( 1, 0 ) ); //neighbourNormalT = LoadNormal( fullResCoord, int2( 0, -1 ) ); @@ -936,113 +720,75 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o - const float globalMipOffset = SSAO_DEPTH_MIPS_GLOBAL_OFFSET; - float mipOffset = (qualityLevel < SSAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixLookupRadiusMod) + globalMipOffset); + const float globalMipOffset = FFX_CACAO_DEPTH_MIPS_GLOBAL_OFFSET; + float mipOffset = (qualityLevel < FFX_CACAO_DEPTH_MIPS_ENABLE_AT_QUALITY_PRESET) ? (0) : (log2(pixLookupRadiusMod) + globalMipOffset); // Used to tilt the second set of samples so that the disk is effectively rotated by the normal // effective at removing one set of artifacts, but too expensive for lower quality settings float2 normXY = float2(pixelNormal.x, pixelNormal.y); float normXYLength = length(normXY); normXY /= float2(normXYLength, -normXYLength); - normXYLength *= SSAO_TILT_SAMPLES_AMOUNT; + normXYLength *= FFX_CACAO_TILT_SAMPLES_AMOUNT; const float3 negViewspaceDir = -normalize(pixCenterPos); // standard, non-adaptive approach if ((qualityLevel != 3) || adaptiveBase) { - //SSAOHits prevHits = SSAOGetHits(qualityLevel, normalizedScreenPos, 0, mipOffset); - -#if 0 - float4 newSample = g_samplePatternMain[0]; - // float zero = g_ZeroTexture.SampleLevel(g_PointClampSampler, float2(0.5f, 0.5f), 0); - SSAOSampleData data = SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); - SSAOHits hits = SSAOGetHits2(data, depthBufferUV); - newSample = g_samplePatternMain[1]; - // newSample.x += zero; - data = SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); - - [unroll] - for (int i = 0; i < numberOfTaps - 1; ++i) - { - // zero = g_ZeroTexture.SampleLevel(g_PointClampSampler, float2(0.5f + zero, 0.5f), 0); - SSAOHits nextHits = SSAOGetHits2(data, depthBufferUV); - // hits.hits[0].x += zero; - newSample = g_samplePatternMain[i + 2]; - - SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); - SSAOSampleData nextData = SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); - hits = nextHits; - data = nextData; - } - - // last loop iteration - { - SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); - } -#else - [unroll] for (int i = 0; i < numberOfTaps; i++) { - SSAOTap(qualityLevel, obscuranceSum, weightSum, i, rotScale, pixCenterPos, negViewspaceDir, pixelNormal, normalizedScreenPos, depthBufferUV, mipOffset, falloffCalcMulSq, 1.0, normXY, normXYLength); - // SSAOHits hits = SSAOGetHits(qualityLevel, normalizedScreenPos, i, mipOffset, rotScale); - // SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, 1.0f, falloffCalcMulSq, weightSum, obscuranceSum, hits); + FFX_CACAO_SSAOTap(qualityLevel, obscuranceSum, weightSum, i, rotScale, pixCenterPos, negViewspaceDir, pixelNormal, normalizedScreenPos, depthBufferUV, mipOffset, falloffCalcMulSq, 1.0, normXY, normXYLength); } - -#endif } else // if( qualityLevel == 3 ) adaptive approach { // add new ones if needed - float2 fullResUV = normalizedScreenPos + g_CACAOConsts.PerPassFullResUVOffset.xy; - float importance = g_ImportanceMap.SampleLevel(g_LinearClampSampler, fullResUV, 0.0).x; + float2 fullResUV = normalizedScreenPos + g_FFX_CACAO_Consts.PerPassFullResUVOffset.xy; + float importance = FFX_CACAO_SSAOGeneration_SampleImportance(fullResUV); - // this is to normalize SSAO_DETAIL_AO_AMOUNT across all pixel regardless of importance - obscuranceSum *= (SSAO_ADAPTIVE_TAP_BASE_COUNT / (float)SSAO_MAX_TAPS) + (importance * SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / (float)SSAO_MAX_TAPS); + // this is to normalize FFX_CACAO_DETAIL_AO_AMOUNT across all pixel regardless of importance + obscuranceSum *= (FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT / (float)FFX_CACAO_MAX_TAPS) + (importance * FFX_CACAO_ADAPTIVE_TAP_FLEXIBLE_COUNT / (float)FFX_CACAO_MAX_TAPS); // load existing base values - float2 baseValues = g_FinalSSAO.Load(int4(SVPosui, g_CACAOConsts.PassIndex, 0)).xy; - weightSum += baseValues.y * (float)(SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); + float2 baseValues = FFX_CACAO_SSAOGeneration_LoadBasePassSSAOPass(SVPosui, g_FFX_CACAO_Consts.PassIndex); + weightSum += baseValues.y * (float)(FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); obscuranceSum += (baseValues.x) * weightSum; // increase importance around edges float edgeCount = dot(1.0 - edgesLRTB, float4(1.0, 1.0, 1.0, 1.0)); - float avgTotalImportance = (float)g_LoadCounter[0] * g_CACAOConsts.LoadCounterAvgDiv; + float avgTotalImportance = (float)FFX_CACAO_SSAOGeneration_GetLoadCounter() * g_FFX_CACAO_Consts.LoadCounterAvgDiv; - float importanceLimiter = saturate(g_CACAOConsts.AdaptiveSampleCountLimit / avgTotalImportance); + float importanceLimiter = saturate(g_FFX_CACAO_Consts.AdaptiveSampleCountLimit / avgTotalImportance); importance *= importanceLimiter; - float additionalSampleCountFlt = SSAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; + float additionalSampleCountFlt = FFX_CACAO_ADAPTIVE_TAP_FLEXIBLE_COUNT * importance; additionalSampleCountFlt += 1.5; uint additionalSamples = uint(additionalSampleCountFlt); - uint additionalSamplesTo = min(SSAO_MAX_TAPS, additionalSamples + SSAO_ADAPTIVE_TAP_BASE_COUNT); + uint additionalSamplesTo = min(FFX_CACAO_MAX_TAPS, additionalSamples + FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT); // sample loop { - float4 newSample = g_samplePatternMain[SSAO_ADAPTIVE_TAP_BASE_COUNT]; - SSAOSampleData data = SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); - SSAOHits hits = SSAOGetHits2(data, depthBufferUV); - newSample = g_samplePatternMain[SSAO_ADAPTIVE_TAP_BASE_COUNT + 1]; + float4 newSample = g_FFX_CACAO_samplePatternMain[FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT]; + FFX_CACAO_SSAOSampleData data = FFX_CACAO_SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); + FFX_CACAO_SSAOHits hits = FFX_CACAO_SSAOGetHits2(data, depthBufferUV); + newSample = g_FFX_CACAO_samplePatternMain[FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT + 1]; - for (uint i = SSAO_ADAPTIVE_TAP_BASE_COUNT; i < additionalSamplesTo - 1; i++) + for (uint i = FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT; i < additionalSamplesTo - 1; i++) { - data = SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); - newSample = g_samplePatternMain[i + 2]; - SSAOHits nextHits = SSAOGetHits2(data, depthBufferUV); + data = FFX_CACAO_SSAOGetSampleData(qualityLevel, rotScale, newSample, mipOffset); + newSample = g_FFX_CACAO_samplePatternMain[i + 2]; + FFX_CACAO_SSAOHits nextHits = FFX_CACAO_SSAOGetHits2(data, depthBufferUV); - // float zero = g_ZeroTexture.SampleLevel(g_ZeroTextureSampler, (float)i, 0.0f); - // hits.weightMod += zero; - - SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); + FFX_CACAO_SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); hits = nextHits; } // last loop iteration { - SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); + FFX_CACAO_SSAOAddHits(qualityLevel, pixCenterPos, pixelNormal, falloffCalcMulSq, weightSum, obscuranceSum, hits); } } } @@ -1062,10 +808,10 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o float obscurance = obscuranceSum / weightSum; // calculate fadeout (1 close, gradient, 0 far) - float fadeOut = saturate(pixCenterPos.z * g_CACAOConsts.EffectFadeOutMul + g_CACAOConsts.EffectFadeOutAdd); + float fadeOut = saturate(pixCenterPos.z * g_FFX_CACAO_Consts.EffectFadeOutMul + g_FFX_CACAO_Consts.EffectFadeOutAdd); // Reduce the SSAO shadowing if we're on the edge to remove artifacts on edges (we don't care for the lower quality one) - if (!adaptiveBase && (qualityLevel >= SSAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) + if (!adaptiveBase && (qualityLevel >= FFX_CACAO_DEPTH_BASED_EDGES_ENABLE_AT_QUALITY_PRESET)) { // float edgeCount = dot( 1.0-edgesLRTB, float4( 1.0, 1.0, 1.0, 1.0 ) ); @@ -1082,21 +828,21 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o // fadeOut *= saturate( dot( edgesLRTB, float4( 0.9, 0.9, 0.9, 0.9 ) ) - 2.6 ); // strength - obscurance = g_CACAOConsts.EffectShadowStrength * obscurance; + obscurance = g_FFX_CACAO_Consts.EffectShadowStrength * obscurance; // clamp - obscurance = min(obscurance, g_CACAOConsts.EffectShadowClamp); + obscurance = min(obscurance, g_FFX_CACAO_Consts.EffectShadowClamp); // fadeout obscurance *= fadeOut; - // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), + // conceptually switch to occlusion with the meaning being visibility (grows with visibility, occlusion == 1 implies full visibility), // to be in line with what is more commonly used. float occlusion = 1.0 - obscurance; // modify the gradient // note: this cannot be moved to a later pass because of loss of precision after storing in the render target - occlusion = pow(saturate(occlusion), g_CACAOConsts.EffectShadowPow); + occlusion = pow(saturate(occlusion), g_FFX_CACAO_Consts.EffectShadowPow); // outputs! outShadowTerm = occlusion; // Our final 'occlusion' term (0 means fully occluded, 1 means fully lit) @@ -1104,89 +850,86 @@ void GenerateSSAOShadowsInternal(out float outShadowTerm, out float4 outEdges, o outWeight = weightSum; } -[numthreads(GENERATE_WIDTH, GENERATE_HEIGHT, 1)] -void CSGenerateQ0(uint2 coord : SV_DispatchThreadID) +[numthreads(FFX_CACAO_GENERATE_SPARSE_WIDTH, FFX_CACAO_GENERATE_SPARSE_HEIGHT, 1)] +void FFX_CACAO_GenerateQ0(uint3 tid : SV_DispatchThreadID) { + uint xOffset = (tid.y * 3 + tid.z) % 5; + uint2 coord = uint2(5 * tid.x + xOffset, tid.y); float2 inPos = (float2)coord; float outShadowTerm; float outWeight; float4 outEdges; - GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 0, false); + FFX_CACAO_GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 0, false); float2 out0; out0.x = outShadowTerm; - out0.y = PackEdges(float4(1, 1, 1, 1)); // no edges in low quality - g_SSAOOutput[uint3(coord, 0)] = out0; + out0.y = FFX_CACAO_PackEdges(float4(1, 1, 1, 1)); // no edges in low quality + FFX_CACAO_SSAOGeneration_StoreOutput(coord, out0); } -[numthreads(GENERATE_WIDTH, GENERATE_HEIGHT, 1)] -void CSGenerateQ1(uint2 coord : SV_DispatchThreadID) +[numthreads(FFX_CACAO_GENERATE_SPARSE_WIDTH, FFX_CACAO_GENERATE_SPARSE_HEIGHT, 1)] +void FFX_CACAO_GenerateQ1(uint3 tid : SV_DispatchThreadID) { + uint xOffset = (tid.y * 3 + tid.z) % 5; + uint2 coord = uint2(5 * tid.x + xOffset, tid.y); float2 inPos = (float2)coord; float outShadowTerm; float outWeight; float4 outEdges; - GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 1, false); + FFX_CACAO_GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 1, false); float2 out0; out0.x = outShadowTerm; - out0.y = PackEdges(outEdges); - g_SSAOOutput[uint3(coord, 0)] = out0; + out0.y = FFX_CACAO_PackEdges(outEdges); + FFX_CACAO_SSAOGeneration_StoreOutput(coord, out0); } -[numthreads(GENERATE_WIDTH, GENERATE_HEIGHT, 1)] -void CSGenerateQ2(uint2 coord : SV_DispatchThreadID) +[numthreads(FFX_CACAO_GENERATE_WIDTH, FFX_CACAO_GENERATE_HEIGHT, 1)] +void FFX_CACAO_GenerateQ2(uint2 coord : SV_DispatchThreadID) { float2 inPos = (float2)coord; float outShadowTerm; float outWeight; float4 outEdges; - GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 2, false); + FFX_CACAO_GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 2, false); float2 out0; out0.x = outShadowTerm; - out0.y = PackEdges(outEdges); - g_SSAOOutput[uint3(coord, 0)] = out0; + out0.y = FFX_CACAO_PackEdges(outEdges); + FFX_CACAO_SSAOGeneration_StoreOutput(coord, out0); } -[numthreads(GENERATE_WIDTH, GENERATE_HEIGHT, 1)] -void CSGenerateQ3Base(uint2 coord : SV_DispatchThreadID) +[numthreads(FFX_CACAO_GENERATE_WIDTH, FFX_CACAO_GENERATE_HEIGHT, 1)] +void FFX_CACAO_GenerateQ3Base(uint2 coord : SV_DispatchThreadID) { float2 inPos = (float2)coord; float outShadowTerm; float outWeight; float4 outEdges; - GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 3, true); + FFX_CACAO_GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 3, true); float2 out0; out0.x = outShadowTerm; - out0.y = outWeight / ((float)SSAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); //0.0; //frac(outWeight / 6.0);// / (float)(SSAO_MAX_TAPS * 4.0); - g_SSAOOutput[uint3(coord, 0)] = out0; + out0.y = outWeight / ((float)FFX_CACAO_ADAPTIVE_TAP_BASE_COUNT * 4.0); //0.0; //frac(outWeight / 6.0);// / (float)(FFX_CACAO_MAX_TAPS * 4.0); + FFX_CACAO_SSAOGeneration_StoreOutput(coord, out0); } -[numthreads(GENERATE_WIDTH, GENERATE_HEIGHT, 1)] -void CSGenerateQ3(uint2 coord : SV_DispatchThreadID) +[numthreads(FFX_CACAO_GENERATE_WIDTH, FFX_CACAO_GENERATE_HEIGHT, 1)] +void FFX_CACAO_GenerateQ3(uint2 coord : SV_DispatchThreadID) { float2 inPos = (float2)coord; float outShadowTerm; float outWeight; float4 outEdges; - GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 3, false); + FFX_CACAO_GenerateSSAOShadowsInternal(outShadowTerm, outEdges, outWeight, inPos.xy, 3, false); float2 out0; out0.x = outShadowTerm; - out0.y = PackEdges(outEdges); - g_SSAOOutput[uint3(coord, 0)] = out0; + out0.y = FFX_CACAO_PackEdges(outEdges); + FFX_CACAO_SSAOGeneration_StoreOutput(coord, out0); } - - - // ======================================================= // Apply -Texture2DArray g_ApplyFinalSSAO : register(t0); -RWTexture2D g_ApplyOutput : register(u0); - - -[numthreads(APPLY_WIDTH, APPLY_HEIGHT, 1)] -void CSApply(uint2 coord : SV_DispatchThreadID) +[numthreads(FFX_CACAO_APPLY_WIDTH, FFX_CACAO_APPLY_HEIGHT, 1)] +void FFX_CACAO_Apply(uint2 coord : SV_DispatchThreadID) { float ao; float2 inPos = coord; @@ -1201,12 +944,12 @@ void CSApply(uint2 coord : SV_DispatchThreadID) int iv = mx + (1 - my) * 2; // neighbouring, vertical int id = (1 - mx) + (1 - my) * 2; // diagonal - float2 centerVal = g_ApplyFinalSSAO.Load(int4(pixPosHalf, ic, 0)).xy; + float2 centerVal = FFX_CACAO_Apply_LoadSSAOPass(pixPosHalf, ic); ao = centerVal.x; #if 1 // change to 0 if you want to disable last pass high-res blur (for debugging purposes, etc.) - float4 edgesLRTB = UnpackEdges(centerVal.y); + float4 edgesLRTB = FFX_CACAO_UnpackEdges(centerVal.y); // return 1.0 - float4( edgesLRTB.x, edgesLRTB.y * 0.5 + edgesLRTB.w * 0.5, edgesLRTB.z, 0.0 ); // debug show edges @@ -1219,12 +962,12 @@ void CSApply(uint2 coord : SV_DispatchThreadID) float fmye = (edgesLRTB.w - edgesLRTB.z); // calculate final sampling offsets and sample using bilinear filter - float2 uvH = (inPos.xy + float2(fmx + fmxe - 0.5, 0.5 - fmy)) * 0.5 * g_CACAOConsts.SSAOBufferInverseDimensions; - float aoH = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(uvH, ih), 0).x; - float2 uvV = (inPos.xy + float2(0.5 - fmx, fmy - 0.5 + fmye)) * 0.5 * g_CACAOConsts.SSAOBufferInverseDimensions; - float aoV = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(uvV, iv), 0).x; - float2 uvD = (inPos.xy + float2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * 0.5 * g_CACAOConsts.SSAOBufferInverseDimensions; - float aoD = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(uvD, id), 0).x; + float2 uvH = (inPos.xy + float2(fmx + fmxe - 0.5, 0.5 - fmy)) * 0.5 * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float aoH = FFX_CACAO_Apply_SampleSSAOUVPass(uvH, ih); + float2 uvV = (inPos.xy + float2(0.5 - fmx, fmy - 0.5 + fmye)) * 0.5 * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float aoV = FFX_CACAO_Apply_SampleSSAOUVPass(uvV, iv); + float2 uvD = (inPos.xy + float2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * 0.5 * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float aoD = FFX_CACAO_Apply_SampleSSAOUVPass(uvD, id); // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 float4 blendWeights; @@ -1238,109 +981,63 @@ void CSApply(uint2 coord : SV_DispatchThreadID) ao = dot(float4(ao, aoH, aoV, aoD), blendWeights) / blendWeightsSum; #endif - g_ApplyOutput[coord] = ao.x; + FFX_CACAO_Apply_StoreOutput(coord, ao.x); } // edge-ignorant blur & apply (for the lowest quality level 0) -[numthreads(APPLY_WIDTH, APPLY_HEIGHT, 1)] -void CSNonSmartApply(uint2 tid : SV_DispatchThreadID) -{ - float2 inUV = float2(tid) * g_CACAOConsts.OutputBufferInverseDimensions; - float a = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(inUV.xy, 0), 0.0).x; - float b = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(inUV.xy, 1), 0.0).x; - float c = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(inUV.xy, 2), 0.0).x; - float d = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(inUV.xy, 3), 0.0).x; - float avg = (a + b + c + d) * 0.25; - g_ApplyOutput[tid] = avg; -} - -// edge-ignorant blur & apply, skipping half pixels in checkerboard pattern (for the Lowest quality level 0 and Settings::SkipHalfPixelsOnLowQualityLevel == true ) -[numthreads(APPLY_WIDTH, APPLY_HEIGHT, 1)] -void CSNonSmartHalfApply(uint2 tid : SV_DispatchThreadID) -{ - float2 inUV = float2(tid) * g_CACAOConsts.OutputBufferInverseDimensions; - float a = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(inUV.xy, 0), 0.0).x; - float d = g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(inUV.xy, 3), 0.0).x; - float avg = (a + d) * 0.5; - g_ApplyOutput[tid] = avg; -} - - -// ============================================================================= -// Prepare - -Texture2D g_DepthIn : register(t0); - -groupshared uint s_PrepareMem[10][18]; - - -min16float ScreenSpaceToViewSpaceDepth(min16float screenDepth) +[numthreads(FFX_CACAO_APPLY_WIDTH, FFX_CACAO_APPLY_HEIGHT, 1)] +void FFX_CACAO_NonSmartApply(uint2 tid : SV_DispatchThreadID) { - min16float depthLinearizeMul = min16float(g_CACAOConsts.DepthUnpackConsts.x); - min16float depthLinearizeAdd = min16float(g_CACAOConsts.DepthUnpackConsts.y); + float2 inUV = float2(tid) * g_FFX_CACAO_Consts.OutputBufferInverseDimensions; + float a = FFX_CACAO_Apply_SampleSSAOUVPass(inUV.xy, 0); + float b = FFX_CACAO_Apply_SampleSSAOUVPass(inUV.xy, 1); + float c = FFX_CACAO_Apply_SampleSSAOUVPass(inUV.xy, 2); + float d = FFX_CACAO_Apply_SampleSSAOUVPass(inUV.xy, 3); + float avg = (a + b + c + d) * 0.25f; - // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" - - // Set your depthLinearizeMul and depthLinearizeAdd to: - // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); - // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear ); - - return depthLinearizeMul / (depthLinearizeAdd - screenDepth); + FFX_CACAO_Apply_StoreOutput(tid, avg); } -min16float4 ScreenSpaceToViewSpaceDepth4x(min16float4 screenDepths) +// edge-ignorant blur & apply, skipping half pixels in checkerboard pattern (for the Lowest quality level 0 and Settings::SkipHalfPixelsOnLowQualityLevel == true ) +[numthreads(FFX_CACAO_APPLY_WIDTH, FFX_CACAO_APPLY_HEIGHT, 1)] +void FFX_CACAO_NonSmartHalfApply(uint2 tid : SV_DispatchThreadID) { - min16float depthLinearizeMul = min16float(g_CACAOConsts.DepthUnpackConsts.x); - min16float depthLinearizeAdd = min16float(g_CACAOConsts.DepthUnpackConsts.y); - - // Optimised version of "-cameraClipNear / (cameraClipFar - projDepth * (cameraClipFar - cameraClipNear)) * cameraClipFar" - - // Set your depthLinearizeMul and depthLinearizeAdd to: - // depthLinearizeMul = ( cameraClipFar * cameraClipNear) / ( cameraClipFar - cameraClipNear ); - // depthLinearizeAdd = cameraClipFar / ( cameraClipFar - cameraClipNear ); + float2 inUV = float2(tid) * g_FFX_CACAO_Consts.OutputBufferInverseDimensions; + float a = FFX_CACAO_Apply_SampleSSAOUVPass(inUV.xy, 0); + float d = FFX_CACAO_Apply_SampleSSAOUVPass(inUV.xy, 3); + float avg = (a + d) * 0.5f; - return depthLinearizeMul / (depthLinearizeAdd - screenDepths); + FFX_CACAO_Apply_StoreOutput(tid, avg); } -RWTexture2DArray g_PrepareDepthsAndMips_OutMip0 : register(u0); -RWTexture2DArray g_PrepareDepthsAndMips_OutMip1 : register(u1); -RWTexture2DArray g_PrepareDepthsAndMips_OutMip2 : register(u2); -RWTexture2DArray g_PrepareDepthsAndMips_OutMip3 : register(u3); +// ============================================================================= +// Prepare -groupshared float s_PrepareDepthsAndMipsBuffer[4][8][8]; +groupshared float s_FFX_CACAO_PrepareDepthsAndMipsBuffer[4][8][8]; -float MipSmartAverage(float4 depths) +float FFX_CACAO_MipSmartAverage(float4 depths) { float closest = min(min(depths.x, depths.y), min(depths.z, depths.w)); - float falloffCalcMulSq = -1.0f / g_CACAOConsts.EffectRadius * g_CACAOConsts.EffectRadius; + float falloffCalcMulSq = -1.0f / g_FFX_CACAO_Consts.EffectRadius * g_FFX_CACAO_Consts.EffectRadius; float4 dists = depths - closest.xxxx; float4 weights = saturate(dists * dists * falloffCalcMulSq + 1.0); return dot(weights, depths) / dot(weights, float4(1.0, 1.0, 1.0, 1.0)); } -min16float MipSmartAverage_16(min16float4 depths) -{ - min16float closest = min(min(depths.x, depths.y), min(depths.z, depths.w)); - min16float falloffCalcMulSq = min16float(-1.0f / g_CACAOConsts.EffectRadius * g_CACAOConsts.EffectRadius); - min16float4 dists = depths - closest.xxxx; - min16float4 weights = saturate(dists * dists * falloffCalcMulSq + 1.0); - return dot(weights, depths) / dot(weights, min16float4(1.0, 1.0, 1.0, 1.0)); -} - -void PrepareDepthsAndMips(float4 samples, uint2 outputCoord, uint2 gtid) +void FFX_CACAO_PrepareDepthsAndMips(float4 samples, uint2 outputCoord, uint2 gtid) { - samples = ScreenSpaceToViewSpaceDepth(samples); + samples = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples); - s_PrepareDepthsAndMipsBuffer[0][gtid.x][gtid.y] = samples.w; - s_PrepareDepthsAndMipsBuffer[1][gtid.x][gtid.y] = samples.z; - s_PrepareDepthsAndMipsBuffer[2][gtid.x][gtid.y] = samples.x; - s_PrepareDepthsAndMipsBuffer[3][gtid.x][gtid.y] = samples.y; + s_FFX_CACAO_PrepareDepthsAndMipsBuffer[0][gtid.x][gtid.y] = samples.w; + s_FFX_CACAO_PrepareDepthsAndMipsBuffer[1][gtid.x][gtid.y] = samples.z; + s_FFX_CACAO_PrepareDepthsAndMipsBuffer[2][gtid.x][gtid.y] = samples.x; + s_FFX_CACAO_PrepareDepthsAndMipsBuffer[3][gtid.x][gtid.y] = samples.y; - g_PrepareDepthsAndMips_OutMip0[int3(outputCoord.x, outputCoord.y, 0)] = samples.w; - g_PrepareDepthsAndMips_OutMip0[int3(outputCoord.x, outputCoord.y, 1)] = samples.z; - g_PrepareDepthsAndMips_OutMip0[int3(outputCoord.x, outputCoord.y, 2)] = samples.x; - g_PrepareDepthsAndMips_OutMip0[int3(outputCoord.x, outputCoord.y, 3)] = samples.y; + FFX_CACAO_Prepare_StoreDepthMip0(outputCoord, 0, samples.w); + FFX_CACAO_Prepare_StoreDepthMip0(outputCoord, 1, samples.z); + FFX_CACAO_Prepare_StoreDepthMip0(outputCoord, 2, samples.x); + FFX_CACAO_Prepare_StoreDepthMip0(outputCoord, 3, samples.y); uint depthArrayIndex = 2 * (gtid.y % 2) + (gtid.x % 2); uint2 depthArrayOffset = int2(gtid.x % 2, gtid.y % 2); @@ -1351,14 +1048,14 @@ void PrepareDepthsAndMips(float4 samples, uint2 outputCoord, uint2 gtid) // if (stillAlive) <-- all threads alive here { - float sample_00 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 0]; - float sample_01 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 1]; - float sample_10 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 1][bufferCoord.y + 0]; - float sample_11 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 1][bufferCoord.y + 1]; - - float avg = MipSmartAverage(float4(sample_00, sample_01, sample_10, sample_11)); - g_PrepareDepthsAndMips_OutMip1[int3(outputCoord.x, outputCoord.y, depthArrayIndex)] = avg; - s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x][bufferCoord.y] = avg; + float sample_00 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 0]; + float sample_01 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 1]; + float sample_10 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 1][bufferCoord.y + 0]; + float sample_11 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 1][bufferCoord.y + 1]; + + float avg = FFX_CACAO_MipSmartAverage(float4(sample_00, sample_01, sample_10, sample_11)); + FFX_CACAO_Prepare_StoreDepthMip1(outputCoord, depthArrayIndex, avg); + s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x][bufferCoord.y] = avg; } bool stillAlive = gtid.x % 4 == depthArrayOffset.x && gtid.y % 4 == depthArrayOffset.y; @@ -1368,132 +1065,121 @@ void PrepareDepthsAndMips(float4 samples, uint2 outputCoord, uint2 gtid) if (stillAlive) { - float sample_00 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 0]; - float sample_01 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 2]; - float sample_10 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 2][bufferCoord.y + 0]; - float sample_11 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 2][bufferCoord.y + 2]; - - float avg = MipSmartAverage(float4(sample_00, sample_01, sample_10, sample_11)); - g_PrepareDepthsAndMips_OutMip2[int3(outputCoord.x, outputCoord.y, depthArrayIndex)] = avg; - s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x][bufferCoord.y] = avg; + float sample_00 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 0]; + float sample_01 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 2]; + float sample_10 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 2][bufferCoord.y + 0]; + float sample_11 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 2][bufferCoord.y + 2]; + + float avg = FFX_CACAO_MipSmartAverage(float4(sample_00, sample_01, sample_10, sample_11)); + FFX_CACAO_Prepare_StoreDepthMip2(outputCoord, depthArrayIndex, avg); + s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x][bufferCoord.y] = avg; } - stillAlive = gtid.x % 8 == depthArrayOffset.x && depthArrayOffset.y % 8 == depthArrayOffset.y; + stillAlive = gtid.x % 8 == depthArrayOffset.x && gtid.y % 8 == depthArrayOffset.y; outputCoord /= 2; GroupMemoryBarrierWithGroupSync(); if (stillAlive) { - float sample_00 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 0]; - float sample_01 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 4]; - float sample_10 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 4][bufferCoord.y + 0]; - float sample_11 = s_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 4][bufferCoord.y + 4]; + float sample_00 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 0]; + float sample_01 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 0][bufferCoord.y + 4]; + float sample_10 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 4][bufferCoord.y + 0]; + float sample_11 = s_FFX_CACAO_PrepareDepthsAndMipsBuffer[depthArrayIndex][bufferCoord.x + 4][bufferCoord.y + 4]; - float avg = MipSmartAverage(float4(sample_00, sample_01, sample_10, sample_11)); - g_PrepareDepthsAndMips_OutMip3[int3(outputCoord.x, outputCoord.y, depthArrayIndex)] = avg; + float avg = FFX_CACAO_MipSmartAverage(float4(sample_00, sample_01, sample_10, sample_11)); + FFX_CACAO_Prepare_StoreDepthMip3(outputCoord, depthArrayIndex, avg); } } -[numthreads(PREPARE_DEPTHS_AND_MIPS_WIDTH, PREPARE_DEPTHS_AND_MIPS_HEIGHT, 1)] -void CSPrepareDownsampledDepthsAndMips(uint2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID) +[numthreads(FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_WIDTH, FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_HEIGHT, 1)] +void FFX_CACAO_PrepareDownsampledDepthsAndMips(uint2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID) { int2 depthBufferCoord = 4 * tid.xy; int2 outputCoord = tid; - float2 uv = (float2(depthBufferCoord)+0.5f) * g_CACAOConsts.DepthBufferInverseDimensions; + float2 uv = (float2(depthBufferCoord)+0.5f) * g_FFX_CACAO_Consts.DepthBufferInverseDimensions; float4 samples; -#if 1 - samples.x = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(0, 2)); - samples.y = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(2, 2)); - samples.z = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(2, 0)); - samples.w = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(0, 0)); -#else - samples.x = g_DepthIn[depthBufferCoord + uint2(0, 2)]; - samples.y = g_DepthIn[depthBufferCoord + uint2(2, 2)]; - samples.z = g_DepthIn[depthBufferCoord + uint2(2, 0)]; - samples.w = g_DepthIn[depthBufferCoord + uint2(0, 0)]; -#endif - PrepareDepthsAndMips(samples, outputCoord, gtid); + samples.x = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(0, 2)); + samples.y = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(2, 2)); + samples.z = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(2, 0)); + samples.w = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(0, 0)); + + FFX_CACAO_PrepareDepthsAndMips(samples, outputCoord, gtid); } -[numthreads(PREPARE_DEPTHS_AND_MIPS_WIDTH, PREPARE_DEPTHS_AND_MIPS_HEIGHT, 1)] -void CSPrepareNativeDepthsAndMips(uint2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID) +[numthreads(FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_WIDTH, FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_HEIGHT, 1)] +void FFX_CACAO_PrepareNativeDepthsAndMips(uint2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID) { int2 depthBufferCoord = 2 * tid.xy; int2 outputCoord = tid; - float2 uv = (float2(depthBufferCoord)+0.5f) * g_CACAOConsts.DepthBufferInverseDimensions; - float4 samples = g_DepthIn.GatherRed(g_PointClampSampler, uv); + float2 uv = (float2(depthBufferCoord)+0.5f) * g_FFX_CACAO_Consts.DepthBufferInverseDimensions; + float4 samples = FFX_CACAO_Prepare_GatherDepth(uv); - PrepareDepthsAndMips(samples, outputCoord, gtid); + FFX_CACAO_PrepareDepthsAndMips(samples, outputCoord, gtid); } -RWTexture2DArray g_PrepareDepthsOut : register(u0); - -void PrepareDepths(float4 samples, uint2 tid) +void FFX_CACAO_PrepareDepths(float4 samples, uint2 tid) { - samples = ScreenSpaceToViewSpaceDepth(samples); - g_PrepareDepthsOut[int3(tid.x, tid.y, 0)] = samples.w; - g_PrepareDepthsOut[int3(tid.x, tid.y, 1)] = samples.z; - g_PrepareDepthsOut[int3(tid.x, tid.y, 2)] = samples.x; - g_PrepareDepthsOut[int3(tid.x, tid.y, 3)] = samples.y; + samples = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples); + FFX_CACAO_Prepare_StoreDepth(tid, 0, samples.w); + FFX_CACAO_Prepare_StoreDepth(tid, 1, samples.z); + FFX_CACAO_Prepare_StoreDepth(tid, 2, samples.x); + FFX_CACAO_Prepare_StoreDepth(tid, 3, samples.y); } -[numthreads(PREPARE_DEPTHS_WIDTH, PREPARE_DEPTHS_HEIGHT, 1)] -void CSPrepareDownsampledDepths(uint2 tid : SV_DispatchThreadID) +[numthreads(FFX_CACAO_PREPARE_DEPTHS_WIDTH, FFX_CACAO_PREPARE_DEPTHS_HEIGHT, 1)] +void FFX_CACAO_PrepareDownsampledDepths(uint2 tid : SV_DispatchThreadID) { int2 depthBufferCoord = 4 * tid.xy; - float2 uv = (float2(depthBufferCoord)+0.5f) * g_CACAOConsts.DepthBufferInverseDimensions; + float2 uv = (float2(depthBufferCoord)+0.5f) * g_FFX_CACAO_Consts.DepthBufferInverseDimensions; float4 samples; - samples.x = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(0, 2)); - samples.y = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(2, 2)); - samples.z = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(2, 0)); - samples.w = g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0, int2(0, 0)); - - PrepareDepths(samples, tid); + + samples.x = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(0, 2)); + samples.y = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(2, 2)); + samples.z = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(2, 0)); + samples.w = FFX_CACAO_Prepare_SampleDepthOffset(uv, int2(0, 0)); + + FFX_CACAO_PrepareDepths(samples, tid); } -[numthreads(PREPARE_DEPTHS_WIDTH, PREPARE_DEPTHS_HEIGHT, 1)] -void CSPrepareNativeDepths(uint2 tid : SV_DispatchThreadID) +[numthreads(FFX_CACAO_PREPARE_DEPTHS_WIDTH, FFX_CACAO_PREPARE_DEPTHS_HEIGHT, 1)] +void FFX_CACAO_PrepareNativeDepths(uint2 tid : SV_DispatchThreadID) { int2 depthBufferCoord = 2 * tid.xy; - float2 uv = (float2(depthBufferCoord)+0.5f) * g_CACAOConsts.DepthBufferInverseDimensions; - float4 samples = g_DepthIn.GatherRed(g_PointClampSampler, uv); + float2 uv = (float2(depthBufferCoord)+0.5f) * g_FFX_CACAO_Consts.DepthBufferInverseDimensions; + float4 samples = FFX_CACAO_Prepare_GatherDepth(uv); - PrepareDepths(samples, tid); + FFX_CACAO_PrepareDepths(samples, tid); } -[numthreads(PREPARE_DEPTHS_HALF_WIDTH, PREPARE_DEPTHS_HALF_HEIGHT, 1)] -void CSPrepareDownsampledDepthsHalf(uint2 tid : SV_DispatchThreadID) +[numthreads(FFX_CACAO_PREPARE_DEPTHS_HALF_WIDTH, FFX_CACAO_PREPARE_DEPTHS_HALF_HEIGHT, 1)] +void FFX_CACAO_PrepareDownsampledDepthsHalf(uint2 tid : SV_DispatchThreadID) { - float sample_00 = g_DepthIn.Load(int3(4 * tid.x + 0, 4 * tid.y + 0, 0)); - float sample_11 = g_DepthIn.Load(int3(4 * tid.x + 2, 4 * tid.y + 2, 0)); - sample_00 = ScreenSpaceToViewSpaceDepth(sample_00); - sample_11 = ScreenSpaceToViewSpaceDepth(sample_11); - g_PrepareDepthsOut[int3(tid.x, tid.y, 0)] = sample_00; - g_PrepareDepthsOut[int3(tid.x, tid.y, 3)] = sample_11; + float sample_00 = FFX_CACAO_Prepare_LoadDepth(int2(4 * tid.x + 0, 4 * tid.y + 0)); + float sample_11 = FFX_CACAO_Prepare_LoadDepth(int2(4 * tid.x + 2, 4 * tid.y + 2)); + sample_00 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(sample_00); + sample_11 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(sample_11); + FFX_CACAO_Prepare_StoreDepth(tid, 0, sample_00); + FFX_CACAO_Prepare_StoreDepth(tid, 3, sample_11); } -[numthreads(PREPARE_DEPTHS_HALF_WIDTH, PREPARE_DEPTHS_HALF_HEIGHT, 1)] -void CSPrepareNativeDepthsHalf(uint2 tid : SV_DispatchThreadID) +[numthreads(FFX_CACAO_PREPARE_DEPTHS_HALF_WIDTH, FFX_CACAO_PREPARE_DEPTHS_HALF_HEIGHT, 1)] +void FFX_CACAO_PrepareNativeDepthsHalf(uint2 tid : SV_DispatchThreadID) { - float sample_00 = g_DepthIn.Load(int3(2 * tid.x + 0, 2 * tid.y + 0, 0)); - float sample_11 = g_DepthIn.Load(int3(2 * tid.x + 1, 2 * tid.y + 1, 0)); - sample_00 = ScreenSpaceToViewSpaceDepth(sample_00); - sample_11 = ScreenSpaceToViewSpaceDepth(sample_11); - g_PrepareDepthsOut[int3(tid.x, tid.y, 0)] = sample_00; - g_PrepareDepthsOut[int3(tid.x, tid.y, 3)] = sample_11; + float sample_00 = FFX_CACAO_Prepare_LoadDepth(int2(2 * tid.x + 0, 2 * tid.y + 0)); + float sample_11 = FFX_CACAO_Prepare_LoadDepth(int2(2 * tid.x + 1, 2 * tid.y + 1)); + sample_00 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(sample_00); + sample_11 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(sample_11); + FFX_CACAO_Prepare_StoreDepth(tid, 0, sample_00); + FFX_CACAO_Prepare_StoreDepth(tid, 3, sample_11); } -groupshared float s_PrepareDepthsNormalsAndMipsBuffer[18][18]; - -RWTexture2DArray g_PrepareNormals_NormalOut : register(u0); - -struct PrepareNormalsInputDepths +struct FFX_CACAO_PrepareNormalsInputDepths { float depth_10; float depth_20; @@ -1512,148 +1198,136 @@ struct PrepareNormalsInputDepths float depth_23; }; -void PrepareNormals(PrepareNormalsInputDepths depths, float2 uv, float2 pixelSize, int2 normalCoord) +void FFX_CACAO_PrepareNormals(FFX_CACAO_PrepareNormalsInputDepths depths, float2 uv, float2 pixelSize, int2 normalCoord) { - float3 p_10 = NDCToViewspace(uv + float2(+0.0f, -1.0f) * pixelSize, depths.depth_10); - float3 p_20 = NDCToViewspace(uv + float2(+1.0f, -1.0f) * pixelSize, depths.depth_20); + float3 p_10 = FFX_CACAO_NDCToViewSpace(uv + float2(+0.0f, -1.0f) * pixelSize, depths.depth_10); + float3 p_20 = FFX_CACAO_NDCToViewSpace(uv + float2(+1.0f, -1.0f) * pixelSize, depths.depth_20); - float3 p_01 = NDCToViewspace(uv + float2(-1.0f, +0.0f) * pixelSize, depths.depth_01); - float3 p_11 = NDCToViewspace(uv + float2(+0.0f, +0.0f) * pixelSize, depths.depth_11); - float3 p_21 = NDCToViewspace(uv + float2(+1.0f, +0.0f) * pixelSize, depths.depth_21); - float3 p_31 = NDCToViewspace(uv + float2(+2.0f, +0.0f) * pixelSize, depths.depth_31); + float3 p_01 = FFX_CACAO_NDCToViewSpace(uv + float2(-1.0f, +0.0f) * pixelSize, depths.depth_01); + float3 p_11 = FFX_CACAO_NDCToViewSpace(uv + float2(+0.0f, +0.0f) * pixelSize, depths.depth_11); + float3 p_21 = FFX_CACAO_NDCToViewSpace(uv + float2(+1.0f, +0.0f) * pixelSize, depths.depth_21); + float3 p_31 = FFX_CACAO_NDCToViewSpace(uv + float2(+2.0f, +0.0f) * pixelSize, depths.depth_31); - float3 p_02 = NDCToViewspace(uv + float2(-1.0f, +1.0f) * pixelSize, depths.depth_02); - float3 p_12 = NDCToViewspace(uv + float2(+0.0f, +1.0f) * pixelSize, depths.depth_12); - float3 p_22 = NDCToViewspace(uv + float2(+1.0f, +1.0f) * pixelSize, depths.depth_22); - float3 p_32 = NDCToViewspace(uv + float2(+2.0f, +1.0f) * pixelSize, depths.depth_32); + float3 p_02 = FFX_CACAO_NDCToViewSpace(uv + float2(-1.0f, +1.0f) * pixelSize, depths.depth_02); + float3 p_12 = FFX_CACAO_NDCToViewSpace(uv + float2(+0.0f, +1.0f) * pixelSize, depths.depth_12); + float3 p_22 = FFX_CACAO_NDCToViewSpace(uv + float2(+1.0f, +1.0f) * pixelSize, depths.depth_22); + float3 p_32 = FFX_CACAO_NDCToViewSpace(uv + float2(+2.0f, +1.0f) * pixelSize, depths.depth_32); - float3 p_13 = NDCToViewspace(uv + float2(+0.0f, +2.0f) * pixelSize, depths.depth_13); - float3 p_23 = NDCToViewspace(uv + float2(+1.0f, +2.0f) * pixelSize, depths.depth_23); + float3 p_13 = FFX_CACAO_NDCToViewSpace(uv + float2(+0.0f, +2.0f) * pixelSize, depths.depth_13); + float3 p_23 = FFX_CACAO_NDCToViewSpace(uv + float2(+1.0f, +2.0f) * pixelSize, depths.depth_23); - float4 edges_11 = CalculateEdges(p_11.z, p_01.z, p_21.z, p_10.z, p_12.z); - float4 edges_21 = CalculateEdges(p_21.z, p_11.z, p_31.z, p_20.z, p_22.z); - float4 edges_12 = CalculateEdges(p_12.z, p_02.z, p_22.z, p_11.z, p_13.z); - float4 edges_22 = CalculateEdges(p_22.z, p_12.z, p_32.z, p_21.z, p_23.z); + float4 edges_11 = FFX_CACAO_CalculateEdges(p_11.z, p_01.z, p_21.z, p_10.z, p_12.z); + float4 edges_21 = FFX_CACAO_CalculateEdges(p_21.z, p_11.z, p_31.z, p_20.z, p_22.z); + float4 edges_12 = FFX_CACAO_CalculateEdges(p_12.z, p_02.z, p_22.z, p_11.z, p_13.z); + float4 edges_22 = FFX_CACAO_CalculateEdges(p_22.z, p_12.z, p_32.z, p_21.z, p_23.z); - float3 norm_11 = CalculateNormal(edges_11, p_11, p_01, p_21, p_10, p_12); - float3 norm_21 = CalculateNormal(edges_21, p_21, p_11, p_31, p_20, p_22); - float3 norm_12 = CalculateNormal(edges_12, p_12, p_02, p_22, p_11, p_13); - float3 norm_22 = CalculateNormal(edges_22, p_22, p_12, p_32, p_21, p_23); + float3 norm_11 = FFX_CACAO_CalculateNormal(edges_11, p_11, p_01, p_21, p_10, p_12); + float3 norm_21 = FFX_CACAO_CalculateNormal(edges_21, p_21, p_11, p_31, p_20, p_22); + float3 norm_12 = FFX_CACAO_CalculateNormal(edges_12, p_12, p_02, p_22, p_11, p_13); + float3 norm_22 = FFX_CACAO_CalculateNormal(edges_22, p_22, p_12, p_32, p_21, p_23); - g_PrepareNormals_NormalOut[int3(normalCoord, 0)] = float4(norm_11, 1.0f); - g_PrepareNormals_NormalOut[int3(normalCoord, 1)] = float4(norm_21, 1.0f); - g_PrepareNormals_NormalOut[int3(normalCoord, 2)] = float4(norm_12, 1.0f); - g_PrepareNormals_NormalOut[int3(normalCoord, 3)] = float4(norm_22, 1.0f); + FFX_CACAO_Prepare_StoreNormal(normalCoord, 0, norm_11); + FFX_CACAO_Prepare_StoreNormal(normalCoord, 1, norm_21); + FFX_CACAO_Prepare_StoreNormal(normalCoord, 2, norm_12); + FFX_CACAO_Prepare_StoreNormal(normalCoord, 3, norm_22); } -[numthreads(PREPARE_NORMALS_WIDTH, PREPARE_NORMALS_HEIGHT, 1)] -void CSPrepareDownsampledNormals(int2 tid : SV_DispatchThreadID) +[numthreads(FFX_CACAO_PREPARE_NORMALS_WIDTH, FFX_CACAO_PREPARE_NORMALS_HEIGHT, 1)] +void FFX_CACAO_PrepareDownsampledNormals(int2 tid : SV_DispatchThreadID) { - int2 depthCoord = 4 * tid + g_CACAOConsts.DepthBufferOffset; + int2 depthCoord = 4 * tid + g_FFX_CACAO_Consts.DepthBufferOffset; - PrepareNormalsInputDepths depths; + FFX_CACAO_PrepareNormalsInputDepths depths; - depths.depth_10 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+0, -2))); - depths.depth_20 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+2, -2))); + depths.depth_10 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+0, -2))); + depths.depth_20 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+2, -2))); - depths.depth_01 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(-2, +0))); - depths.depth_11 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+0, +0))); - depths.depth_21 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+2, +0))); - depths.depth_31 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+4, +0))); + depths.depth_01 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(-2, +0))); + depths.depth_11 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+0, +0))); + depths.depth_21 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+2, +0))); + depths.depth_31 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+4, +0))); - depths.depth_02 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(-2, +2))); - depths.depth_12 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+0, +2))); - depths.depth_22 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+2, +2))); - depths.depth_32 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+4, +2))); + depths.depth_02 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(-2, +2))); + depths.depth_12 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+0, +2))); + depths.depth_22 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+2, +2))); + depths.depth_32 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+4, +2))); - depths.depth_13 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+0, +4))); - depths.depth_23 = ScreenSpaceToViewSpaceDepth(g_DepthIn.Load(int3(depthCoord, 0), int2(+2, +4))); + depths.depth_13 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+0, +4))); + depths.depth_23 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_Prepare_LoadDepthOffset(depthCoord, int2(+2, +4))); - float2 pixelSize = 2.0f * g_CACAOConsts.OutputBufferInverseDimensions; // 2.0f * g_CACAOConsts.DepthBufferInverseDimensions; - float2 uv = (float2(4 * tid) + 0.5f) * g_CACAOConsts.OutputBufferInverseDimensions; // * g_CACAOConsts.SSAOBufferInverseDimensions; + float2 pixelSize = 2.0f * g_FFX_CACAO_Consts.OutputBufferInverseDimensions; // 2.0f * g_FFX_CACAO_Consts.DepthBufferInverseDimensions; + float2 uv = (float2(4 * tid) + 0.5f) * g_FFX_CACAO_Consts.OutputBufferInverseDimensions; // * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; - PrepareNormals(depths, uv, pixelSize, tid); + FFX_CACAO_PrepareNormals(depths, uv, pixelSize, tid); } -[numthreads(PREPARE_NORMALS_WIDTH, PREPARE_NORMALS_HEIGHT, 1)] -void CSPrepareNativeNormals(int2 tid : SV_DispatchThreadID) +[numthreads(FFX_CACAO_PREPARE_NORMALS_WIDTH, FFX_CACAO_PREPARE_NORMALS_HEIGHT, 1)] +void FFX_CACAO_PrepareNativeNormals(int2 tid : SV_DispatchThreadID) { - int2 depthCoord = 2 * tid + g_CACAOConsts.DepthBufferOffset; - float2 depthBufferUV = (float2(depthCoord)-0.5f) * g_CACAOConsts.DepthBufferInverseDimensions; - float4 samples_00 = g_DepthIn.GatherRed(g_PointClampSampler, depthBufferUV, int2(0, 0)); - float4 samples_10 = g_DepthIn.GatherRed(g_PointClampSampler, depthBufferUV, int2(2, 0)); - float4 samples_01 = g_DepthIn.GatherRed(g_PointClampSampler, depthBufferUV, int2(0, 2)); - float4 samples_11 = g_DepthIn.GatherRed(g_PointClampSampler, depthBufferUV, int2(2, 2)); + int2 depthCoord = 2 * tid + g_FFX_CACAO_Consts.DepthBufferOffset; + float2 depthBufferUV = (float2(depthCoord)-0.5f) * g_FFX_CACAO_Consts.DepthBufferInverseDimensions; + float4 samples_00 = FFX_CACAO_Prepare_GatherDepthOffset(depthBufferUV, int2(0, 0)); + float4 samples_10 = FFX_CACAO_Prepare_GatherDepthOffset(depthBufferUV, int2(2, 0)); + float4 samples_01 = FFX_CACAO_Prepare_GatherDepthOffset(depthBufferUV, int2(0, 2)); + float4 samples_11 = FFX_CACAO_Prepare_GatherDepthOffset(depthBufferUV, int2(2, 2)); - PrepareNormalsInputDepths depths; + FFX_CACAO_PrepareNormalsInputDepths depths; - depths.depth_10 = ScreenSpaceToViewSpaceDepth(samples_00.z); - depths.depth_20 = ScreenSpaceToViewSpaceDepth(samples_10.w); + depths.depth_10 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_00.z); + depths.depth_20 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_10.w); - depths.depth_01 = ScreenSpaceToViewSpaceDepth(samples_00.x); - depths.depth_11 = ScreenSpaceToViewSpaceDepth(samples_00.y); - depths.depth_21 = ScreenSpaceToViewSpaceDepth(samples_10.x); - depths.depth_31 = ScreenSpaceToViewSpaceDepth(samples_10.y); + depths.depth_01 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_00.x); + depths.depth_11 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_00.y); + depths.depth_21 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_10.x); + depths.depth_31 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_10.y); - depths.depth_02 = ScreenSpaceToViewSpaceDepth(samples_01.w); - depths.depth_12 = ScreenSpaceToViewSpaceDepth(samples_01.z); - depths.depth_22 = ScreenSpaceToViewSpaceDepth(samples_11.w); - depths.depth_32 = ScreenSpaceToViewSpaceDepth(samples_11.z); + depths.depth_02 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_01.w); + depths.depth_12 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_01.z); + depths.depth_22 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_11.w); + depths.depth_32 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_11.z); - depths.depth_13 = ScreenSpaceToViewSpaceDepth(samples_01.y); - depths.depth_23 = ScreenSpaceToViewSpaceDepth(samples_11.x); + depths.depth_13 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_01.y); + depths.depth_23 = FFX_CACAO_ScreenSpaceToViewSpaceDepth(samples_11.x); // use unused samples to make sure compiler doesn't overlap memory and put a sync // between loads float epsilon = (samples_00.w + samples_10.z + samples_01.x + samples_11.y) * 1e-20f; - float2 pixelSize = g_CACAOConsts.OutputBufferInverseDimensions; - float2 uv = (float2(2 * tid) + 0.5f + epsilon) * g_CACAOConsts.OutputBufferInverseDimensions; - - PrepareNormals(depths, uv, pixelSize, tid); -} - -Texture2D g_PrepareNormalsFromNormalsInput : register(t0); -RWTexture2DArray g_PrepareNormalsFromNormalsOutput : register(u0); + float2 pixelSize = g_FFX_CACAO_Consts.OutputBufferInverseDimensions; + float2 uv = (float2(2 * tid) + 0.5f + epsilon) * g_FFX_CACAO_Consts.OutputBufferInverseDimensions; -float3 PrepareNormalsFromInputNormalsLoadNormal(int2 pos) -{ - float3 encodedNormal = g_PrepareNormalsFromNormalsInput.SampleLevel(g_PointClampSampler, (float2(pos)+0.5f) * g_CACAOConsts.OutputBufferInverseDimensions, 0).xyz; - return DecodeNormal(encodedNormal); + FFX_CACAO_PrepareNormals(depths, uv, pixelSize, tid); } [numthreads(PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH, PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT, 1)] -void CSPrepareDownsampledNormalsFromInputNormals(int2 tid : SV_DispatchThreadID) +void FFX_CACAO_PrepareDownsampledNormalsFromInputNormals(int2 tid : SV_DispatchThreadID) { int2 baseCoord = 4 * tid; - g_PrepareNormalsFromNormalsOutput[uint3(tid, 0)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(0, 0)), 1.0f); - g_PrepareNormalsFromNormalsOutput[uint3(tid, 1)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(2, 0)), 1.0f); - g_PrepareNormalsFromNormalsOutput[uint3(tid, 2)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(0, 2)), 1.0f); - g_PrepareNormalsFromNormalsOutput[uint3(tid, 3)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(2, 2)), 1.0f); + FFX_CACAO_Prepare_StoreNormal(tid, 0, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(0, 0))); + FFX_CACAO_Prepare_StoreNormal(tid, 1, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(2, 0))); + FFX_CACAO_Prepare_StoreNormal(tid, 2, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(0, 2))); + FFX_CACAO_Prepare_StoreNormal(tid, 3, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(2, 2))); } [numthreads(PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH, PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT, 1)] -void CSPrepareNativeNormalsFromInputNormals(int2 tid : SV_DispatchThreadID) +void FFX_CACAO_PrepareNativeNormalsFromInputNormals(int2 tid : SV_DispatchThreadID) { int2 baseCoord = 2 * tid; - g_PrepareNormalsFromNormalsOutput[uint3(tid, 0)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(0, 0)), 1.0f); - g_PrepareNormalsFromNormalsOutput[uint3(tid, 1)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(1, 0)), 1.0f); - g_PrepareNormalsFromNormalsOutput[uint3(tid, 2)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(0, 1)), 1.0f); - g_PrepareNormalsFromNormalsOutput[uint3(tid, 3)] = float4(PrepareNormalsFromInputNormalsLoadNormal(baseCoord + int2(1, 1)), 1.0f); + FFX_CACAO_Prepare_StoreNormal(tid, 0, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(0, 0))); + FFX_CACAO_Prepare_StoreNormal(tid, 1, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(1, 0))); + FFX_CACAO_Prepare_StoreNormal(tid, 2, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(0, 1))); + FFX_CACAO_Prepare_StoreNormal(tid, 3, FFX_CACAO_Prepare_LoadNormal(baseCoord + int2(1, 1))); } -// ====================================================================================== -// importance map stuff - -Texture2DArray g_ImportanceFinalSSAO : register(t0); -RWTexture2D g_ImportanceOut : register(u0); +// ============================================================================= +// Importance Map [numthreads(IMPORTANCE_MAP_WIDTH, IMPORTANCE_MAP_HEIGHT, 1)] -void CSGenerateImportanceMap(uint2 tid : SV_DispatchThreadID) +void FFX_CACAO_GenerateImportanceMap(uint2 tid : SV_DispatchThreadID) { uint2 basePos = tid * 2; - float2 baseUV = (float2(basePos)+float2(0.5f, 0.5f)) * g_CACAOConsts.SSAOBufferInverseDimensions; + float2 baseUV = (float2(basePos)+float2(0.5f, 0.5f)) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; float avg = 0.0; float minV = 1.0; @@ -1661,14 +1335,14 @@ void CSGenerateImportanceMap(uint2 tid : SV_DispatchThreadID) [unroll] for (int i = 0; i < 4; i++) { - float4 vals = g_ImportanceFinalSSAO.GatherRed(g_PointClampSampler, float3(baseUV, i)); + float4 vals = FFX_CACAO_Importance_GatherSSAO(baseUV, i); // apply the same modifications that would have been applied in the main shader - vals = g_CACAOConsts.EffectShadowStrength * vals; + vals = g_FFX_CACAO_Consts.EffectShadowStrength * vals; vals = 1 - vals; - vals = pow(saturate(vals), g_CACAOConsts.EffectShadowPow); + vals = pow(saturate(vals), g_FFX_CACAO_Consts.EffectShadowPow); avg += dot(float4(vals.x, vals.y, vals.z, vals.w), float4(1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0, 1.0 / 16.0)); @@ -1678,129 +1352,215 @@ void CSGenerateImportanceMap(uint2 tid : SV_DispatchThreadID) float minMaxDiff = maxV - minV; - g_ImportanceOut[tid] = pow(saturate(minMaxDiff * 2.0), 0.8); + FFX_CACAO_Importance_StoreImportance(tid, pow(saturate(minMaxDiff * 2.0), 0.8)); } -Texture2D g_ImportanceAIn : register(t0); -RWTexture2D g_ImportanceAOut : register(u0); - -static const float cSmoothenImportance = 1.0; +static const float c_FFX_CACAO_SmoothenImportance = 1.0f; [numthreads(IMPORTANCE_MAP_A_WIDTH, IMPORTANCE_MAP_A_HEIGHT, 1)] -void CSPostprocessImportanceMapA(uint2 tid : SV_DispatchThreadID) +void FFX_CACAO_PostprocessImportanceMapA(uint2 tid : SV_DispatchThreadID) { - float2 uv = (float2(tid)+0.5f) * g_CACAOConsts.ImportanceMapInverseDimensions; + float2 uv = (float2(tid)+0.5f) * g_FFX_CACAO_Consts.ImportanceMapInverseDimensions; - float centre = g_ImportanceAIn.SampleLevel(g_LinearClampSampler, uv, 0.0).x; + float centre = FFX_CACAO_Importance_SampleImportanceA(uv); //return centre; - float2 halfPixel = 0.5f * g_CACAOConsts.ImportanceMapInverseDimensions; + float2 halfPixel = 0.5f * g_FFX_CACAO_Consts.ImportanceMapInverseDimensions; float4 vals; - vals.x = g_ImportanceAIn.SampleLevel(g_LinearClampSampler, uv + float2(-halfPixel.x * 3, -halfPixel.y), 0.0).x; - vals.y = g_ImportanceAIn.SampleLevel(g_LinearClampSampler, uv + float2(+halfPixel.x, -halfPixel.y * 3), 0.0).x; - vals.z = g_ImportanceAIn.SampleLevel(g_LinearClampSampler, uv + float2(+halfPixel.x * 3, +halfPixel.y), 0.0).x; - vals.w = g_ImportanceAIn.SampleLevel(g_LinearClampSampler, uv + float2(-halfPixel.x, +halfPixel.y * 3), 0.0).x; + vals.x = FFX_CACAO_Importance_SampleImportanceA(uv + float2(-halfPixel.x * 3, -halfPixel.y)); + vals.y = FFX_CACAO_Importance_SampleImportanceA(uv + float2(+halfPixel.x, -halfPixel.y * 3)); + vals.z = FFX_CACAO_Importance_SampleImportanceA(uv + float2(+halfPixel.x * 3, +halfPixel.y)); + vals.w = FFX_CACAO_Importance_SampleImportanceA(uv + float2(-halfPixel.x, +halfPixel.y * 3)); float avgVal = dot(vals, float4(0.25, 0.25, 0.25, 0.25)); vals.xy = max(vals.xy, vals.zw); float maxVal = max(centre, max(vals.x, vals.y)); - g_ImportanceAOut[tid] = lerp(maxVal, avgVal, cSmoothenImportance); + FFX_CACAO_Importance_StoreImportanceA(tid, lerp(maxVal, avgVal, c_FFX_CACAO_SmoothenImportance)); } -Texture2D g_ImportanceBIn : register(t0); -RWTexture2D g_ImportanceBOut : register(u0); -RWTexture1D g_ImportanceBLoadCounter : register(u1); - [numthreads(IMPORTANCE_MAP_B_WIDTH, IMPORTANCE_MAP_B_HEIGHT, 1)] -void CSPostprocessImportanceMapB(uint2 tid : SV_DispatchThreadID) +void FFX_CACAO_PostprocessImportanceMapB(uint2 tid : SV_DispatchThreadID) { - float2 uv = (float2(tid)+0.5f) * g_CACAOConsts.ImportanceMapInverseDimensions; + float2 uv = (float2(tid)+0.5f) * g_FFX_CACAO_Consts.ImportanceMapInverseDimensions; - float centre = g_ImportanceBIn.SampleLevel(g_LinearClampSampler, uv, 0.0).x; + float centre = FFX_CACAO_Importance_SampleImportanceB(uv); //return centre; - float2 halfPixel = 0.5f * g_CACAOConsts.ImportanceMapInverseDimensions; + float2 halfPixel = 0.5f * g_FFX_CACAO_Consts.ImportanceMapInverseDimensions; float4 vals; - vals.x = g_ImportanceBIn.SampleLevel(g_LinearClampSampler, uv + float2(-halfPixel.x, -halfPixel.y * 3), 0.0).x; - vals.y = g_ImportanceBIn.SampleLevel(g_LinearClampSampler, uv + float2(+halfPixel.x * 3, -halfPixel.y), 0.0).x; - vals.z = g_ImportanceBIn.SampleLevel(g_LinearClampSampler, uv + float2(+halfPixel.x, +halfPixel.y * 3), 0.0).x; - vals.w = g_ImportanceBIn.SampleLevel(g_LinearClampSampler, uv + float2(-halfPixel.x * 3, +halfPixel.y), 0.0).x; + vals.x = FFX_CACAO_Importance_SampleImportanceB(uv + float2(-halfPixel.x, -halfPixel.y * 3)); + vals.y = FFX_CACAO_Importance_SampleImportanceB(uv + float2(+halfPixel.x * 3, -halfPixel.y)); + vals.z = FFX_CACAO_Importance_SampleImportanceB(uv + float2(+halfPixel.x, +halfPixel.y * 3)); + vals.w = FFX_CACAO_Importance_SampleImportanceB(uv + float2(-halfPixel.x * 3, +halfPixel.y)); float avgVal = dot(vals, float4(0.25, 0.25, 0.25, 0.25)); vals.xy = max(vals.xy, vals.zw); float maxVal = max(centre, max(vals.x, vals.y)); - float retVal = lerp(maxVal, avgVal, cSmoothenImportance); - g_ImportanceBOut[tid] = retVal; + float retVal = lerp(maxVal, avgVal, c_FFX_CACAO_SmoothenImportance); + FFX_CACAO_Importance_StoreImportanceB(tid, retVal); - // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel + // sum the average; to avoid overflowing we assume max AO resolution is not bigger than 16384x16384; so quarter res (used here) will be 4096x4096, which leaves us with 8 bits per pixel uint sum = (uint)(saturate(retVal) * 255.0 + 0.5); // save every 9th to avoid InterlockedAdd congestion - since we're blurring, this is good enough; compensated by multiplying LoadCounterAvgDiv by 9 if (((tid.x % 3) + (tid.y % 3)) == 0) { - InterlockedAdd(g_ImportanceBLoadCounter[0], sum); + FFX_CACAO_Importance_LoadCounterInterlockedAdd(sum); } } -// ============================================================================================ -// bilateral upscale - -RWTexture2D g_BilateralUpscaleOutput : register(u0); - -Texture2DArray g_BilateralUpscaleInput : register(t0); - -Texture2D g_BilateralUpscaleDepth : register(t1); -Texture2DArray g_BilateralUpscaleDownscaledDepth : register(t3); - +// ============================================================================= +// Bilateral Upscale -uint DoublePackFloat16(float v) +uint FFX_CACAO_DoublePackFloat16(float v) { uint2 p = f32tof16(float2(v, v)); return p.x | (p.y << 16); } -#define BILATERAL_UPSCALE_BUFFER_WIDTH (BILATERAL_UPSCALE_WIDTH + 4) -#define BILATERAL_UPSCALE_BUFFER_HEIGHT (BILATERAL_UPSCALE_HEIGHT + 4 + 4) +#define FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH (FFX_CACAO_BILATERAL_UPSCALE_WIDTH + 4) +#define FFX_CACAO_BILATERAL_UPSCALE_BUFFER_HEIGHT (FFX_CACAO_BILATERAL_UPSCALE_HEIGHT + 4 + 4) -struct BilateralBufferVal +struct FFX_CACAO_BilateralBufferVal { - // float depth; - // float ssaoVal; uint packedDepths; uint packedSsaoVals; }; -groupshared BilateralBufferVal s_BilateralUpscaleBuffer[BILATERAL_UPSCALE_BUFFER_WIDTH][BILATERAL_UPSCALE_BUFFER_HEIGHT]; +groupshared FFX_CACAO_BilateralBufferVal s_FFX_CACAO_BilateralUpscaleBuffer[FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH][FFX_CACAO_BILATERAL_UPSCALE_BUFFER_HEIGHT]; -void BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const int height) +void FFX_CACAO_BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const int height, const bool useEdges) { // fill in group shared buffer { - uint threadNum = (gtid.y * BILATERAL_UPSCALE_WIDTH + gtid.x) * 3; - uint2 bufferCoord = uint2(threadNum % BILATERAL_UPSCALE_BUFFER_WIDTH, threadNum / BILATERAL_UPSCALE_BUFFER_WIDTH); - uint2 imageCoord = (gid * uint2(BILATERAL_UPSCALE_WIDTH, BILATERAL_UPSCALE_HEIGHT)) + bufferCoord - 2; + uint threadNum = (gtid.y * FFX_CACAO_BILATERAL_UPSCALE_WIDTH + gtid.x) * 3; + uint2 bufferCoord = uint2(threadNum % FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH, threadNum / FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH); + uint2 imageCoord = (gid * uint2(FFX_CACAO_BILATERAL_UPSCALE_WIDTH, FFX_CACAO_BILATERAL_UPSCALE_HEIGHT)) + bufferCoord - 2; - for (int i = 0; i < 3; ++i) + if (useEdges) { - // uint2 depthBufferCoord = imageCoord + 2 * g_CACAOConsts.DeinterleavedDepthBufferOffset; - // uint3 depthArrayBufferCoord = uint3(depthBufferCoord / 2, 2 * (depthBufferCoord.y % 2) + depthBufferCoord.x % 2); - uint3 ssaoArrayBufferCoord = uint3(imageCoord / 2, 2 * (imageCoord.y % 2) + imageCoord.x % 2); - uint3 depthArrayBufferCoord = ssaoArrayBufferCoord + uint3(g_CACAOConsts.DeinterleavedDepthBufferOffset, 0); - ++imageCoord.x; + float2 inputs[3]; + for (int j = 0; j < 3; ++j) + { + int2 p = int2(imageCoord.x + j, imageCoord.y); + int2 pos = p / 2; + int index = (p.x % 2) + 2 * (p.y % 2); + inputs[j] = FFX_CACAO_BilateralUpscale_LoadSSAO(pos, index); + } + + for (int i = 0; i < 3; ++i) + { + int mx = (imageCoord.x % 2); + int my = (imageCoord.y % 2); + + int ic = mx + my * 2; // center index + int ih = (1 - mx) + my * 2; // neighbouring, horizontal + int iv = mx + (1 - my) * 2; // neighbouring, vertical + int id = (1 - mx) + (1 - my) * 2; // diagonal + + float2 centerVal = inputs[i]; + + float ao = centerVal.x; + + float4 edgesLRTB = FFX_CACAO_UnpackEdges(centerVal.y); + + // convert index shifts to sampling offsets + float fmx = (float)mx; + float fmy = (float)my; + + // in case of an edge, push sampling offsets away from the edge (towards pixel center) + float fmxe = (edgesLRTB.y - edgesLRTB.x); + float fmye = (edgesLRTB.w - edgesLRTB.z); + + // calculate final sampling offsets and sample using bilinear filter + float2 p = imageCoord; + float2 uvH = (p + float2(fmx + fmxe - 0.5, 0.5 - fmy)) * 0.5 * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float aoH = FFX_CACAO_BilateralUpscale_SampleSSAOLinear(uvH, ih); + float2 uvV = (p + float2(0.5 - fmx, fmy - 0.5 + fmye)) * 0.5 * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float aoV = FFX_CACAO_BilateralUpscale_SampleSSAOLinear(uvV, iv); + float2 uvD = (p + float2(fmx - 0.5 + fmxe, fmy - 0.5 + fmye)) * 0.5 * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float aoD = FFX_CACAO_BilateralUpscale_SampleSSAOLinear(uvD, id); + + // reduce weight for samples near edge - if the edge is on both sides, weight goes to 0 + float4 blendWeights; + blendWeights.x = 1.0; + blendWeights.y = (edgesLRTB.x + edgesLRTB.y) * 0.5; + blendWeights.z = (edgesLRTB.z + edgesLRTB.w) * 0.5; + blendWeights.w = (blendWeights.y + blendWeights.z) * 0.5; + + // calculate weighted average + float blendWeightsSum = dot(blendWeights, float4(1.0, 1.0, 1.0, 1.0)); + ao = dot(float4(ao, aoH, aoV, aoD), blendWeights) / blendWeightsSum; + + ++imageCoord.x; - BilateralBufferVal bufferVal; + FFX_CACAO_BilateralBufferVal bufferVal; - float depth = g_BilateralUpscaleDownscaledDepth[depthArrayBufferCoord]; - float ssaoVal = g_BilateralUpscaleInput.SampleLevel(g_PointClampSampler, float3((float2(ssaoArrayBufferCoord.xy) + 0.5f) * g_CACAOConsts.SSAOBufferInverseDimensions, ssaoArrayBufferCoord.z), 0).x; + uint2 depthArrayBufferCoord = (imageCoord / 2) + g_FFX_CACAO_Consts.DeinterleavedDepthBufferOffset; + uint depthArrayBufferIndex = ic; + float depth = FFX_CACAO_BilateralUpscale_LoadDownscaledDepth(depthArrayBufferCoord, depthArrayBufferIndex); - bufferVal.packedDepths = DoublePackFloat16(depth); - bufferVal.packedSsaoVals = DoublePackFloat16(ssaoVal); + bufferVal.packedDepths = FFX_CACAO_DoublePackFloat16(depth); + bufferVal.packedSsaoVals = FFX_CACAO_DoublePackFloat16(ao); - s_BilateralUpscaleBuffer[bufferCoord.x + i][bufferCoord.y] = bufferVal; + s_FFX_CACAO_BilateralUpscaleBuffer[bufferCoord.x + i][bufferCoord.y] = bufferVal; + } + } + else + { + for (int i = 0; i < 3; ++i) + { + float2 sampleLoc0 = (float2(imageCoord / 2) + 0.5f) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float2 sampleLoc1 = sampleLoc0; + float2 sampleLoc2 = sampleLoc0; + float2 sampleLoc3 = sampleLoc0; + switch ((imageCoord.y % 2) * 2 + (imageCoord.x % 2)) { + case 0: + sampleLoc1.x -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.x; + sampleLoc2.y -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.y; + sampleLoc3 -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + break; + case 1: + sampleLoc0.x += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.x; + sampleLoc2 += float2(0.5f, -0.5f) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + sampleLoc3.y -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.y; + break; + case 2: + sampleLoc0.y += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.y; + sampleLoc1 += float2(-0.5f, 0.5f) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + sampleLoc3.x -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.x; + break; + case 3: + sampleLoc0 += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + sampleLoc1.y += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.y; + sampleLoc2.x += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.x; + break; + } + + float ssaoVal0 = FFX_CACAO_BilateralUpscale_SampleSSAOPoint(sampleLoc0, 0); + float ssaoVal1 = FFX_CACAO_BilateralUpscale_SampleSSAOPoint(sampleLoc1, 1); + float ssaoVal2 = FFX_CACAO_BilateralUpscale_SampleSSAOPoint(sampleLoc2, 2); + float ssaoVal3 = FFX_CACAO_BilateralUpscale_SampleSSAOPoint(sampleLoc3, 3); + + uint3 ssaoArrayBufferCoord = uint3(imageCoord / 2, 2 * (imageCoord.y % 2) + imageCoord.x % 2); + uint2 depthArrayBufferCoord = ssaoArrayBufferCoord.xy + g_FFX_CACAO_Consts.DeinterleavedDepthBufferOffset; + uint depthArrayBufferIndex = ssaoArrayBufferCoord.z; + ++imageCoord.x; + + FFX_CACAO_BilateralBufferVal bufferVal; + + float depth = FFX_CACAO_BilateralUpscale_LoadDownscaledDepth(depthArrayBufferCoord, depthArrayBufferIndex); + float ssaoVal = (ssaoVal0 + ssaoVal1 + ssaoVal2 + ssaoVal3) * 0.25f; + + bufferVal.packedDepths = FFX_CACAO_DoublePackFloat16(depth); + bufferVal.packedSsaoVals = FFX_CACAO_DoublePackFloat16(ssaoVal); + + s_FFX_CACAO_BilateralUpscaleBuffer[bufferCoord.x + i][bufferCoord.y] = bufferVal; + } } } @@ -1810,27 +1570,27 @@ void BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const // load depths { int2 fullBufferCoord = 2 * tid; - int2 fullDepthBufferCoord = fullBufferCoord + g_CACAOConsts.DepthBufferOffset; + int2 fullDepthBufferCoord = fullBufferCoord + g_FFX_CACAO_Consts.DepthBufferOffset; - depths[0] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(0, 0)]); - depths[1] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(1, 0)]); - depths[2] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(0, 1)]); - depths[3] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(1, 1)]); + depths[0] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(0, 0))); + depths[1] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(1, 0))); + depths[2] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(0, 1))); + depths[3] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(1, 1))); } min16float4 packedDepths = min16float4(depths[0], depths[1], depths[2], depths[3]); - float totals[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; - float totalWeights[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; - float2 pps[] = { float2(0.0f, 0.0f), float2(0.5f, 0.0f), float2(0.0f, 0.5f), float2(0.5f, 0.5f) }; - - min16float4 packedTotals = min16float4(0.0f, 0.0f, 0.0f, 0.0f); - min16float4 packedTotalWeights = min16float4(0.0f, 0.0f, 0.0f, 0.0f); - int2 baseBufferCoord = gtid + int2(width, height); - float distanceSigma = g_CACAOConsts.BilateralSimilarityDistanceSigma; + min16float epsilonWeight = 1e-3f; + min16float2 nearestSsaoVals = FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BilateralUpscaleBuffer[baseBufferCoord.x][baseBufferCoord.y].packedSsaoVals); + min16float4 packedTotals = epsilonWeight * min16float4(1.0f, 1.0f, 1.0f, 1.0f); + packedTotals.xy *= nearestSsaoVals; + packedTotals.zw *= nearestSsaoVals; + min16float4 packedTotalWeights = epsilonWeight * min16float4(1.0f, 1.0f, 1.0f, 1.0f); + + float distanceSigma = g_FFX_CACAO_Consts.BilateralSimilarityDistanceSigma; min16float2 packedDistSigma = min16float2(1.0f / distanceSigma, 1.0f / distanceSigma); - float sigma = g_CACAOConsts.BilateralSigmaSquared; + float sigma = g_FFX_CACAO_Consts.BilateralSigmaSquared; min16float2 packedSigma = min16float2(1.0f / sigma, 1.0f / sigma); for (int x = -width; x <= width; ++x) @@ -1839,11 +1599,11 @@ void BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const { int2 bufferCoord = baseBufferCoord + int2(x, y); - BilateralBufferVal bufferVal = s_BilateralUpscaleBuffer[bufferCoord.x][bufferCoord.y]; + FFX_CACAO_BilateralBufferVal bufferVal = s_FFX_CACAO_BilateralUpscaleBuffer[bufferCoord.x][bufferCoord.y]; - min16float2 u = min16float2(x, x) + min16float2(0.0f, 0.5f); - min16float2 v1 = min16float2(y, y) + min16float2(0.0f, 0.0f); - min16float2 v2 = min16float2(y, y) + min16float2(0.5f, 0.5f); + min16float2 u = min16float2(x, x) - min16float2(0.0f, 0.5f); + min16float2 v1 = min16float2(y, y) - min16float2(0.0f, 0.0f); + min16float2 v2 = min16float2(y, y) - min16float2(0.5f, 0.5f); u = u * u; v1 = v1 * v1; v2 = v2 * v2; @@ -1854,7 +1614,7 @@ void BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const min16float2 wx1 = exp(-dist1 * packedSigma); min16float2 wx2 = exp(-dist2 * packedSigma); - min16float2 bufferPackedDepths = UnpackFloat16(bufferVal.packedDepths); + min16float2 bufferPackedDepths = FFX_CACAO_UnpackFloat16(bufferVal.packedDepths); #if 0 min16float2 diff1 = abs(packedDepths.xy - bufferPackedDepths); @@ -1872,7 +1632,7 @@ void BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const min16float2 weight1 = wx1 * wy1; min16float2 weight2 = wx2 * wy2; - min16float2 packedSsaoVals = UnpackFloat16(bufferVal.packedSsaoVals); + min16float2 packedSsaoVals = FFX_CACAO_UnpackFloat16(bufferVal.packedSsaoVals); packedTotals.xy += packedSsaoVals * weight1; packedTotals.zw += packedSsaoVals * weight2; packedTotalWeights.xy += weight1; @@ -1882,54 +1642,78 @@ void BilateralUpscaleNxN(int2 tid, uint2 gtid, uint2 gid, const int width, const uint2 outputCoord = 2 * tid; min16float4 outputValues = packedTotals / packedTotalWeights; - g_BilateralUpscaleOutput[outputCoord + int2(0, 0)] = outputValues.x; // totals[0] / totalWeights[0]; - g_BilateralUpscaleOutput[outputCoord + int2(1, 0)] = outputValues.y; // totals[1] / totalWeights[1]; - g_BilateralUpscaleOutput[outputCoord + int2(0, 1)] = outputValues.z; // totals[2] / totalWeights[2]; - g_BilateralUpscaleOutput[outputCoord + int2(1, 1)] = outputValues.w; // totals[3] / totalWeights[3]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(0, 0), outputValues.x); // totals[0] / totalWeights[0]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(1, 0), outputValues.y); // totals[1] / totalWeights[1]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(0, 1), outputValues.z); // totals[2] / totalWeights[2]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(1, 1), outputValues.w); // totals[3] / totalWeights[3]; +} + +[numthreads(FFX_CACAO_BILATERAL_UPSCALE_WIDTH, FFX_CACAO_BILATERAL_UPSCALE_HEIGHT, 1)] +void FFX_CACAO_UpscaleBilateral5x5Smart(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) +{ + FFX_CACAO_BilateralUpscaleNxN(tid, gtid, gid, 2, 2, true); } -[numthreads(BILATERAL_UPSCALE_WIDTH, BILATERAL_UPSCALE_HEIGHT, 1)] -void CSUpscaleBilateral5x5(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BILATERAL_UPSCALE_WIDTH, FFX_CACAO_BILATERAL_UPSCALE_HEIGHT, 1)] +void FFX_CACAO_UpscaleBilateral5x5NonSmart(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - BilateralUpscaleNxN(tid, gtid, gid, 2, 2); + FFX_CACAO_BilateralUpscaleNxN(tid, gtid, gid, 2, 2, false); } -[numthreads(BILATERAL_UPSCALE_WIDTH, BILATERAL_UPSCALE_HEIGHT, 1)] -void CSUpscaleBilateral7x7(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BILATERAL_UPSCALE_WIDTH, FFX_CACAO_BILATERAL_UPSCALE_HEIGHT, 1)] +void FFX_CACAO_UpscaleBilateral7x7(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) { - BilateralUpscaleNxN(tid, gtid, gid, 3, 3); + FFX_CACAO_BilateralUpscaleNxN(tid, gtid, gid, 3, 3, true); } -[numthreads(BILATERAL_UPSCALE_WIDTH, BILATERAL_UPSCALE_HEIGHT, 1)] -void CSUpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) +[numthreads(FFX_CACAO_BILATERAL_UPSCALE_WIDTH, FFX_CACAO_BILATERAL_UPSCALE_HEIGHT, 1)] +void FFX_CACAO_UpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_GroupThreadID, uint2 gid : SV_GroupID) { const int width = 2, height = 2; // fill in group shared buffer { - uint threadNum = (gtid.y * BILATERAL_UPSCALE_WIDTH + gtid.x) * 3; - uint2 bufferCoord = uint2(threadNum % BILATERAL_UPSCALE_BUFFER_WIDTH, threadNum / BILATERAL_UPSCALE_BUFFER_WIDTH); - uint2 imageCoord = (gid * uint2(BILATERAL_UPSCALE_WIDTH, BILATERAL_UPSCALE_HEIGHT)) + bufferCoord - 2; + uint threadNum = (gtid.y * FFX_CACAO_BILATERAL_UPSCALE_WIDTH + gtid.x) * 3; + uint2 bufferCoord = uint2(threadNum % FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH, threadNum / FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH); + uint2 imageCoord = (gid * uint2(FFX_CACAO_BILATERAL_UPSCALE_WIDTH, FFX_CACAO_BILATERAL_UPSCALE_HEIGHT)) + bufferCoord - 2; for (int i = 0; i < 3; ++i) { - // uint2 depthBufferCoord = imageCoord + g_CACAOConsts.DeinterleavedDepthBufferOffset; - // uint3 depthArrayBufferCoord = uint3(depthBufferCoord / 2, 2 * (depthBufferCoord.y % 2) + depthBufferCoord.x % 2); - uint idx = (imageCoord.y % 2) * 3; - uint3 ssaoArrayBufferCoord = uint3(imageCoord / 2, idx); - uint3 depthArrayBufferCoord = ssaoArrayBufferCoord + uint3(g_CACAOConsts.DeinterleavedDepthBufferOffset, 0); + float2 sampleLoc0 = (float2(imageCoord / 2) + 0.5f) * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + float2 sampleLoc1 = sampleLoc0; + switch ((imageCoord.y % 2) * 2 + (imageCoord.x % 2)) { + case 0: + sampleLoc1 -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + break; + case 1: + sampleLoc0.x += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.x; + sampleLoc1.y -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.y; + break; + case 2: + sampleLoc0.y += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.y; + sampleLoc1.x -= 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions.x; + break; + case 3: + sampleLoc0 += 0.5f * g_FFX_CACAO_Consts.SSAOBufferInverseDimensions; + break; + } + + float ssaoVal0 = FFX_CACAO_BilateralUpscale_SampleSSAOPoint(sampleLoc0, 0); + float ssaoVal1 = FFX_CACAO_BilateralUpscale_SampleSSAOPoint(sampleLoc1, 3); + + uint2 depthArrayBufferCoord = (imageCoord / 2) + g_FFX_CACAO_Consts.DeinterleavedDepthBufferOffset; + uint depthArrayBufferIndex = (imageCoord.y % 2) * 3; ++imageCoord.x; - BilateralBufferVal bufferVal; + FFX_CACAO_BilateralBufferVal bufferVal; - float depth = g_BilateralUpscaleDownscaledDepth[depthArrayBufferCoord]; - // float ssaoVal = g_BilateralUpscaleInput.SampleLevel(g_PointClampSampler, float3((float2(ssaoArrayBufferCoord.xy) + 0.5f) * g_CACAOConsts.HalfViewportPixelSize, ssaoArrayBufferCoord.z), 0); - float ssaoVal = g_BilateralUpscaleInput.SampleLevel(g_PointClampSampler, float3((float2(ssaoArrayBufferCoord.xy) + 0.5f) * g_CACAOConsts.SSAOBufferInverseDimensions, ssaoArrayBufferCoord.z), 0).x; + float depth = FFX_CACAO_BilateralUpscale_LoadDownscaledDepth(depthArrayBufferCoord, depthArrayBufferIndex); + float ssaoVal = (ssaoVal0 + ssaoVal1) * 0.5f; - bufferVal.packedDepths = DoublePackFloat16(depth); - bufferVal.packedSsaoVals = DoublePackFloat16(ssaoVal); + bufferVal.packedDepths = FFX_CACAO_DoublePackFloat16(depth); + bufferVal.packedSsaoVals = FFX_CACAO_DoublePackFloat16(ssaoVal); - s_BilateralUpscaleBuffer[bufferCoord.x + i][bufferCoord.y] = bufferVal; + s_FFX_CACAO_BilateralUpscaleBuffer[bufferCoord.x + i][bufferCoord.y] = bufferVal; } } @@ -1939,27 +1723,27 @@ void CSUpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_G // load depths { int2 fullBufferCoord = 2 * tid; - int2 fullDepthBufferCoord = fullBufferCoord + g_CACAOConsts.DepthBufferOffset; + int2 fullDepthBufferCoord = fullBufferCoord + g_FFX_CACAO_Consts.DepthBufferOffset; - depths[0] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(0, 0)]); - depths[1] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(1, 0)]); - depths[2] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(0, 1)]); - depths[3] = ScreenSpaceToViewSpaceDepth(g_BilateralUpscaleDepth[fullDepthBufferCoord + int2(1, 1)]); + depths[0] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(0, 0))); + depths[1] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(1, 0))); + depths[2] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(0, 1))); + depths[3] = FFX_CACAO_ScreenSpaceToViewSpaceDepth(FFX_CACAO_BilateralUpscale_LoadDepth(fullDepthBufferCoord, int2(1, 1))); } min16float4 packedDepths = min16float4(depths[0], depths[1], depths[2], depths[3]); - float totals[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; - float totalWeights[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; - float2 pps[] = { float2(0.0f, 0.0f), float2(0.5f, 0.0f), float2(0.0f, 0.5f), float2(0.5f, 0.5f) }; - - min16float4 packedTotals = min16float4(0.0f, 0.0f, 0.0f, 0.0f); - min16float4 packedTotalWeights = min16float4(0.0f, 0.0f, 0.0f, 0.0f); - int2 baseBufferCoord = gtid + int2(width, height); - float distanceSigma = g_CACAOConsts.BilateralSimilarityDistanceSigma; + min16float epsilonWeight = 1e-3f; + min16float2 nearestSsaoVals = FFX_CACAO_UnpackFloat16(s_FFX_CACAO_BilateralUpscaleBuffer[baseBufferCoord.x][baseBufferCoord.y].packedSsaoVals); + min16float4 packedTotals = epsilonWeight * min16float4(1.0f, 1.0f, 1.0f, 1.0f); + packedTotals.xy *= nearestSsaoVals; + packedTotals.zw *= nearestSsaoVals; + min16float4 packedTotalWeights = epsilonWeight * min16float4(1.0f, 1.0f, 1.0f, 1.0f); + + float distanceSigma = g_FFX_CACAO_Consts.BilateralSimilarityDistanceSigma; min16float2 packedDistSigma = min16float2(1.0f / distanceSigma, 1.0f / distanceSigma); - float sigma = g_CACAOConsts.BilateralSigmaSquared; + float sigma = g_FFX_CACAO_Consts.BilateralSigmaSquared; min16float2 packedSigma = min16float2(1.0f / sigma, 1.0f / sigma); for (int x = -width; x <= width; ++x) @@ -1968,11 +1752,11 @@ void CSUpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_G { int2 bufferCoord = baseBufferCoord + int2(x, y); - BilateralBufferVal bufferVal = s_BilateralUpscaleBuffer[bufferCoord.x][bufferCoord.y]; + FFX_CACAO_BilateralBufferVal bufferVal = s_FFX_CACAO_BilateralUpscaleBuffer[bufferCoord.x][bufferCoord.y]; - min16float2 u = min16float2(x, x) + min16float2(0.0f, 0.5f); - min16float2 v1 = min16float2(y, y) + min16float2(0.0f, 0.0f); - min16float2 v2 = min16float2(y, y) + min16float2(0.5f, 0.5f); + min16float2 u = min16float2(x, x) - min16float2(0.0f, 0.5f); + min16float2 v1 = min16float2(y, y) - min16float2(0.0f, 0.0f); + min16float2 v2 = min16float2(y, y) - min16float2(0.5f, 0.5f); u = u * u; v1 = v1 * v1; v2 = v2 * v2; @@ -1983,7 +1767,7 @@ void CSUpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_G min16float2 wx1 = exp(-dist1 * packedSigma); min16float2 wx2 = exp(-dist2 * packedSigma); - min16float2 bufferPackedDepths = UnpackFloat16(bufferVal.packedDepths); + min16float2 bufferPackedDepths = FFX_CACAO_UnpackFloat16(bufferVal.packedDepths); #if 0 min16float2 diff1 = abs(packedDepths.xy - bufferPackedDepths); @@ -2001,7 +1785,7 @@ void CSUpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_G min16float2 weight1 = wx1 * wy1; min16float2 weight2 = wx2 * wy2; - min16float2 packedSsaoVals = UnpackFloat16(bufferVal.packedSsaoVals); + min16float2 packedSsaoVals = FFX_CACAO_UnpackFloat16(bufferVal.packedSsaoVals); packedTotals.xy += packedSsaoVals * weight1; packedTotals.zw += packedSsaoVals * weight2; packedTotalWeights.xy += weight1; @@ -2011,12 +1795,12 @@ void CSUpscaleBilateral5x5Half(int2 tid : SV_DispatchThreadID, uint2 gtid : SV_G uint2 outputCoord = 2 * tid; min16float4 outputValues = packedTotals / packedTotalWeights; - g_BilateralUpscaleOutput[outputCoord + int2(0, 0)] = outputValues.x; // totals[0] / totalWeights[0]; - g_BilateralUpscaleOutput[outputCoord + int2(1, 0)] = outputValues.y; // totals[1] / totalWeights[1]; - g_BilateralUpscaleOutput[outputCoord + int2(0, 1)] = outputValues.z; // totals[2] / totalWeights[2]; - g_BilateralUpscaleOutput[outputCoord + int2(1, 1)] = outputValues.w; // totals[3] / totalWeights[3]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(0, 0), outputValues.x); // totals[0] / totalWeights[0]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(1, 0), outputValues.y); // totals[1] / totalWeights[1]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(0, 1), outputValues.z); // totals[2] / totalWeights[2]; + FFX_CACAO_BilateralUpscale_StoreOutput(outputCoord, int2(1, 1), outputValues.w); // totals[3] / totalWeights[3]; } -#undef BILATERAL_UPSCALE_BUFFER_WIDTH -#undef BILATERAL_UPSCALE_BUFFER_HEIGHT +#undef FFX_CACAO_BILATERAL_UPSCALE_BUFFER_WIDTH +#undef FFX_CACAO_BILATERAL_UPSCALE_BUFFER_HEIGHT diff --git a/ffx-cacao/src/ffx_cacao_bindings.hlsl b/ffx-cacao/src/ffx_cacao_bindings.hlsl new file mode 100644 index 0000000..2af3b4c --- /dev/null +++ b/ffx-cacao/src/ffx_cacao_bindings.hlsl @@ -0,0 +1,367 @@ +// Modifications Copyright © 2021. Advanced Micro Devices, Inc. All Rights Reserved. + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#ifndef FFX_CACAO_BINDINGS_HLSL +#define FFX_CACAO_BINDINGS_HLSL + +// ============================================================================= +// Constants + +struct FFX_CACAO_Constants +{ + float2 DepthUnpackConsts; + float2 CameraTanHalfFOV; + + float2 NDCToViewMul; + float2 NDCToViewAdd; + + float2 DepthBufferUVToViewMul; + float2 DepthBufferUVToViewAdd; + + float EffectRadius; // world (viewspace) maximum size of the shadow + float EffectShadowStrength; // global strength of the effect (0 - 5) + float EffectShadowPow; + float EffectShadowClamp; + + float EffectFadeOutMul; // effect fade out from distance (ex. 25) + float EffectFadeOutAdd; // effect fade out to distance (ex. 100) + float EffectHorizonAngleThreshold; // limit errors on slopes and caused by insufficient geometry tessellation (0.05 to 0.5) + float EffectSamplingRadiusNearLimitRec; // if viewspace pixel closer than this, don't enlarge shadow sampling radius anymore (makes no sense to grow beyond some distance, not enough samples to cover everything, so just limit the shadow growth; could be SSAOSettingsFadeOutFrom * 0.1 or less) + + float DepthPrecisionOffsetMod; + float NegRecEffectRadius; // -1.0 / EffectRadius + float LoadCounterAvgDiv; // 1.0 / ( halfDepthMip[SSAO_DEPTH_MIP_LEVELS-1].sizeX * halfDepthMip[SSAO_DEPTH_MIP_LEVELS-1].sizeY ) + float AdaptiveSampleCountLimit; + + float InvSharpness; + int PassIndex; + float BilateralSigmaSquared; + float BilateralSimilarityDistanceSigma; + + float4 PatternRotScaleMatrices[5]; + + float NormalsUnpackMul; + float NormalsUnpackAdd; + float DetailAOStrength; + float Dummy0; + + float2 SSAOBufferDimensions; + float2 SSAOBufferInverseDimensions; + + float2 DepthBufferDimensions; + float2 DepthBufferInverseDimensions; + + int2 DepthBufferOffset; + float2 PerPassFullResUVOffset; + + float2 OutputBufferDimensions; + float2 OutputBufferInverseDimensions; + + float2 ImportanceMapDimensions; + float2 ImportanceMapInverseDimensions; + + float2 DeinterleavedDepthBufferDimensions; + float2 DeinterleavedDepthBufferInverseDimensions; + + float2 DeinterleavedDepthBufferOffset; + float2 DeinterleavedDepthBufferNormalisedOffset; + + float4x4 NormalsWorldToViewspaceMatrix; +}; + +cbuffer SSAOConstantsBuffer : register(b0) +{ + FFX_CACAO_Constants g_FFX_CACAO_Consts; +} + +// ============================================================================= +// Samplers + +SamplerState g_PointClampSampler : register(s0); +SamplerState g_PointMirrorSampler : register(s1); +SamplerState g_LinearClampSampler : register(s2); +SamplerState g_ViewspaceDepthTapSampler : register(s3); +SamplerState g_RealPointClampSampler : register(s4); + +// ============================================================================= +// Clear Load Counter + +RWTexture1D g_ClearLoadCounter_LoadCounter : register(u0); + +void FFX_CACAO_ClearLoadCounter_SetLoadCounter(uint val) +{ + g_ClearLoadCounter_LoadCounter[0] = val; +} + +// ============================================================================= +// Edge Sensitive Blur + +Texture2DArray g_EdgeSensitiveBlur_Input : register(t0); +RWTexture2DArray g_EdgeSensitiveBlur_Output : register(u0); + +float2 FFX_CACAO_EdgeSensitiveBlur_SampleInputOffset(float2 uv, int2 offset) +{ + return g_EdgeSensitiveBlur_Input.SampleLevel(g_PointMirrorSampler, float3(uv, 0.0f), 0.0f, offset); +} + +float2 FFX_CACAO_EdgeSensitiveBlur_SampleInput(float2 uv) +{ + return g_EdgeSensitiveBlur_Input.SampleLevel(g_PointMirrorSampler, float3(uv, 0.0f), 0.0f); +} + +void FFX_CACAO_EdgeSensitiveBlur_StoreOutput(int2 coord, float2 value) +{ + g_EdgeSensitiveBlur_Output[int3(coord, 0)] = value; +} + +// ============================================================================= +// SSAO Generation + +Texture2DArray g_ViewspaceDepthSource : register(t0); +Texture2DArray g_DeinterleavedNormals : register(t1); +Texture1D g_LoadCounter : register(t2); +Texture2D g_ImportanceMap : register(t3); +Texture2DArray g_FinalSSAO : register(t4); + +RWTexture2DArray g_SSAOOutput : register(u0); + +float FFX_CACAO_SSAOGeneration_SampleViewspaceDepthMip(float2 uv, float mip) +{ + return g_ViewspaceDepthSource.SampleLevel(g_ViewspaceDepthTapSampler, float3(uv, 0.0f), mip); +} + +float4 FFX_CACAO_SSAOGeneration_GatherViewspaceDepthOffset(float2 uv, int2 offset) +{ + return g_ViewspaceDepthSource.GatherRed(g_PointMirrorSampler, float3(uv, 0.0f), offset); +} + +uint FFX_CACAO_SSAOGeneration_GetLoadCounter() +{ + return g_LoadCounter[0]; +} + +float FFX_CACAO_SSAOGeneration_SampleImportance(float2 uv) +{ + return g_ImportanceMap.SampleLevel(g_LinearClampSampler, uv, 0.0f); +} + +float2 FFX_CACAO_SSAOGeneration_LoadBasePassSSAOPass(int2 coord, int pass) +{ + return g_FinalSSAO.Load(int4(coord, pass, 0)).xy; +} + +float3 FFX_CACAO_SSAOGeneration_GetNormalPass(int2 coord, int pass) +{ + return g_DeinterleavedNormals[int3(coord, pass)].xyz; +} + +void FFX_CACAO_SSAOGeneration_StoreOutput(int2 coord, float2 val) +{ + g_SSAOOutput[int3(coord, 0)] = val; +} + +// ============================================================================ +// Apply + +Texture2DArray g_ApplyFinalSSAO : register(t0); +RWTexture2D g_ApplyOutput : register(u0); + +float FFX_CACAO_Apply_SampleSSAOUVPass(float2 uv, int pass) +{ + return g_ApplyFinalSSAO.SampleLevel(g_LinearClampSampler, float3(uv, pass), 0.0f).x; +} + +float2 FFX_CACAO_Apply_LoadSSAOPass(int2 coord, int pass) +{ + return g_ApplyFinalSSAO.Load(int4(coord, pass, 0)); +} + +void FFX_CACAO_Apply_StoreOutput(int2 coord, float val) +{ + g_ApplyOutput[coord] = val; +} + +// ============================================================================= +// Prepare + +Texture2D g_DepthIn : register(t0); +Texture2D g_PrepareNormalsFromNormalsInput : register(t0); + +RWTexture2DArray g_PrepareDepthsAndMips_OutMip0 : register(u0); +RWTexture2DArray g_PrepareDepthsAndMips_OutMip1 : register(u1); +RWTexture2DArray g_PrepareDepthsAndMips_OutMip2 : register(u2); +RWTexture2DArray g_PrepareDepthsAndMips_OutMip3 : register(u3); + +RWTexture2DArray g_PrepareDepthsOut : register(u0); + +RWTexture2DArray g_PrepareNormals_NormalOut : register(u0); + +float FFX_CACAO_Prepare_SampleDepthOffset(float2 uv, int2 offset) +{ + return g_DepthIn.SampleLevel(g_PointClampSampler, uv, 0.0f, offset); +} + +float4 FFX_CACAO_Prepare_GatherDepth(float2 uv) +{ + return g_DepthIn.GatherRed(g_PointClampSampler, uv); +} + +float FFX_CACAO_Prepare_LoadDepth(int2 coord) +{ + return g_DepthIn.Load(int3(coord, 0)); +} + +float FFX_CACAO_Prepare_LoadDepthOffset(int2 coord, int2 offset) +{ + return g_DepthIn.Load(int3(coord, 0), offset); +} + +float4 FFX_CACAO_Prepare_GatherDepthOffset(float2 uv, int2 offset) +{ + return g_DepthIn.GatherRed(g_PointClampSampler, uv, offset); +} + +float3 FFX_CACAO_Prepare_LoadNormal(int2 coord) +{ + float3 normal = g_PrepareNormalsFromNormalsInput.Load(int3(coord, 0)).xyz; + normal = normal * g_FFX_CACAO_Consts.NormalsUnpackMul.xxx + g_FFX_CACAO_Consts.NormalsUnpackAdd.xxx; + normal = mul(normal, (float3x3)g_FFX_CACAO_Consts.NormalsWorldToViewspaceMatrix).xyz; + // normal = normalize(normal); + return normal; +} + +void FFX_CACAO_Prepare_StoreDepthMip0(int2 coord, int index, float val) +{ + g_PrepareDepthsAndMips_OutMip0[int3(coord, index)] = val; +} + +void FFX_CACAO_Prepare_StoreDepthMip1(int2 coord, int index, float val) +{ + g_PrepareDepthsAndMips_OutMip1[int3(coord, index)] = val; +} + +void FFX_CACAO_Prepare_StoreDepthMip2(int2 coord, int index, float val) +{ + g_PrepareDepthsAndMips_OutMip2[int3(coord, index)] = val; +} + +void FFX_CACAO_Prepare_StoreDepthMip3(int2 coord, int index, float val) +{ + g_PrepareDepthsAndMips_OutMip3[int3(coord, index)] = val; +} + +void FFX_CACAO_Prepare_StoreDepth(int2 coord, int index, float val) +{ + g_PrepareDepthsOut[int3(coord, index)] = val; +} + +void FFX_CACAO_Prepare_StoreNormal(int2 coord, int index, float3 normal) +{ + g_PrepareNormals_NormalOut[int3(coord, index)] = float4(normal, 1.0f); +} + +// ============================================================================= +// Importance Map + +Texture2DArray g_ImportanceFinalSSAO : register(t0); +RWTexture2D g_ImportanceOut : register(u0); + +Texture2D g_ImportanceAIn : register(t0); +RWTexture2D g_ImportanceAOut : register(u0); + +Texture2D g_ImportanceBIn : register(t0); +RWTexture2D g_ImportanceBOut : register(u0); +RWTexture1D g_ImportanceBLoadCounter : register(u1); + +float4 FFX_CACAO_Importance_GatherSSAO(float2 uv, int index) +{ + return g_ImportanceFinalSSAO.GatherRed(g_PointClampSampler, float3(uv, index)); +} + +void FFX_CACAO_Importance_StoreImportance(int2 coord, float val) +{ + g_ImportanceOut[coord] = val; +} + +float FFX_CACAO_Importance_SampleImportanceA(float2 uv) +{ + return g_ImportanceAIn.SampleLevel(g_LinearClampSampler, uv, 0.0f); +} + +void FFX_CACAO_Importance_StoreImportanceA(int2 coord, float val) +{ + g_ImportanceAOut[coord] = val; +} + +float FFX_CACAO_Importance_SampleImportanceB(float2 uv) +{ + return g_ImportanceBIn.SampleLevel(g_LinearClampSampler, uv, 0.0f); +} + +void FFX_CACAO_Importance_StoreImportanceB(int2 coord, float val) +{ + g_ImportanceBOut[coord] = val; +} + +void FFX_CACAO_Importance_LoadCounterInterlockedAdd(uint val) +{ + InterlockedAdd(g_ImportanceBLoadCounter[0], val); +} + +// ============================================================================= +// Bilateral Upscale + +RWTexture2D g_BilateralUpscaleOutput : register(u0); + +Texture2DArray g_BilateralUpscaleInput : register(t0); +Texture2D g_BilateralUpscaleDepth : register(t1); +Texture2DArray g_BilateralUpscaleDownscaledDepth : register(t2); + +void FFX_CACAO_BilateralUpscale_StoreOutput(int2 coord, int2 offset, float val) +{ + g_BilateralUpscaleOutput[coord + offset] = val; +} + +float FFX_CACAO_BilateralUpscale_SampleSSAOLinear(float2 uv, int index) +{ + return g_BilateralUpscaleInput.SampleLevel(g_LinearClampSampler, float3(uv, index), 0).x; +} + +float FFX_CACAO_BilateralUpscale_SampleSSAOPoint(float2 uv, int index) +{ + return g_BilateralUpscaleInput.SampleLevel(g_PointClampSampler, float3(uv, index), 0).x; +} + +float2 FFX_CACAO_BilateralUpscale_LoadSSAO(int2 coord, int index) +{ + return g_BilateralUpscaleInput.Load(int4(coord, index, 0)); +} + +float FFX_CACAO_BilateralUpscale_LoadDepth(int2 coord, int2 offset) +{ + return g_BilateralUpscaleDepth.Load(int3(coord, 0), offset); +} + +float FFX_CACAO_BilateralUpscale_LoadDownscaledDepth(int2 coord, int index) +{ + return g_BilateralUpscaleDownscaledDepth.Load(int4(coord, index, 0)); +} + +#endif diff --git a/ffx-cacao/src/ffx_cacao_defines.h b/ffx-cacao/src/ffx_cacao_defines.h index 9b17fcf..ff825e0 100644 --- a/ffx-cacao/src/ffx_cacao_defines.h +++ b/ffx-cacao/src/ffx_cacao_defines.h @@ -1,4 +1,4 @@ -// Modifications Copyright © 2020. Advanced Micro Devices, Inc. All Rights Reserved. +// Modifications Copyright © 2021. Advanced Micro Devices, Inc. All Rights Reserved. /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Copyright (c) 2016, Intel Corporation @@ -20,41 +20,38 @@ // Defines for constants common to both CACAO.cpp and CACAO.hlsl -// ==================================================================== -// Prepare shader dimensions +#ifndef FFX_CACAO_DEFINES_H +#define FFX_CACAO_DEFINES_H -#define PREPARE_DEPTHS_AND_MIPS_WIDTH 8 -#define PREPARE_DEPTHS_AND_MIPS_HEIGHT 8 +// ============================================================================ +// Prepare -#define PREPARE_DEPTHS_WIDTH 8 -#define PREPARE_DEPTHS_HEIGHT 8 +#define FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_WIDTH 8 +#define FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_HEIGHT 8 -#define PREPARE_DEPTHS_HALF_WIDTH 8 -#define PREPARE_DEPTHS_HALF_HEIGHT 8 +#define FFX_CACAO_PREPARE_DEPTHS_WIDTH 8 +#define FFX_CACAO_PREPARE_DEPTHS_HEIGHT 8 -#define PREPARE_DEPTHS_NORMALS_AND_MIPS_WIDTH 8 -#define PREPARE_DEPTHS_NORMALS_AND_MIPS_HEIGHT 8 +#define FFX_CACAO_PREPARE_DEPTHS_HALF_WIDTH 8 +#define FFX_CACAO_PREPARE_DEPTHS_HALF_HEIGHT 8 -#define PREPARE_DEPTHS_AND_NORMALS_WIDTH 8 -#define PREPARE_DEPTHS_AND_NORMALS_HEIGHT 8 - -#define PREPARE_DEPTHS_AND_NORMALS_HALF_WIDTH 8 -#define PREPARE_DEPTHS_AND_NORMALS_HALF_HEIGHT 8 - -#define PREPARE_NORMALS_WIDTH 8 -#define PREPARE_NORMALS_HEIGHT 8 +#define FFX_CACAO_PREPARE_NORMALS_WIDTH 8 +#define FFX_CACAO_PREPARE_NORMALS_HEIGHT 8 #define PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH 8 #define PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT 8 -// ==================================================================== -// Generate SSAO shader dimensions +// ============================================================================ +// SSAO Generation + +#define FFX_CACAO_GENERATE_SPARSE_WIDTH 4 +#define FFX_CACAO_GENERATE_SPARSE_HEIGHT 16 -#define GENERATE_WIDTH 8 -#define GENERATE_HEIGHT 8 +#define FFX_CACAO_GENERATE_WIDTH 8 +#define FFX_CACAO_GENERATE_HEIGHT 8 -// ==================================================================== -// Importance map shader dimensions +// ============================================================================ +// Importance Map #define IMPORTANCE_MAP_WIDTH 8 #define IMPORTANCE_MAP_HEIGHT 8 @@ -65,29 +62,22 @@ #define IMPORTANCE_MAP_B_WIDTH 8 #define IMPORTANCE_MAP_B_HEIGHT 8 -// ==================================================================== -// Blur shader dimensions - -#define BLUR_WIDTH 16 -#define BLUR_HEIGHT 16 - -// ==================================================================== -// Apply shader dimensions +// ============================================================================ +// Edge Sensitive Blur -#define APPLY_WIDTH 8 -#define APPLY_HEIGHT 8 +#define FFX_CACAO_BLUR_WIDTH 16 +#define FFX_CACAO_BLUR_HEIGHT 16 -// ==================================================================== -// Reinterleave shader dimensions +// ============================================================================ +// Apply -#define REINTERLEAVE_WIDTH 16 -#define REINTERLEAVE_HEIGHT 8 +#define FFX_CACAO_APPLY_WIDTH 8 +#define FFX_CACAO_APPLY_HEIGHT 8 -// ==================================================================== -// Upscale +// ============================================================================ +// Bilateral Upscale -#define UPSCALE_WIDTH 8 -#define UPSCALE_HEIGHT 8 +#define FFX_CACAO_BILATERAL_UPSCALE_WIDTH 8 +#define FFX_CACAO_BILATERAL_UPSCALE_HEIGHT 8 -#define BILATERAL_UPSCALE_WIDTH 8 -#define BILATERAL_UPSCALE_HEIGHT 8 \ No newline at end of file +#endif diff --git a/ffx-cacao/src/ffx_cacao_impl.cpp b/ffx-cacao/src/ffx_cacao_impl.cpp new file mode 100644 index 0000000..18aef4f --- /dev/null +++ b/ffx-cacao/src/ffx_cacao_impl.cpp @@ -0,0 +1,3834 @@ +// Modifications Copyright © 2021. Advanced Micro Devices, Inc. All Rights Reserved. + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2016, Intel Corporation +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of +// the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO +// THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2016-09-07: filip.strugar@intel.com: first commit +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + +#include "ffx_cacao_impl.h" +#include "ffx_cacao_defines.h" + +#include +#include // cos, sin +#include // memcpy +#include // snprintf + +#ifdef FFX_CACAO_ENABLE_D3D12 +#include +#endif + +// Define symbol to enable DirectX debug markers created using Cauldron +#define FFX_CACAO_ENABLE_CAULDRON_DEBUG + +#define FFX_CACAO_ASSERT(exp) assert(exp) +#define FFX_CACAO_ARRAY_SIZE(xs) (sizeof(xs)/sizeof(xs[0])) +#define FFX_CACAO_COS(x) cosf(x) +#define FFX_CACAO_SIN(x) sinf(x) +#define FFX_CACAO_MIN(x, y) (((x) < (y)) ? (x) : (y)) +#define FFX_CACAO_MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define FFX_CACAO_CLAMP(value, lower, upper) FFX_CACAO_MIN(FFX_CACAO_MAX(value, lower), upper) +#define FFX_CACAO_OFFSET_OF(T, member) (size_t)(&(((T*)0)->member)) + +#ifdef FFX_CACAO_ENABLE_D3D12 +#include "PrecompiledShadersDXIL/CACAOClearLoadCounter.h" + +#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsHalf.h" +#include "PrecompiledShadersDXIL/CACAOPrepareNativeDepthsHalf.h" + +#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledDepthsAndMips.h" +#include "PrecompiledShadersDXIL/CACAOPrepareNativeDepthsAndMips.h" + +#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledNormals.h" +#include "PrecompiledShadersDXIL/CACAOPrepareNativeNormals.h" + +#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledNormalsFromInputNormals.h" +#include "PrecompiledShadersDXIL/CACAOPrepareNativeNormalsFromInputNormals.h" + +#include "PrecompiledShadersDXIL/CACAOPrepareDownsampledDepths.h" +#include "PrecompiledShadersDXIL/CACAOPrepareNativeDepths.h" + +#include "PrecompiledShadersDXIL/CACAOGenerateQ0.h" +#include "PrecompiledShadersDXIL/CACAOGenerateQ1.h" +#include "PrecompiledShadersDXIL/CACAOGenerateQ2.h" +#include "PrecompiledShadersDXIL/CACAOGenerateQ3.h" +#include "PrecompiledShadersDXIL/CACAOGenerateQ3Base.h" + +#include "PrecompiledShadersDXIL/CACAOGenerateImportanceMap.h" +#include "PrecompiledShadersDXIL/CACAOPostprocessImportanceMapA.h" +#include "PrecompiledShadersDXIL/CACAOPostprocessImportanceMapB.h" + +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur1.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur2.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur3.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur4.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur5.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur6.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur7.h" +#include "PrecompiledShadersDXIL/CACAOEdgeSensitiveBlur8.h" + +#include "PrecompiledShadersDXIL/CACAOApply.h" +#include "PrecompiledShadersDXIL/CACAONonSmartApply.h" +#include "PrecompiledShadersDXIL/CACAONonSmartHalfApply.h" + +#include "PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5Smart.h" +#include "PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5NonSmart.h" +#include "PrecompiledShadersDXIL/CACAOUpscaleBilateral5x5Half.h" +#endif + +#ifdef FFX_CACAO_ENABLE_VULKAN +// 16 bit versions +#include "PrecompiledShadersSPIRV/CACAOClearLoadCounter_16.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_16.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_16.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_16.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_16.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_16.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_16.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_16.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_16.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_16.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_16.h" + +#include "PrecompiledShadersSPIRV/CACAOGenerateQ0_16.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ1_16.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ2_16.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ3_16.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ3Base_16.h" + +#include "PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_16.h" +#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_16.h" +#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_16.h" + +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_16.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_16.h" + +#include "PrecompiledShadersSPIRV/CACAOApply_16.h" +#include "PrecompiledShadersSPIRV/CACAONonSmartApply_16.h" +#include "PrecompiledShadersSPIRV/CACAONonSmartHalfApply_16.h" + +#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Smart_16.h" +#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5NonSmart_16.h" +#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_16.h" + +// 32 bit versions +#include "PrecompiledShadersSPIRV/CACAOClearLoadCounter_32.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsHalf_32.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsHalf_32.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepthsAndMips_32.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepthsAndMips_32.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormals_32.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormals_32.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledNormalsFromInputNormals_32.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeNormalsFromInputNormals_32.h" + +#include "PrecompiledShadersSPIRV/CACAOPrepareDownsampledDepths_32.h" +#include "PrecompiledShadersSPIRV/CACAOPrepareNativeDepths_32.h" + +#include "PrecompiledShadersSPIRV/CACAOGenerateQ0_32.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ1_32.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ2_32.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ3_32.h" +#include "PrecompiledShadersSPIRV/CACAOGenerateQ3Base_32.h" + +#include "PrecompiledShadersSPIRV/CACAOGenerateImportanceMap_32.h" +#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapA_32.h" +#include "PrecompiledShadersSPIRV/CACAOPostprocessImportanceMapB_32.h" + +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur1_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur2_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur3_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur4_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur5_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur6_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur7_32.h" +#include "PrecompiledShadersSPIRV/CACAOEdgeSensitiveBlur8_32.h" + +#include "PrecompiledShadersSPIRV/CACAOApply_32.h" +#include "PrecompiledShadersSPIRV/CACAONonSmartApply_32.h" +#include "PrecompiledShadersSPIRV/CACAONonSmartHalfApply_32.h" + +#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Smart_32.h" +#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5NonSmart_32.h" +#include "PrecompiledShadersSPIRV/CACAOUpscaleBilateral5x5Half_32.h" +#endif + +#define MAX_BLUR_PASSES 8 + +#if defined(FFX_CACAO_ENABLE_CAULDRON_DEBUG) && defined(FFX_CACAO_ENABLE_D3D12) +#include + +#define USER_MARKER(name) CAULDRON_DX12::UserMarker __marker(commandList, name) +#else +#define USER_MARKER(name) +#endif + +inline static uint32_t dispatchSize(uint32_t tileSize, uint32_t totalSize) +{ + return (totalSize + tileSize - 1) / tileSize; +} + +#ifdef FFX_CACAO_ENABLE_PROFILING +// TIMESTAMP(name) +#define TIMESTAMPS \ + TIMESTAMP(BEGIN) \ + TIMESTAMP(PREPARE) \ + TIMESTAMP(BASE_SSAO_PASS) \ + TIMESTAMP(IMPORTANCE_MAP) \ + TIMESTAMP(GENERATE_SSAO) \ + TIMESTAMP(EDGE_SENSITIVE_BLUR) \ + TIMESTAMP(BILATERAL_UPSAMPLE) \ + TIMESTAMP(APPLY) + +typedef enum TimestampID { +#define TIMESTAMP(name) TIMESTAMP_##name, + TIMESTAMPS +#undef TIMESTAMP + NUM_TIMESTAMPS +} TimestampID; + +static const char *TIMESTAMP_NAMES[NUM_TIMESTAMPS] = { +#define TIMESTAMP(name) "FFX_CACAO_" #name, + TIMESTAMPS +#undef TIMESTAMP +}; + +#define NUM_TIMESTAMP_BUFFERS 5 +#endif + +// TIMESTAMP_FORMAT(name, vulkan_format, d3d12_format) +#define TEXTURE_FORMATS \ + TEXTURE_FORMAT(R16_SFLOAT, VK_FORMAT_R16_SFLOAT, DXGI_FORMAT_R16_FLOAT) \ + TEXTURE_FORMAT(R16G16B16A16_SFLOAT, VK_FORMAT_R16G16B16A16_SFLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT) \ + TEXTURE_FORMAT(R8G8B8A8_SNORM, VK_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_SNORM) \ + TEXTURE_FORMAT(R8G8_UNORM, VK_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM) \ + TEXTURE_FORMAT(R8_UNORM, VK_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM) + +typedef enum TextureFormatID { +#define TEXTURE_FORMAT(name, _vulkan_format, _d3d12_format) TEXTURE_FORMAT_##name, + TEXTURE_FORMATS +#undef TEXTURE_FORMAT +} TextureFormatID; + +#ifdef FFX_CACAO_ENABLE_VULKAN +static const VkFormat TEXTURE_FORMAT_LOOKUP_VK[] = { +#define TEXTURE_FORMAT(_name, vulkan_format, _d3d12_format) vulkan_format, + TEXTURE_FORMATS +#undef TEXTURE_FORMAT +}; +#endif +#ifdef FFX_CACAO_ENABLE_D3D12 +static const DXGI_FORMAT TEXTURE_FORMAT_LOOKUP_D3D12[] = { +#define TEXTURE_FORMAT(_name, _vulkan_format, d3d12_format) d3d12_format, + TEXTURE_FORMATS +#undef TEXTURE_FORMAT +}; +#endif + +// TEXTURE(name, width, height, texture_format, array_size, num_mips) +#define TEXTURES \ + TEXTURE(DEINTERLEAVED_DEPTHS, deinterleavedDepthBufferWidth, deinterleavedDepthBufferHeight, TEXTURE_FORMAT_R16_SFLOAT, 4, 4) \ + TEXTURE(DEINTERLEAVED_NORMALS, ssaoBufferWidth, ssaoBufferHeight, TEXTURE_FORMAT_R8G8B8A8_SNORM, 4, 1) \ + TEXTURE(SSAO_BUFFER_PING, ssaoBufferWidth, ssaoBufferHeight, TEXTURE_FORMAT_R8G8_UNORM, 4, 1) \ + TEXTURE(SSAO_BUFFER_PONG, ssaoBufferWidth, ssaoBufferHeight, TEXTURE_FORMAT_R8G8_UNORM, 4, 1) \ + TEXTURE(IMPORTANCE_MAP, importanceMapWidth, importanceMapHeight, TEXTURE_FORMAT_R8_UNORM, 1, 1) \ + TEXTURE(IMPORTANCE_MAP_PONG, importanceMapWidth, importanceMapHeight, TEXTURE_FORMAT_R8_UNORM, 1, 1) \ + TEXTURE(DOWNSAMPLED_SSAO_BUFFER, downsampledSsaoBufferWidth, downsampledSsaoBufferHeight, TEXTURE_FORMAT_R8_UNORM, 1, 1) + +typedef enum TextureID { +#define TEXTURE(name, _width, _height, _format, _array_size, _num_mips) TEXTURE_##name, + TEXTURES +#undef TEXTURE + NUM_TEXTURES +} TextureID; + +typedef struct TextureMetaData { + size_t widthOffset; + size_t heightOffset; + TextureFormatID format; + uint32_t arraySize; + uint32_t numMips; + const char *name; +} TextureMetaData; + +static const TextureMetaData TEXTURE_META_DATA[NUM_TEXTURES] = { +#define TEXTURE(name, width, height, format, array_size, num_mips) { FFX_CACAO_OFFSET_OF(FFX_CACAO_BufferSizeInfo, width), FFX_CACAO_OFFSET_OF(FFX_CACAO_BufferSizeInfo, height), format, array_size, num_mips, "FFX_CACAO_" #name }, + TEXTURES +#undef TEXTURE +}; + +// DESCRIPTOR_SET_LAYOUT(name, num_inputs, num_outputs) +#define DESCRIPTOR_SET_LAYOUTS \ + DESCRIPTOR_SET_LAYOUT(CLEAR_LOAD_COUNTER, 0, 1) \ + DESCRIPTOR_SET_LAYOUT(PREPARE_DEPTHS, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(PREPARE_DEPTHS_MIPS, 1, 4) \ + DESCRIPTOR_SET_LAYOUT(PREPARE_POINTS, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(PREPARE_POINTS_MIPS, 1, 4) \ + DESCRIPTOR_SET_LAYOUT(PREPARE_NORMALS, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(PREPARE_NORMALS_FROM_INPUT_NORMALS, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(GENERATE, 2, 1) \ + DESCRIPTOR_SET_LAYOUT(GENERATE_ADAPTIVE, 5, 1) \ + DESCRIPTOR_SET_LAYOUT(GENERATE_IMPORTANCE_MAP, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(POSTPROCESS_IMPORTANCE_MAP_A, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(POSTPROCESS_IMPORTANCE_MAP_B, 1, 2) \ + DESCRIPTOR_SET_LAYOUT(EDGE_SENSITIVE_BLUR, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(APPLY, 1, 1) \ + DESCRIPTOR_SET_LAYOUT(BILATERAL_UPSAMPLE, 4, 1) + +typedef enum DescriptorSetLayoutID { +#define DESCRIPTOR_SET_LAYOUT(name, _num_inputs, _num_outputs) DSL_##name, + DESCRIPTOR_SET_LAYOUTS +#undef DESCRIPTOR_SET_LAYOUT + NUM_DESCRIPTOR_SET_LAYOUTS +} DescriptorSetLayoutID; + +typedef struct DescriptorSetLayoutMetaData { + uint32_t numInputs; + uint32_t numOutputs; + const char *name; +} DescriptorSetLayoutMetaData; + +static const DescriptorSetLayoutMetaData DESCRIPTOR_SET_LAYOUT_META_DATA[NUM_DESCRIPTOR_SET_LAYOUTS] = { +#define DESCRIPTOR_SET_LAYOUT(name, num_inputs, num_outputs) { num_inputs, num_outputs, "FFX_CACAO_DSL_" #name }, + DESCRIPTOR_SET_LAYOUTS +#undef DESCRIPTOR_SET_LAYOUT +}; + +// DESCRIPTOR_SET(name, layout_name, pass) +#define DESCRIPTOR_SETS \ + DESCRIPTOR_SET(CLEAR_LOAD_COUNTER, CLEAR_LOAD_COUNTER, 0) \ + DESCRIPTOR_SET(PREPARE_DEPTHS, PREPARE_DEPTHS, 0) \ + DESCRIPTOR_SET(PREPARE_DEPTHS_MIPS, PREPARE_DEPTHS_MIPS, 0) \ + DESCRIPTOR_SET(PREPARE_POINTS, PREPARE_POINTS, 0) \ + DESCRIPTOR_SET(PREPARE_POINTS_MIPS, PREPARE_POINTS_MIPS, 0) \ + DESCRIPTOR_SET(PREPARE_NORMALS, PREPARE_NORMALS, 0) \ + DESCRIPTOR_SET(PREPARE_NORMALS_FROM_INPUT_NORMALS, PREPARE_NORMALS_FROM_INPUT_NORMALS, 0) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_0, GENERATE, 0) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_1, GENERATE, 1) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_2, GENERATE, 2) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_BASE_3, GENERATE, 3) \ + DESCRIPTOR_SET(GENERATE_0, GENERATE, 0) \ + DESCRIPTOR_SET(GENERATE_1, GENERATE, 1) \ + DESCRIPTOR_SET(GENERATE_2, GENERATE, 2) \ + DESCRIPTOR_SET(GENERATE_3, GENERATE, 3) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_0, GENERATE_ADAPTIVE, 0) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_1, GENERATE_ADAPTIVE, 1) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_2, GENERATE_ADAPTIVE, 2) \ + DESCRIPTOR_SET(GENERATE_ADAPTIVE_3, GENERATE_ADAPTIVE, 3) \ + DESCRIPTOR_SET(GENERATE_IMPORTANCE_MAP, GENERATE_IMPORTANCE_MAP, 0) \ + DESCRIPTOR_SET(POSTPROCESS_IMPORTANCE_MAP_A, POSTPROCESS_IMPORTANCE_MAP_A, 0) \ + DESCRIPTOR_SET(POSTPROCESS_IMPORTANCE_MAP_B, POSTPROCESS_IMPORTANCE_MAP_B, 0) \ + DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_0, EDGE_SENSITIVE_BLUR, 0) \ + DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_1, EDGE_SENSITIVE_BLUR, 1) \ + DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_2, EDGE_SENSITIVE_BLUR, 2) \ + DESCRIPTOR_SET(EDGE_SENSITIVE_BLUR_3, EDGE_SENSITIVE_BLUR, 3) \ + DESCRIPTOR_SET(APPLY_PING, APPLY, 0) \ + DESCRIPTOR_SET(APPLY_PONG, APPLY, 0) \ + DESCRIPTOR_SET(BILATERAL_UPSAMPLE_PING, BILATERAL_UPSAMPLE, 0) \ + DESCRIPTOR_SET(BILATERAL_UPSAMPLE_PONG, BILATERAL_UPSAMPLE, 0) + +typedef enum DescriptorSetID { +#define DESCRIPTOR_SET(name, _layout_name, _pass) DS_##name, + DESCRIPTOR_SETS +#undef DESCRIPTOR_SET + NUM_DESCRIPTOR_SETS +} DescriptorSetID; + +typedef struct DescriptorSetMetaData { + DescriptorSetLayoutID descriptorSetLayoutID; + uint32_t pass; + const char *name; +} DescriptorSetMetaData; + +static const DescriptorSetMetaData DESCRIPTOR_SET_META_DATA[NUM_DESCRIPTOR_SETS] = { +#define DESCRIPTOR_SET(name, layout_name, pass) { DSL_##layout_name, pass, "FFX_CACAO_DS_" #name }, + DESCRIPTOR_SETS +#undef DESCRIPTOR_SET +}; + +// VIEW_TYPE(name, vulkan_view_type, d3d12_view_type_srv) +#define VIEW_TYPES \ + VIEW_TYPE(2D, VK_IMAGE_VIEW_TYPE_2D, D3D12_SRV_DIMENSION_TEXTURE2D, D3D12_UAV_DIMENSION_TEXTURE2D) \ + VIEW_TYPE(2D_ARRAY, VK_IMAGE_VIEW_TYPE_2D_ARRAY, D3D12_SRV_DIMENSION_TEXTURE2DARRAY, D3D12_UAV_DIMENSION_TEXTURE2DARRAY) + +typedef enum ViewTypeID { +#define VIEW_TYPE(name, _vulkan_view_type, _d3d12_view_type_srv, _d3d12_view_type_uav) VIEW_TYPE_##name, + VIEW_TYPES +#undef VIEW_TYPE +} ViewTypeID; + +#ifdef FFX_CACAO_ENABLE_VULKAN +static const VkImageViewType VIEW_TYPE_LOOKUP_VK[] = { +#define VIEW_TYPE(_name, vulkan_view_type, _d3d12_view_type_srv, _d3d12_view_type_uav) vulkan_view_type, + VIEW_TYPES +#undef VIEW_TYPE +}; +#endif + +#ifdef FFX_CACAO_ENABLE_D3D12 +static const D3D12_SRV_DIMENSION VIEW_TYPE_LOOKUP_D3D12_SRV[] = { +#define VIEW_TYPE(_name, _vulkan_view_type, d3d12_view_type_srv, _d3d12_view_type_uav) d3d12_view_type_srv, + VIEW_TYPES +#undef VIEW_TYPE +}; + +static const D3D12_UAV_DIMENSION VIEW_TYPE_LOOKUP_D3D12_UAV[] = { +#define VIEW_TYPE(_name, _vulkan_view_type, _d3d12_view_type_srv, d3d12_view_type_uav) d3d12_view_type_uav, + VIEW_TYPES +#undef VIEW_TYPE +}; +#endif + +// SHADER_RESOURCE_VIEW(name, texture, view_dimension, most_detailed_mip, mip_levels, first_array_slice, array_size) +#define SHADER_RESOURCE_VIEWS \ + SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 0, 4, 0, 4) \ + SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_0, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 0, 4, 0, 1) \ + SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_1, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 0, 4, 1, 1) \ + SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_2, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 0, 4, 2, 1) \ + SHADER_RESOURCE_VIEW(DEINTERLEAVED_DEPTHS_3, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 0, 4, 3, 1) \ + SHADER_RESOURCE_VIEW(DEINTERLEAVED_NORMALS, DEINTERLEAVED_NORMALS, VIEW_TYPE_2D_ARRAY, 0, 1, 0, 4) \ + SHADER_RESOURCE_VIEW(IMPORTANCE_MAP, IMPORTANCE_MAP, VIEW_TYPE_2D, 0, 1, 0, 1) \ + SHADER_RESOURCE_VIEW(IMPORTANCE_MAP_PONG, IMPORTANCE_MAP_PONG, VIEW_TYPE_2D, 0, 1, 0, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 1, 0, 4) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_0, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 1, 0, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_1, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 1, 1, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_2, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 1, 2, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PING_3, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 1, 3, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 1, 0, 4) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_0, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 1, 0, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_1, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 1, 1, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_2, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 1, 2, 1) \ + SHADER_RESOURCE_VIEW(SSAO_BUFFER_PONG_3, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 1, 3, 1) + +typedef enum ShaderResourceViewID { +#define SHADER_RESOURCE_VIEW(name, _texture, _view_dimension, _most_detailed_mip, _mip_levels, _first_array_slice, _array_size) SRV_##name, + SHADER_RESOURCE_VIEWS +#undef SHADER_RESOURCE_VIEW + NUM_SHADER_RESOURCE_VIEWS +} ShaderResourceViewID; + +typedef struct ShaderResourceViewMetaData { + TextureID texture; + ViewTypeID viewType; + uint32_t mostDetailedMip; + uint32_t mipLevels; + uint32_t firstArraySlice; + uint32_t arraySize; +} ShaderResourceViewMetaData; + +static const ShaderResourceViewMetaData SRV_META_DATA[NUM_SHADER_RESOURCE_VIEWS] = { +#define SHADER_RESOURCE_VIEW(_name, texture, view_dimension, most_detailed_mip, mip_levels, first_array_slice, array_size) { TEXTURE_##texture, view_dimension, most_detailed_mip, mip_levels, first_array_slice, array_size }, + SHADER_RESOURCE_VIEWS +#undef SHADER_RESOURCE_VIEW +}; + +// UNORDERED_ACCESS_VIEW(name, texture, view_dimension, mip_slice, first_array_slice, array_size) +#define UNORDERED_ACCESS_VIEWS \ + UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_0, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ + UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_1, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 1, 0, 4) \ + UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_2, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 2, 0, 4) \ + UNORDERED_ACCESS_VIEW(DEINTERLEAVED_DEPTHS_MIP_3, DEINTERLEAVED_DEPTHS, VIEW_TYPE_2D_ARRAY, 3, 0, 4) \ + UNORDERED_ACCESS_VIEW(DEINTERLEAVED_NORMALS, DEINTERLEAVED_NORMALS, VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ + UNORDERED_ACCESS_VIEW(IMPORTANCE_MAP, IMPORTANCE_MAP, VIEW_TYPE_2D, 0, 0, 1) \ + UNORDERED_ACCESS_VIEW(IMPORTANCE_MAP_PONG, IMPORTANCE_MAP_PONG, VIEW_TYPE_2D, 0, 0, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_0, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 0, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_1, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 1, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_2, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 2, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PING_3, SSAO_BUFFER_PING, VIEW_TYPE_2D_ARRAY, 0, 3, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 0, 4) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_0, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 0, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_1, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 1, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_2, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 2, 1) \ + UNORDERED_ACCESS_VIEW(SSAO_BUFFER_PONG_3, SSAO_BUFFER_PONG, VIEW_TYPE_2D_ARRAY, 0, 3, 1) + +typedef enum UnorderedAccessViewID { +#define UNORDERED_ACCESS_VIEW(name, _texture, _view_dimension, _mip_slice, _first_array_slice, _array_size) UAV_##name, + UNORDERED_ACCESS_VIEWS +#undef UNORDERED_ACCESS_VIEW + NUM_UNORDERED_ACCESS_VIEWS +} UnorderedAccessViewID; + +typedef struct UnorderedAccessViewMetaData { + TextureID textureID; + ViewTypeID viewType; + uint32_t mostDetailedMip; + uint32_t firstArraySlice; + uint32_t arraySize; +} UnorderedAccessViewMetaData; + +static const UnorderedAccessViewMetaData UAV_META_DATA[NUM_UNORDERED_ACCESS_VIEWS] = { +#define UNORDERED_ACCESS_VIEW(_name, texture, view_dimension, mip_slice, first_array_slice, array_size) { TEXTURE_##texture, view_dimension, mip_slice, first_array_slice, array_size }, + UNORDERED_ACCESS_VIEWS +#undef UNORDERED_ACCESS_VIEW +}; + +// INPUT_DESCRIPTOR(descriptor_set_name, srv_name, binding_num) +#define INPUT_DESCRIPTOR_BINDINGS \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_0, DEINTERLEAVED_DEPTHS_0, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_0, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_1, DEINTERLEAVED_DEPTHS_1, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_1, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_2, DEINTERLEAVED_DEPTHS_2, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_2, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_3, DEINTERLEAVED_DEPTHS_3, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_3, DEINTERLEAVED_NORMALS, 1) \ + \ + INPUT_DESCRIPTOR_BINDING(GENERATE_0, DEINTERLEAVED_DEPTHS_0, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_0, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_1, DEINTERLEAVED_DEPTHS_1, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_1, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_2, DEINTERLEAVED_DEPTHS_2, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_2, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_3, DEINTERLEAVED_DEPTHS_3, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_3, DEINTERLEAVED_NORMALS, 1) \ + \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, DEINTERLEAVED_DEPTHS_0, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, IMPORTANCE_MAP, 3) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, SSAO_BUFFER_PONG_0, 4) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, DEINTERLEAVED_DEPTHS_1, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, IMPORTANCE_MAP, 3) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, SSAO_BUFFER_PONG_1, 4) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, DEINTERLEAVED_DEPTHS_2, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, IMPORTANCE_MAP, 3) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, SSAO_BUFFER_PONG_2, 4) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, DEINTERLEAVED_DEPTHS_3, 0) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, DEINTERLEAVED_NORMALS, 1) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, IMPORTANCE_MAP, 3) \ + INPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, SSAO_BUFFER_PONG_3, 4) \ + \ + INPUT_DESCRIPTOR_BINDING(GENERATE_IMPORTANCE_MAP, SSAO_BUFFER_PONG, 0) \ + INPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_A, IMPORTANCE_MAP, 0) \ + INPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_B, IMPORTANCE_MAP_PONG, 0) \ + \ + INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_0, SSAO_BUFFER_PING_0, 0) \ + INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_1, SSAO_BUFFER_PING_1, 0) \ + INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_2, SSAO_BUFFER_PING_2, 0) \ + INPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_3, SSAO_BUFFER_PING_3, 0) \ + \ + INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PING, SSAO_BUFFER_PING, 0) \ + INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PING, DEINTERLEAVED_DEPTHS, 2) \ + INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PONG, SSAO_BUFFER_PONG, 0) \ + INPUT_DESCRIPTOR_BINDING(BILATERAL_UPSAMPLE_PONG, DEINTERLEAVED_DEPTHS, 2) \ + \ + INPUT_DESCRIPTOR_BINDING(APPLY_PING, SSAO_BUFFER_PING, 0) \ + INPUT_DESCRIPTOR_BINDING(APPLY_PONG, SSAO_BUFFER_PONG, 0) + +// need this to define NUM_INPUT_DESCRIPTOR_BINDINGS +typedef enum InputDescriptorBindingID { +#define INPUT_DESCRIPTOR_BINDING(descriptor_set_name, srv_name, _binding_num) INPUT_DESCRIPTOR_BINDING_##descriptor_set_name##_##srv_name, + INPUT_DESCRIPTOR_BINDINGS +#undef INPUT_DESCRIPTOR_BINDING + NUM_INPUT_DESCRIPTOR_BINDINGS +} InputDescriptorBindingID; + +typedef struct InputDescriptorBindingMetaData { + DescriptorSetID descriptorID; + ShaderResourceViewID srvID; + uint32_t bindingNumber; +} InputDescriptorBindingMetaData; + +static const InputDescriptorBindingMetaData INPUT_DESCRIPTOR_BINDING_META_DATA[NUM_INPUT_DESCRIPTOR_BINDINGS] = { +#define INPUT_DESCRIPTOR_BINDING(descriptor_set_name, srv_name, binding_num) { DS_##descriptor_set_name, SRV_##srv_name, binding_num }, + INPUT_DESCRIPTOR_BINDINGS +#undef INPUT_DESCRIPTOR_BINDING +}; + +// OUTPUT_DESCRIPTOR(descriptor_set_name, uav_name, binding_num) +#define OUTPUT_DESCRIPTOR_BINDINGS \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS, DEINTERLEAVED_DEPTHS_MIP_0, 0) \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_0, 0) \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_1, 1) \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_2, 2) \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_DEPTHS_MIPS, DEINTERLEAVED_DEPTHS_MIP_3, 3) \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_NORMALS, DEINTERLEAVED_NORMALS, 0) \ + OUTPUT_DESCRIPTOR_BINDING(PREPARE_NORMALS_FROM_INPUT_NORMALS, DEINTERLEAVED_NORMALS, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_0, SSAO_BUFFER_PONG_0, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_1, SSAO_BUFFER_PONG_1, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_2, SSAO_BUFFER_PONG_2, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_BASE_3, SSAO_BUFFER_PONG_3, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_0, SSAO_BUFFER_PING_0, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_1, SSAO_BUFFER_PING_1, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_2, SSAO_BUFFER_PING_2, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_3, SSAO_BUFFER_PING_3, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_0, SSAO_BUFFER_PING_0, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_1, SSAO_BUFFER_PING_1, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_2, SSAO_BUFFER_PING_2, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_ADAPTIVE_3, SSAO_BUFFER_PING_3, 0) \ + OUTPUT_DESCRIPTOR_BINDING(GENERATE_IMPORTANCE_MAP, IMPORTANCE_MAP, 0) \ + OUTPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_A, IMPORTANCE_MAP_PONG, 0) \ + OUTPUT_DESCRIPTOR_BINDING(POSTPROCESS_IMPORTANCE_MAP_B, IMPORTANCE_MAP, 0) \ + OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_0, SSAO_BUFFER_PONG_0, 0) \ + OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_1, SSAO_BUFFER_PONG_1, 0) \ + OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_2, SSAO_BUFFER_PONG_2, 0) \ + OUTPUT_DESCRIPTOR_BINDING(EDGE_SENSITIVE_BLUR_3, SSAO_BUFFER_PONG_3, 0) + +typedef enum OutputDescriptorBindingID { +#define OUTPUT_DESCRIPTOR_BINDING(descriptor_set_name, uav_name, _binding_num) OUTPUT_DESCRIPTOR_BINDING_##descriptor_set_name##_##uav_name, + OUTPUT_DESCRIPTOR_BINDINGS +#undef OUTPUT_DESCRIPTOR_BINDING + NUM_OUTPUT_DESCRIPTOR_BINDINGS +} OutputDescriptorBindingID; + +typedef struct OutputDescriptorBindingMetaData { + DescriptorSetID descriptorID; + UnorderedAccessViewID uavID; + uint32_t bindingNumber; +} OutputDescriptorBindingMetaData; + +static const OutputDescriptorBindingMetaData OUTPUT_DESCRIPTOR_BINDING_META_DATA[NUM_OUTPUT_DESCRIPTOR_BINDINGS] = { +#define OUTPUT_DESCRIPTOR_BINDING(descriptor_set_name, uav_name, binding_num) { DS_##descriptor_set_name, UAV_##uav_name, binding_num }, + OUTPUT_DESCRIPTOR_BINDINGS +#undef OUTPUT_DESCRIPTOR_BINDING +}; + +// define all the data for compute shaders +// COMPUTE_SHADER(enum_name, pascal_case_name, descriptor_set) +#define COMPUTE_SHADERS \ + COMPUTE_SHADER(CLEAR_LOAD_COUNTER, ClearLoadCounter, CLEAR_LOAD_COUNTER) \ + \ + COMPUTE_SHADER(PREPARE_DOWNSAMPLED_DEPTHS, PrepareDownsampledDepths, PREPARE_DEPTHS) \ + COMPUTE_SHADER(PREPARE_NATIVE_DEPTHS, PrepareNativeDepths, PREPARE_DEPTHS) \ + COMPUTE_SHADER(PREPARE_DOWNSAMPLED_DEPTHS_AND_MIPS, PrepareDownsampledDepthsAndMips, PREPARE_DEPTHS_MIPS) \ + COMPUTE_SHADER(PREPARE_NATIVE_DEPTHS_AND_MIPS, PrepareNativeDepthsAndMips, PREPARE_DEPTHS_MIPS) \ + COMPUTE_SHADER(PREPARE_DOWNSAMPLED_NORMALS, PrepareDownsampledNormals, PREPARE_NORMALS) \ + COMPUTE_SHADER(PREPARE_NATIVE_NORMALS, PrepareNativeNormals, PREPARE_NORMALS) \ + COMPUTE_SHADER(PREPARE_DOWNSAMPLED_NORMALS_FROM_INPUT_NORMALS, PrepareDownsampledNormalsFromInputNormals, PREPARE_NORMALS_FROM_INPUT_NORMALS) \ + COMPUTE_SHADER(PREPARE_NATIVE_NORMALS_FROM_INPUT_NORMALS, PrepareNativeNormalsFromInputNormals, PREPARE_NORMALS_FROM_INPUT_NORMALS) \ + COMPUTE_SHADER(PREPARE_DOWNSAMPLED_DEPTHS_HALF, PrepareDownsampledDepthsHalf, PREPARE_DEPTHS) \ + COMPUTE_SHADER(PREPARE_NATIVE_DEPTHS_HALF, PrepareNativeDepthsHalf, PREPARE_DEPTHS) \ + \ + COMPUTE_SHADER(GENERATE_Q0, GenerateQ0, GENERATE) \ + COMPUTE_SHADER(GENERATE_Q1, GenerateQ1, GENERATE) \ + COMPUTE_SHADER(GENERATE_Q2, GenerateQ2, GENERATE) \ + COMPUTE_SHADER(GENERATE_Q3, GenerateQ3, GENERATE_ADAPTIVE) \ + COMPUTE_SHADER(GENERATE_Q3_BASE, GenerateQ3Base, GENERATE) \ + \ + COMPUTE_SHADER(GENERATE_IMPORTANCE_MAP, GenerateImportanceMap, GENERATE_IMPORTANCE_MAP) \ + COMPUTE_SHADER(POSTPROCESS_IMPORTANCE_MAP_A, PostprocessImportanceMapA, POSTPROCESS_IMPORTANCE_MAP_A) \ + COMPUTE_SHADER(POSTPROCESS_IMPORTANCE_MAP_B, PostprocessImportanceMapB, POSTPROCESS_IMPORTANCE_MAP_B) \ + \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_1, EdgeSensitiveBlur1, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_2, EdgeSensitiveBlur2, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_3, EdgeSensitiveBlur3, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_4, EdgeSensitiveBlur4, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_5, EdgeSensitiveBlur5, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_6, EdgeSensitiveBlur6, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_7, EdgeSensitiveBlur7, EDGE_SENSITIVE_BLUR) \ + COMPUTE_SHADER(EDGE_SENSITIVE_BLUR_8, EdgeSensitiveBlur8, EDGE_SENSITIVE_BLUR) \ + \ + COMPUTE_SHADER(APPLY, Apply, APPLY) \ + COMPUTE_SHADER(NON_SMART_APPLY, NonSmartApply, APPLY) \ + COMPUTE_SHADER(NON_SMART_HALF_APPLY, NonSmartHalfApply, APPLY) \ + \ + COMPUTE_SHADER(UPSCALE_BILATERAL_5X5_SMART, UpscaleBilateral5x5Smart, BILATERAL_UPSAMPLE) \ + COMPUTE_SHADER(UPSCALE_BILATERAL_5X5_NON_SMART, UpscaleBilateral5x5NonSmart, BILATERAL_UPSAMPLE) \ + COMPUTE_SHADER(UPSCALE_BILATERAL_5X5_HALF, UpscaleBilateral5x5Half, BILATERAL_UPSAMPLE) + +typedef enum ComputeShaderID { +#define COMPUTE_SHADER(name, _pascal_name, _descriptor_set) CS_##name, + COMPUTE_SHADERS +#undef COMPUTE_SHADER + NUM_COMPUTE_SHADERS +} ComputeShaderID; + +typedef struct ComputeShaderMetaData { + const char *name; + DescriptorSetLayoutID descriptorSetLayoutID; + const char *objectName; + const char *rootSignatureName; +} ComputeShaderMetaData; + +typedef struct ComputeShaderSPIRV { + const uint32_t *spirv; + size_t len; +} ComputeShaderSPIRV; + +typedef struct ComputeShaderDXIL { + const void *dxil; + size_t len; +} ComputeShaderDXIL; + +#ifdef FFX_CACAO_ENABLE_VULKAN +static const ComputeShaderSPIRV COMPUTE_SHADER_SPIRV_32[] = { +#define COMPUTE_SHADER(name, pascal_name, descriptor_set_layout) { (uint32_t*)CS##pascal_name##SPIRV32, FFX_CACAO_ARRAY_SIZE(CS##pascal_name##SPIRV32) }, + COMPUTE_SHADERS +#undef COMPUTE_SHADER +}; + +static const ComputeShaderSPIRV COMPUTE_SHADER_SPIRV_16[] = { +#define COMPUTE_SHADER(name, pascal_name, descriptor_set_layout) { (uint32_t*)CS##pascal_name##SPIRV16, FFX_CACAO_ARRAY_SIZE(CS##pascal_name##SPIRV16) }, + COMPUTE_SHADERS +#undef COMPUTE_SHADER +}; +#endif + +#ifdef FFX_CACAO_ENABLE_D3D12 +static const ComputeShaderDXIL COMPUTE_SHADER_DXIL[] = { +#define COMPUTE_SHADER(name, pascal_name, descriptor_set_layout) { CS##pascal_name##DXIL, sizeof(CS##pascal_name##DXIL) }, + COMPUTE_SHADERS +#undef COMPUTE_SHADER +}; +#endif + +static const ComputeShaderMetaData COMPUTE_SHADER_META_DATA[] = { +#define COMPUTE_SHADER(name, pascal_name, descriptor_set_layout) { "FFX_CACAO_"#pascal_name, DSL_##descriptor_set_layout, "FFX_CACAO_CS_"#name, "FFX_CACAO_RS_"#name }, + COMPUTE_SHADERS +#undef COMPUTE_SHADER +}; + + +// ================================================================================= +// DirectX 12 +// ================================================================================= + +#ifdef FFX_CACAO_ENABLE_D3D12 + +static inline FFX_CACAO_Status hresultToFFX_CACAO_Status(HRESULT hr) +{ + switch (hr) + { + case E_FAIL: return FFX_CACAO_STATUS_FAILED; + case E_INVALIDARG: return FFX_CACAO_STATUS_INVALID_ARGUMENT; + case E_OUTOFMEMORY: return FFX_CACAO_STATUS_OUT_OF_MEMORY; + case E_NOTIMPL: return FFX_CACAO_STATUS_INVALID_ARGUMENT; + case S_FALSE: return FFX_CACAO_STATUS_OK; + case S_OK: return FFX_CACAO_STATUS_OK; + default: return FFX_CACAO_STATUS_FAILED; + } +} + +static inline void SetName(ID3D12Object* obj, const char* name) +{ + if (name == NULL) + { + return; + } + + FFX_CACAO_ASSERT(obj != NULL); + wchar_t buffer[1024]; + swprintf(buffer, FFX_CACAO_ARRAY_SIZE(buffer), L"%S", name); + obj->SetName(buffer); +} + +static inline size_t AlignOffset(size_t uOffset, size_t uAlign) +{ + return ((uOffset + (uAlign - 1)) & ~(uAlign - 1)); +} + +static size_t GetPixelByteSize(DXGI_FORMAT fmt) +{ + switch (fmt) + { + case(DXGI_FORMAT_R10G10B10A2_TYPELESS): + case(DXGI_FORMAT_R10G10B10A2_UNORM): + case(DXGI_FORMAT_R10G10B10A2_UINT): + case(DXGI_FORMAT_R11G11B10_FLOAT): + case(DXGI_FORMAT_R8G8B8A8_TYPELESS): + case(DXGI_FORMAT_R8G8B8A8_UNORM): + case(DXGI_FORMAT_R8G8B8A8_UNORM_SRGB): + case(DXGI_FORMAT_R8G8B8A8_UINT): + case(DXGI_FORMAT_R8G8B8A8_SNORM): + case(DXGI_FORMAT_R8G8B8A8_SINT): + case(DXGI_FORMAT_B8G8R8A8_UNORM): + case(DXGI_FORMAT_B8G8R8X8_UNORM): + case(DXGI_FORMAT_R10G10B10_XR_BIAS_A2_UNORM): + case(DXGI_FORMAT_B8G8R8A8_TYPELESS): + case(DXGI_FORMAT_B8G8R8A8_UNORM_SRGB): + case(DXGI_FORMAT_B8G8R8X8_TYPELESS): + case(DXGI_FORMAT_B8G8R8X8_UNORM_SRGB): + case(DXGI_FORMAT_R16G16_TYPELESS): + case(DXGI_FORMAT_R16G16_FLOAT): + case(DXGI_FORMAT_R16G16_UNORM): + case(DXGI_FORMAT_R16G16_UINT): + case(DXGI_FORMAT_R16G16_SNORM): + case(DXGI_FORMAT_R16G16_SINT): + case(DXGI_FORMAT_R32_TYPELESS): + case(DXGI_FORMAT_D32_FLOAT): + case(DXGI_FORMAT_R32_FLOAT): + case(DXGI_FORMAT_R32_UINT): + case(DXGI_FORMAT_R32_SINT): + return 4; + + case(DXGI_FORMAT_BC1_TYPELESS): + case(DXGI_FORMAT_BC1_UNORM): + case(DXGI_FORMAT_BC1_UNORM_SRGB): + case(DXGI_FORMAT_BC4_TYPELESS): + case(DXGI_FORMAT_BC4_UNORM): + case(DXGI_FORMAT_BC4_SNORM): + case(DXGI_FORMAT_R16G16B16A16_FLOAT): + case(DXGI_FORMAT_R16G16B16A16_TYPELESS): + return 8; + + case(DXGI_FORMAT_BC2_TYPELESS): + case(DXGI_FORMAT_BC2_UNORM): + case(DXGI_FORMAT_BC2_UNORM_SRGB): + case(DXGI_FORMAT_BC3_TYPELESS): + case(DXGI_FORMAT_BC3_UNORM): + case(DXGI_FORMAT_BC3_UNORM_SRGB): + case(DXGI_FORMAT_BC5_TYPELESS): + case(DXGI_FORMAT_BC5_UNORM): + case(DXGI_FORMAT_BC5_SNORM): + case(DXGI_FORMAT_BC6H_TYPELESS): + case(DXGI_FORMAT_BC6H_UF16): + case(DXGI_FORMAT_BC6H_SF16): + case(DXGI_FORMAT_BC7_TYPELESS): + case(DXGI_FORMAT_BC7_UNORM): + case(DXGI_FORMAT_BC7_UNORM_SRGB): + case(DXGI_FORMAT_R32G32B32A32_FLOAT): + case(DXGI_FORMAT_R32G32B32A32_TYPELESS): + return 16; + + default: + FFX_CACAO_ASSERT(0); + break; + } + return 0; +} + +// ================================================================================================= +// GpuTimer implementation +// ================================================================================================= + +#ifdef FFX_CACAO_ENABLE_PROFILING +#define GPU_TIMER_MAX_VALUES_PER_FRAME (FFX_CACAO_ARRAY_SIZE(((FFX_CACAO_DetailedTiming*)0)->timestamps)) + +typedef struct D3D12Timestamp { + TimestampID timestampID; + uint64_t value; +} D3D12Timestamp; + +typedef struct GpuTimer { + ID3D12Resource *buffer; + ID3D12QueryHeap *queryHeap; + uint32_t currentFrame; + uint32_t collectFrame; + struct { + uint32_t len; + D3D12Timestamp timestamps[NUM_TIMESTAMPS]; + } timestampBuffers[NUM_TIMESTAMP_BUFFERS]; + +} GpuTimer; + +static FFX_CACAO_Status gpuTimerInit(GpuTimer* gpuTimer, ID3D12Device* device) +{ + memset(gpuTimer, 0, sizeof(*gpuTimer)); + + D3D12_QUERY_HEAP_DESC queryHeapDesc = {}; + queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + queryHeapDesc.Count = GPU_TIMER_MAX_VALUES_PER_FRAME * NUM_TIMESTAMP_BUFFERS; + queryHeapDesc.NodeMask = 0; + HRESULT hr = device->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(&gpuTimer->queryHeap)); + if (FAILED(hr)) + { + return hresultToFFX_CACAO_Status(hr); + } + + hr = device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(sizeof(uint64_t) * NUM_TIMESTAMP_BUFFERS * GPU_TIMER_MAX_VALUES_PER_FRAME), + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(&gpuTimer->buffer)); + if (FAILED(hr)) + { + FFX_CACAO_ASSERT(gpuTimer->queryHeap); + gpuTimer->queryHeap->Release(); + return hresultToFFX_CACAO_Status(hr); + } + + SetName(gpuTimer->buffer, "CACAO::GPUTimer::buffer"); + + return FFX_CACAO_STATUS_OK; +} + +static void gpuTimerDestroy(GpuTimer* gpuTimer) +{ + FFX_CACAO_ASSERT(gpuTimer->buffer); + FFX_CACAO_ASSERT(gpuTimer->queryHeap); + gpuTimer->buffer->Release(); + gpuTimer->queryHeap->Release(); +} + +static void gpuTimerStartFrame(GpuTimer* gpuTimer) +{ + uint32_t frame = gpuTimer->currentFrame = (gpuTimer->currentFrame + 1) % NUM_TIMESTAMP_BUFFERS; + gpuTimer->timestampBuffers[frame].len = 0; + + uint32_t collectFrame = gpuTimer->collectFrame = (frame + 1) % NUM_TIMESTAMP_BUFFERS; + + uint32_t numMeasurements = gpuTimer->timestampBuffers[collectFrame].len; + if (!numMeasurements) + { + return; + } + + uint32_t start = GPU_TIMER_MAX_VALUES_PER_FRAME * collectFrame; + uint32_t end = GPU_TIMER_MAX_VALUES_PER_FRAME * (collectFrame + 1); + + D3D12_RANGE readRange; + readRange.Begin = start * sizeof(uint64_t); + readRange.End = end * sizeof(uint64_t); + uint64_t *timingsInTicks = NULL; + gpuTimer->buffer->Map(0, &readRange, (void**)&timingsInTicks); + + for (uint32_t i = 0; i < numMeasurements; ++i) + { + gpuTimer->timestampBuffers[collectFrame].timestamps[i].value = timingsInTicks[start + i]; + } + + D3D12_RANGE writtenRange = {}; + writtenRange.Begin = 0; + writtenRange.End = 0; + gpuTimer->buffer->Unmap(0, &writtenRange); +} + +static void gpuTimerGetTimestamp(GpuTimer* gpuTimer, ID3D12GraphicsCommandList* commandList, TimestampID timestampID) +{ + uint32_t frame = gpuTimer->currentFrame; + uint32_t curTimestamp = gpuTimer->timestampBuffers[frame].len++; + FFX_CACAO_ASSERT(curTimestamp < GPU_TIMER_MAX_VALUES_PER_FRAME); + gpuTimer->timestampBuffers[frame].timestamps[curTimestamp].timestampID = timestampID; + commandList->EndQuery(gpuTimer->queryHeap, D3D12_QUERY_TYPE_TIMESTAMP, frame * GPU_TIMER_MAX_VALUES_PER_FRAME + curTimestamp); +} + +static void gpuTimerEndFrame(GpuTimer* gpuTimer, ID3D12GraphicsCommandList* commandList) +{ + uint32_t frame = gpuTimer->currentFrame; + uint32_t numTimestamps = gpuTimer->timestampBuffers[frame].len; + commandList->ResolveQueryData( + gpuTimer->queryHeap, + D3D12_QUERY_TYPE_TIMESTAMP, + frame * GPU_TIMER_MAX_VALUES_PER_FRAME, + numTimestamps, + gpuTimer->buffer, + frame * GPU_TIMER_MAX_VALUES_PER_FRAME * sizeof(uint64_t)); +} + +static void gpuTimerCollectTimings(GpuTimer* gpuTimer, FFX_CACAO_DetailedTiming* timings) +{ + uint32_t frame = gpuTimer->collectFrame; + uint32_t numTimestamps = timings->numTimestamps = gpuTimer->timestampBuffers[frame].len; + + uint64_t prevTimeTicks = gpuTimer->timestampBuffers[frame].timestamps[0].value; + for (uint32_t i = 1; i < numTimestamps; ++i) + { + uint64_t thisTimeTicks = gpuTimer->timestampBuffers[frame].timestamps[i].value; + FFX_CACAO_Timestamp *t = &timings->timestamps[i]; + t->label = TIMESTAMP_NAMES[gpuTimer->timestampBuffers[frame].timestamps[i].timestampID]; + t->ticks = thisTimeTicks - prevTimeTicks; + prevTimeTicks = thisTimeTicks; + } + + timings->timestamps[0].label = "FFX_CACAO_TOTAL"; + timings->timestamps[0].ticks = prevTimeTicks - gpuTimer->timestampBuffers[frame].timestamps[0].value; +} +#endif + +// ================================================================================================= +// CbvSrvUav implementation +// ================================================================================================= + +typedef struct CbvSrvUav { + uint32_t size; + uint32_t descriptorSize; + D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor; + D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptor; + D3D12_CPU_DESCRIPTOR_HANDLE cpuVisibleCpuDescriptor; +} CbvSrvUav; + +static D3D12_CPU_DESCRIPTOR_HANDLE cbvSrvUavGetCpu(CbvSrvUav* cbvSrvUav, uint32_t i) +{ + D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = cbvSrvUav->cpuDescriptor; + cpuDescriptor.ptr += i * cbvSrvUav->descriptorSize; + return cpuDescriptor; +} + +static D3D12_CPU_DESCRIPTOR_HANDLE cbvSrvUavGetCpuVisibleCpu(CbvSrvUav* cbvSrvUav, uint32_t i) +{ + D3D12_CPU_DESCRIPTOR_HANDLE cpuDescriptor = cbvSrvUav->cpuVisibleCpuDescriptor; + cpuDescriptor.ptr += i * cbvSrvUav->descriptorSize; + return cpuDescriptor; +} + +static D3D12_GPU_DESCRIPTOR_HANDLE cbvSrvUavGetGpu(CbvSrvUav* cbvSrvUav, uint32_t i) +{ + D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptor = cbvSrvUav->gpuDescriptor; + gpuDescriptor.ptr += i * cbvSrvUav->descriptorSize; + return gpuDescriptor; +} + +// ================================================================================================= +// CbvSrvUavHeap implementation +// ================================================================================================= + +typedef struct CbvSrvUavHeap { + uint32_t index; + uint32_t descriptorCount; + uint32_t descriptorElementSize; + ID3D12DescriptorHeap *heap; + ID3D12DescriptorHeap *cpuVisibleHeap; +} ResourceViewHeap; + +static FFX_CACAO_Status cbvSrvUavHeapInit(CbvSrvUavHeap* cbvSrvUavHeap, ID3D12Device* device, uint32_t descriptorCount) +{ + FFX_CACAO_ASSERT(cbvSrvUavHeap); + FFX_CACAO_ASSERT(device); + + cbvSrvUavHeap->descriptorCount = descriptorCount; + cbvSrvUavHeap->index = 0; + + cbvSrvUavHeap->descriptorElementSize = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + + D3D12_DESCRIPTOR_HEAP_DESC descHeap; + descHeap.NumDescriptors = descriptorCount; + descHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + descHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + descHeap.NodeMask = 0; + + HRESULT hr = device->CreateDescriptorHeap(&descHeap, IID_PPV_ARGS(&cbvSrvUavHeap->heap)); + if (FAILED(hr)) + { + return hresultToFFX_CACAO_Status(hr); + } + + SetName(cbvSrvUavHeap->heap, "FFX_CACAO_CbvSrvUavHeap"); + + D3D12_DESCRIPTOR_HEAP_DESC cpuVisibleDescHeap; + cpuVisibleDescHeap.NumDescriptors = descriptorCount; + cpuVisibleDescHeap.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + cpuVisibleDescHeap.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + cpuVisibleDescHeap.NodeMask = 0; + + hr = device->CreateDescriptorHeap(&cpuVisibleDescHeap, IID_PPV_ARGS(&cbvSrvUavHeap->cpuVisibleHeap)); + if (FAILED(hr)) + { + FFX_CACAO_ASSERT(cbvSrvUavHeap->heap); + cbvSrvUavHeap->heap->Release(); + return hresultToFFX_CACAO_Status(hr); + } + + SetName(cbvSrvUavHeap->cpuVisibleHeap, "FFX_CACAO_CbvSrvUavCpuVisibleHeap"); + return FFX_CACAO_STATUS_OK; +} + +static void cbvSrvUavHeapDestroy(CbvSrvUavHeap* cbvSrvUavHeap) +{ + FFX_CACAO_ASSERT(cbvSrvUavHeap); + FFX_CACAO_ASSERT(cbvSrvUavHeap->heap); + FFX_CACAO_ASSERT(cbvSrvUavHeap->cpuVisibleHeap); + cbvSrvUavHeap->heap->Release(); + cbvSrvUavHeap->cpuVisibleHeap->Release(); +} + +static void cbvSrvUavHeapAllocDescriptor(CbvSrvUavHeap* cbvSrvUavHeap, CbvSrvUav* cbvSrvUav, uint32_t size) +{ + FFX_CACAO_ASSERT(cbvSrvUavHeap); + FFX_CACAO_ASSERT(cbvSrvUav); + FFX_CACAO_ASSERT(cbvSrvUavHeap->index + size <= cbvSrvUavHeap->descriptorCount); + + D3D12_CPU_DESCRIPTOR_HANDLE cpuView = cbvSrvUavHeap->heap->GetCPUDescriptorHandleForHeapStart(); + cpuView.ptr += cbvSrvUavHeap->index * cbvSrvUavHeap->descriptorElementSize; + + D3D12_GPU_DESCRIPTOR_HANDLE gpuView = cbvSrvUavHeap->heap->GetGPUDescriptorHandleForHeapStart(); + gpuView.ptr += cbvSrvUavHeap->index * cbvSrvUavHeap->descriptorElementSize; + + D3D12_CPU_DESCRIPTOR_HANDLE cpuVisibleCpuView = cbvSrvUavHeap->cpuVisibleHeap->GetCPUDescriptorHandleForHeapStart(); + cpuVisibleCpuView.ptr += cbvSrvUavHeap->index * cbvSrvUavHeap->descriptorElementSize; + + cbvSrvUavHeap->index += size; + + cbvSrvUav->size = size; + cbvSrvUav->descriptorSize = cbvSrvUavHeap->descriptorElementSize; + cbvSrvUav->cpuDescriptor = cpuView; + cbvSrvUav->gpuDescriptor = gpuView; + cbvSrvUav->cpuVisibleCpuDescriptor = cpuVisibleCpuView; +} + +// ================================================================================================= +// ConstantBufferRing implementation +// ================================================================================================= + +typedef struct ConstantBufferRing { + size_t pageSize; + size_t totalSize; + size_t currentOffset; + uint32_t currentPage; + uint32_t numPages; + char *data; + ID3D12Resource *buffer; +} ConstantBufferRing; + +static FFX_CACAO_Status constantBufferRingInit(ConstantBufferRing* constantBufferRing, ID3D12Device* device, uint32_t numPages, size_t pageSize) +{ + FFX_CACAO_ASSERT(constantBufferRing); + FFX_CACAO_ASSERT(device); + + pageSize = AlignOffset(pageSize, 256); + size_t totalSize = numPages * pageSize; + char *data = NULL; + ID3D12Resource *buffer = NULL; + + HRESULT hr = device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD), + D3D12_HEAP_FLAG_NONE, + &CD3DX12_RESOURCE_DESC::Buffer(totalSize), + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(&buffer)); + if (FAILED(hr)) + { + return hresultToFFX_CACAO_Status(hr); + } + + SetName(buffer, "DynamicBufferRing::m_pBuffer"); + + buffer->Map(0, NULL, (void**)&data); + + constantBufferRing->pageSize = pageSize; + constantBufferRing->totalSize = totalSize; + constantBufferRing->currentOffset = 0; + constantBufferRing->currentPage = 0; + constantBufferRing->numPages = numPages; + constantBufferRing->data = data; + constantBufferRing->buffer = buffer; + + return FFX_CACAO_STATUS_OK; +} + +static void constantBufferRingDestroy(ConstantBufferRing* constantBufferRing) +{ + FFX_CACAO_ASSERT(constantBufferRing); + FFX_CACAO_ASSERT(constantBufferRing->buffer); + constantBufferRing->buffer->Release(); +} + +static void constantBufferRingStartFrame(ConstantBufferRing* constantBufferRing) +{ + FFX_CACAO_ASSERT(constantBufferRing); + constantBufferRing->currentPage = (constantBufferRing->currentPage + 1) % constantBufferRing->numPages; + constantBufferRing->currentOffset = 0; +} + +static void constantBufferRingAlloc(ConstantBufferRing* constantBufferRing, size_t size, void **data, D3D12_GPU_VIRTUAL_ADDRESS *bufferViewDesc) +{ + FFX_CACAO_ASSERT(constantBufferRing); + size = AlignOffset(size, 256); + FFX_CACAO_ASSERT(constantBufferRing->currentOffset + size <= constantBufferRing->pageSize); + + size_t memOffset = constantBufferRing->pageSize * constantBufferRing->currentPage + constantBufferRing->currentOffset; + *data = constantBufferRing->data + memOffset; + constantBufferRing->currentOffset += size; + + *bufferViewDesc = constantBufferRing->buffer->GetGPUVirtualAddress() + memOffset; +} + +// ================================================================================================= +// Texture implementation +// ================================================================================================= + +typedef struct Texture { + ID3D12Resource *resource; + DXGI_FORMAT format; + uint32_t width; + uint32_t height; + uint32_t arraySize; + uint32_t mipMapCount; +} Texture; + +static FFX_CACAO_Status textureInit(Texture* texture, ID3D12Device* device, const char* name, const CD3DX12_RESOURCE_DESC* desc, D3D12_RESOURCE_STATES initialState, const D3D12_CLEAR_VALUE* clearValue) +{ + FFX_CACAO_ASSERT(texture); + FFX_CACAO_ASSERT(device); + FFX_CACAO_ASSERT(name); + FFX_CACAO_ASSERT(desc); + + HRESULT hr = device->CreateCommittedResource( + &CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT), + D3D12_HEAP_FLAG_NONE, + desc, + initialState, + clearValue, + IID_PPV_ARGS(&texture->resource)); + if (FAILED(hr)) + { + return hresultToFFX_CACAO_Status(hr); + } + + texture->format = desc->Format; + texture->width = (uint32_t)desc->Width; + texture->height = desc->Height; + texture->arraySize = desc->DepthOrArraySize; + texture->mipMapCount = desc->MipLevels; + + SetName(texture->resource, name); + + return FFX_CACAO_STATUS_OK; +} + +static void textureDestroy(Texture* texture) +{ + FFX_CACAO_ASSERT(texture); + FFX_CACAO_ASSERT(texture->resource); + texture->resource->Release(); +} + +static void textureCreateSrvFromDesc(Texture* texture, uint32_t index, CbvSrvUav* srv, const D3D12_SHADER_RESOURCE_VIEW_DESC* srvDesc) +{ + FFX_CACAO_ASSERT(texture); + FFX_CACAO_ASSERT(srv); + FFX_CACAO_ASSERT(srvDesc); + + ID3D12Device* device; + texture->resource->GetDevice(__uuidof(*device), (void**)&device); + + device->CreateShaderResourceView(texture->resource, srvDesc, cbvSrvUavGetCpu(srv, index)); + device->CreateShaderResourceView(texture->resource, srvDesc, cbvSrvUavGetCpuVisibleCpu(srv, index)); + + device->Release(); +} + +static void textureCreateUavFromDesc(Texture* texture, uint32_t index, CbvSrvUav* uav, const D3D12_UNORDERED_ACCESS_VIEW_DESC* uavDesc) +{ + FFX_CACAO_ASSERT(texture); + FFX_CACAO_ASSERT(uav); + FFX_CACAO_ASSERT(uavDesc); + + ID3D12Device* device; + texture->resource->GetDevice(__uuidof(*device), (void**)&device); + + device->CreateUnorderedAccessView(texture->resource, NULL, uavDesc, cbvSrvUavGetCpu(uav, index)); + device->CreateUnorderedAccessView(texture->resource, NULL, uavDesc, cbvSrvUavGetCpuVisibleCpu(uav, index)); + + device->Release(); +} + +// ================================================================================================= +// CACAO implementation +// ================================================================================================= + +struct FFX_CACAO_D3D12Context { + FFX_CACAO_Settings settings; + FFX_CACAO_Bool useDownsampledSsao; + + ID3D12Device *device; + CbvSrvUavHeap cbvSrvUavHeap; + +#ifdef FFX_CACAO_ENABLE_PROFILING + GpuTimer gpuTimer; +#endif + + ConstantBufferRing constantBufferRing; + FFX_CACAO_BufferSizeInfo bufferSizeInfo; + ID3D12Resource *outputResource; + + Texture loadCounter; + + CbvSrvUav loadCounterUav; // required for LoadCounter clear + + ID3D12RootSignature *csRootSignatures[NUM_COMPUTE_SHADERS]; + ID3D12PipelineState *computeShader[NUM_COMPUTE_SHADERS]; + + ID3D12Resource *textures[NUM_TEXTURES]; + CbvSrvUav inputDescriptors[NUM_DESCRIPTOR_SETS]; + CbvSrvUav outputDescriptors[NUM_DESCRIPTOR_SETS]; +}; + +static inline FFX_CACAO_D3D12Context* getAlignedD3D12ContextPointer(FFX_CACAO_D3D12Context* ptr) +{ + uintptr_t tmp = (uintptr_t)ptr; + tmp = (tmp + alignof(FFX_CACAO_D3D12Context) - 1) & (~(alignof(FFX_CACAO_D3D12Context) - 1)); + return (FFX_CACAO_D3D12Context*)tmp; +} +#endif + +#ifdef FFX_CACAO_ENABLE_VULKAN +// ================================================================================================= +// CACAO vulkan implementation +// ================================================================================================= + + + +#define MAX_DESCRIPTOR_BINDINGS 32 + + + +#define NUM_BACK_BUFFERS 3 +#define NUM_SAMPLERS 5 +typedef struct FFX_CACAO_VkContext { + FFX_CACAO_Settings settings; + FFX_CACAO_Bool useDownsampledSsao; + FFX_CACAO_BufferSizeInfo bufferSizeInfo; + +#ifdef FFX_CACAO_ENABLE_PROFILING + VkQueryPool timestampQueryPool; + uint32_t collectBuffer; + struct { + TimestampID timestamps[NUM_TIMESTAMPS]; + uint64_t timings[NUM_TIMESTAMPS]; + uint32_t numTimestamps; + } timestampQueries[NUM_BACK_BUFFERS]; +#endif + + VkPhysicalDevice physicalDevice; + VkDevice device; + PFN_vkCmdDebugMarkerBeginEXT vkCmdDebugMarkerBegin; + PFN_vkCmdDebugMarkerEndEXT vkCmdDebugMarkerEnd; + PFN_vkSetDebugUtilsObjectNameEXT vkSetDebugUtilsObjectName; + + + VkDescriptorSetLayout descriptorSetLayouts[NUM_DESCRIPTOR_SET_LAYOUTS]; + VkPipelineLayout pipelineLayouts[NUM_DESCRIPTOR_SET_LAYOUTS]; + + VkShaderModule computeShaders[NUM_COMPUTE_SHADERS]; + VkPipeline computePipelines[NUM_COMPUTE_SHADERS]; + + VkDescriptorSet descriptorSets[NUM_BACK_BUFFERS][NUM_DESCRIPTOR_SETS]; + VkDescriptorPool descriptorPool; + + VkSampler samplers[NUM_SAMPLERS]; + + VkImage textures[NUM_TEXTURES]; + VkDeviceMemory textureMemory[NUM_TEXTURES]; + VkImageView shaderResourceViews[NUM_SHADER_RESOURCE_VIEWS]; + VkImageView unorderedAccessViews[NUM_UNORDERED_ACCESS_VIEWS]; + + VkImage loadCounter; + VkDeviceMemory loadCounterMemory; + VkImageView loadCounterView; + + VkImage output; + + uint32_t currentConstantBuffer; + VkBuffer constantBuffer[NUM_BACK_BUFFERS][4]; + VkDeviceMemory constantBufferMemory[NUM_BACK_BUFFERS][4]; +} FFX_CACAO_VkContext; + +static inline FFX_CACAO_VkContext* getAlignedVkContextPointer(FFX_CACAO_VkContext* ptr) +{ + uintptr_t tmp = (uintptr_t)ptr; + tmp = (tmp + alignof(FFX_CACAO_VkContext) - 1) & (~(alignof(FFX_CACAO_VkContext) - 1)); + return (FFX_CACAO_VkContext*)tmp; +} +#endif + +// ================================================================================= +// Interface +// ================================================================================= + +#ifdef __cplusplus +extern "C" +{ +#endif + +#ifdef FFX_CACAO_ENABLE_D3D12 +size_t FFX_CACAO_D3D12GetContextSize() +{ + return sizeof(FFX_CACAO_D3D12Context) + alignof(FFX_CACAO_D3D12Context) - 1; +} + +FFX_CACAO_Status FFX_CACAO_D3D12InitContext(FFX_CACAO_D3D12Context* context, ID3D12Device* device) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + if (device == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + uint32_t numInputDescriptorsInited = 0; + uint32_t numOutputDescriptorsInited = 0; + uint32_t numRootSignaturesInited = 0; + uint32_t numComputeShadersInited = 0; + +#define COMPUTE_SHADER_INIT(name, entryPoint, uavSize, srvSize) \ + errorStatus = computeShaderInit(&context->name, device, #entryPoint, entryPoint ## DXIL, sizeof(entryPoint ## DXIL), uavSize, srvSize, samplers, FFX_CACAO_ARRAY_SIZE(samplers)); \ + if (errorStatus) \ + { \ + goto error_create_ ## entryPoint; \ + } +#define ERROR_COMPUTE_SHADER_DESTROY(name, entryPoint) \ + computeShaderDestroy(&context->name); \ +error_create_ ## entryPoint: + + FFX_CACAO_Status errorStatus = FFX_CACAO_STATUS_FAILED; + + context->device = device; + CbvSrvUavHeap *cbvSrvUavHeap = &context->cbvSrvUavHeap; + errorStatus = cbvSrvUavHeapInit(cbvSrvUavHeap, device, 512); + if (errorStatus) + { + goto error_create_cbv_srv_uav_heap; + } + errorStatus = constantBufferRingInit(&context->constantBufferRing, device, 5, 1024 * 5); + if (errorStatus) + { + goto error_create_constant_buffer_ring; + } +#ifdef FFX_CACAO_ENABLE_PROFILING + errorStatus = gpuTimerInit(&context->gpuTimer, device); + if (errorStatus) + { + goto error_create_gpu_timer; + } +#endif + + D3D12_STATIC_SAMPLER_DESC samplers[5] = { }; + + samplers[0].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplers[0].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[0].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[0].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[0].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplers[0].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplers[0].MinLOD = 0.0f; + samplers[0].MaxLOD = D3D12_FLOAT32_MAX; + samplers[0].MipLODBias = 0; + samplers[0].MaxAnisotropy = 1; + samplers[0].ShaderRegister = 0; + samplers[0].RegisterSpace = 0; + samplers[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + samplers[1].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplers[1].AddressU = D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + samplers[1].AddressV = D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + samplers[1].AddressW = D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + samplers[1].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplers[1].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplers[1].MinLOD = 0.0f; + samplers[1].MaxLOD = D3D12_FLOAT32_MAX; + samplers[1].MipLODBias = 0; + samplers[1].MaxAnisotropy = 1; + samplers[1].ShaderRegister = 1; + samplers[1].RegisterSpace = 0; + samplers[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + samplers[2].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + samplers[2].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[2].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[2].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[2].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplers[2].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplers[2].MinLOD = 0.0f; + samplers[2].MaxLOD = D3D12_FLOAT32_MAX; + samplers[2].MipLODBias = 0; + samplers[2].MaxAnisotropy = 1; + samplers[2].ShaderRegister = 2; + samplers[2].RegisterSpace = 0; + samplers[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + samplers[3].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplers[3].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[3].AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[3].AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplers[3].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplers[3].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplers[3].MinLOD = 0.0f; + samplers[3].MaxLOD = D3D12_FLOAT32_MAX; + samplers[3].MipLODBias = 0; + samplers[3].MaxAnisotropy = 1; + samplers[3].ShaderRegister = 3; + samplers[3].RegisterSpace = 0; + samplers[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + samplers[4].Filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + samplers[4].AddressU = D3D12_TEXTURE_ADDRESS_MODE_BORDER; + samplers[4].AddressV = D3D12_TEXTURE_ADDRESS_MODE_BORDER; + samplers[4].AddressW = D3D12_TEXTURE_ADDRESS_MODE_BORDER; + samplers[4].ComparisonFunc = D3D12_COMPARISON_FUNC_ALWAYS; + samplers[4].BorderColor = D3D12_STATIC_BORDER_COLOR_TRANSPARENT_BLACK; + samplers[4].MinLOD = 0.0f; + samplers[4].MaxLOD = D3D12_FLOAT32_MAX; + samplers[4].MipLODBias = 0; + samplers[4].MaxAnisotropy = 1; + samplers[4].ShaderRegister = 4; + samplers[4].RegisterSpace = 0; + samplers[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + errorStatus = textureInit(&context->loadCounter, device, "CACAO::m_loadCounter", &CD3DX12_RESOURCE_DESC::Tex1D(DXGI_FORMAT_R32_UINT, 1, 1, 1, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL); + if (errorStatus) + { + goto error_create_load_counter_texture; + } + + // create uav for load counter + { + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_R32_UINT; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + uavDesc.Texture1D.MipSlice = 0; + + cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->loadCounterUav, 1); // required for clearing the load counter + textureCreateUavFromDesc(&context->loadCounter, 0, &context->loadCounterUav, &uavDesc); + } + + for (; numInputDescriptorsInited < NUM_DESCRIPTOR_SETS; ++numInputDescriptorsInited) + { + uint32_t size = DESCRIPTOR_SET_LAYOUT_META_DATA[DESCRIPTOR_SET_META_DATA[numInputDescriptorsInited].descriptorSetLayoutID].numInputs; + cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->inputDescriptors[numInputDescriptorsInited], size); + } + + for (; numOutputDescriptorsInited < NUM_DESCRIPTOR_SETS; ++numOutputDescriptorsInited) + { + uint32_t size = DESCRIPTOR_SET_LAYOUT_META_DATA[DESCRIPTOR_SET_META_DATA[numOutputDescriptorsInited].descriptorSetLayoutID].numOutputs; + cbvSrvUavHeapAllocDescriptor(cbvSrvUavHeap, &context->outputDescriptors[numOutputDescriptorsInited], size); + } + + for (; numRootSignaturesInited < NUM_COMPUTE_SHADERS; ++numRootSignaturesInited) + { + ComputeShaderMetaData metaData = COMPUTE_SHADER_META_DATA[numRootSignaturesInited]; + DescriptorSetLayoutMetaData dslMetaData = DESCRIPTOR_SET_LAYOUT_META_DATA[metaData.descriptorSetLayoutID]; + + CD3DX12_DESCRIPTOR_RANGE DescRange[4]; + CD3DX12_ROOT_PARAMETER RTSlot[4]; + + // we'll always have a constant buffer + int parameterCount = 0; + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_CBV, 1, 0); + RTSlot[parameterCount++].InitAsConstantBufferView(0, 0, D3D12_SHADER_VISIBILITY_ALL); + + // if we have a UAV table + if (dslMetaData.numOutputs > 0) + { + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_UAV, dslMetaData.numOutputs, 0); + RTSlot[parameterCount].InitAsDescriptorTable(1, &DescRange[parameterCount], D3D12_SHADER_VISIBILITY_ALL); + ++parameterCount; + } + + // if we have a SRV table + if (dslMetaData.numInputs > 0) + { + DescRange[parameterCount].Init(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, dslMetaData.numInputs, 0); + RTSlot[parameterCount].InitAsDescriptorTable(1, &DescRange[parameterCount], D3D12_SHADER_VISIBILITY_ALL); + ++parameterCount; + } + + // the root signature contains 3 slots to be used + CD3DX12_ROOT_SIGNATURE_DESC descRootSignature = CD3DX12_ROOT_SIGNATURE_DESC(); + descRootSignature.NumParameters = parameterCount; + descRootSignature.pParameters = RTSlot; + descRootSignature.NumStaticSamplers = FFX_CACAO_ARRAY_SIZE(samplers); + descRootSignature.pStaticSamplers = samplers; + + // deny uneccessary access to certain pipeline stages + descRootSignature.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + ID3DBlob *outBlob, *errorBlob = NULL; + + HRESULT hr = D3D12SerializeRootSignature(&descRootSignature, D3D_ROOT_SIGNATURE_VERSION_1, &outBlob, &errorBlob); + if (FAILED(hr)) + { + errorStatus = hresultToFFX_CACAO_Status(hr); + goto error_init_root_signature; + } + + if (errorBlob) + { + errorBlob->Release(); + if (outBlob) + { + outBlob->Release(); + } + errorStatus = FFX_CACAO_STATUS_FAILED; + goto error_init_root_signature; + } + + hr = device->CreateRootSignature(0, outBlob->GetBufferPointer(), outBlob->GetBufferSize(), IID_PPV_ARGS(&context->csRootSignatures[numRootSignaturesInited])); + if (FAILED(hr)) + { + outBlob->Release(); + errorStatus = hresultToFFX_CACAO_Status(hr); + goto error_init_root_signature; + } + + SetName(context->csRootSignatures[numRootSignaturesInited], metaData.rootSignatureName); + + outBlob->Release(); + } + + for (; numComputeShadersInited < NUM_COMPUTE_SHADERS; ++numComputeShadersInited) + { + ComputeShaderMetaData metaData = COMPUTE_SHADER_META_DATA[numComputeShadersInited]; + + D3D12_SHADER_BYTECODE shaderByteCode = {}; + shaderByteCode.pShaderBytecode = COMPUTE_SHADER_DXIL[numComputeShadersInited].dxil; + shaderByteCode.BytecodeLength = COMPUTE_SHADER_DXIL[numComputeShadersInited].len; + + D3D12_COMPUTE_PIPELINE_STATE_DESC descPso = {}; + descPso.CS = shaderByteCode; + descPso.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + descPso.pRootSignature = context->csRootSignatures[numComputeShadersInited]; + descPso.NodeMask = 0; + + HRESULT hr = device->CreateComputePipelineState(&descPso, IID_PPV_ARGS(&context->computeShader[numComputeShadersInited])); + if (FAILED(hr)) + { + goto error_init_compute_shader; + } + + SetName(context->computeShader[numComputeShadersInited], metaData.objectName); + } + + return FFX_CACAO_STATUS_OK; + +error_init_compute_shader: + for (uint32_t i = 0; i < numComputeShadersInited; ++i) + { + context->computeShader[i]->Release(); + } + +error_init_root_signature: + for (uint32_t i = 0; i < numRootSignaturesInited; ++i) + { + context->csRootSignatures[i]->Release(); + } + +error_create_load_counter_texture: + + +#ifdef FFX_CACAO_ENABLE_PROFILING + gpuTimerDestroy(&context->gpuTimer); +error_create_gpu_timer: +#endif + constantBufferRingDestroy(&context->constantBufferRing); +error_create_constant_buffer_ring: + cbvSrvUavHeapDestroy(&context->cbvSrvUavHeap); +error_create_cbv_srv_uav_heap: + + return errorStatus; +} + +FFX_CACAO_Status FFX_CACAO_D3D12DestroyContext(FFX_CACAO_D3D12Context* context) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + for (uint32_t i = 0; i < NUM_COMPUTE_SHADERS; ++i) + { + context->computeShader[i]->Release(); + } + + for (uint32_t i = 0; i < NUM_COMPUTE_SHADERS; ++i) + { + context->csRootSignatures[i]->Release(); + } + + textureDestroy(&context->loadCounter); + +#ifdef FFX_CACAO_ENABLE_PROFILING + gpuTimerDestroy(&context->gpuTimer); +#endif + constantBufferRingDestroy(&context->constantBufferRing); + cbvSrvUavHeapDestroy(&context->cbvSrvUavHeap); + + return FFX_CACAO_STATUS_OK; +} + +FFX_CACAO_Status FFX_CACAO_D3D12InitScreenSizeDependentResources(FFX_CACAO_D3D12Context* context, const FFX_CACAO_D3D12ScreenSizeInfo* info) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + if (info == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + uint32_t numTexturesInited = 0; + uint32_t numInputBindingsInited = 0; + uint32_t numOutputBindingsInited = 0; + + FFX_CACAO_Bool useDownsampledSsao = info->useDownsampledSsao; + context->useDownsampledSsao = useDownsampledSsao; + FFX_CACAO_Status errorStatus; + + ID3D12Device *device = context->device; + + FFX_CACAO_BufferSizeInfo *bsi = &context->bufferSizeInfo; + FFX_CACAO_UpdateBufferSizeInfo(info->width, info->height, useDownsampledSsao, bsi); + + // ======================================= + // Init debug SRVs/UAVs + + context->outputResource = info->outputResource; + + // ======================================= + // Init textures + + for (; numTexturesInited < NUM_TEXTURES; ++numTexturesInited) + { + TextureMetaData metaData = TEXTURE_META_DATA[numTexturesInited]; + + DXGI_FORMAT format = TEXTURE_FORMAT_LOOKUP_D3D12[metaData.format]; + uint32_t width = *(uint32_t*)(((uint8_t*)bsi) + metaData.widthOffset); + uint32_t height = *(uint32_t*)(((uint8_t*)bsi) + metaData.heightOffset); + + D3D12_HEAP_PROPERTIES heapProperties = CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT); + D3D12_RESOURCE_DESC desc = CD3DX12_RESOURCE_DESC::Tex2D(format, width, height, metaData.arraySize, metaData.numMips, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + + HRESULT hr = device->CreateCommittedResource(&heapProperties, D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, NULL, IID_PPV_ARGS(&context->textures[numTexturesInited])); + + if (FAILED(hr)) + { + errorStatus = hresultToFFX_CACAO_Status(hr); + goto error_init_textures; + } + + SetName(context->textures[numTexturesInited], metaData.name); + } + + for (; numInputBindingsInited < NUM_INPUT_DESCRIPTOR_BINDINGS; ++numInputBindingsInited) + { + InputDescriptorBindingMetaData metaData = INPUT_DESCRIPTOR_BINDING_META_DATA[numInputBindingsInited]; + DescriptorSetID ds = metaData.descriptorID; + ShaderResourceViewMetaData srvMetaData = SRV_META_DATA[metaData.srvID]; + + D3D12_CPU_DESCRIPTOR_HANDLE descriptor = context->inputDescriptors[ds].cpuDescriptor; + descriptor.ptr += metaData.bindingNumber * context->inputDescriptors[ds].descriptorSize; + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = TEXTURE_FORMAT_LOOKUP_D3D12[TEXTURE_META_DATA[srvMetaData.texture].format]; + srvDesc.ViewDimension = VIEW_TYPE_LOOKUP_D3D12_SRV[srvMetaData.viewType]; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + switch (srvDesc.ViewDimension) + { + case D3D12_SRV_DIMENSION_TEXTURE2D: + srvDesc.Texture2D.MostDetailedMip = srvMetaData.mostDetailedMip; + srvDesc.Texture2D.MipLevels = srvMetaData.mipLevels; + break; + case D3D12_SRV_DIMENSION_TEXTURE2DARRAY: + srvDesc.Texture2DArray.MostDetailedMip = srvMetaData.mostDetailedMip; + srvDesc.Texture2DArray.MipLevels = srvMetaData.mipLevels; + srvDesc.Texture2DArray.FirstArraySlice = srvMetaData.firstArraySlice; + srvDesc.Texture2DArray.ArraySize = srvMetaData.arraySize; + break; + default: + FFX_CACAO_ASSERT(0); + break; + } + device->CreateShaderResourceView(context->textures[srvMetaData.texture], &srvDesc, descriptor); + } + + for (; numOutputBindingsInited < NUM_OUTPUT_DESCRIPTOR_BINDINGS; ++numOutputBindingsInited) + { + OutputDescriptorBindingMetaData metaData = OUTPUT_DESCRIPTOR_BINDING_META_DATA[numOutputBindingsInited]; + DescriptorSetID ds = metaData.descriptorID; + UnorderedAccessViewMetaData uavMetaData = UAV_META_DATA[metaData.uavID]; + + D3D12_CPU_DESCRIPTOR_HANDLE descriptor = context->outputDescriptors[ds].cpuDescriptor; + descriptor.ptr += metaData.bindingNumber * context->outputDescriptors[ds].descriptorSize; + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = TEXTURE_FORMAT_LOOKUP_D3D12[TEXTURE_META_DATA[uavMetaData.textureID].format]; + uavDesc.ViewDimension = VIEW_TYPE_LOOKUP_D3D12_UAV[uavMetaData.viewType]; + switch (uavDesc.ViewDimension) + { + case D3D12_UAV_DIMENSION_TEXTURE2D: + uavDesc.Texture2D.MipSlice = uavMetaData.mostDetailedMip; + break; + case D3D12_UAV_DIMENSION_TEXTURE2DARRAY: + uavDesc.Texture2DArray.MipSlice = uavMetaData.mostDetailedMip; + uavDesc.Texture2DArray.FirstArraySlice = uavMetaData.firstArraySlice; + uavDesc.Texture2DArray.ArraySize = uavMetaData.arraySize; + break; + default: + FFX_CACAO_ASSERT(0); + break; + } + device->CreateUnorderedAccessView(context->textures[uavMetaData.textureID], NULL, &uavDesc, descriptor); + } + + // misc inputs + { + D3D12_CPU_DESCRIPTOR_HANDLE descriptor; + + // depth buffer input + descriptor = context->inputDescriptors[DS_PREPARE_DEPTHS].cpuDescriptor; + device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, descriptor); + + descriptor = context->inputDescriptors[DS_PREPARE_DEPTHS_MIPS].cpuDescriptor; + device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, descriptor); + + descriptor = context->inputDescriptors[DS_PREPARE_NORMALS].cpuDescriptor; + device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, descriptor); + + descriptor = context->inputDescriptors[DS_BILATERAL_UPSAMPLE_PING].cpuDescriptor; + descriptor.ptr += context->cbvSrvUavHeap.descriptorElementSize; + device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, descriptor); + + descriptor = context->inputDescriptors[DS_BILATERAL_UPSAMPLE_PONG].cpuDescriptor; + descriptor.ptr += context->cbvSrvUavHeap.descriptorElementSize; + device->CreateShaderResourceView(info->depthBufferResource, &info->depthBufferSrvDesc, descriptor); + + // normal buffer input + if (info->normalBufferResource) + { + descriptor = context->inputDescriptors[DS_PREPARE_NORMALS_FROM_INPUT_NORMALS].cpuDescriptor; + device->CreateShaderResourceView(info->normalBufferResource, &info->normalBufferSrvDesc, descriptor); + } + + // ssao buffer output + descriptor = context->outputDescriptors[DS_BILATERAL_UPSAMPLE_PING].cpuDescriptor; + device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, descriptor); + + descriptor = context->outputDescriptors[DS_BILATERAL_UPSAMPLE_PONG].cpuDescriptor; + device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, descriptor); + + descriptor = context->outputDescriptors[DS_APPLY_PING].cpuDescriptor; + device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, descriptor); + + descriptor = context->outputDescriptors[DS_APPLY_PONG].cpuDescriptor; + device->CreateUnorderedAccessView(info->outputResource, NULL, &info->outputUavDesc, descriptor); + + // load counter input + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = context->loadCounter.format; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE1D; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Texture1D.MostDetailedMip = 0; + srvDesc.Texture1D.MipLevels = 1; + + for (uint32_t pass = 0; pass < 4; ++pass) + { + descriptor = context->inputDescriptors[DS_GENERATE_ADAPTIVE_0 + pass].cpuDescriptor; + descriptor.ptr += 2 * context->cbvSrvUavHeap.descriptorElementSize; + device->CreateShaderResourceView(context->loadCounter.resource, &srvDesc, descriptor); + } + + // load counter output + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = context->loadCounter.format; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE1D; + uavDesc.Texture1D.MipSlice = 0; + + descriptor = context->outputDescriptors[DS_POSTPROCESS_IMPORTANCE_MAP_B].cpuDescriptor; + descriptor.ptr += 1 * context->cbvSrvUavHeap.descriptorElementSize; + device->CreateUnorderedAccessView(context->loadCounter.resource, NULL, &uavDesc, descriptor); + + descriptor = context->outputDescriptors[DS_CLEAR_LOAD_COUNTER].cpuDescriptor; + device->CreateUnorderedAccessView(context->loadCounter.resource, NULL, &uavDesc, descriptor); + } + + + return FFX_CACAO_STATUS_OK; + +error_init_textures: + for (uint32_t i = 0; i < numTexturesInited; ++i) + { + context->textures[i]->Release(); + } + + return errorStatus; +} + +FFX_CACAO_Status FFX_CACAO_D3D12DestroyScreenSizeDependentResources(FFX_CACAO_D3D12Context* context) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + for (uint32_t i = 0; i < NUM_TEXTURES; ++i) + { + context->textures[i]->Release(); + } + + return FFX_CACAO_STATUS_OK; +} + +FFX_CACAO_Status FFX_CACAO_D3D12UpdateSettings(FFX_CACAO_D3D12Context* context, const FFX_CACAO_Settings* settings) +{ + if (context == NULL || settings == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + memcpy(&context->settings, settings, sizeof(*settings)); + + return FFX_CACAO_STATUS_OK; +} + +static inline void computeShaderDraw(FFX_CACAO_D3D12Context* context, ComputeShaderID computeShaderID, ID3D12GraphicsCommandList* commandList, D3D12_GPU_VIRTUAL_ADDRESS constantBuffer, DescriptorSetID descriptorSetID, uint32_t width, uint32_t height, uint32_t depth) +{ + FFX_CACAO_ASSERT(computeShaderID); + FFX_CACAO_ASSERT(commandList); + + DescriptorSetMetaData dsMetaData = DESCRIPTOR_SET_META_DATA[descriptorSetID]; + DescriptorSetLayoutMetaData dslMetaData = DESCRIPTOR_SET_LAYOUT_META_DATA[dsMetaData.descriptorSetLayoutID]; + + commandList->SetComputeRootSignature(context->csRootSignatures[computeShaderID]); + + int params = 0; + commandList->SetComputeRootConstantBufferView(params++, constantBuffer); + if (dslMetaData.numOutputs) + { + commandList->SetComputeRootDescriptorTable(params++, context->outputDescriptors[descriptorSetID].gpuDescriptor); + } + if (dslMetaData.numInputs) + { + commandList->SetComputeRootDescriptorTable(params++, context->inputDescriptors[descriptorSetID].gpuDescriptor); + } + + commandList->SetPipelineState(context->computeShader[computeShaderID]); + commandList->Dispatch(width, height, depth); +} + +FFX_CACAO_Status FFX_CACAO_D3D12Draw(FFX_CACAO_D3D12Context* context, ID3D12GraphicsCommandList* commandList, const FFX_CACAO_Matrix4x4* proj, const FFX_CACAO_Matrix4x4* normalsToView) +{ + if (context == NULL || commandList == NULL || proj == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + +#ifdef FFX_CACAO_ENABLE_PROFILING +#define GET_TIMESTAMP(name) gpuTimerGetTimestamp(&context->gpuTimer, commandList, TIMESTAMP_##name) +#else +#define GET_TIMESTAMP(name) +#endif + FFX_CACAO_BufferSizeInfo *bsi = &context->bufferSizeInfo; + + + USER_MARKER("FidelityFX CACAO"); + + constantBufferRingStartFrame(&context->constantBufferRing); + +#ifdef FFX_CACAO_ENABLE_PROFILING + gpuTimerStartFrame(&context->gpuTimer); +#endif + + GET_TIMESTAMP(BEGIN); + + // set the descriptor heaps + { + ID3D12DescriptorHeap *descriptorHeaps[] = { context->cbvSrvUavHeap.heap }; + commandList->SetDescriptorHeaps(FFX_CACAO_ARRAY_SIZE(descriptorHeaps), descriptorHeaps); + } + + // clear load counter + { + UINT clearValue[] = { 0, 0, 0, 0 }; + commandList->ClearUnorderedAccessViewUint(context->loadCounterUav.gpuDescriptor, context->loadCounterUav.cpuVisibleCpuDescriptor, context->loadCounter.resource, clearValue, 0, NULL); + } + + // move this to initialisation + D3D12_GPU_VIRTUAL_ADDRESS cbCACAOHandle; + FFX_CACAO_Constants *pCACAOConsts; + D3D12_GPU_VIRTUAL_ADDRESS cbCACAOPerPassHandle[4]; + FFX_CACAO_Constants *pPerPassConsts[4]; + + // upload constant buffers + { + constantBufferRingAlloc(&context->constantBufferRing, sizeof(*pCACAOConsts), (void**)&pCACAOConsts, &cbCACAOHandle); + FFX_CACAO_UpdateConstants(pCACAOConsts, &context->settings, bsi, proj, normalsToView); + + for (int i = 0; i < 4; ++i) + { + constantBufferRingAlloc(&context->constantBufferRing, sizeof(*pPerPassConsts[0]), (void**)&pPerPassConsts[i], &cbCACAOPerPassHandle[i]); + FFX_CACAO_UpdateConstants(pPerPassConsts[i], &context->settings, bsi, proj, normalsToView); + FFX_CACAO_UpdatePerPassConstants(pPerPassConsts[i], &context->settings, &context->bufferSizeInfo, i); + } + } + + // prepare depths, normals and mips + { + USER_MARKER("Prepare downsampled depths, normals and mips"); + + + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_HALF_WIDTH, bsi->deinterleavedDepthBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_HALF_HEIGHT, bsi->deinterleavedDepthBufferHeight); + ComputeShaderID prepareDepthsHalf = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS_HALF : CS_PREPARE_NATIVE_DEPTHS_HALF; + computeShaderDraw(context, prepareDepthsHalf, commandList, cbCACAOHandle, DS_PREPARE_DEPTHS, dispatchWidth, dispatchHeight, 1); + break; + } + case FFX_CACAO_QUALITY_LOW: { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_WIDTH, bsi->deinterleavedDepthBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_HEIGHT, bsi->deinterleavedDepthBufferHeight); + ComputeShaderID prepareDepths = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS : CS_PREPARE_NATIVE_DEPTHS; + computeShaderDraw(context, prepareDepths, commandList, cbCACAOHandle, DS_PREPARE_DEPTHS, dispatchWidth, dispatchHeight, 1); + break; + } + default: { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_WIDTH, bsi->deinterleavedDepthBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_HEIGHT, bsi->deinterleavedDepthBufferHeight); + ComputeShaderID prepareDepthsAndMips = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS_AND_MIPS : CS_PREPARE_NATIVE_DEPTHS_AND_MIPS; + computeShaderDraw(context, prepareDepthsAndMips, commandList, cbCACAOHandle, DS_PREPARE_DEPTHS_MIPS, dispatchWidth, dispatchHeight, 1); + break; + } + } + + if (context->settings.generateNormals) + { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_NORMALS_WIDTH, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_NORMALS_HEIGHT, bsi->ssaoBufferHeight); + ComputeShaderID prepareNormals = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_NORMALS : CS_PREPARE_NATIVE_NORMALS; + computeShaderDraw(context, prepareNormals, commandList, cbCACAOHandle, DS_PREPARE_NORMALS, dispatchWidth, dispatchHeight, 1); + } + else + { + uint32_t dispatchWidth = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT, bsi->ssaoBufferHeight); + ComputeShaderID prepareNormalsFromInputNormals = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_NORMALS_FROM_INPUT_NORMALS : CS_PREPARE_NATIVE_NORMALS_FROM_INPUT_NORMALS; + computeShaderDraw(context, prepareNormalsFromInputNormals, commandList, cbCACAOHandle, DS_PREPARE_NORMALS_FROM_INPUT_NORMALS, dispatchWidth, dispatchHeight, 1); + } + + GET_TIMESTAMP(PREPARE); + } + + // deinterleaved depths and normals are now read only resources, also used in the next stage + { + D3D12_RESOURCE_BARRIER resourceBarriers[] = { + CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_DEINTERLEAVED_DEPTHS], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_DEINTERLEAVED_NORMALS], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + }; + commandList->ResourceBarrier(FFX_CACAO_ARRAY_SIZE(resourceBarriers), resourceBarriers); + } + + // base pass for highest quality setting + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST) + { + USER_MARKER("Generate High Quality Base Pass"); + + // SSAO + { + USER_MARKER("SSAO"); + + for (int pass = 0; pass < 4; ++pass) + { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_GENERATE_WIDTH, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_GENERATE_HEIGHT, bsi->ssaoBufferHeight); + DescriptorSetID ds = (DescriptorSetID)(DS_GENERATE_ADAPTIVE_BASE_0 + pass); + computeShaderDraw(context, CS_GENERATE_Q3_BASE, commandList, cbCACAOPerPassHandle[pass], ds, dispatchWidth, dispatchHeight, 1); + } + GET_TIMESTAMP(BASE_SSAO_PASS); + } + + // results written by base pass are now a reaad only resource, used in next stage + commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PONG], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); + + // generate importance map + { + USER_MARKER("Importance Map"); + + CD3DX12_RESOURCE_BARRIER barriers[2]; + UINT barrierCount; + + uint32_t dispatchWidth = dispatchSize(IMPORTANCE_MAP_WIDTH, bsi->importanceMapWidth); + uint32_t dispatchHeight = dispatchSize(IMPORTANCE_MAP_HEIGHT, bsi->importanceMapHeight); + + computeShaderDraw(context, CS_GENERATE_IMPORTANCE_MAP, commandList, cbCACAOHandle, DS_GENERATE_IMPORTANCE_MAP, dispatchWidth, dispatchHeight, 1); + + barrierCount = 0; + barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_IMPORTANCE_MAP], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(barrierCount, barriers); + + computeShaderDraw(context, CS_POSTPROCESS_IMPORTANCE_MAP_A, commandList, cbCACAOHandle, DS_POSTPROCESS_IMPORTANCE_MAP_A, dispatchWidth, dispatchHeight, 1); + + barrierCount = 0; + barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_IMPORTANCE_MAP], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_IMPORTANCE_MAP_PONG], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(barrierCount, barriers); + + computeShaderDraw(context, CS_POSTPROCESS_IMPORTANCE_MAP_B, commandList, cbCACAOHandle, DS_POSTPROCESS_IMPORTANCE_MAP_B, dispatchWidth, dispatchHeight, 1); + + barrierCount = 0; + barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_IMPORTANCE_MAP], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + barriers[barrierCount++] = CD3DX12_RESOURCE_BARRIER::Transition(context->loadCounter.resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE); + commandList->ResourceBarrier(barrierCount, barriers); + + GET_TIMESTAMP(IMPORTANCE_MAP); + } + } + + int blurPassCount = context->settings.blurPassCount; + blurPassCount = FFX_CACAO_CLAMP(blurPassCount, 0, MAX_BLUR_PASSES); + + // main ssao generation + { + USER_MARKER("Generate SSAO"); + + // ComputeShader *generate = &context->generateSSAO[FFX_CACAO_MAX(0, context->settings.qualityLevel - 1)]; + ComputeShaderID generate = (ComputeShaderID)(CS_GENERATE_Q0 + FFX_CACAO_MAX(0, context->settings.qualityLevel - 1)); + + uint32_t dispatchWidth, dispatchHeight, dispatchDepth; + + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: + case FFX_CACAO_QUALITY_LOW: + case FFX_CACAO_QUALITY_MEDIUM: + dispatchWidth = dispatchSize(FFX_CACAO_GENERATE_SPARSE_WIDTH, bsi->ssaoBufferWidth); + dispatchWidth = (dispatchWidth + 4) / 5; + dispatchHeight = dispatchSize(FFX_CACAO_GENERATE_SPARSE_HEIGHT, bsi->ssaoBufferHeight); + dispatchDepth = 5; + break; + case FFX_CACAO_QUALITY_HIGH: + case FFX_CACAO_QUALITY_HIGHEST: + dispatchWidth = dispatchSize(FFX_CACAO_GENERATE_WIDTH, bsi->ssaoBufferWidth); + dispatchHeight = dispatchSize(FFX_CACAO_GENERATE_HEIGHT, bsi->ssaoBufferHeight); + dispatchDepth = 1; + break; + } + + for (int pass = 0; pass < 4; ++pass) + { + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) + { + continue; + } + + DescriptorSetID ds = context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST ? (DescriptorSetID)(DS_GENERATE_ADAPTIVE_0 + pass) : (DescriptorSetID)(DS_GENERATE_0 + pass); + computeShaderDraw(context, generate, commandList, cbCACAOPerPassHandle[pass], ds, dispatchWidth, dispatchHeight, dispatchDepth); + } + + GET_TIMESTAMP(GENERATE_SSAO); + } + + // de-interleaved blur + if (blurPassCount) + { + // only need to transition pong to writable if we didn't already use it in the base pass + CD3DX12_RESOURCE_BARRIER barriers[] = { + CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PING], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PONG], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS), + }; + commandList->ResourceBarrier(context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST ? 2 : 1, barriers); + + USER_MARKER("Deinterleaved blur"); + + for (int pass = 0; pass < 4; ++pass) + { + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) + { + continue; + } + + uint32_t w = 4 * FFX_CACAO_BLUR_WIDTH - 2 * blurPassCount; + uint32_t h = 3 * FFX_CACAO_BLUR_HEIGHT - 2 * blurPassCount; + uint32_t blurPassIndex = blurPassCount - 1; + uint32_t dispatchWidth = dispatchSize(w, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(h, bsi->ssaoBufferHeight); + ComputeShaderID edgeSensitiveBlur = (ComputeShaderID)(CS_EDGE_SENSITIVE_BLUR_1 + blurPassCount - 1); + DescriptorSetID ds = (DescriptorSetID)(DS_EDGE_SENSITIVE_BLUR_0 + pass); + computeShaderDraw(context, edgeSensitiveBlur, commandList, cbCACAOPerPassHandle[pass], ds, dispatchWidth, dispatchHeight, 1); + } + + GET_TIMESTAMP(EDGE_SENSITIVE_BLUR); + + commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PONG], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); + } + else + { + commandList->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PING], D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE)); + } + + + if (context->useDownsampledSsao) + { + USER_MARKER("Upscale"); + + DescriptorSetID ds = blurPassCount ? DS_BILATERAL_UPSAMPLE_PONG : DS_BILATERAL_UPSAMPLE_PING; + ComputeShaderID upscaler; + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: + upscaler = CS_UPSCALE_BILATERAL_5X5_HALF; + break; + case FFX_CACAO_QUALITY_LOW: + case FFX_CACAO_QUALITY_MEDIUM: + upscaler = CS_UPSCALE_BILATERAL_5X5_NON_SMART; + break; + case FFX_CACAO_QUALITY_HIGH: + case FFX_CACAO_QUALITY_HIGHEST: + upscaler = CS_UPSCALE_BILATERAL_5X5_SMART; + break; + } + uint32_t dispatchWidth = dispatchSize(2 * FFX_CACAO_BILATERAL_UPSCALE_WIDTH, bsi->inputOutputBufferWidth); + uint32_t dispatchHeight = dispatchSize(2 * FFX_CACAO_BILATERAL_UPSCALE_HEIGHT, bsi->inputOutputBufferHeight); + computeShaderDraw(context, upscaler, commandList, cbCACAOHandle, ds, dispatchWidth, dispatchHeight, 1); + + GET_TIMESTAMP(BILATERAL_UPSAMPLE); + } + else + { + USER_MARKER("Create Output"); + DescriptorSetID ds = blurPassCount ? DS_APPLY_PONG : DS_APPLY_PING; + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_APPLY_WIDTH, bsi->inputOutputBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_APPLY_HEIGHT, bsi->inputOutputBufferHeight); + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: + computeShaderDraw(context, CS_NON_SMART_HALF_APPLY, commandList, cbCACAOHandle, ds, dispatchWidth, dispatchHeight, 1); + break; + case FFX_CACAO_QUALITY_LOW: + computeShaderDraw(context, CS_NON_SMART_APPLY, commandList, cbCACAOHandle, ds, dispatchWidth, dispatchHeight, 1); + break; + default: + computeShaderDraw(context, CS_APPLY, commandList, cbCACAOHandle, ds, dispatchWidth, dispatchHeight, 1); + break; + } + GET_TIMESTAMP(APPLY); + } + + // end frame resource barrier + { + uint32_t numBarriers = 0; + D3D12_RESOURCE_BARRIER resourceBarriers[10] = {}; + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_DEINTERLEAVED_DEPTHS], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_DEINTERLEAVED_NORMALS], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->outputResource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_GENERIC_READ); + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PING], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST || blurPassCount) + { + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_SSAO_BUFFER_PONG], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST) + { + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_IMPORTANCE_MAP], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->textures[TEXTURE_IMPORTANCE_MAP_PONG], D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + resourceBarriers[numBarriers++] = CD3DX12_RESOURCE_BARRIER::Transition(context->loadCounter.resource, D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } + commandList->ResourceBarrier(numBarriers, resourceBarriers); + } + +#ifdef FFX_CACAO_ENABLE_PROFILING + gpuTimerEndFrame(&context->gpuTimer, commandList); +#endif + + return FFX_CACAO_STATUS_OK; + +#undef GET_TIMESTAMP +} + +#ifdef FFX_CACAO_ENABLE_PROFILING +FFX_CACAO_Status FFX_CACAO_D3D12GetDetailedTimings(FFX_CACAO_D3D12Context* context, FFX_CACAO_DetailedTiming* timings) +{ + if (context == NULL || timings == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedD3D12ContextPointer(context); + + gpuTimerCollectTimings(&context->gpuTimer, timings); + + return FFX_CACAO_STATUS_OK; +} +#endif +#endif + +#ifdef FFX_CACAO_ENABLE_VULKAN +inline static void setObjectName(VkDevice device, FFX_CACAO_VkContext* context, VkObjectType type, uint64_t handle, const char* name) +{ + if (!context->vkSetDebugUtilsObjectName) + { + return; + } + + VkDebugUtilsObjectNameInfoEXT info = {}; + info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT; + info.pNext = NULL; + info.objectType = type; + info.objectHandle = handle; + info.pObjectName = name; + + VkResult result = context->vkSetDebugUtilsObjectName(device, &info); + FFX_CACAO_ASSERT(result == VK_SUCCESS); +} + +inline static uint32_t getBestMemoryHeapIndex(VkPhysicalDevice physicalDevice, VkMemoryRequirements memoryRequirements, VkMemoryPropertyFlags desiredProperties) +{ + VkPhysicalDeviceMemoryProperties memoryProperties; + vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties); + + uint32_t chosenMemoryTypeIndex = VK_MAX_MEMORY_TYPES; + for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; ++i) + { + uint32_t typeBit = 1 << i; + // can we allocate to memory of this type + if (memoryRequirements.memoryTypeBits & typeBit) + { + VkMemoryType currentMemoryType = memoryProperties.memoryTypes[i]; + // do we want to allocate to memory of this type + if ((currentMemoryType.propertyFlags & desiredProperties) == desiredProperties) + { + chosenMemoryTypeIndex = i; + break; + } + } + } + return chosenMemoryTypeIndex; +} + +size_t FFX_CACAO_VkGetContextSize() +{ + return sizeof(FFX_CACAO_VkContext) + alignof(FFX_CACAO_VkContext) - 1; +} + +FFX_CACAO_Status FFX_CACAO_VkInitContext(FFX_CACAO_VkContext* context, const FFX_CACAO_VkCreateInfo* info) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + if (info == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + memset(context, 0, sizeof(*context)); + + VkDevice device = info->device; + VkPhysicalDevice physicalDevice = info->physicalDevice; + VkResult result; + FFX_CACAO_Bool use16Bit = info->flags & FFX_CACAO_VK_CREATE_USE_16_BIT ? FFX_CACAO_TRUE : FFX_CACAO_FALSE; + FFX_CACAO_Status errorStatus = FFX_CACAO_STATUS_FAILED; + + context->device = device; + context->physicalDevice = physicalDevice; + + if (info->flags & FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS) + { + context->vkCmdDebugMarkerBegin = (PFN_vkCmdDebugMarkerBeginEXT)vkGetDeviceProcAddr(device, "vkCmdDebugMarkerBeginEXT"); + context->vkCmdDebugMarkerEnd = (PFN_vkCmdDebugMarkerEndEXT)vkGetDeviceProcAddr(device, "vkCmdDebugMarkerEndEXT"); + } + if (info->flags & FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS) + { + context->vkSetDebugUtilsObjectName = (PFN_vkSetDebugUtilsObjectNameEXT)vkGetDeviceProcAddr(device, "vkSetDebugUtilsObjectNameEXT"); + } + + uint32_t numSamplersInited = 0; + uint32_t numDescriptorSetLayoutsInited = 0; + uint32_t numPipelineLayoutsInited = 0; + uint32_t numShaderModulesInited = 0; + uint32_t numPipelinesInited = 0; + uint32_t numConstantBackBuffersInited = 0; + + VkSampler samplers[NUM_SAMPLERS]; + { + VkSamplerCreateInfo samplerCreateInfo = {}; + samplerCreateInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerCreateInfo.pNext = NULL; + samplerCreateInfo.flags = 0; + samplerCreateInfo.magFilter = VK_FILTER_LINEAR; + samplerCreateInfo.minFilter = VK_FILTER_LINEAR; + samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerCreateInfo.mipLodBias = 0.0f; + samplerCreateInfo.anisotropyEnable = VK_FALSE; + samplerCreateInfo.compareEnable = VK_FALSE; + samplerCreateInfo.minLod = -1000.0f; + samplerCreateInfo.maxLod = 1000.0f; + samplerCreateInfo.unnormalizedCoordinates = VK_FALSE; + + result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); + if (result != VK_SUCCESS) + { + goto error_init_samplers; + } + setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_POINT_CLAMP_SAMPLER"); + ++numSamplersInited; + + samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + + result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); + if (result != VK_SUCCESS) + { + goto error_init_samplers; + } + setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_POINT_MIRROR_SAMPLER"); + ++numSamplersInited; + + samplerCreateInfo.magFilter = VK_FILTER_LINEAR; + samplerCreateInfo.minFilter = VK_FILTER_LINEAR; + samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE; + + result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); + if (result != VK_SUCCESS) + { + goto error_init_samplers; + } + setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_LINEAR_CLAMP_SAMPLER"); + ++numSamplersInited; + + samplerCreateInfo.magFilter = VK_FILTER_NEAREST; + samplerCreateInfo.minFilter = VK_FILTER_NEAREST; + samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + + result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); + if (result != VK_SUCCESS) + { + goto error_init_samplers; + } + setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_VIEWSPACE_DEPTH_TAP_SAMPLER"); + ++numSamplersInited; + + samplerCreateInfo.magFilter = VK_FILTER_NEAREST; + samplerCreateInfo.minFilter = VK_FILTER_NEAREST; + samplerCreateInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + samplerCreateInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerCreateInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + samplerCreateInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT; + + result = vkCreateSampler(device, &samplerCreateInfo, NULL, &samplers[numSamplersInited]); + if (result != VK_SUCCESS) + { + goto error_init_samplers; + } + setObjectName(device, context, VK_OBJECT_TYPE_SAMPLER, (uint64_t)samplers[numSamplersInited], "FFX_CACAO_REAL_POINT_CLAMP_SAMPLER"); + ++numSamplersInited; + + for (uint32_t i = 0; i < FFX_CACAO_ARRAY_SIZE(samplers); ++i) + { + context->samplers[i] = samplers[i]; + } + } + + // create descriptor set layouts + for ( ; numDescriptorSetLayoutsInited < NUM_DESCRIPTOR_SET_LAYOUTS; ++numDescriptorSetLayoutsInited) + { + VkDescriptorSetLayout descriptorSetLayout; + DescriptorSetLayoutMetaData dslMetaData = DESCRIPTOR_SET_LAYOUT_META_DATA[numDescriptorSetLayoutsInited]; + + VkDescriptorSetLayoutBinding bindings[MAX_DESCRIPTOR_BINDINGS] = {}; + uint32_t numBindings = 0; + for (uint32_t samplerBinding = 0; samplerBinding < FFX_CACAO_ARRAY_SIZE(samplers); ++samplerBinding) + { + VkDescriptorSetLayoutBinding binding = {}; + binding.binding = samplerBinding; + binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = &samplers[samplerBinding]; + bindings[numBindings++] = binding; + } + + // constant buffer binding + { + VkDescriptorSetLayoutBinding binding = {}; + binding.binding = 10; + binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = NULL; + bindings[numBindings++] = binding; + } + + for (uint32_t inputBinding = 0; inputBinding < dslMetaData.numInputs; ++inputBinding) + { + VkDescriptorSetLayoutBinding binding = {}; + binding.binding = 20 + inputBinding; + binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = NULL; + bindings[numBindings++] = binding; + } + + for (uint32_t outputBinding = 0; outputBinding < dslMetaData.numOutputs; ++outputBinding) + { + VkDescriptorSetLayoutBinding binding = {}; + binding.binding = 30 + outputBinding; // g_PrepareDepthsOut register(u0) + binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + binding.descriptorCount = 1; + binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + binding.pImmutableSamplers = NULL; + bindings[numBindings++] = binding; + } + + VkDescriptorSetLayoutCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.bindingCount = numBindings; + info.pBindings = bindings; + + result = vkCreateDescriptorSetLayout(device, &info, NULL, &descriptorSetLayout); + if (result != VK_SUCCESS) + { + goto error_init_descriptor_set_layouts; + } + setObjectName(device, context, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, (uint64_t)descriptorSetLayout, dslMetaData.name); + + context->descriptorSetLayouts[numDescriptorSetLayoutsInited] = descriptorSetLayout; + } + + // create pipeline layouts + for ( ; numPipelineLayoutsInited < NUM_DESCRIPTOR_SET_LAYOUTS; ++numPipelineLayoutsInited) + { + VkPipelineLayout pipelineLayout; + + DescriptorSetLayoutMetaData dslMetaData = DESCRIPTOR_SET_LAYOUT_META_DATA[numPipelineLayoutsInited]; + + VkPipelineLayoutCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.setLayoutCount = 1; + info.pSetLayouts = &context->descriptorSetLayouts[numPipelineLayoutsInited]; + info.pushConstantRangeCount = 0; + info.pPushConstantRanges = NULL; + + result = vkCreatePipelineLayout(device, &info, NULL, &pipelineLayout); + if (result != VK_SUCCESS) + { + goto error_init_pipeline_layouts; + } + setObjectName(device, context, VK_OBJECT_TYPE_PIPELINE_LAYOUT, (uint64_t)pipelineLayout, dslMetaData.name); + + context->pipelineLayouts[numPipelineLayoutsInited] = pipelineLayout; + } + + for ( ; numShaderModulesInited < NUM_COMPUTE_SHADERS; ++numShaderModulesInited) + { + VkShaderModule shaderModule; + ComputeShaderMetaData csMetaData = COMPUTE_SHADER_META_DATA[numShaderModulesInited]; + + VkShaderModuleCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + info.pNext = 0; + info.flags = 0; + ComputeShaderSPIRV spirv = use16Bit ? COMPUTE_SHADER_SPIRV_16[numShaderModulesInited] : COMPUTE_SHADER_SPIRV_32[numShaderModulesInited]; + info.codeSize = spirv.len; + info.pCode = spirv.spirv; + + result = vkCreateShaderModule(device, &info, NULL, &shaderModule); + if (result != VK_SUCCESS) + { + goto error_init_shader_modules; + } + setObjectName(device, context, VK_OBJECT_TYPE_SHADER_MODULE, (uint64_t)shaderModule, csMetaData.objectName); + + context->computeShaders[numShaderModulesInited] = shaderModule; + } + + for ( ; numPipelinesInited < NUM_COMPUTE_SHADERS; ++numPipelinesInited) + { + VkPipeline pipeline; + ComputeShaderMetaData csMetaData = COMPUTE_SHADER_META_DATA[numPipelinesInited]; + + VkPipelineShaderStageCreateInfo stageInfo = {}; + stageInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + stageInfo.pNext = NULL; + stageInfo.flags = 0; + stageInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; + stageInfo.module = context->computeShaders[numPipelinesInited]; + stageInfo.pName = csMetaData.name; + stageInfo.pSpecializationInfo = NULL; + + VkComputePipelineCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.stage = stageInfo; + info.layout = context->pipelineLayouts[csMetaData.descriptorSetLayoutID]; + info.basePipelineHandle = VK_NULL_HANDLE; + info.basePipelineIndex = 0; + + result = vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &info, NULL, &pipeline); + if (result != VK_SUCCESS) + { + goto error_init_pipelines; + } + setObjectName(device, context, VK_OBJECT_TYPE_PIPELINE, (uint64_t)pipeline, csMetaData.objectName); + + context->computePipelines[numPipelinesInited] = pipeline; + } + + // create descriptor pool + { + VkDescriptorPool descriptorPool; + + VkDescriptorPoolSize poolSizes[4] = {}; + poolSizes[0].type = VK_DESCRIPTOR_TYPE_SAMPLER; + poolSizes[0].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 5; + poolSizes[1].type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + poolSizes[1].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 7; + poolSizes[2].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + poolSizes[2].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 4; + poolSizes[3].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + poolSizes[3].descriptorCount = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS * 1; + + VkDescriptorPoolCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.maxSets = NUM_BACK_BUFFERS * NUM_DESCRIPTOR_SETS; + info.poolSizeCount = FFX_CACAO_ARRAY_SIZE(poolSizes); + info.pPoolSizes = poolSizes; + + result = vkCreateDescriptorPool(device, &info, NULL, &descriptorPool); + if (result != VK_SUCCESS) + { + goto error_init_descriptor_pool; + } + setObjectName(device, context, VK_OBJECT_TYPE_DESCRIPTOR_POOL, (uint64_t)descriptorPool, "FFX_CACAO_DESCRIPTOR_POOL"); + + context->descriptorPool = descriptorPool; + } + + // allocate descriptor sets + { + VkDescriptorSetLayout descriptorSetLayouts[NUM_DESCRIPTOR_SETS]; + for (uint32_t i = 0; i < NUM_DESCRIPTOR_SETS; ++i) { + descriptorSetLayouts[i] = context->descriptorSetLayouts[DESCRIPTOR_SET_META_DATA[i].descriptorSetLayoutID]; + } + + for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { + VkDescriptorSetAllocateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + info.pNext = NULL; + info.descriptorPool = context->descriptorPool; + info.descriptorSetCount = FFX_CACAO_ARRAY_SIZE(descriptorSetLayouts); // FFX_CACAO_ARRAY_SIZE(context->descriptorSetLayouts); + info.pSetLayouts = descriptorSetLayouts; // context->descriptorSetLayouts; + + result = vkAllocateDescriptorSets(device, &info, context->descriptorSets[i]); + if (result != VK_SUCCESS) + { + goto error_allocate_descriptor_sets; + } + } + + char name[1024]; + for (uint32_t j = 0; j < NUM_BACK_BUFFERS; ++j) { + for (uint32_t i = 0; i < NUM_DESCRIPTOR_SETS; ++i) { + DescriptorSetMetaData dsMetaData = DESCRIPTOR_SET_META_DATA[i]; + snprintf(name, FFX_CACAO_ARRAY_SIZE(name), "%s_%u", dsMetaData.name, j); + setObjectName(device, context, VK_OBJECT_TYPE_DESCRIPTOR_SET, (uint64_t)context->descriptorSets[j][i], name); + } + } + } + + // assign memory to constant buffers + for ( ; numConstantBackBuffersInited < NUM_BACK_BUFFERS; ++numConstantBackBuffersInited) + { + for (uint32_t j = 0; j < 4; ++j) + { + VkBuffer buffer = context->constantBuffer[numConstantBackBuffersInited][j]; + + VkBufferCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.size = sizeof(FFX_CACAO_Constants); + info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT; + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + info.queueFamilyIndexCount = 0; + info.pQueueFamilyIndices = NULL; + + result = vkCreateBuffer(device, &info, NULL, &buffer); + if (result != VK_SUCCESS) + { + goto error_init_constant_buffers; + } + char name[1024]; + snprintf(name, FFX_CACAO_ARRAY_SIZE(name), "FFX_CACAO_CONSTANT_BUFFER_PASS_%u_BACK_BUFFER_%u", j, numConstantBackBuffersInited); + setObjectName(device, context, VK_OBJECT_TYPE_BUFFER, (uint64_t)buffer, name); + + VkMemoryRequirements memoryRequirements; + vkGetBufferMemoryRequirements(device, buffer, &memoryRequirements); + + uint32_t chosenMemoryTypeIndex = getBestMemoryHeapIndex(physicalDevice, memoryRequirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (chosenMemoryTypeIndex == VK_MAX_MEMORY_TYPES) + { + vkDestroyBuffer(device, buffer, NULL); + goto error_init_constant_buffers; + } + + VkMemoryAllocateInfo allocationInfo = {}; + allocationInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocationInfo.pNext = NULL; + allocationInfo.allocationSize = memoryRequirements.size; + allocationInfo.memoryTypeIndex = chosenMemoryTypeIndex; + + VkDeviceMemory memory; + result = vkAllocateMemory(device, &allocationInfo, NULL, &memory); + if (result != VK_SUCCESS) + { + vkDestroyBuffer(device, buffer, NULL); + goto error_init_constant_buffers; + } + + result = vkBindBufferMemory(device, buffer, memory, 0); + if (result != VK_SUCCESS) + { + vkDestroyBuffer(device, buffer, NULL); + goto error_init_constant_buffers; + } + + context->constantBufferMemory[numConstantBackBuffersInited][j] = memory; + context->constantBuffer[numConstantBackBuffersInited][j] = buffer; + } + } + + // create load counter VkImage + { + VkImage image = VK_NULL_HANDLE; + + VkImageCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.imageType = VK_IMAGE_TYPE_1D; + info.format = VK_FORMAT_R32_UINT; + info.extent.width = 1; + info.extent.height = 1; + info.extent.depth = 1; + info.mipLevels = 1; + info.arrayLayers = 1; + info.samples = VK_SAMPLE_COUNT_1_BIT; + info.tiling = VK_IMAGE_TILING_OPTIMAL; + info.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + info.queueFamilyIndexCount = 0; + info.pQueueFamilyIndices = NULL; + info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + result = vkCreateImage(device, &info, NULL, &image); + if (result != VK_SUCCESS) + { + goto error_init_load_counter_image; + } + + setObjectName(device, context, VK_OBJECT_TYPE_IMAGE, (uint64_t)image, "FFX_CACAO_LOAD_COUNTER"); + + VkMemoryRequirements memoryRequirements; + vkGetImageMemoryRequirements(device, image, &memoryRequirements); + + uint32_t chosenMemoryTypeIndex = getBestMemoryHeapIndex(physicalDevice, memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (chosenMemoryTypeIndex == VK_MAX_MEMORY_TYPES) + { + vkDestroyImage(device, image, NULL); + goto error_init_load_counter_image; + } + + VkMemoryAllocateInfo allocationInfo = {}; + allocationInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocationInfo.pNext = NULL; + allocationInfo.allocationSize = memoryRequirements.size; + allocationInfo.memoryTypeIndex = chosenMemoryTypeIndex; + + VkDeviceMemory memory; + result = vkAllocateMemory(device, &allocationInfo, NULL, &memory); + if (result != VK_SUCCESS) + { + vkDestroyImage(device, image, NULL); + goto error_init_load_counter_image; + } + + result = vkBindImageMemory(device, image, memory, 0); + if (result != VK_SUCCESS) + { + vkDestroyImage(device, image, NULL); + goto error_init_load_counter_image; + } + + context->loadCounter = image; + context->loadCounterMemory = memory; + } + + // create load counter view + { + VkImageView imageView; + + VkImageViewCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.image = context->loadCounter; + info.viewType = VK_IMAGE_VIEW_TYPE_1D; + info.format = VK_FORMAT_R32_UINT; + info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + info.subresourceRange.baseMipLevel = 0; + info.subresourceRange.levelCount = 1; + info.subresourceRange.baseArrayLayer = 0; + info.subresourceRange.layerCount = 1; + + result = vkCreateImageView(device, &info, NULL, &imageView); + if (result != VK_SUCCESS) + { + goto error_init_load_counter_view; + } + + context->loadCounterView = imageView; + } + +#ifdef FFX_CACAO_ENABLE_PROFILING + // create timestamp query pool + { + VkQueryPool queryPool = VK_NULL_HANDLE; + + VkQueryPoolCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.queryType = VK_QUERY_TYPE_TIMESTAMP; + info.queryCount = NUM_TIMESTAMPS * NUM_BACK_BUFFERS; + + result = vkCreateQueryPool(device, &info, NULL, &queryPool); + if (result != VK_SUCCESS) + { + goto error_init_query_pool; + } + + context->timestampQueryPool = queryPool; + } +#endif + + return FFX_CACAO_STATUS_OK; + +#ifdef FFX_CACAO_ENABLE_PROFILING + vkDestroyQueryPool(device, context->timestampQueryPool, NULL); +error_init_query_pool: +#endif + + vkDestroyImageView(device, context->loadCounterView, NULL); +error_init_load_counter_view: + vkDestroyImage(device, context->loadCounter, NULL); + vkFreeMemory(device, context->loadCounterMemory, NULL); +error_init_load_counter_image: + +error_init_constant_buffers: + for (uint32_t i = 0; i < numConstantBackBuffersInited; ++i) + { + for (uint32_t j = 0; j < 4; ++j) + { + vkDestroyBuffer(device, context->constantBuffer[i][j], NULL); + vkFreeMemory(device, context->constantBufferMemory[i][j], NULL); + } + } + +error_allocate_descriptor_sets: + vkDestroyDescriptorPool(device, context->descriptorPool, NULL); +error_init_descriptor_pool: + +error_init_pipelines: + for (uint32_t i = 0; i < numPipelinesInited; ++i) + { + vkDestroyPipeline(device, context->computePipelines[i], NULL); + } + +error_init_shader_modules: + for (uint32_t i = 0; i < numShaderModulesInited; ++i) + { + vkDestroyShaderModule(device, context->computeShaders[i], NULL); + } + +error_init_pipeline_layouts: + for (uint32_t i = 0; i < numPipelineLayoutsInited; ++i) + { + vkDestroyPipelineLayout(device, context->pipelineLayouts[i], NULL); + } + +error_init_descriptor_set_layouts: + for (uint32_t i = 0; i < numDescriptorSetLayoutsInited; ++i) + { + vkDestroyDescriptorSetLayout(device, context->descriptorSetLayouts[i], NULL); + } + + +error_init_samplers: + for (uint32_t i = 0; i < numSamplersInited; ++i) + { + vkDestroySampler(device, context->samplers[i], NULL); + } + + return errorStatus; +} + +FFX_CACAO_Status FFX_CACAO_VkDestroyContext(FFX_CACAO_VkContext* context) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + + VkDevice device = context->device; + +#ifdef FFX_CACAO_ENABLE_PROFILING + vkDestroyQueryPool(device, context->timestampQueryPool, NULL); +#endif + + vkDestroyImageView(device, context->loadCounterView, NULL); + vkDestroyImage(device, context->loadCounter, NULL); + vkFreeMemory(device, context->loadCounterMemory, NULL); + + for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) + { + for (uint32_t j = 0; j < 4; ++j) + { + vkDestroyBuffer(device, context->constantBuffer[i][j], NULL); + vkFreeMemory(device, context->constantBufferMemory[i][j], NULL); + } + } + + vkDestroyDescriptorPool(device, context->descriptorPool, NULL); + + for (uint32_t i = 0; i < NUM_COMPUTE_SHADERS; ++i) + { + vkDestroyPipeline(device, context->computePipelines[i], NULL); + } + + for (uint32_t i = 0; i < NUM_COMPUTE_SHADERS; ++i) + { + vkDestroyShaderModule(device, context->computeShaders[i], NULL); + } + + for (uint32_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; ++i) + { + vkDestroyPipelineLayout(device, context->pipelineLayouts[i], NULL); + } + + for(uint32_t i = 0; i < NUM_DESCRIPTOR_SET_LAYOUTS; ++i) + { + vkDestroyDescriptorSetLayout(device, context->descriptorSetLayouts[i], NULL); + } + + + for (uint32_t i = 0; i < FFX_CACAO_ARRAY_SIZE(context->samplers); ++i) + { + vkDestroySampler(device, context->samplers[i], NULL); + } + + return FFX_CACAO_STATUS_OK; +} + +FFX_CACAO_Status FFX_CACAO_VkInitScreenSizeDependentResources(FFX_CACAO_VkContext* context, const FFX_CACAO_VkScreenSizeInfo* info) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + if (info == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + + FFX_CACAO_Bool useDownsampledSsao = info->useDownsampledSsao; + context->useDownsampledSsao = useDownsampledSsao; + context->output = info->output; + + VkDevice device = context->device; + VkPhysicalDevice physicalDevice = context->physicalDevice; + VkPhysicalDeviceMemoryProperties memoryProperties; + vkGetPhysicalDeviceMemoryProperties(physicalDevice, &memoryProperties); + VkResult result; + + FFX_CACAO_BufferSizeInfo *bsi = &context->bufferSizeInfo; + FFX_CACAO_UpdateBufferSizeInfo(info->width, info->height, useDownsampledSsao, bsi); + + FFX_CACAO_Status errorStatus = FFX_CACAO_STATUS_FAILED; + uint32_t numTextureImagesInited = 0; + uint32_t numTextureMemoriesInited = 0; + uint32_t numSrvsInited = 0; + uint32_t numUavsInited = 0; + + // create images for textures + for ( ; numTextureImagesInited < NUM_TEXTURES; ++numTextureImagesInited) + { + TextureMetaData metaData = TEXTURE_META_DATA[numTextureImagesInited]; + VkImage image = VK_NULL_HANDLE; + + VkImageCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.imageType = VK_IMAGE_TYPE_2D; + info.format = TEXTURE_FORMAT_LOOKUP_VK[metaData.format]; + info.extent.width = *(uint32_t*)((uint8_t*)bsi + metaData.widthOffset); + info.extent.height = *(uint32_t*)((uint8_t*)bsi + metaData.heightOffset); + info.extent.depth = 1; + info.mipLevels = metaData.numMips; + info.arrayLayers = metaData.arraySize; + info.samples = VK_SAMPLE_COUNT_1_BIT; + info.tiling = VK_IMAGE_TILING_OPTIMAL; + info.usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; + info.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + info.queueFamilyIndexCount = 0; + info.pQueueFamilyIndices = NULL; + info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + result = vkCreateImage(device, &info, NULL, &image); + if (result != VK_SUCCESS) + { + goto error_init_texture_images; + } + + setObjectName(device, context, VK_OBJECT_TYPE_IMAGE, (uint64_t)image, metaData.name); + + context->textures[numTextureImagesInited] = image; + } + + // allocate memory for textures + for ( ; numTextureMemoriesInited < NUM_TEXTURES; ++numTextureMemoriesInited) + { + VkImage image = context->textures[numTextureMemoriesInited]; + + VkMemoryRequirements memoryRequirements; + vkGetImageMemoryRequirements(device, image, &memoryRequirements); + + uint32_t chosenMemoryTypeIndex = getBestMemoryHeapIndex(physicalDevice, memoryRequirements, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + if (chosenMemoryTypeIndex == VK_MAX_MEMORY_TYPES) + { + goto error_init_texture_memories; + } + + VkMemoryAllocateInfo allocationInfo = {}; + allocationInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocationInfo.pNext = NULL; + allocationInfo.allocationSize = memoryRequirements.size; + allocationInfo.memoryTypeIndex = chosenMemoryTypeIndex; + + VkDeviceMemory memory; + result = vkAllocateMemory(device, &allocationInfo, NULL, &memory); + if (result != VK_SUCCESS) + { + goto error_init_texture_memories; + } + + result = vkBindImageMemory(device, image, memory, 0); + if (result != VK_SUCCESS) + { + vkFreeMemory(device, memory, NULL); + goto error_init_texture_memories; + } + + context->textureMemory[numTextureMemoriesInited] = memory; + } + + // create srv image views + for ( ; numSrvsInited < NUM_SHADER_RESOURCE_VIEWS; ++numSrvsInited) + { + VkImageView imageView; + ShaderResourceViewMetaData srvMetaData = SRV_META_DATA[numSrvsInited]; + + VkImageViewCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.image = context->textures[srvMetaData.texture]; + info.viewType = VIEW_TYPE_LOOKUP_VK[srvMetaData.viewType]; + info.format = TEXTURE_FORMAT_LOOKUP_VK[TEXTURE_META_DATA[srvMetaData.texture].format]; + info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + info.subresourceRange.baseMipLevel = srvMetaData.mostDetailedMip; + info.subresourceRange.levelCount = srvMetaData.mipLevels; + info.subresourceRange.baseArrayLayer = srvMetaData.firstArraySlice; + info.subresourceRange.layerCount = srvMetaData.arraySize; + + result = vkCreateImageView(device, &info, NULL, &imageView); + if (result != VK_SUCCESS) + { + goto error_init_srvs; + } + + context->shaderResourceViews[numSrvsInited] = imageView; + } + + // create uav image views + for ( ; numUavsInited < NUM_UNORDERED_ACCESS_VIEWS; ++numUavsInited) + { + VkImageView imageView; + UnorderedAccessViewMetaData uavMetaData = UAV_META_DATA[numUavsInited]; + + VkImageViewCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.pNext = NULL; + info.flags = 0; + info.image = context->textures[uavMetaData.textureID]; + info.viewType = VIEW_TYPE_LOOKUP_VK[uavMetaData.viewType]; + info.format = TEXTURE_FORMAT_LOOKUP_VK[TEXTURE_META_DATA[uavMetaData.textureID].format]; + info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY; + info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY; + info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + info.subresourceRange.baseMipLevel = uavMetaData.mostDetailedMip; + info.subresourceRange.levelCount = 1; + info.subresourceRange.baseArrayLayer = uavMetaData.firstArraySlice; + info.subresourceRange.layerCount = uavMetaData.arraySize; + + result = vkCreateImageView(device, &info, NULL, &imageView); + if (result != VK_SUCCESS) + { + goto error_init_uavs; + } + + context->unorderedAccessViews[numUavsInited] = imageView; + } + + // update descriptor sets from table + for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { + VkDescriptorImageInfo imageInfos[NUM_INPUT_DESCRIPTOR_BINDINGS + NUM_OUTPUT_DESCRIPTOR_BINDINGS] = {}; + VkDescriptorImageInfo *curImageInfo = imageInfos; + VkWriteDescriptorSet writes[NUM_INPUT_DESCRIPTOR_BINDINGS + NUM_OUTPUT_DESCRIPTOR_BINDINGS] = {}; + VkWriteDescriptorSet *curWrite = writes; + + // write input descriptor bindings + for (uint32_t j = 0; j < NUM_INPUT_DESCRIPTOR_BINDINGS; ++j) + { + InputDescriptorBindingMetaData bindingMetaData = INPUT_DESCRIPTOR_BINDING_META_DATA[j]; + + curImageInfo->sampler = VK_NULL_HANDLE; + curImageInfo->imageView = context->shaderResourceViews[bindingMetaData.srvID]; + curImageInfo->imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + + curWrite->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + curWrite->pNext = NULL; + curWrite->dstSet = context->descriptorSets[i][bindingMetaData.descriptorID]; + curWrite->dstBinding = 20 + bindingMetaData.bindingNumber; + curWrite->descriptorCount = 1; + curWrite->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + curWrite->pImageInfo = curImageInfo; + + ++curWrite; ++curImageInfo; + } + + // write output descriptor bindings + for (uint32_t j = 0; j < NUM_OUTPUT_DESCRIPTOR_BINDINGS; ++j) + { + OutputDescriptorBindingMetaData bindingMetaData = OUTPUT_DESCRIPTOR_BINDING_META_DATA[j]; + + curImageInfo->sampler = VK_NULL_HANDLE; + curImageInfo->imageView = context->unorderedAccessViews[bindingMetaData.uavID]; + curImageInfo->imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + curWrite->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + curWrite->pNext = VK_NULL_HANDLE; + curWrite->dstSet = context->descriptorSets[i][bindingMetaData.descriptorID]; + curWrite->dstBinding = 30 + bindingMetaData.bindingNumber; + curWrite->descriptorCount = 1; + curWrite->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + curWrite->pImageInfo = curImageInfo; + + ++curWrite; ++curImageInfo; + } + + vkUpdateDescriptorSets(device, FFX_CACAO_ARRAY_SIZE(writes), writes, 0, NULL); + } + + // update descriptor sets with inputs + for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { +#define MAX_NUM_MISC_INPUT_DESCRIPTORS 32 + + VkDescriptorImageInfo imageInfos[MAX_NUM_MISC_INPUT_DESCRIPTORS] = {}; + VkWriteDescriptorSet writes[MAX_NUM_MISC_INPUT_DESCRIPTORS] = {}; + + for (uint32_t i = 0; i < FFX_CACAO_ARRAY_SIZE(writes); ++i) + { + VkDescriptorImageInfo *imageInfo = imageInfos + i; + VkWriteDescriptorSet *write = writes + i; + + imageInfo->sampler = VK_NULL_HANDLE; + + write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write->pNext = NULL; + write->descriptorCount = 1; + write->pImageInfo = imageInfo; + } + + uint32_t cur = 0; + + // register(t0) -> 20 + // register(u0) -> 30 + imageInfos[cur].imageView = info->depthView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_DEPTHS]; + writes[cur].dstBinding = 20; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->depthView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_DEPTHS_MIPS]; + writes[cur].dstBinding = 20; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->depthView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_NORMALS]; + writes[cur].dstBinding = 20; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->depthView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PING]; + writes[cur].dstBinding = 21; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->depthView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PONG]; + writes[cur].dstBinding = 21; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->outputView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PING]; + writes[cur].dstBinding = 30; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->outputView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][DS_BILATERAL_UPSAMPLE_PONG]; + writes[cur].dstBinding = 30; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->outputView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][DS_APPLY_PING]; + writes[cur].dstBinding = 30; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ++cur; + + imageInfos[cur].imageView = info->outputView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][DS_APPLY_PONG]; + writes[cur].dstBinding = 30; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ++cur; + + imageInfos[cur].imageView = context->loadCounterView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][DS_POSTPROCESS_IMPORTANCE_MAP_B]; + writes[cur].dstBinding = 31; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ++cur; + + imageInfos[cur].imageView = context->loadCounterView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][DS_CLEAR_LOAD_COUNTER]; + writes[cur].dstBinding = 30; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + ++cur; + + for (uint32_t pass = 0; pass < 4; ++pass) + { + imageInfos[cur].imageView = context->loadCounterView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_GENERAL; + writes[cur].dstSet = context->descriptorSets[i][(DescriptorSetID)(DS_GENERATE_ADAPTIVE_0 + pass)]; + writes[cur].dstBinding = 22; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + } + + if (info->normalsView) { + imageInfos[cur].imageView = info->normalsView; + imageInfos[cur].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + writes[cur].dstSet = context->descriptorSets[i][DS_PREPARE_NORMALS_FROM_INPUT_NORMALS]; + writes[cur].dstBinding = 20; + writes[cur].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; + ++cur; + } + + FFX_CACAO_ASSERT(cur <= MAX_NUM_MISC_INPUT_DESCRIPTORS); + vkUpdateDescriptorSets(device, cur, writes, 0, NULL); + } + + // update descriptor sets with constant buffers + for (uint32_t i = 0; i < NUM_BACK_BUFFERS; ++i) { + VkDescriptorBufferInfo bufferInfos[NUM_DESCRIPTOR_SETS] = {}; + VkDescriptorBufferInfo *curBufferInfo = bufferInfos; + VkWriteDescriptorSet writes[NUM_DESCRIPTOR_SETS] = {}; + VkWriteDescriptorSet *curWrite = writes; + + for (uint32_t j = 0; j < NUM_DESCRIPTOR_SETS; ++j) + { + DescriptorSetMetaData dsMetaData = DESCRIPTOR_SET_META_DATA[j]; + + curBufferInfo->buffer = context->constantBuffer[i][dsMetaData.pass]; + curBufferInfo->offset = 0; + curBufferInfo->range = VK_WHOLE_SIZE; + + curWrite->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + curWrite->pNext = NULL; + curWrite->dstSet = context->descriptorSets[i][j]; + curWrite->dstBinding = 10; + curWrite->dstArrayElement = 0; + curWrite->descriptorCount = 1; + curWrite->descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + curWrite->pBufferInfo = curBufferInfo; + + ++curWrite; + ++curBufferInfo; + } + + vkUpdateDescriptorSets(device, FFX_CACAO_ARRAY_SIZE(writes), writes, 0, NULL); + } + + return FFX_CACAO_STATUS_OK; + +error_init_uavs: + for (uint32_t i = 0; i < numUavsInited; ++i) + { + vkDestroyImageView(device, context->unorderedAccessViews[i], NULL); + } + +error_init_srvs: + for (uint32_t i = 0; i < numSrvsInited; ++i) + { + vkDestroyImageView(device, context->shaderResourceViews[i], NULL); + } + +error_init_texture_memories: + for (uint32_t i = 0; i < numTextureMemoriesInited; ++i) + { + vkFreeMemory(device, context->textureMemory[i], NULL); + } + +error_init_texture_images: + for (uint32_t i = 0; i < numTextureImagesInited; ++i) + { + vkDestroyImage(device, context->textures[i], NULL); + } + + return errorStatus; +} + +FFX_CACAO_Status FFX_CACAO_VkDestroyScreenSizeDependentResources(FFX_CACAO_VkContext* context) +{ + if (context == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + + VkDevice device = context->device; + + for (uint32_t i = 0; i < NUM_UNORDERED_ACCESS_VIEWS; ++i) + { + vkDestroyImageView(device, context->unorderedAccessViews[i], NULL); + } + + for (uint32_t i = 0; i < NUM_SHADER_RESOURCE_VIEWS; ++i) + { + vkDestroyImageView(device, context->shaderResourceViews[i], NULL); + } + + for (uint32_t i = 0; i < NUM_TEXTURES; ++i) + { + vkFreeMemory(device, context->textureMemory[i], NULL); + } + + for (uint32_t i = 0; i < NUM_TEXTURES; ++i) + { + vkDestroyImage(device, context->textures[i], NULL); + } + + return FFX_CACAO_STATUS_OK; +} + +FFX_CACAO_Status FFX_CACAO_VkUpdateSettings(FFX_CACAO_VkContext* context, const FFX_CACAO_Settings* settings) +{ + if (context == NULL || settings == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + + memcpy(&context->settings, settings, sizeof(*settings)); + + return FFX_CACAO_STATUS_OK; +} + +static inline void computeDispatch(FFX_CACAO_VkContext* context, VkCommandBuffer cb, DescriptorSetID ds, ComputeShaderID cs, uint32_t width, uint32_t height, uint32_t depth) +{ + DescriptorSetLayoutID dsl = DESCRIPTOR_SET_META_DATA[ds].descriptorSetLayoutID; + vkCmdBindDescriptorSets(cb, VK_PIPELINE_BIND_POINT_COMPUTE, context->pipelineLayouts[dsl], 0, 1, &context->descriptorSets[context->currentConstantBuffer][ds], 0, NULL); + vkCmdBindPipeline(cb, VK_PIPELINE_BIND_POINT_COMPUTE, context->computePipelines[cs]); + vkCmdDispatch(cb, width, height, depth); +} + +typedef struct BarrierList +{ + uint32_t len; + VkImageMemoryBarrier barriers[32]; +} BarrierList; + +static inline void pushBarrier(BarrierList* barrierList, VkImage image, VkImageLayout oldLayout, VkImageLayout newLayout, VkAccessFlags srcAccessFlags, VkAccessFlags dstAccessFlags) +{ + FFX_CACAO_ASSERT(barrierList->len < FFX_CACAO_ARRAY_SIZE(barrierList->barriers)); + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + barrier.srcAccessMask = srcAccessFlags; + barrier.dstAccessMask = dstAccessFlags; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + barrier.image = image; + barrierList->barriers[barrierList->len++] = barrier; +} + +static inline void beginDebugMarker(FFX_CACAO_VkContext* context, VkCommandBuffer cb, const char* name) +{ + if (context->vkCmdDebugMarkerBegin) + { + VkDebugMarkerMarkerInfoEXT info = {}; + info.sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT; + info.pNext = NULL; + info.pMarkerName = name; + info.color[0] = 1.0f; + info.color[1] = 0.0f; + info.color[2] = 0.0f; + info.color[3] = 1.0f; + + context->vkCmdDebugMarkerBegin(cb, &info); + } +} + +static inline void endDebugMarker(FFX_CACAO_VkContext* context, VkCommandBuffer cb) +{ + if (context->vkCmdDebugMarkerEnd) + { + context->vkCmdDebugMarkerEnd(cb); + } +} + +FFX_CACAO_Status FFX_CACAO_VkDraw(FFX_CACAO_VkContext* context, VkCommandBuffer cb, const FFX_CACAO_Matrix4x4* proj, const FFX_CACAO_Matrix4x4* normalsToView) +{ + if (context == NULL || cb == VK_NULL_HANDLE || proj == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + + FFX_CACAO_Settings *settings = &context->settings; + FFX_CACAO_BufferSizeInfo *bsi = &context->bufferSizeInfo; + VkDevice device = context->device; + VkDescriptorSet *ds = context->descriptorSets[context->currentConstantBuffer]; + VkImage *tex = context->textures; + VkResult result; + BarrierList barrierList; + + uint32_t curBuffer = context->currentConstantBuffer; + curBuffer = (curBuffer + 1) % NUM_BACK_BUFFERS; + context->currentConstantBuffer = curBuffer; +#ifdef FFX_CACAO_ENABLE_PROFILING + { + uint32_t collectBuffer = context->collectBuffer = (curBuffer + 1) % NUM_BACK_BUFFERS; + if (uint32_t numQueries = context->timestampQueries[collectBuffer].numTimestamps) + { + uint32_t offset = collectBuffer * NUM_TIMESTAMPS; + vkGetQueryPoolResults(device, context->timestampQueryPool, offset, numQueries, numQueries * sizeof(uint64_t), context->timestampQueries[collectBuffer].timings, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT); + } + } +#endif + + beginDebugMarker(context, cb, "FidelityFX CACAO"); + + // update constant buffer + + for (uint32_t i = 0; i < 4; ++i) + { + VkDeviceMemory memory = context->constantBufferMemory[curBuffer][i]; + void *data = NULL; + result = vkMapMemory(device, memory, 0, VK_WHOLE_SIZE, 0, &data); + FFX_CACAO_ASSERT(result == VK_SUCCESS); + FFX_CACAO_UpdateConstants((FFX_CACAO_Constants*)data, settings, bsi, proj, normalsToView); + FFX_CACAO_UpdatePerPassConstants((FFX_CACAO_Constants*)data, settings, bsi, i); + vkUnmapMemory(device, memory); + } + +#ifdef FFX_CACAO_ENABLE_PROFILING + uint32_t queryPoolOffset = curBuffer * NUM_TIMESTAMPS; + uint32_t numTimestamps = 0; + vkCmdResetQueryPool(cb, context->timestampQueryPool, queryPoolOffset, NUM_TIMESTAMPS); +#define GET_TIMESTAMP(name) \ + context->timestampQueries[curBuffer].timestamps[numTimestamps] = TIMESTAMP_##name; \ + vkCmdWriteTimestamp(cb, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, context->timestampQueryPool, queryPoolOffset + numTimestamps++); +#else +#define GET_TIMESTAMP(name) +#endif + + GET_TIMESTAMP(BEGIN) + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_DEPTHS], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_NORMALS], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PING], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP_PONG], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, context->loadCounter, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + + // prepare depths, normals and mips + { + beginDebugMarker(context, cb, "Prepare downsampled depths, normals and mips"); + + // clear load counter + computeDispatch(context, cb, DS_CLEAR_LOAD_COUNTER, CS_CLEAR_LOAD_COUNTER, 1, 1, 1); + + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_HALF_WIDTH, bsi->deinterleavedDepthBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_HALF_HEIGHT, bsi->deinterleavedDepthBufferHeight); + ComputeShaderID csPrepareDepthsHalf = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS_HALF : CS_PREPARE_NATIVE_DEPTHS_HALF; + computeDispatch(context, cb, DS_PREPARE_DEPTHS, csPrepareDepthsHalf, dispatchWidth, dispatchHeight, 1); + break; + } + case FFX_CACAO_QUALITY_LOW: { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_WIDTH, bsi->deinterleavedDepthBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_HEIGHT, bsi->deinterleavedDepthBufferHeight); + ComputeShaderID csPrepareDepths = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS : CS_PREPARE_NATIVE_DEPTHS; + computeDispatch(context, cb, DS_PREPARE_DEPTHS, csPrepareDepths, dispatchWidth, dispatchHeight, 1); + break; + } + default: { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_WIDTH, bsi->deinterleavedDepthBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_DEPTHS_AND_MIPS_HEIGHT, bsi->deinterleavedDepthBufferHeight); + ComputeShaderID csPrepareDepthsAndMips = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_DEPTHS_AND_MIPS : CS_PREPARE_NATIVE_DEPTHS_AND_MIPS; + computeDispatch(context, cb, DS_PREPARE_DEPTHS_MIPS, csPrepareDepthsAndMips, dispatchWidth, dispatchHeight, 1); + break; + } + } + + if (context->settings.generateNormals) + { + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_PREPARE_NORMALS_WIDTH, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_PREPARE_NORMALS_HEIGHT, bsi->ssaoBufferHeight); + ComputeShaderID csPrepareNormals = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_NORMALS : CS_PREPARE_NATIVE_NORMALS; + computeDispatch(context, cb, DS_PREPARE_NORMALS, csPrepareNormals, dispatchWidth, dispatchHeight, 1); + } + else + { + uint32_t dispatchWidth = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_WIDTH, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(PREPARE_NORMALS_FROM_INPUT_NORMALS_HEIGHT, bsi->ssaoBufferHeight); + ComputeShaderID csPrepareNormalsFromInputNormals = context->useDownsampledSsao ? CS_PREPARE_DOWNSAMPLED_NORMALS_FROM_INPUT_NORMALS : CS_PREPARE_NATIVE_NORMALS_FROM_INPUT_NORMALS; + computeDispatch(context, cb, DS_PREPARE_NORMALS_FROM_INPUT_NORMALS, csPrepareNormalsFromInputNormals, dispatchWidth, dispatchHeight, 1); + } + + endDebugMarker(context, cb); + GET_TIMESTAMP(PREPARE) + } + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_DEPTHS], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + pushBarrier(&barrierList, tex[TEXTURE_DEINTERLEAVED_NORMALS], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + pushBarrier(&barrierList, context->loadCounter, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + + // base pass for highest quality setting + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST) + { + beginDebugMarker(context, cb, "Generate High Quality Base Pass"); + + // SSAO + { + beginDebugMarker(context, cb, "Base SSAO"); + + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_GENERATE_WIDTH, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_GENERATE_HEIGHT, bsi->ssaoBufferHeight); + + for (int pass = 0; pass < 4; ++pass) + { + computeDispatch(context, cb, (DescriptorSetID)(DS_GENERATE_ADAPTIVE_BASE_0 + pass), CS_GENERATE_Q3_BASE, dispatchWidth, dispatchHeight, 1); + } + + endDebugMarker(context, cb); + } + + GET_TIMESTAMP(BASE_SSAO_PASS) + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + + // generate importance map + { + beginDebugMarker(context, cb, "Importance Map"); + + uint32_t dispatchWidth = dispatchSize(IMPORTANCE_MAP_WIDTH, bsi->importanceMapWidth); + uint32_t dispatchHeight = dispatchSize(IMPORTANCE_MAP_HEIGHT, bsi->importanceMapHeight); + + computeDispatch(context, cb, DS_GENERATE_IMPORTANCE_MAP, CS_GENERATE_IMPORTANCE_MAP, dispatchWidth, dispatchHeight, 1); + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + + computeDispatch(context, cb, DS_POSTPROCESS_IMPORTANCE_MAP_A, CS_POSTPROCESS_IMPORTANCE_MAP_A, dispatchWidth, dispatchHeight, 1); + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_WRITE_BIT); + pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP_PONG], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + + computeDispatch(context, cb, DS_POSTPROCESS_IMPORTANCE_MAP_B, CS_POSTPROCESS_IMPORTANCE_MAP_B, dispatchWidth, dispatchHeight, 1); + + endDebugMarker(context, cb); + } + + endDebugMarker(context, cb); + GET_TIMESTAMP(IMPORTANCE_MAP) + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_IMPORTANCE_MAP], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + pushBarrier(&barrierList, context->loadCounter, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_GENERAL, VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_READ_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + } + + // main ssao generation + { + beginDebugMarker(context, cb, "Generate SSAO"); + + ComputeShaderID generateCS = (ComputeShaderID)(CS_GENERATE_Q0 + FFX_CACAO_MAX(0, context->settings.qualityLevel - 1)); + + uint32_t dispatchWidth, dispatchHeight, dispatchDepth; + + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: + case FFX_CACAO_QUALITY_LOW: + case FFX_CACAO_QUALITY_MEDIUM: + dispatchWidth = dispatchSize(FFX_CACAO_GENERATE_SPARSE_WIDTH, bsi->ssaoBufferWidth); + dispatchWidth = (dispatchWidth + 4) / 5; + dispatchHeight = dispatchSize(FFX_CACAO_GENERATE_SPARSE_HEIGHT, bsi->ssaoBufferHeight); + dispatchDepth = 5; + break; + case FFX_CACAO_QUALITY_HIGH: + case FFX_CACAO_QUALITY_HIGHEST: + dispatchWidth = dispatchSize(FFX_CACAO_GENERATE_WIDTH, bsi->ssaoBufferWidth); + dispatchHeight = dispatchSize(FFX_CACAO_GENERATE_HEIGHT, bsi->ssaoBufferHeight); + dispatchDepth = 1; + break; + } + + for (int pass = 0; pass < 4; ++pass) + { + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) + { + continue; + } + + DescriptorSetID descriptorSetID = context->settings.qualityLevel == FFX_CACAO_QUALITY_HIGHEST ? DS_GENERATE_ADAPTIVE_0 : DS_GENERATE_0; + descriptorSetID = (DescriptorSetID)(descriptorSetID + pass); + + computeDispatch(context, cb, descriptorSetID, generateCS, dispatchWidth, dispatchHeight, dispatchDepth); + } + + endDebugMarker(context, cb); + GET_TIMESTAMP(GENERATE_SSAO) + } + + uint32_t blurPassCount = context->settings.blurPassCount; + blurPassCount = FFX_CACAO_CLAMP(blurPassCount, 0, MAX_BLUR_PASSES); + + // de-interleaved blur + if (blurPassCount) + { + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PING], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + + beginDebugMarker(context, cb, "Deinterleaved Blur"); + + uint32_t w = 4 * FFX_CACAO_BLUR_WIDTH - 2 * blurPassCount; + uint32_t h = 3 * FFX_CACAO_BLUR_HEIGHT - 2 * blurPassCount; + uint32_t dispatchWidth = dispatchSize(w, bsi->ssaoBufferWidth); + uint32_t dispatchHeight = dispatchSize(h, bsi->ssaoBufferHeight); + + for (int pass = 0; pass < 4; ++pass) + { + if (context->settings.qualityLevel == FFX_CACAO_QUALITY_LOWEST && (pass == 1 || pass == 2)) + { + continue; + } + + ComputeShaderID blurShaderID = (ComputeShaderID)(CS_EDGE_SENSITIVE_BLUR_1 + blurPassCount - 1); + DescriptorSetID descriptorSetID = (DescriptorSetID)(DS_EDGE_SENSITIVE_BLUR_0 + pass); + computeDispatch(context, cb, descriptorSetID, blurShaderID, dispatchWidth, dispatchHeight, 1); + } + + endDebugMarker(context, cb); + GET_TIMESTAMP(EDGE_SENSITIVE_BLUR) + + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PONG], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + pushBarrier(&barrierList, context->output, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + } + else + { + barrierList.len = 0; + pushBarrier(&barrierList, tex[TEXTURE_SSAO_BUFFER_PING], VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + pushBarrier(&barrierList, context->output, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_GENERAL, 0, VK_ACCESS_SHADER_WRITE_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + } + + + if (context->useDownsampledSsao) + { + beginDebugMarker(context, cb, "Bilateral Upsample"); + + uint32_t dispatchWidth = dispatchSize(2 * FFX_CACAO_BILATERAL_UPSCALE_WIDTH, bsi->inputOutputBufferWidth); + uint32_t dispatchHeight = dispatchSize(2 * FFX_CACAO_BILATERAL_UPSCALE_HEIGHT, bsi->inputOutputBufferHeight); + + DescriptorSetID descriptorSetID = blurPassCount ? DS_BILATERAL_UPSAMPLE_PONG : DS_BILATERAL_UPSAMPLE_PING; + ComputeShaderID upscaler; + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: + upscaler = CS_UPSCALE_BILATERAL_5X5_HALF; + break; + case FFX_CACAO_QUALITY_LOW: + case FFX_CACAO_QUALITY_MEDIUM: + upscaler = CS_UPSCALE_BILATERAL_5X5_NON_SMART; + break; + case FFX_CACAO_QUALITY_HIGH: + case FFX_CACAO_QUALITY_HIGHEST: + upscaler = CS_UPSCALE_BILATERAL_5X5_SMART; + break; + } + + computeDispatch(context, cb, descriptorSetID, upscaler, dispatchWidth, dispatchHeight, 1); + + endDebugMarker(context, cb); + GET_TIMESTAMP(BILATERAL_UPSAMPLE) + } + else + { + beginDebugMarker(context, cb, "Reinterleave"); + + uint32_t dispatchWidth = dispatchSize(FFX_CACAO_APPLY_WIDTH, bsi->inputOutputBufferWidth); + uint32_t dispatchHeight = dispatchSize(FFX_CACAO_APPLY_HEIGHT, bsi->inputOutputBufferHeight); + + DescriptorSetID descriptorSetID = blurPassCount ? DS_APPLY_PONG : DS_APPLY_PING; + + switch (context->settings.qualityLevel) + { + case FFX_CACAO_QUALITY_LOWEST: + computeDispatch(context, cb, descriptorSetID, CS_NON_SMART_HALF_APPLY, dispatchWidth, dispatchHeight, 1); + break; + case FFX_CACAO_QUALITY_LOW: + computeDispatch(context, cb, descriptorSetID, CS_NON_SMART_APPLY, dispatchWidth, dispatchHeight, 1); + break; + default: + computeDispatch(context, cb, descriptorSetID, CS_APPLY, dispatchWidth, dispatchHeight, 1); + break; + } + + endDebugMarker(context, cb); + GET_TIMESTAMP(APPLY) + } + + endDebugMarker(context, cb); + + barrierList.len = 0; + pushBarrier(&barrierList, context->output, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); + vkCmdPipelineBarrier(cb, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, barrierList.len, barrierList.barriers); + +#ifdef FFX_CACAO_ENABLE_PROFILING + context->timestampQueries[curBuffer].numTimestamps = numTimestamps; +#endif + + return FFX_CACAO_STATUS_OK; +} + +#ifdef FFX_CACAO_ENABLE_PROFILING +FFX_CACAO_Status FFX_CACAO_VkGetDetailedTimings(FFX_CACAO_VkContext* context, FFX_CACAO_DetailedTiming* timings) +{ + if (context == NULL || timings == NULL) + { + return FFX_CACAO_STATUS_INVALID_POINTER; + } + context = getAlignedVkContextPointer(context); + + uint32_t bufferIndex = context->collectBuffer; + uint32_t numTimestamps = context->timestampQueries[bufferIndex].numTimestamps; + uint64_t prevTime = context->timestampQueries[bufferIndex].timings[0]; + for (uint32_t i = 1; i < numTimestamps; ++i) + { + TimestampID timestampID = context->timestampQueries[bufferIndex].timestamps[i]; + timings->timestamps[i].label = TIMESTAMP_NAMES[timestampID]; + uint64_t time = context->timestampQueries[bufferIndex].timings[i]; + timings->timestamps[i].ticks = time - prevTime; + prevTime = time; + } + timings->timestamps[0].label = "FFX_CACAO_TOTAL"; + timings->timestamps[0].ticks = prevTime - context->timestampQueries[bufferIndex].timings[0]; + timings->numTimestamps = numTimestamps; + + return FFX_CACAO_STATUS_OK; +} +#endif +#endif + +#ifdef __cplusplus +} +#endif diff --git a/license.txt b/license.txt index 6adade0..cc2870e 100644 --- a/license.txt +++ b/license.txt @@ -1,4 +1,4 @@ -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/readme.md b/readme.md index dbbcd64..f59a2bd 100644 --- a/readme.md +++ b/readme.md @@ -1,14 +1,17 @@ # FidelityFX CACAO -Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +Copyright (c) 2020-2021 Advanced Micro Devices, Inc. All rights reserved. + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE @@ -28,12 +31,8 @@ You can find the binaries for FidelityFX CACAO in the release section on GitHub. # Sponza Model Issue -In the provided Sponza model for the FFX CACAO sample, at the bottom of some curtains, -there is an issue with incorrect normals -in the mesh causing ambient occlusion to be incorrectly calculated as light in places where occlusion -should be dark. This is a known issue with the mesh, and not an issue with FFX CACAO itself. +In the provided Sponza model for the FidelityFX CACAO sample, at the bottom of some curtains, there is an issue with incorrect normals in the mesh causing ambient occlusion to be incorrectly calculated as light in places where occlusion should be dark. This is a known issue with the mesh, and not an issue with FidelityFX CACAO itself. # Notices -CACAO is a modification of the Adaptive Screen Space Ambient Occlusion (ASSAO) algorithm that was developed by Intel. -The original implementation can be found [here](https://github.com/GameTechDev/ASSAO). +FidelityFX CACAO is a modification of the Adaptive Screen Space Ambient Occlusion (ASSAO) algorithm that was developed by Intel. The original implementation can be found [here](https://github.com/GameTechDev/ASSAO). diff --git a/sample/libs/cauldron b/sample/libs/cauldron index e850540..5a82a0c 160000 --- a/sample/libs/cauldron +++ b/sample/libs/cauldron @@ -1 +1 @@ -Subproject commit e85054054ae65f92c52f8cdde988d1b448f8dbf2 +Subproject commit 5a82a0ce20ea74e27e5e8818b78f0317d2e36157 diff --git a/sample/src/Common/FFX_CACAO_Common.h b/sample/src/Common/Common.h similarity index 83% rename from sample/src/Common/FFX_CACAO_Common.h rename to sample/src/Common/Common.h index b5a53af..62a0c49 100644 --- a/sample/src/Common/FFX_CACAO_Common.h +++ b/sample/src/Common/Common.h @@ -1,6 +1,6 @@ // AMD Sample sample code // -// Copyright(c) 2020 Advanced Micro Devices, Inc.All rights reserved. +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -20,20 +20,29 @@ #include "ffx_cacao.h" -struct FfxCacaoPreset +struct Preset { -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION bool useDownsampledSsao; -#endif - FfxCacaoSettings settings; + FFX_CACAO_Settings settings; }; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION -static const char *FFX_CACAO_PRESET_NAMES[] = { "Native - High Quality", "Native - Medium Quality", "Native - Low Quality", "Downsampled - High Quality", "Downsampled - Medium Quality", "Downsampled - Low Quality", "Custom" }; +static const char *FFX_CACAO_PRESET_NAMES[] = { + "Native - Adaptive Quality", + "Native - High Quality", + "Native - Medium Quality", + "Native - Low Quality", + "Native - Lowest Quality", + "Downsampled - Adaptive Quality", + "Downsampled - High Quality", + "Downsampled - Medium Quality", + "Downsampled - Low Quality", + "Downsampled - Lowest Quality", + "Custom" +}; -static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { - // Native - High Quality +static const Preset FFX_CACAO_PRESETS[] = { + // Native - Adaptive Quality { /* useDownsampledSsao */ false, { @@ -56,7 +65,7 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* bilateralSimilarityDistanceSigma */ 0.1f, } }, - // Native - Medium Quality + // Native - High Quality { /* useDownsampledSsao */ false, { @@ -67,7 +76,7 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* horizonAngleThreshold */ 0.06f, /* fadeOutFrom */ 20.0f, /* fadeOutTo */ 40.0f, - /* qualityLevel */ FFX_CACAO_QUALITY_MEDIUM, + /* qualityLevel */ FFX_CACAO_QUALITY_HIGH, /* adaptiveQualityLimit */ 0.75f, /* blurPassCount */ 2, /* sharpness */ 0.98f, @@ -79,7 +88,7 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* bilateralSimilarityDistanceSigma */ 0.1f, } }, - // Native - Low Quality + // Native - Medium Quality { /* useDownsampledSsao */ false, { @@ -90,9 +99,9 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* horizonAngleThreshold */ 0.06f, /* fadeOutFrom */ 20.0f, /* fadeOutTo */ 40.0f, - /* qualityLevel */ FFX_CACAO_QUALITY_LOWEST, + /* qualityLevel */ FFX_CACAO_QUALITY_MEDIUM, /* adaptiveQualityLimit */ 0.75f, - /* blurPassCount */ 4, + /* blurPassCount */ 2, /* sharpness */ 0.98f, /* temporalSupersamplingAngleOffset */ 0.0f, /* temporalSupersamplingRadiusOffset */ 0.0f, @@ -102,9 +111,9 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* bilateralSimilarityDistanceSigma */ 0.1f, } }, - // Downsampled - High Quality + // Native - Low Quality { - /* useDownsampledSsao */ true, + /* useDownsampledSsao */ false, { /* radius */ 1.2f, /* shadowMultiplier */ 1.0f, @@ -113,9 +122,9 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* horizonAngleThreshold */ 0.06f, /* fadeOutFrom */ 20.0f, /* fadeOutTo */ 40.0f, - /* qualityLevel */ FFX_CACAO_QUALITY_HIGHEST, + /* qualityLevel */ FFX_CACAO_QUALITY_LOW, /* adaptiveQualityLimit */ 0.75f, - /* blurPassCount */ 2, + /* blurPassCount */ 6, /* sharpness */ 0.98f, /* temporalSupersamplingAngleOffset */ 0.0f, /* temporalSupersamplingRadiusOffset */ 0.0f, @@ -125,9 +134,9 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* bilateralSimilarityDistanceSigma */ 0.1f, } }, - // Downsampled - Medium Quality + // Native - Lowest Quality { - /* useDownsampledSsao */ true, + /* useDownsampledSsao */ false, { /* radius */ 1.2f, /* shadowMultiplier */ 1.0f, @@ -136,19 +145,19 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* horizonAngleThreshold */ 0.06f, /* fadeOutFrom */ 20.0f, /* fadeOutTo */ 40.0f, - /* qualityLevel */ FFX_CACAO_QUALITY_MEDIUM, + /* qualityLevel */ FFX_CACAO_QUALITY_LOWEST, /* adaptiveQualityLimit */ 0.75f, - /* blurPassCount */ 3, + /* blurPassCount */ 6, /* sharpness */ 0.98f, /* temporalSupersamplingAngleOffset */ 0.0f, /* temporalSupersamplingRadiusOffset */ 0.0f, /* detailShadowStrength */ 0.5f, /* generateNormals */ FFX_CACAO_FALSE, /* bilateralSigmaSquared */ 5.0f, - /* bilateralSimilarityDistanceSigma */ 0.2f, + /* bilateralSimilarityDistanceSigma */ 0.1f, } }, - // Downsampled - Low Quality + // Downsampled - Highest Quality { /* useDownsampledSsao */ true, { @@ -159,25 +168,21 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* horizonAngleThreshold */ 0.06f, /* fadeOutFrom */ 20.0f, /* fadeOutTo */ 40.0f, - /* qualityLevel */ FFX_CACAO_QUALITY_LOWEST, + /* qualityLevel */ FFX_CACAO_QUALITY_HIGHEST, /* adaptiveQualityLimit */ 0.75f, - /* blurPassCount */ 6, + /* blurPassCount */ 2, /* sharpness */ 0.98f, /* temporalSupersamplingAngleOffset */ 0.0f, /* temporalSupersamplingRadiusOffset */ 0.0f, /* detailShadowStrength */ 0.5f, /* generateNormals */ FFX_CACAO_FALSE, - /* bilateralSigmaSquared */ 8.0f, - /* bilateralSimilarityDistanceSigma */ 0.8f, + /* bilateralSigmaSquared */ 5.0f, + /* bilateralSimilarityDistanceSigma */ 0.1f, } - } -}; -#else -static const char *FFX_CACAO_PRESET_NAMES[] = { "High Quality", "Medium Quality", "Low Quality", "Custom" }; - -static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { - // High Quality + }, + // Downsampled - High Quality { + /* useDownsampledSsao */ true, { /* radius */ 1.2f, /* shadowMultiplier */ 1.0f, @@ -186,7 +191,7 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* horizonAngleThreshold */ 0.06f, /* fadeOutFrom */ 20.0f, /* fadeOutTo */ 40.0f, - /* qualityLevel */ FFX_CACAO_QUALITY_HIGHEST, + /* qualityLevel */ FFX_CACAO_QUALITY_HIGH, /* adaptiveQualityLimit */ 0.75f, /* blurPassCount */ 2, /* sharpness */ 0.98f, @@ -198,8 +203,9 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* bilateralSimilarityDistanceSigma */ 0.1f, } }, - // Medium Quality + // Downsampled - Medium Quality { + /* useDownsampledSsao */ true, { /* radius */ 1.2f, /* shadowMultiplier */ 1.0f, @@ -220,8 +226,32 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { /* bilateralSimilarityDistanceSigma */ 0.2f, } }, - // Low Quality + // Downsampled - Low Quality { + /* useDownsampledSsao */ true, + { + /* radius */ 1.2f, + /* shadowMultiplier */ 1.0f, + /* shadowPower */ 1.50f, + /* shadowClamp */ 0.98f, + /* horizonAngleThreshold */ 0.06f, + /* fadeOutFrom */ 20.0f, + /* fadeOutTo */ 40.0f, + /* qualityLevel */ FFX_CACAO_QUALITY_LOW, + /* adaptiveQualityLimit */ 0.75f, + /* blurPassCount */ 6, + /* sharpness */ 0.98f, + /* temporalSupersamplingAngleOffset */ 0.0f, + /* temporalSupersamplingRadiusOffset */ 0.0f, + /* detailShadowStrength */ 0.5f, + /* generateNormals */ FFX_CACAO_FALSE, + /* bilateralSigmaSquared */ 8.0f, + /* bilateralSimilarityDistanceSigma */ 0.8f, + } + }, + // Downsampled - Lowest Quality + { + /* useDownsampledSsao */ true, { /* radius */ 1.2f, /* shadowMultiplier */ 1.0f, @@ -243,4 +273,3 @@ static const FfxCacaoPreset FFX_CACAO_PRESETS[] = { } } }; -#endif \ No newline at end of file diff --git a/sample/src/Common/FFX_CACAO_Sample.json b/sample/src/Common/SampleSettings.json similarity index 100% rename from sample/src/Common/FFX_CACAO_Sample.json rename to sample/src/Common/SampleSettings.json diff --git a/sample/src/DX12/CMakeLists.txt b/sample/src/DX12/CMakeLists.txt index 34a3656..fd9e8f6 100644 --- a/sample/src/DX12/CMakeLists.txt +++ b/sample/src/DX12/CMakeLists.txt @@ -5,14 +5,16 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../../common.cmake) add_compile_options(/MP) set(sources - FFX_CACAO_Sample.cpp - FFX_CACAO_Sample.h + Sample.cpp + Sample.h SampleRenderer.cpp SampleRenderer.h ../../../ffx-cacao/src/ffx_cacao_defines.h ../../../ffx-cacao/src/ffx_cacao.cpp ../../../ffx-cacao/inc/ffx_cacao.h - ../Common/FFX_CACAO_Common.h + ../../../ffx-cacao/src/ffx_cacao_impl.cpp + ../../../ffx-cacao/inc/ffx_cacao_impl.h + ../Common/Common.h stdafx.cpp stdafx.h) @@ -22,7 +24,7 @@ set(shaders ${CMAKE_CURRENT_SOURCE_DIR}/Apply_CACAO.hlsl) set(config - ${CMAKE_CURRENT_SOURCE_DIR}/../Common/FFX_CACAO_Sample.json + ${CMAKE_CURRENT_SOURCE_DIR}/../Common/SampleSettings.json ) source_group("Sources" FILES ${sources}) diff --git a/sample/src/DX12/FFX_CACAO_Sample.cpp b/sample/src/DX12/Sample.cpp similarity index 68% rename from sample/src/DX12/FFX_CACAO_Sample.cpp rename to sample/src/DX12/Sample.cpp index 7940cd9..72d284b 100644 --- a/sample/src/DX12/FFX_CACAO_Sample.cpp +++ b/sample/src/DX12/Sample.cpp @@ -1,6 +1,6 @@ // AMD SampleDX12 sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. +// +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -19,8 +19,10 @@ #include "stdafx.h" -#include "FFX_CACAO_Sample.h" -#include "FFX_CACAO_Common.h" +#include "Sample.h" +#include "Common.h" + +#include "ffx_cacao_impl.h" static inline void SetWindowClientSize(HWND hWnd, LONG width, LONG height) { @@ -39,13 +41,13 @@ static inline void SetWindowClientSize(HWND hWnd, LONG width, LONG height) const bool VALIDATION_ENABLED = false; -FfxCacaoSample::FfxCacaoSample(LPCSTR name) : FrameworkWindows(name) +Sample::Sample(LPCSTR name) : FrameworkWindows(name) { - m_lastFrameTime = MillisecondsNow(); - m_time = 0; - m_bPlay = true; + m_lastFrameTime = MillisecondsNow(); + m_time = 0; + m_bPlay = true; - m_pGltfLoader = NULL; + m_pGltfLoader = NULL; } //-------------------------------------------------------------------------------------- @@ -53,7 +55,7 @@ FfxCacaoSample::FfxCacaoSample(LPCSTR name) : FrameworkWindows(name) // OnParseCommandLine // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen) +void Sample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen) { // set some default values *pWidth = 1920; @@ -85,7 +87,7 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 // read config file (and override values from commandline if so) // { - std::ifstream f("FFX_CACAO_Sample.json"); + std::ifstream f("SampleSettings.json"); if (!f) { MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); @@ -130,18 +132,12 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 #ifdef FFX_CACAO_ENABLE_PROFILING if (m_isBenchmarking) { -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION bool downsampled = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; -#endif uint32_t quality = FFX_CACAO_PRESETS[m_presetIndex].settings.qualityLevel; m_benchmarkScreenWidth = *pWidth; m_benchmarkScreenHeight = *pHeight; m_benchmarkWarmUpFramesToRun = 100; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION snprintf(m_benchmarkFilename, _countof(m_benchmarkFilename), "FFX_CACAO_DX12_Benchmark_%s_%ux%u_Q%u.csv", downsampled ? "downsampled" : "native", *pWidth, *pHeight, quality); -#else - snprintf(m_benchmarkFilename, _countof(m_benchmarkFilename), "FFX_CACAO_DX12_Benchmark_downsampled_%ux%u_Q%u.csv", *pWidth, *pHeight, quality); -#endif m_vsyncEnabled = false; m_isGpuValidationLayerEnabled = false; m_isCpuValidationLayerEnabled = false; @@ -154,7 +150,7 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 // OnCreate // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnCreate(HWND hWnd) +void Sample::OnCreate(HWND hWnd) { m_hWnd = hWnd; @@ -162,68 +158,69 @@ void FfxCacaoSample::OnCreate(HWND hWnd) m_cameraControlSelected = 1; m_state.cacaoSettings = FFX_CACAO_PRESETS[m_presetIndex].settings; + m_state.useDownsampledSSAO = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; DWORD dwAttrib = GetFileAttributes("..\\media\\"); - if ((dwAttrib == INVALID_FILE_ATTRIBUTES) || ((dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) == 0) - { - MessageBox(NULL, "Media files not found!\n\nPlease check the readme on how to get the media files.", "Cauldron Panic!", MB_ICONERROR); - exit(0); - } + if ((dwAttrib == INVALID_FILE_ATTRIBUTES) || ((dwAttrib & FILE_ATTRIBUTE_DIRECTORY)) == 0) + { + MessageBox(NULL, "Media files not found!\n\nPlease check the readme on how to get the media files.", "Cauldron Panic!", MB_ICONERROR); + exit(0); + } m_fullscreen = false; - // Create Device - // - m_device.OnCreate("myapp", "myEngine", m_isCpuValidationLayerEnabled, m_isGpuValidationLayerEnabled, hWnd); - m_device.CreatePipelineCache(); + // Create Device + // + m_device.OnCreate("FfxCacaoSample", "Cauldron", m_isCpuValidationLayerEnabled, m_isGpuValidationLayerEnabled, hWnd); + m_device.CreatePipelineCache(); - //init the shader compiler + //init the shader compiler InitDirectXCompiler(); - CreateShaderCache(); - - // Create Swapchain - // - - // Init FS2 and choose format - fsHdrInit(m_device.GetAGSContext(), m_device.GetAGSGPUInfo(), hWnd); - - uint32_t dwNumberOfBackBuffers = 2; - m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); - - // Create a instance of the renderer and initialize it, we need to do that for each GPU - // - m_Node = new SampleRenderer(); - m_Node->OnCreate(&m_device, &m_swapChain); - - // init GUI (non gfx stuff) - // - ImGUI_Init((void *)hWnd); - - // Init Camera, looking at the origin - // - m_roll = 0.0f; - m_pitch = 0.0f; - m_distance = 3.5f; - - // init GUI state - m_state.toneMapper = 0; - m_state.skyDomeType = 0; - m_state.exposure = 1.0f; - m_state.iblFactor = 10.0f; - m_state.emmisiveFactor = 1.0f; - m_state.bDrawLightFrustum = false; - m_state.bDrawBoundingBoxes = false; - m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); - - m_state.spotlightCount = 1; - - m_state.spotlight[0].intensity = 5.0f; - m_state.spotlight[0].color = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); - m_state.spotlight[0].light.SetFov(XM_PI / 2.0f, 1024, 1024, 0.1f, 100.0f); - m_state.spotlight[0].light.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); - - m_state.bUseCACAO = true; - m_state.bDisplayCacaoDirectly = true; + CreateShaderCache(); + + // Create Swapchain + // + + // Init FS2 and choose format + fsHdrInit(m_device.GetAGSContext(), m_device.GetAGSGPUInfo(), hWnd); + + uint32_t dwNumberOfBackBuffers = 2; + m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); + + // Create a instance of the renderer and initialize it, we need to do that for each GPU + // + m_node = new SampleRenderer(); + m_node->OnCreate(&m_device, &m_swapChain); + + // init GUI (non gfx stuff) + // + ImGUI_Init((void *)hWnd); + + // Init Camera, looking at the origin + // + m_roll = 0.0f; + m_pitch = 0.0f; + m_distance = 3.5f; + + // init GUI state + m_state.toneMapper = 0; + m_state.skyDomeType = 0; + m_state.exposure = 1.0f; + m_state.iblFactor = 10.0f; + m_state.emmisiveFactor = 1.0f; + m_state.drawLightFrustum = false; + m_state.drawBoundingBoxes = false; + m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); + + m_state.spotlightCount = 1; + + m_state.spotlight[0].intensity = 5.0f; + m_state.spotlight[0].color = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); + m_state.spotlight[0].light.SetFov(XM_PI / 2.0f, 1024, 1024, 0.1f, 100.0f); + m_state.spotlight[0].light.LookAt(XM_PI / 2.0f, 0.58f, 3.5f, XMVectorSet(0, 0, 0, 0)); + + m_state.useCACAO = true; + m_state.displayCacaoDirectly = true; } //-------------------------------------------------------------------------------------- @@ -231,38 +228,38 @@ void FfxCacaoSample::OnCreate(HWND hWnd) // OnDestroy // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnDestroy() +void Sample::OnDestroy() { #ifdef FFX_CACAO_ENABLE_PROFILING m_isBenchmarking = false; #endif - ImGUI_Shutdown(); + ImGUI_Shutdown(); - m_device.GPUFlush(); + m_device.GPUFlush(); - // Fullscreen state should always be false before exiting the app. - m_swapChain.SetFullScreen(false); + // Fullscreen state should always be false before exiting the app. + m_swapChain.SetFullScreen(false); - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); + m_node->UnloadScene(); + m_node->OnDestroyWindowSizeDependentResources(); + m_node->OnDestroy(); - delete m_Node; + delete m_node; - m_swapChain.OnDestroyWindowSizeDependentResources(); - m_swapChain.OnDestroy(); + m_swapChain.OnDestroyWindowSizeDependentResources(); + m_swapChain.OnDestroy(); - //shut down the shader compiler - DestroyShaderCache(&m_device); + //shut down the shader compiler + DestroyShaderCache(&m_device); - if (m_pGltfLoader) - { - delete m_pGltfLoader; - m_pGltfLoader = NULL; - } + if (m_pGltfLoader) + { + delete m_pGltfLoader; + m_pGltfLoader = NULL; + } - m_device.OnDestroy(); + m_device.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -270,10 +267,10 @@ void FfxCacaoSample::OnDestroy() // OnEvent // //-------------------------------------------------------------------------------------- -bool FfxCacaoSample::OnEvent(MSG msg) +bool Sample::OnEvent(MSG msg) { - if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) - return true; + if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) + return true; return true; } @@ -283,11 +280,11 @@ bool FfxCacaoSample::OnEvent(MSG msg) // SetFullScreen // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::SetFullScreen(bool fullscreen) +void Sample::SetFullScreen(bool fullscreen) { - m_device.GPUFlush(); + m_device.GPUFlush(); - m_swapChain.SetFullScreen(fullscreen); + m_swapChain.SetFullScreen(fullscreen); } //-------------------------------------------------------------------------------------- @@ -295,7 +292,7 @@ void FfxCacaoSample::SetFullScreen(bool fullscreen) // OnResize // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnResize(uint32_t width, uint32_t height, bool force) +void Sample::OnResize(uint32_t width, uint32_t height, bool force) { #ifdef FFX_CACAO_ENABLE_PROFILING if (m_isBenchmarking && !m_benchmarkWarmUpFramesToRun) @@ -308,38 +305,38 @@ void FfxCacaoSample::OnResize(uint32_t width, uint32_t height, bool force) } #endif - if (m_Width != width || m_Height != height || force) - { - // Flush GPU - // - m_device.GPUFlush(); - - // If resizing but no minimizing - // - if (m_Width > 0 && m_Height > 0) - { - if (m_Node!=NULL) - { - m_Node->OnDestroyWindowSizeDependentResources(); - } - m_swapChain.OnDestroyWindowSizeDependentResources(); - } - - m_Width = width; - m_Height = height; - - // if resizing but not minimizing the recreate it with the new size - // - if (m_Width > 0 && m_Height > 0) - { - m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, m_vsyncEnabled, DISPLAYMODE_SDR); - if (m_Node != NULL) - { - m_Node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); - } - } - } - m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); + if (m_Width != width || m_Height != height || force) + { + // Flush GPU + // + m_device.GPUFlush(); + + // If resizing but no minimizing + // + if (m_Width > 0 && m_Height > 0) + { + if (m_node!=NULL) + { + m_node->OnDestroyWindowSizeDependentResources(); + } + m_swapChain.OnDestroyWindowSizeDependentResources(); + } + + m_Width = width; + m_Height = height; + + // if resizing but not minimizing the recreate it with the new size + // + if (m_Width > 0 && m_Height > 0) + { + m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, m_vsyncEnabled, DISPLAYMODE_SDR); + if (m_node != NULL) + { + m_node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); + } + } + } + m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); } //-------------------------------------------------------------------------------------- @@ -347,7 +344,7 @@ void FfxCacaoSample::OnResize(uint32_t width, uint32_t height, bool force) // BuildUI, also loads the scene! // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::BuildUI() +void Sample::BuildUI() { if (m_requiresLoad) { @@ -357,12 +354,12 @@ void FfxCacaoSample::BuildUI() // release everything and load the GLTF, just the light json data, the rest (textures and geometry) will be done in the main loop if (m_pGltfLoader != NULL) { - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); + m_node->UnloadScene(); + m_node->OnDestroyWindowSizeDependentResources(); + m_node->OnDestroy(); m_pGltfLoader->Unload(); - m_Node->OnCreate(&m_device, &m_swapChain); - m_Node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); + m_node->OnCreate(&m_device, &m_swapChain); + m_node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); } delete(m_pGltfLoader); @@ -458,12 +455,10 @@ void FfxCacaoSample::BuildUI() if (ImGui::Combo("Preset", &m_presetIndex, FFX_CACAO_PRESET_NAMES, _countof(FFX_CACAO_PRESET_NAMES)) && m_presetIndex < _countof(FFX_CACAO_PRESETS)) { m_state.cacaoSettings = FFX_CACAO_PRESETS[m_presetIndex].settings; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - m_state.bUseDownsampledSSAO = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; -#endif + m_state.useDownsampledSSAO = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; } - FfxCacaoSettings *settings = &m_state.cacaoSettings; + FFX_CACAO_Settings *settings = &m_state.cacaoSettings; ImGui::SliderFloat("Radius", &settings->radius, 0.0f, 10.0f); ImGui::SliderFloat("Shadow Multiplier", &settings->shadowMultiplier, 0.0f, 5.0f); ImGui::SliderFloat("Shadow Power", &settings->shadowPower, 0.5f, 5.0f); @@ -474,7 +469,7 @@ void FfxCacaoSample::BuildUI() const char *qualityLevels[] = { "Lowest", "Low", "Medium", "High", "Highest" }; int idx = (int)settings->qualityLevel; ImGui::Combo("Quality Level", &idx, qualityLevels, _countof(qualityLevels)); - settings->qualityLevel = (FfxCacaoQuality)idx; + settings->qualityLevel = (FFX_CACAO_Quality)idx; if (settings->qualityLevel == FFX_CACAO_QUALITY_HIGHEST) { ImGui::SliderFloat("Adaptive Quality Limit", &settings->adaptiveQualityLimit, 0.5f, 1.0f); @@ -486,49 +481,43 @@ void FfxCacaoSample::BuildUI() bool generateNormals = settings->generateNormals ? true : false; ImGui::Checkbox("Generate Normal Buffer From Depth Buffer", &generateNormals); settings->generateNormals = generateNormals ? FFX_CACAO_TRUE : FFX_CACAO_FALSE; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ImGui::Checkbox("Use Downsampled SSAO", &m_state.bUseDownsampledSSAO); - if (m_state.bUseDownsampledSSAO) -#endif + ImGui::Checkbox("Use Downsampled SSAO", &m_state.useDownsampledSSAO); + if (m_state.useDownsampledSSAO) { ImGui::SliderFloat("Bilateral Sigma Squared", &settings->bilateralSigmaSquared, 0.0f, 10.0f); ImGui::SliderFloat("Bilateral Similarity Distance Sigma", &settings->bilateralSimilarityDistanceSigma, 0.1f, 1.0f); } - ImGui::Checkbox("Display FFX CACAO Output Directly", &m_state.bDisplayCacaoDirectly); - if (!m_state.bDisplayCacaoDirectly) + ImGui::Checkbox("Display FFX CACAO Output Directly", &m_state.displayCacaoDirectly); + if (!m_state.displayCacaoDirectly) { - ImGui::Checkbox("Use FFX CACAO", &m_state.bUseCACAO); + ImGui::Checkbox("Use FFX CACAO", &m_state.useCACAO); } - m_state.bUseCACAO |= m_state.bDisplayCacaoDirectly; + m_state.useCACAO |= m_state.displayCacaoDirectly; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - if (m_presetIndex < _countof(FFX_CACAO_PRESETS) && (memcmp(&m_state.cacaoSettings, &FFX_CACAO_PRESETS[m_presetIndex].settings, sizeof(m_state.cacaoSettings)) || m_state.bUseDownsampledSSAO != FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao)) -#else - if (m_presetIndex < _countof(FFX_CACAO_PRESETS) && memcmp(&m_state.cacaoSettings, &FFX_CACAO_PRESETS[m_presetIndex].settings, sizeof(m_state.cacaoSettings))) -#endif + if (m_presetIndex < _countof(FFX_CACAO_PRESETS) && (memcmp(&m_state.cacaoSettings, &FFX_CACAO_PRESETS[m_presetIndex].settings, sizeof(m_state.cacaoSettings)) || m_state.useDownsampledSSAO != FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao)) { m_presetIndex = _countof(FFX_CACAO_PRESETS); } } #ifdef FFX_CACAO_ENABLE_PROFILING - if (m_vsyncEnabled || !m_state.bUseCACAO) + if (m_vsyncEnabled || !m_state.useCACAO || m_isCpuValidationLayerEnabled || m_isGpuValidationLayerEnabled) { - ImGui::CollapsingHeader("Profiler Disabled (enable CACAO and turn off vsync)"); + ImGui::CollapsingHeader("Profiler Disabled (enable CACAO and turn off vsync and validation)"); } else { bool displayProfiling = ImGui::CollapsingHeader("Profiler", ImGuiTreeNodeFlags_DefaultOpen); - FfxCacaoDetailedTiming timings; + FFX_CACAO_DetailedTiming timings; uint64_t gpuTicksPerMicrosecond; - m_Node->GetCacaoTimings(&m_state, &timings, &gpuTicksPerMicrosecond); + m_node->GetCacaoTimings(&m_state, &timings, &gpuTicksPerMicrosecond); gpuTicksPerMicrosecond /= 1000000; for (uint32_t i = 0; i < timings.numTimestamps; ++i) { - FfxCacaoTimestamp *t = &timings.timestamps[i]; + FFX_CACAO_Timestamp *t = &timings.timestamps[i]; if (displayProfiling) { ImGui::Text("%-32s: %7.1f us", t->label, ((double)t->ticks) / ((double)gpuTicksPerMicrosecond)); @@ -573,7 +562,7 @@ void FfxCacaoSample::BuildUI() else if (m_cameraControlSelected > 1) { // Use a camera from the GLTF - // + // m_pGltfLoader->GetCamera(m_cameraControlSelected - 2, &m_state.camera); m_roll = m_state.camera.GetYaw(); m_pitch = m_state.camera.GetPitch(); @@ -586,7 +575,7 @@ void FfxCacaoSample::BuildUI() // OnRender, updates the state from the UI, animates, transforms and renders the scene // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnRender() +void Sample::OnRender() { // Get timings // @@ -608,14 +597,14 @@ void FfxCacaoSample::OnRender() if (m_loadingStage) { - // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns 0 - // This is done so we can display a progress bar when the scene is loading - m_loadingStage = m_Node->LoadScene(m_pGltfLoader, m_loadingStage); + // LoadScene needs to be called a number of times, the scene is not fully loaded until it returns 0 + // This is done so we can display a progress bar when the scene is loading + m_loadingStage = m_node->LoadScene(m_pGltfLoader, m_loadingStage); if (m_loadingStage == 0) { m_time = 0.0f; } - } + } #if FFX_CACAO_ENABLE_PROFILING else if (m_pGltfLoader && m_isBenchmarking) { @@ -634,8 +623,8 @@ void FfxCacaoSample::OnRender() } uint64_t gpuTicksPerSecond; - FfxCacaoDetailedTiming timings = {}; - m_Node->GetCacaoTimings(&m_state, &timings, &gpuTicksPerSecond); + FFX_CACAO_DetailedTiming timings = {}; + m_node->GetCacaoTimings(&m_state, &timings, &gpuTicksPerSecond); double microsecondsPerGpuTick = 1000000.0 / (double)gpuTicksPerSecond; if (timings.numTimestamps) @@ -667,21 +656,21 @@ void FfxCacaoSample::OnRender() } - // Animate and transform the scene - // - if (m_pGltfLoader) - { - m_pGltfLoader->SetAnimationTime(0, m_time); - m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); - } + // Animate and transform the scene + // + if (m_pGltfLoader) + { + m_pGltfLoader->SetAnimationTime(0, m_time); + m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); + } - m_state.time = m_time; + m_state.time = m_time; - // Do Render frame using AFR - // - m_Node->OnRender(&m_state, &m_swapChain); + // Do Render frame using AFR + // + m_node->OnRender(&m_state, &m_swapChain); - m_swapChain.Present(); + m_swapChain.Present(); } //-------------------------------------------------------------------------------------- @@ -690,14 +679,14 @@ void FfxCacaoSample::OnRender() // //-------------------------------------------------------------------------------------- int WINAPI WinMain(HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpCmdLine, - int nCmdShow) + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) { - LPCSTR Name = "FFX CACAO DirectX 12 Sample v1.0"; - uint32_t Width = 1280; - uint32_t Height = 720; + LPCSTR Name = "FFX CACAO DirectX 12 Sample v1.2"; + uint32_t Width = 1280; + uint32_t Height = 720; - // create new DX sample - return RunFramework(hInstance, lpCmdLine, nCmdShow, new FfxCacaoSample(Name)); + // create new DX sample + return RunFramework(hInstance, lpCmdLine, nCmdShow, new Sample(Name)); } diff --git a/sample/src/DX12/FFX_CACAO_Sample.h b/sample/src/DX12/Sample.h similarity index 53% rename from sample/src/DX12/FFX_CACAO_Sample.h rename to sample/src/DX12/Sample.h index c313d02..302c377 100644 --- a/sample/src/DX12/FFX_CACAO_Sample.h +++ b/sample/src/DX12/Sample.h @@ -1,6 +1,6 @@ // AMD SampleDX12 sample code // -// Copyright(c) 2017 Advanced Micro Devices, Inc.All rights reserved. +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -20,51 +20,49 @@ #include "SampleRenderer.h" -#include "ffx_cacao.h" - -class FfxCacaoSample : public FrameworkWindows +class Sample : public FrameworkWindows { public: - FfxCacaoSample(LPCSTR name); - void OnCreate(HWND hWnd); - void OnDestroy(); + Sample(LPCSTR name); + void OnCreate(HWND hWnd); + void OnDestroy(); void BuildUI(); void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen); void OnRender(); - bool OnEvent(MSG msg); + bool OnEvent(MSG msg); void OnResize(uint32_t width, uint32_t height) { OnResize(width, height, false); } - void OnResize(uint32_t Width, uint32_t Height, bool force); - void SetFullScreen(bool fullscreen); - + void OnResize(uint32_t Width, uint32_t Height, bool force); + void SetFullScreen(bool fullscreen); + private: - HWND m_hWnd; + HWND m_hWnd; - Device m_device; - SwapChain m_swapChain; + Device m_device; + SwapChain m_swapChain; - GLTFCommon *m_pGltfLoader = NULL; + GLTFCommon *m_pGltfLoader = NULL; - SampleRenderer *m_Node = NULL; - SampleRenderer::State m_state; + SampleRenderer *m_node = NULL; + SampleRenderer::State m_state; - int m_loadingStage = 0; - bool m_requiresLoad = true; - int m_preset; + int m_loadingStage = 0; + bool m_requiresLoad = true; + int m_preset; - float m_distance; - float m_roll; - float m_pitch; + float m_distance; + float m_roll; + float m_pitch; - float m_time; // WallClock in seconds. - double m_deltaTime; // The elapsed time in milliseconds since the previous frame. - double m_lastFrameTime; + float m_time; // WallClock in seconds. + double m_deltaTime; // The elapsed time in milliseconds since the previous frame. + double m_lastFrameTime; - bool m_isCapturing = false; - bool m_vsyncEnabled = false; - int m_cameraControlSelected = 0; - bool m_bPlay; - bool m_displayGUI; - bool m_fullscreen; + bool m_isCapturing = false; + bool m_vsyncEnabled = false; + int m_cameraControlSelected = 0; + bool m_bPlay; + bool m_displayGUI; + bool m_fullscreen; // json config file json m_jsonConfigFile; @@ -77,10 +75,10 @@ class FfxCacaoSample : public FrameworkWindows int m_presetIndex = 0; #ifdef FFX_CACAO_ENABLE_PROFILING - char m_benchmarkFilename[1024]; - bool m_isBenchmarking; - uint32_t m_benchmarkScreenWidth; - uint32_t m_benchmarkScreenHeight; - uint32_t m_benchmarkWarmUpFramesToRun; + char m_benchmarkFilename[1024]; + bool m_isBenchmarking; + uint32_t m_benchmarkScreenWidth; + uint32_t m_benchmarkScreenHeight; + uint32_t m_benchmarkWarmUpFramesToRun; #endif }; diff --git a/sample/src/DX12/SampleRenderer.cpp b/sample/src/DX12/SampleRenderer.cpp index 8154356..5055f2f 100644 --- a/sample/src/DX12/SampleRenderer.cpp +++ b/sample/src/DX12/SampleRenderer.cpp @@ -1,6 +1,6 @@ // AMD SampleDX12 sample code // -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -28,70 +28,68 @@ //-------------------------------------------------------------------------------------- void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) { - m_pDevice = pDevice; - - // Initialize helpers - - // Create all the heaps for the resources views - const uint32_t cbvDescriptorCount = 3000; - const uint32_t srvDescriptorCount = 3000; - const uint32_t uavDescriptorCount = 100; - const uint32_t dsvDescriptorCount = 100; - const uint32_t rtvDescriptorCount = 1000; - const uint32_t samplerDescriptorCount = 50; - m_resourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, dsvDescriptorCount, rtvDescriptorCount, samplerDescriptorCount); - - // Create a commandlist ring for the Direct queue - // We are queuing (backBufferCount + 0.5) frames, so we need to triple buffer the command lists - uint32_t commandListsPerBackBuffer = 8; - m_CommandListRing.OnCreate(pDevice, backBufferCount + 1, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); - - // Create a 'dynamic' constant buffer - const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; - m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, &m_resourceViewHeaps); - - // Create a 'static' pool for vertices, indices and constant buffers - const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; - m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); - - // initialize the GPU time stamps module - m_GPUTimer.OnCreate(pDevice, backBufferCount); - - // Quick helper to upload resources, it has it's own commandList and uses suballocation. - // for 4K textures we'll need 100Megs - const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; - m_UploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) - - // Create the depth buffer views - m_resourceViewHeaps.AllocDSVDescriptor(1, &m_depthBufferDSV); - m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_depthBufferSRV); - - // Create a Shadowmap atlas to hold 4 cascades/spotlights - m_ShadowMap.InitDepthStencil(pDevice, "m_pShadowMap", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, 2 * 1024, 2 * 1024, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); - m_resourceViewHeaps.AllocDSVDescriptor(1, &m_ShadowMapDSV); - m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_ShadowMapSRV); - m_ShadowMap.CreateDSV(0, &m_ShadowMapDSV); - m_ShadowMap.CreateSRV(0, &m_ShadowMapSRV); - - m_skyDome.OnCreate(pDevice, &m_UploadHeap, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 4); - m_skyDomeProc.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); - m_wireframe.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); - m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); - m_downSample.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); - m_bloom.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); - m_motionBlur.OnCreate(pDevice, &m_resourceViewHeaps, "motionBlur.hlsl", "main", 1, 2, 8, 8, 1); - - size_t cacaoSize = ffxCacaoD3D12GetContextSize(); - FfxCacaoStatus status; - -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - m_pFfxCacaoContextNative = (FfxCacaoD3D12Context*)malloc(cacaoSize); - status = ffxCacaoD3D12InitContext(m_pFfxCacaoContextNative, pDevice->GetDevice()); + m_pDevice = pDevice; + + // Initialize helpers + + // Create all the heaps for the resources views + const uint32_t cbvDescriptorCount = 3000; + const uint32_t srvDescriptorCount = 3000; + const uint32_t uavDescriptorCount = 100; + const uint32_t dsvDescriptorCount = 100; + const uint32_t rtvDescriptorCount = 1000; + const uint32_t samplerDescriptorCount = 50; + m_resourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, dsvDescriptorCount, rtvDescriptorCount, samplerDescriptorCount); + + // Create a commandlist ring for the Direct queue + // We are queuing (backBufferCount + 0.5) frames, so we need to triple buffer the command lists + uint32_t commandListsPerBackBuffer = 8; + m_commandListRing.OnCreate(pDevice, backBufferCount + 1, commandListsPerBackBuffer, pDevice->GetGraphicsQueue()->GetDesc()); + + // Create a 'dynamic' constant buffer + const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; + m_constantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, &m_resourceViewHeaps); + + // Create a 'static' pool for vertices, indices and constant buffers + const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; + m_vidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); + + // initialize the GPU time stamps module + m_gpuTimer.OnCreate(pDevice, backBufferCount); + + // Quick helper to upload resources, it has it's own commandList and uses suballocation. + // for 4K textures we'll need 100Megs + const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; + m_uploadHeap.OnCreate(pDevice, uploadHeapMemSize); // initialize an upload heap (uses suballocation for faster results) + + // Create the depth buffer views + m_resourceViewHeaps.AllocDSVDescriptor(1, &m_depthBufferDSV); + m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_depthBufferSRV); + + // Create a Shadowmap atlas to hold 4 cascades/spotlights + m_shadowMap.InitDepthStencil(pDevice, "m_pShadowMap", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, 2 * 1024, 2 * 1024, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); + m_resourceViewHeaps.AllocDSVDescriptor(1, &m_shadowMapDSV); + m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_shadowMapSRV); + m_shadowMap.CreateDSV(0, &m_shadowMapDSV); + m_shadowMap.CreateSRV(0, &m_shadowMapSRV); + + m_skyDome.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", DXGI_FORMAT_R16G16B16A16_FLOAT, 4); + m_skyDomeProc.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); + m_wireframe.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT, 4); + m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool); + m_downSample.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); + m_bloom.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, DXGI_FORMAT_R16G16B16A16_FLOAT); + m_motionBlur.OnCreate(pDevice, &m_resourceViewHeaps, "motionBlur.hlsl", "main", 1, 2, 8, 8, 1); + + size_t cacaoSize = FFX_CACAO_D3D12GetContextSize(); + FFX_CACAO_Status status; + + m_pCACAOContextNative = (FFX_CACAO_D3D12Context*)malloc(cacaoSize); + status = FFX_CACAO_D3D12InitContext(m_pCACAOContextNative, pDevice->GetDevice()); assert(status == FFX_CACAO_STATUS_OK); -#endif - m_pFfxCacaoContextDownsampled = (FfxCacaoD3D12Context*)malloc(cacaoSize); - status = ffxCacaoD3D12InitContext(m_pFfxCacaoContextDownsampled, pDevice->GetDevice()); + m_pCACAOContextDownsampled = (FFX_CACAO_D3D12Context*)malloc(cacaoSize); + status = FFX_CACAO_D3D12InitContext(m_pCACAOContextDownsampled, pDevice->GetDevice()); assert(status == FFX_CACAO_STATUS_OK); D3D12_STATIC_SAMPLER_DESC SamplerDesc = {}; @@ -109,23 +107,23 @@ void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) SamplerDesc.RegisterSpace = 0; SamplerDesc.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; - m_applyDirect.OnCreate(pDevice, "Apply_CACAO.hlsl", &m_resourceViewHeaps, &m_VidMemBufferPool, 1, 1, &SamplerDesc, pSwapChain->GetFormat()); // DXGI_FORMAT_R16G16B16A16_FLOAT); + m_cacaoApplyDirect.OnCreate(pDevice, "Apply_CACAO.hlsl", &m_resourceViewHeaps, &m_vidMemBufferPool, 1, 1, &SamplerDesc, pSwapChain->GetFormat()); // DXGI_FORMAT_R16G16B16A16_FLOAT); // Create tonemapping pass - m_cacaoUavClear.OnCreate(pDevice, &m_resourceViewHeaps, "Apply_CACAO.hlsl", "CSClear", 1, 0, 8, 8, 1); - m_toneMapping.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, pSwapChain->GetFormat()); + m_cacaoUAVClear.OnCreate(pDevice, &m_resourceViewHeaps, "Apply_CACAO.hlsl", "CSClear", 1, 0, 8, 8, 1); + m_toneMapping.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, pSwapChain->GetFormat()); - // Initialize UI rendering resources - m_ImGUI.OnCreate(pDevice, &m_UploadHeap, &m_resourceViewHeaps, &m_ConstantBufferRing, pSwapChain->GetFormat()); + // Initialize UI rendering resources + m_imGUI.OnCreate(pDevice, &m_uploadHeap, &m_resourceViewHeaps, &m_constantBufferRing, pSwapChain->GetFormat()); - m_resourceViewHeaps.AllocRTVDescriptor(1, &m_HDRRTV); - m_resourceViewHeaps.AllocRTVDescriptor(1, &m_HDRRTVMSAA); + m_resourceViewHeaps.AllocRTVDescriptor(1, &m_hdrRTV); + m_resourceViewHeaps.AllocRTVDescriptor(1, &m_hdrRTVMSAA); - m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_HDRSRV); + m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_hdrSRV); // CACAO stuff - m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_applyDirectInput); - m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_FfxCacaoOutputSRV); - m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_FfxCacaoOutputUAV); + m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_cacaoApplyDirectInput); + m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_cacaoOutputSRV); + m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_cacaoOutputUAV); // Deferred non msaa pass m_resourceViewHeaps.AllocDSVDescriptor(1, &m_depthBufferNonMsaaDSV); @@ -133,10 +131,10 @@ void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) m_resourceViewHeaps.AllocRTVDescriptor(1, &m_normalBufferNonMsaaRTV); m_resourceViewHeaps.AllocCBV_SRV_UAVDescriptor(1, &m_normalBufferNonMsaaSRV); - // Make sure upload heap has finished uploading before continuing + // Make sure upload heap has finished uploading before continuing #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif } @@ -147,38 +145,36 @@ void SampleRenderer::OnCreate(Device* pDevice, SwapChain *pSwapChain) //-------------------------------------------------------------------------------------- void SampleRenderer::OnDestroy() { - m_ImGUI.OnDestroy(); - m_toneMapping.OnDestroy(); - m_taa.OnDestroy(); - m_motionBlur.OnDestroy(); - m_sharpen.OnDestroy(); - m_bloom.OnDestroy(); - -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ffxCacaoD3D12DestroyContext(m_pFfxCacaoContextNative); - free(m_pFfxCacaoContextNative); -#endif - ffxCacaoD3D12DestroyContext(m_pFfxCacaoContextDownsampled); - free(m_pFfxCacaoContextDownsampled); - m_cacaoUavClear.OnDestroy(); - m_applyDirect.OnDestroy(); + m_imGUI.OnDestroy(); + m_toneMapping.OnDestroy(); + m_taa.OnDestroy(); + m_motionBlur.OnDestroy(); + m_sharpen.OnDestroy(); + m_bloom.OnDestroy(); + + FFX_CACAO_D3D12DestroyContext(m_pCACAOContextNative); + free(m_pCACAOContextNative); + FFX_CACAO_D3D12DestroyContext(m_pCACAOContextDownsampled); + free(m_pCACAOContextDownsampled); + m_cacaoUAVClear.OnDestroy(); + m_cacaoApplyDirect.OnDestroy(); m_downSample.OnDestroy(); - m_wireframeBox.OnDestroy(); - m_wireframe.OnDestroy(); - m_skyDomeProc.OnDestroy(); - m_skyDome.OnDestroy(); - m_ShadowMap.OnDestroy(); + m_wireframeBox.OnDestroy(); + m_wireframe.OnDestroy(); + m_skyDomeProc.OnDestroy(); + m_skyDome.OnDestroy(); + m_shadowMap.OnDestroy(); #if USE_SHADOWMASK - m_shadowResolve.OnDestroy(); + m_shadowResolve.OnDestroy(); #endif - m_UploadHeap.OnDestroy(); - m_GPUTimer.OnDestroy(); - m_VidMemBufferPool.OnDestroy(); - m_ConstantBufferRing.OnDestroy(); - m_CommandListRing.OnDestroy(); - m_resourceViewHeaps.OnDestroy(); + m_uploadHeap.OnDestroy(); + m_gpuTimer.OnDestroy(); + m_vidMemBufferPool.OnDestroy(); + m_constantBufferRing.OnDestroy(); + m_commandListRing.OnDestroy(); + m_resourceViewHeaps.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -188,22 +184,22 @@ void SampleRenderer::OnDestroy() //-------------------------------------------------------------------------------------- void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) { - m_Width = Width; - m_Height = Height; + m_width = Width; + m_height = Height; - // Set the viewport - // - m_viewPort = { 0.0f, 0.0f, static_cast(Width), static_cast(Height), 0.0f, 1.0f }; + // Set the viewport + // + m_viewPort = { 0.0f, 0.0f, static_cast(Width), static_cast(Height), 0.0f, 1.0f }; - // Create scissor rectangle - // - m_RectScissor = { 0, 0, (LONG)Width, (LONG)Height }; + // Create scissor rectangle + // + m_rectScissor = { 0, 0, (LONG)Width, (LONG)Height }; - // Create depth buffer - // + // Create depth buffer + // m_depthBuffer.InitDepthStencil(m_pDevice, "depthbuffer", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, Width, Height, 1, 1, 4, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); m_depthBuffer.CreateDSV(0, &m_depthBufferDSV); - m_depthBuffer.CreateSRV(0, &m_depthBufferSRV); + m_depthBuffer.CreateSRV(0, &m_depthBufferSRV); m_depthBufferNonMsaa.InitDepthStencil(m_pDevice, "depthBufferNonMSAA", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R32_TYPELESS, Width, Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)); m_depthBufferNonMsaa.CreateDSV(0, &m_depthBufferNonMsaaDSV); @@ -213,29 +209,29 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, m_normalBufferNonMsaa.CreateRTV(0, &m_normalBufferNonMsaaRTV); m_normalBufferNonMsaa.CreateSRV(0, &m_normalBufferNonMsaaSRV); - // Create Texture + RTV with x4 MSAA - // - CD3DX12_RESOURCE_DESC RDescMSAA = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, Width, Height, 1, 1, 4, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); - m_HDRMSAA.InitRenderTarget(m_pDevice, "HDRMSAA", &RDescMSAA, D3D12_RESOURCE_STATE_RENDER_TARGET); - m_HDRMSAA.CreateRTV(0, &m_HDRRTVMSAA); + // Create Texture + RTV with x4 MSAA + // + CD3DX12_RESOURCE_DESC RDescMSAA = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, Width, Height, 1, 1, 4, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET); + m_hdrMSAA.InitRenderTarget(m_pDevice, "HDRMSAA", &RDescMSAA, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_hdrMSAA.CreateRTV(0, &m_hdrRTVMSAA); - // Create Texture + RTV, to hold the resolved scene - // - CD3DX12_RESOURCE_DESC RDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, Width, Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); - m_HDR.InitRenderTarget(m_pDevice, "HDR", &RDesc, D3D12_RESOURCE_STATE_RENDER_TARGET); - m_HDR.CreateSRV(0, &m_HDRSRV); - m_HDR.CreateRTV(0, &m_HDRRTV); + // Create Texture + RTV, to hold the resolved scene + // + CD3DX12_RESOURCE_DESC RDesc = CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R16G16B16A16_FLOAT, Width, Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS); + m_hdr.InitRenderTarget(m_pDevice, "HDR", &RDesc, D3D12_RESOURCE_STATE_RENDER_TARGET); + m_hdr.CreateSRV(0, &m_hdrSRV); + m_hdr.CreateRTV(0, &m_hdrRTV); - m_FfxCacaoOutput.Init(m_pDevice, "cacaoOutput", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, Width, Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), D3D12_RESOURCE_STATE_GENERIC_READ, NULL); - m_FfxCacaoOutput.CreateSRV(0, &m_FfxCacaoOutputSRV); - m_FfxCacaoOutput.CreateUAV(0, &m_FfxCacaoOutputUAV); + m_cacaoOutput.Init(m_pDevice, "cacaoOutput", &CD3DX12_RESOURCE_DESC::Tex2D(DXGI_FORMAT_R8_UNORM, Width, Height, 1, 1, 1, 0, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS), D3D12_RESOURCE_STATE_GENERIC_READ, NULL); + m_cacaoOutput.CreateSRV(0, &m_cacaoOutputSRV); + m_cacaoOutput.CreateUAV(0, &m_cacaoOutputUAV); if (m_gltfPBR) { - m_gltfPBR->OnUpdateWindowSizeDependentResources(&m_FfxCacaoOutput); + m_gltfPBR->OnUpdateWindowSizeDependentResources(&m_cacaoOutput); } - FfxCacaoD3D12ScreenSizeInfo cacaoScreenSizeDependentInfo; + FFX_CACAO_D3D12ScreenSizeInfo cacaoScreenSizeDependentInfo; cacaoScreenSizeDependentInfo.width = Width; cacaoScreenSizeDependentInfo.height = Height; @@ -249,8 +245,8 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, cacaoScreenSizeDependentInfo.normalBufferSrvDesc.Texture2D.PlaneSlice = 0; cacaoScreenSizeDependentInfo.normalBufferSrvDesc.Texture2D.ResourceMinLODClamp = 0.0f; - cacaoScreenSizeDependentInfo.outputResource = m_FfxCacaoOutput.GetResource(); - cacaoScreenSizeDependentInfo.outputUavDesc.Format = m_FfxCacaoOutput.GetFormat(); + cacaoScreenSizeDependentInfo.outputResource = m_cacaoOutput.GetResource(); + cacaoScreenSizeDependentInfo.outputUavDesc.Format = m_cacaoOutput.GetFormat(); cacaoScreenSizeDependentInfo.outputUavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; cacaoScreenSizeDependentInfo.outputUavDesc.Texture2D.MipSlice = 0; cacaoScreenSizeDependentInfo.outputUavDesc.Texture2D.PlaneSlice = 0; @@ -264,25 +260,21 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, cacaoScreenSizeDependentInfo.depthBufferSrvDesc.Texture2D.PlaneSlice = 0; cacaoScreenSizeDependentInfo.depthBufferSrvDesc.Texture2D.ResourceMinLODClamp = 0.0f; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION cacaoScreenSizeDependentInfo.useDownsampledSsao = FFX_CACAO_FALSE; - ffxCacaoD3D12InitScreenSizeDependentResources(m_pFfxCacaoContextNative, &cacaoScreenSizeDependentInfo); + FFX_CACAO_D3D12InitScreenSizeDependentResources(m_pCACAOContextNative, &cacaoScreenSizeDependentInfo); cacaoScreenSizeDependentInfo.useDownsampledSsao = FFX_CACAO_TRUE; - ffxCacaoD3D12InitScreenSizeDependentResources(m_pFfxCacaoContextDownsampled, &cacaoScreenSizeDependentInfo); -#else - ffxCacaoD3D12InitScreenSizeDependentResources(m_pFfxCacaoContextDownsampled, &cacaoScreenSizeDependentInfo); -#endif + FFX_CACAO_D3D12InitScreenSizeDependentResources(m_pCACAOContextDownsampled, &cacaoScreenSizeDependentInfo); - m_FfxCacaoOutput.CreateSRV(0, &m_applyDirectInput); + m_cacaoOutput.CreateSRV(0, &m_cacaoApplyDirectInput); - m_applyDirect.UpdatePipeline(pSwapChain->GetFormat()); + m_cacaoApplyDirect.UpdatePipeline(pSwapChain->GetFormat()); - // update bloom and downscaling effect - // - m_downSample.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, 5); //downsample the HDR texture 5 times - m_bloom.OnCreateWindowSizeDependentResources(m_Width / 2, m_Height / 2, m_downSample.GetTexture(), 5, &m_HDR); - m_toneMapping.UpdatePipelines(pSwapChain->GetFormat()); - m_ImGUI.UpdatePipeline(pSwapChain->GetFormat()); + // update bloom and downscaling effect + // + m_downSample.OnCreateWindowSizeDependentResources(m_width, m_height, &m_hdr, 5); //downsample the HDR texture 5 times + m_bloom.OnCreateWindowSizeDependentResources(m_width / 2, m_height / 2, m_downSample.GetTexture(), 5, &m_hdr); + m_toneMapping.UpdatePipelines(pSwapChain->GetFormat()); + m_imGUI.UpdatePipeline(pSwapChain->GetFormat()); } //-------------------------------------------------------------------------------------- @@ -292,26 +284,24 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, //-------------------------------------------------------------------------------------- void SampleRenderer::OnDestroyWindowSizeDependentResources() { - m_bloom.OnDestroyWindowSizeDependentResources(); - m_downSample.OnDestroyWindowSizeDependentResources(); + m_bloom.OnDestroyWindowSizeDependentResources(); + m_downSample.OnDestroyWindowSizeDependentResources(); -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ffxCacaoD3D12DestroyScreenSizeDependentResources(m_pFfxCacaoContextNative); -#endif - ffxCacaoD3D12DestroyScreenSizeDependentResources(m_pFfxCacaoContextDownsampled); - m_FfxCacaoOutput.OnDestroy(); + FFX_CACAO_D3D12DestroyScreenSizeDependentResources(m_pCACAOContextNative); + FFX_CACAO_D3D12DestroyScreenSizeDependentResources(m_pCACAOContextDownsampled); + m_cacaoOutput.OnDestroy(); - m_HDR.OnDestroy(); - m_HDRMSAA.OnDestroy(); - m_HistoryBuffer.OnDestroy(); - m_TAABuffer.OnDestroy(); + m_hdr.OnDestroy(); + m_hdrMSAA.OnDestroy(); + m_historyBuffer.OnDestroy(); + m_taaBuffer.OnDestroy(); #if USE_SHADOWMASK - m_ShadowMask.OnDestroy(); + m_ShadowMask.OnDestroy(); #endif m_normalBufferNonMsaa.OnDestroy(); m_depthBufferNonMsaa.OnDestroy(); - m_depthBuffer.OnDestroy(); + m_depthBuffer.OnDestroy(); } @@ -322,53 +312,53 @@ void SampleRenderer::OnDestroyWindowSizeDependentResources() //-------------------------------------------------------------------------------------- int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) { - // show loading progress - // - ImGui::OpenPopup("Loading"); - if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) - { - float progress = (float)stage / 13.0f; - ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); - ImGui::EndPopup(); - } - - // Loading stages - // - if (stage == 0) - { - } - else if (stage == 5) - { - Profile p("m_pGltfLoader->Load"); - - m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); - m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); - } - else if (stage == 6) - { - Profile p("LoadTextures"); - - // here we are loading onto the GPU all the textures and the inverse matrices - // this data will be used to create the PBR and Depth passes - m_pGLTFTexturesAndBuffers->LoadTextures(); - } - else if (stage == 7) - { - { - Profile p("m_gltfDepth->OnCreate"); - - //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass - m_gltfDepth = new GltfDepthPass(); - m_gltfDepth->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers - ); - } - } + // show loading progress + // + ImGui::OpenPopup("Loading"); + if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + float progress = (float)stage / 13.0f; + ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); + ImGui::EndPopup(); + } + + // Loading stages + // + if (stage == 0) + { + } + else if (stage == 5) + { + Profile p("m_pGltfLoader->Load"); + + m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); + m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_uploadHeap, &m_vidMemBufferPool, &m_constantBufferRing); + } + else if (stage == 6) + { + Profile p("LoadTextures"); + + // here we are loading onto the GPU all the textures and the inverse matrices + // this data will be used to create the PBR and Depth passes + m_pGLTFTexturesAndBuffers->LoadTextures(); + } + else if (stage == 7) + { + { + Profile p("m_gltfDepth->OnCreate"); + + //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass + m_gltfDepth = new GltfDepthPass(); + m_gltfDepth->OnCreate( + m_pDevice, + &m_uploadHeap, + &m_resourceViewHeaps, + &m_constantBufferRing, + &m_vidMemBufferPool, + m_pGLTFTexturesAndBuffers + ); + } + } else if (stage == 8) { Profile p("m_gltfPBR->OnCreate (Non MSAA)"); @@ -377,10 +367,10 @@ int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) m_gltfPBRNonMsaa = new GltfPbrPass(); m_gltfPBRNonMsaa->OnCreate( m_pDevice, - &m_UploadHeap, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers, &m_skyDome, false, @@ -392,67 +382,67 @@ int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) 1 ); } - else if (stage == 9) - { - Profile p("m_gltfPBR->OnCreate"); - - // same thing as above but for the PBR pass - m_gltfPBR = new GltfPbrPass(); - m_gltfPBR->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_skyDome, + else if (stage == 9) + { + Profile p("m_gltfPBR->OnCreate"); + + // same thing as above but for the PBR pass + m_gltfPBR = new GltfPbrPass(); + m_gltfPBR->OnCreate( + m_pDevice, + &m_uploadHeap, + &m_resourceViewHeaps, + &m_constantBufferRing, + &m_vidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_skyDome, true, - false, - DXGI_FORMAT_R16G16B16A16_FLOAT, + false, + DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, 4 - ); - m_gltfPBR->OnUpdateWindowSizeDependentResources(&m_FfxCacaoOutput); - } - else if (stage == 10) - { - Profile p("m_gltfBBox->OnCreate"); - - // just a bounding box pass that will draw boundingboxes instead of the geometry itself - m_gltfBBox = new GltfBBoxPass(); - m_gltfBBox->OnCreate( - m_pDevice, - &m_UploadHeap, - &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_wireframe - ); + ); + m_gltfPBR->OnUpdateWindowSizeDependentResources(&m_cacaoOutput); + } + else if (stage == 10) + { + Profile p("m_gltfBBox->OnCreate"); + + // just a bounding box pass that will draw boundingboxes instead of the geometry itself + m_gltfBBox = new GltfBBoxPass(); + m_gltfBBox->OnCreate( + m_pDevice, + &m_uploadHeap, + &m_resourceViewHeaps, + &m_constantBufferRing, + &m_vidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_wireframe + ); #if (USE_VID_MEM==true) - // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); #endif - } - else if (stage == 11) - { - Profile p("Flush"); + } + else if (stage == 11) + { + Profile p("Flush"); - m_UploadHeap.FlushAndFinish(); + m_uploadHeap.FlushAndFinish(); #if (USE_VID_MEM==true) - //once everything is uploaded we dont need he upload heaps anymore - m_VidMemBufferPool.FreeUploadHeap(); + //once everything is uploaded we dont need he upload heaps anymore + m_vidMemBufferPool.FreeUploadHeap(); #endif - // tell caller that we are done loading the map - return 0; - } + // tell caller that we are done loading the map + return 0; + } - stage++; - return stage; + stage++; + return stage; } //-------------------------------------------------------------------------------------- @@ -468,34 +458,34 @@ void SampleRenderer::UnloadScene() delete m_gltfPBRNonMsaa; m_gltfPBRNonMsaa = NULL; } - + if (m_gltfPBR) - { - m_gltfPBR->OnDestroy(); - delete m_gltfPBR; - m_gltfPBR = NULL; - } - - if (m_gltfDepth) - { - m_gltfDepth->OnDestroy(); - delete m_gltfDepth; - m_gltfDepth = NULL; - } - - if (m_gltfBBox) - { - m_gltfBBox->OnDestroy(); - delete m_gltfBBox; - m_gltfBBox = NULL; - } - - if (m_pGLTFTexturesAndBuffers) - { - m_pGLTFTexturesAndBuffers->OnDestroy(); - delete m_pGLTFTexturesAndBuffers; - m_pGLTFTexturesAndBuffers = NULL; - } + { + m_gltfPBR->OnDestroy(); + delete m_gltfPBR; + m_gltfPBR = NULL; + } + + if (m_gltfDepth) + { + m_gltfDepth->OnDestroy(); + delete m_gltfDepth; + m_gltfDepth = NULL; + } + + if (m_gltfBBox) + { + m_gltfBBox->OnDestroy(); + delete m_gltfBBox; + m_gltfBBox = NULL; + } + + if (m_pGLTFTexturesAndBuffers) + { + m_pGLTFTexturesAndBuffers->OnDestroy(); + delete m_pGLTFTexturesAndBuffers; + m_pGLTFTexturesAndBuffers = NULL; + } } @@ -506,147 +496,147 @@ void SampleRenderer::UnloadScene() //-------------------------------------------------------------------------------------- void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) { - // Timing values - // - UINT64 gpuTicksPerSecond; - m_pDevice->GetGraphicsQueue()->GetTimestampFrequency(&gpuTicksPerSecond); - - // Let our resource managers do some house keeping - // - m_ConstantBufferRing.OnBeginFrame(); - m_GPUTimer.OnBeginFrame(gpuTicksPerSecond, &m_TimeStamps); - - // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- - // - per_frame *pPerFrame = NULL; - if (m_pGLTFTexturesAndBuffers) - { - pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); - pPerFrame->invScreenResolution[0] = 1.0f / (float)m_Width; - pPerFrame->invScreenResolution[1] = 1.0f / (float)m_Height; - - //apply jittering to the camera - if (m_HasTAA) - { - static uint32_t sampleIndex=0; - - static const auto CalculateHaltonNumber = [](uint32_t index, uint32_t base) - { - float f = 1.0f, result = 0.0f; - - for (uint32_t i = index; i > 0;) - { - f /= static_cast(base); - result = result + f * static_cast(i % base); - i = static_cast(floorf(static_cast(i) / static_cast(base))); - } - - return result; - }; - - sampleIndex = (sampleIndex + 1) % 16; // 16x TAA - } - - //override gltf camera with ours - pPerFrame->mCameraViewProj = pState->camera.GetView() * pState->camera.GetProjection(); - pPerFrame->mInverseCameraViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - pPerFrame->cameraPos = pState->camera.GetPosition(); - pPerFrame->iblFactor = pState->iblFactor; - pPerFrame->emmisiveFactor = pState->emmisiveFactor; - - //if the gltf doesn't have any lights set some spotlights - if (pPerFrame->lightCount == 0) - { - pPerFrame->lightCount = pState->spotlightCount; - for (uint32_t i = 0; i < pState->spotlightCount; i++) - { - GetXYZ(pPerFrame->lights[i].color, pState->spotlight[i].color); - GetXYZ(pPerFrame->lights[i].position, pState->spotlight[i].light.GetPosition()); - GetXYZ(pPerFrame->lights[i].direction, pState->spotlight[i].light.GetDirection()); - - pPerFrame->lights[i].range = 15.0f; // in meters - pPerFrame->lights[i].type = LightType_Spot; - pPerFrame->lights[i].intensity = pState->spotlight[i].intensity; - pPerFrame->lights[i].innerConeCos = cosf(pState->spotlight[i].light.GetFovV() * 0.9f / 2.0f); - pPerFrame->lights[i].outerConeCos = cosf(pState->spotlight[i].light.GetFovV() / 2.0f); - pPerFrame->lights[i].mLightViewProj = pState->spotlight[i].light.GetView() * pState->spotlight[i].light.GetProjection(); - } - } - - // Up to 4 spotlights can have shadowmaps. Each spot the light has a shadowMap index which is used to find the shadowmap in the atlas - // Additionally, directional lights shadows can be raytraced. - uint32_t shadowMapIndex = 0; - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index so the color pass knows which shadow map to use - pPerFrame->lights[i].depthBias = 70.0f / 100000.0f; - } - else - { - pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light - } - } - - m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); - - m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); - } - - // command buffer calls - // - ID3D12GraphicsCommandList* pCmdLst1 = m_CommandListRing.GetNewCommandList(); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Begin Frame"); - - // Clear GBuffer and depth stencil - // - pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); - - // Clears ----------------------------------------------------------------------- - // - pCmdLst1->ClearDepthStencilView(m_ShadowMapDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear shadow map"); - - float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; - pCmdLst1->ClearRenderTargetView(m_HDRRTVMSAA.GetCPU(), clearColor, 0, nullptr); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear HDR"); - - pCmdLst1->ClearDepthStencilView(m_depthBufferDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear depth"); + // Timing values + // + UINT64 gpuTicksPerSecond; + m_pDevice->GetGraphicsQueue()->GetTimestampFrequency(&gpuTicksPerSecond); + + // Let our resource managers do some house keeping + // + m_constantBufferRing.OnBeginFrame(); + m_gpuTimer.OnBeginFrame(gpuTicksPerSecond, &m_timeStamps); + + // Sets the perFrame data (Camera and lights data), override as necessary and set them as constant buffers -------------- + // + per_frame *pPerFrame = NULL; + if (m_pGLTFTexturesAndBuffers) + { + pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); + pPerFrame->invScreenResolution[0] = 1.0f / (float)m_width; + pPerFrame->invScreenResolution[1] = 1.0f / (float)m_height; + + //apply jittering to the camera + if (m_hasTAA) + { + static uint32_t sampleIndex=0; + + static const auto CalculateHaltonNumber = [](uint32_t index, uint32_t base) + { + float f = 1.0f, result = 0.0f; + + for (uint32_t i = index; i > 0;) + { + f /= static_cast(base); + result = result + f * static_cast(i % base); + i = static_cast(floorf(static_cast(i) / static_cast(base))); + } + + return result; + }; + + sampleIndex = (sampleIndex + 1) % 16; // 16x TAA + } + + //override gltf camera with ours + pPerFrame->mCameraViewProj = pState->camera.GetView() * pState->camera.GetProjection(); + pPerFrame->mInverseCameraViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + pPerFrame->cameraPos = pState->camera.GetPosition(); + pPerFrame->iblFactor = pState->iblFactor; + pPerFrame->emmisiveFactor = pState->emmisiveFactor; + + //if the gltf doesn't have any lights set some spotlights + if (pPerFrame->lightCount == 0) + { + pPerFrame->lightCount = pState->spotlightCount; + for (uint32_t i = 0; i < pState->spotlightCount; i++) + { + GetXYZ(pPerFrame->lights[i].color, pState->spotlight[i].color); + GetXYZ(pPerFrame->lights[i].position, pState->spotlight[i].light.GetPosition()); + GetXYZ(pPerFrame->lights[i].direction, pState->spotlight[i].light.GetDirection()); + + pPerFrame->lights[i].range = 15.0f; // in meters + pPerFrame->lights[i].type = LightType_Spot; + pPerFrame->lights[i].intensity = pState->spotlight[i].intensity; + pPerFrame->lights[i].innerConeCos = cosf(pState->spotlight[i].light.GetFovV() * 0.9f / 2.0f); + pPerFrame->lights[i].outerConeCos = cosf(pState->spotlight[i].light.GetFovV() / 2.0f); + pPerFrame->lights[i].mLightViewProj = pState->spotlight[i].light.GetView() * pState->spotlight[i].light.GetProjection(); + } + } + + // Up to 4 spotlights can have shadowmaps. Each spot the light has a shadowMap index which is used to find the shadowmap in the atlas + // Additionally, directional lights shadows can be raytraced. + uint32_t shadowMapIndex = 0; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index so the color pass knows which shadow map to use + pPerFrame->lights[i].depthBias = 70.0f / 100000.0f; + } + else + { + pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light + } + } + + m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); + + m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); + } + + // command buffer calls + // + ID3D12GraphicsCommandList* pCmdLst1 = m_commandListRing.GetNewCommandList(); + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Begin Frame"); + + // Clear GBuffer and depth stencil + // + pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_PRESENT, D3D12_RESOURCE_STATE_RENDER_TARGET)); + + // Clears ----------------------------------------------------------------------- + // + pCmdLst1->ClearDepthStencilView(m_shadowMapDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Clear shadow map"); + + float clearColor[] = { 0.0f, 0.0f, 0.0f, 0.0f }; + pCmdLst1->ClearRenderTargetView(m_hdrRTVMSAA.GetCPU(), clearColor, 0, nullptr); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Clear HDR"); + + pCmdLst1->ClearDepthStencilView(m_depthBufferDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, nullptr); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Clear depth"); pCmdLst1->ClearDepthStencilView(m_depthBufferNonMsaaDSV.GetCPU(), D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, NULL); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Clear depth (Non MSAA)"); - - // Render to shadow map atlas for spot lights ------------------------------------------ - // - if (m_gltfDepth && pPerFrame != NULL) - { - uint32_t shadowMapIndex = 0; - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if (pPerFrame->lights[i].type != LightType_Spot) - continue; - - // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) - uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; - uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; - uint32_t viewportWidth = m_ShadowMap.GetWidth() / 2; - uint32_t viewportHeight = m_ShadowMap.GetHeight() / 2; - SetViewportAndScissor(pCmdLst1, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); - pCmdLst1->OMSetRenderTargets(0, NULL, true, &m_ShadowMapDSV.GetCPU()); - - GltfDepthPass::per_frame *cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); - cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; - - m_gltfDepth->Draw(pCmdLst1); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Shadow map"); - shadowMapIndex++; - } - } - pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_ShadowMap.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Clear depth (Non MSAA)"); + + // Render to shadow map atlas for spot lights ------------------------------------------ + // + if (m_gltfDepth && pPerFrame != NULL) + { + uint32_t shadowMapIndex = 0; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if (pPerFrame->lights[i].type != LightType_Spot) + continue; + + // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) + uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; + uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; + uint32_t viewportWidth = m_shadowMap.GetWidth() / 2; + uint32_t viewportHeight = m_shadowMap.GetHeight() / 2; + SetViewportAndScissor(pCmdLst1, viewportOffsetsX[i] * viewportWidth, viewportOffsetsY[i] * viewportHeight, viewportWidth, viewportHeight); + pCmdLst1->OMSetRenderTargets(0, NULL, true, &m_shadowMapDSV.GetCPU()); + + GltfDepthPass::per_frame *cbDepthPerFrame = m_gltfDepth->SetPerFrameConstants(); + cbDepthPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; + + m_gltfDepth->Draw(pCmdLst1); + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Shadow map"); + shadowMapIndex++; + } + } + pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_shadowMap.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); // Render normal/depth buffer if (pPerFrame) @@ -654,13 +644,13 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) // Render Scene to the MSAA HDR RT ------------------------------------------------ // pCmdLst1->RSSetViewports(1, &m_viewPort); - pCmdLst1->RSSetScissorRects(1, &m_RectScissor); + pCmdLst1->RSSetScissorRects(1, &m_rectScissor); pCmdLst1->OMSetRenderTargets(1, &m_normalBufferNonMsaaRTV.GetCPU(), true, &m_depthBufferNonMsaaDSV.GetCPU()); // Render normal/depth buffer if (m_gltfPBRNonMsaa) { - m_gltfPBRNonMsaa->Draw(pCmdLst1, &m_ShadowMapSRV); + m_gltfPBRNonMsaa->Draw(pCmdLst1, &m_shadowMapSRV); } // resource barriers @@ -672,9 +662,9 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) pCmdLst1->ResourceBarrier(_countof(barriers), barriers); } - if (pState->bUseCACAO) + if (pState->useCACAO) { - FfxCacaoMatrix4x4 proj, normalsWorldToView; + FFX_CACAO_Matrix4x4 proj, normalsWorldToView; { XMFLOAT4X4 p; XMMATRIX xProj = pState->camera.GetProjection(); @@ -692,25 +682,21 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) normalsWorldToView.elements[3][0] = p._41; normalsWorldToView.elements[3][1] = p._42; normalsWorldToView.elements[3][2] = p._43; normalsWorldToView.elements[3][3] = p._44; } - FfxCacaoD3D12Context *context = NULL; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - context = pState->bUseDownsampledSSAO ? m_pFfxCacaoContextDownsampled : m_pFfxCacaoContextNative; -#else - context = m_pFfxCacaoContextDownsampled; -#endif + FFX_CACAO_D3D12Context *context = NULL; + context = pState->useDownsampledSSAO ? m_pCACAOContextDownsampled : m_pCACAOContextNative; - pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_FfxCacaoOutput.GetResource(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); + pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_cacaoOutput.GetResource(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); - ffxCacaoD3D12UpdateSettings(context, &pState->cacaoSettings); - ffxCacaoD3D12Draw(context, pCmdLst1, &proj, &normalsWorldToView); + FFX_CACAO_D3D12UpdateSettings(context, &pState->cacaoSettings); + FFX_CACAO_D3D12Draw(context, pCmdLst1, &proj, &normalsWorldToView); } else { - pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_FfxCacaoOutput.GetResource(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); + pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_cacaoOutput.GetResource(), D3D12_RESOURCE_STATE_GENERIC_READ, D3D12_RESOURCE_STATE_UNORDERED_ACCESS)); uint32_t dummy = 0; - D3D12_GPU_VIRTUAL_ADDRESS dummyConstantBufferAddress = m_ConstantBufferRing.AllocConstantBuffer(sizeof(dummy), &dummy); - m_cacaoUavClear.Draw(pCmdLst1, dummyConstantBufferAddress, &m_FfxCacaoOutputUAV, NULL, m_Width, m_Height, 1); - pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_FfxCacaoOutput.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_GENERIC_READ)); + D3D12_GPU_VIRTUAL_ADDRESS dummyConstantBufferAddress = m_constantBufferRing.AllocConstantBuffer(sizeof(dummy), &dummy); + m_cacaoUAVClear.Draw(pCmdLst1, dummyConstantBufferAddress, &m_cacaoOutputUAV, NULL, m_width, m_height, 1); + pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_cacaoOutput.GetResource(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_GENERIC_READ)); } // resource barriers @@ -723,199 +709,195 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) } } - // Render Scene to the MSAA HDR RT ------------------------------------------------ - // - pCmdLst1->RSSetViewports(1, &m_viewPort); - pCmdLst1->RSSetScissorRects(1, &m_RectScissor); - pCmdLst1->OMSetRenderTargets(1, &m_HDRRTVMSAA.GetCPU(), true, &m_depthBufferDSV.GetCPU()); - - if (pPerFrame != NULL) - { - // Render skydome - // - if (pState->skyDomeType == 1) - { - XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - m_skyDome.Draw(pCmdLst1, clipToView); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Skydome"); - } - else if (pState->skyDomeType == 0) - { - SkyDomeProc::Constants skyDomeConstants; - skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); - skyDomeConstants.turbidity = 10.0f; - skyDomeConstants.rayleigh = 2.0f; - skyDomeConstants.mieCoefficient = 0.005f; - skyDomeConstants.mieDirectionalG = 0.8f; - skyDomeConstants.luminance = 1.0f; - skyDomeConstants.sun = false; - m_skyDomeProc.Draw(pCmdLst1, skyDomeConstants); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Skydome proc"); - } - - // Render scene to color buffer - // - if (m_gltfPBR && pPerFrame != NULL) - { - //set per frame constant buffer values - m_gltfPBR->Draw(pCmdLst1, &m_ShadowMapSRV); - } - - // draw object's bounding boxes - // - if (m_gltfBBox && pPerFrame != NULL) - { - if (pState->bDrawBoundingBoxes) - { - m_gltfBBox->Draw(pCmdLst1, pPerFrame->mCameraViewProj); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Bounding Box"); - } - } - - // draw light's frustums - // - if (pState->bDrawLightFrustum && pPerFrame != NULL) - { - UserMarker marker(pCmdLst1, "light frustrums"); - - XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); - XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); - XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); - XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; - m_wireframeBox.Draw(pCmdLst1, &m_wireframe, worldMatrix, vCenter, vRadius, vColor); - } - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Light's frustum"); - } - } - pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_ShadowMap.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); - // pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_depthBuffer.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Rendering scene"); - - // Resolve MSAA ------------------------------------------------------------------------ - // - { - UserMarker marker(pCmdLst1, "Resolving MSAA"); - - D3D12_RESOURCE_BARRIER preResolve[2] = { - CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_DEST), - CD3DX12_RESOURCE_BARRIER::Transition(m_HDRMSAA.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_SOURCE) - }; - pCmdLst1->ResourceBarrier(2, preResolve); - - pCmdLst1->ResolveSubresource(m_HDR.GetResource(), 0, m_HDRMSAA.GetResource(), 0, DXGI_FORMAT_R16G16B16A16_FLOAT); - - D3D12_RESOURCE_BARRIER postResolve[2] = { - CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), - CD3DX12_RESOURCE_BARRIER::Transition(m_HDRMSAA.GetResource(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET) - }; - pCmdLst1->ResourceBarrier(2, postResolve); - - m_GPUTimer.GetTimeStamp(pCmdLst1, "Resolve MSAA"); - } - - // Post proc--------------------------------------------------------------------------- - // + // Render Scene to the MSAA HDR RT ------------------------------------------------ + // + pCmdLst1->RSSetViewports(1, &m_viewPort); + pCmdLst1->RSSetScissorRects(1, &m_rectScissor); + pCmdLst1->OMSetRenderTargets(1, &m_hdrRTVMSAA.GetCPU(), true, &m_depthBufferDSV.GetCPU()); + + if (pPerFrame != NULL) + { + // Render skydome + // + if (pState->skyDomeType == 1) + { + XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + m_skyDome.Draw(pCmdLst1, clipToView); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Skydome"); + } + else if (pState->skyDomeType == 0) + { + SkyDomeProc::Constants skyDomeConstants; + skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); + skyDomeConstants.turbidity = 10.0f; + skyDomeConstants.rayleigh = 2.0f; + skyDomeConstants.mieCoefficient = 0.005f; + skyDomeConstants.mieDirectionalG = 0.8f; + skyDomeConstants.luminance = 1.0f; + skyDomeConstants.sun = false; + m_skyDomeProc.Draw(pCmdLst1, skyDomeConstants); + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Skydome proc"); + } + + // Render scene to color buffer + // + if (m_gltfPBR && pPerFrame != NULL) + { + //set per frame constant buffer values + m_gltfPBR->Draw(pCmdLst1, &m_shadowMapSRV); + } + + // draw object's bounding boxes + // + if (m_gltfBBox && pPerFrame != NULL) + { + if (pState->drawBoundingBoxes) + { + m_gltfBBox->Draw(pCmdLst1, pPerFrame->mCameraViewProj); + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Bounding Box"); + } + } + + // draw light's frustums + // + if (pState->drawLightFrustum && pPerFrame != NULL) + { + UserMarker marker(pCmdLst1, "light frustrums"); + + XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.0f, 0.0f); + XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 1.0f, 0.0f); + XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); + XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; + m_wireframeBox.Draw(pCmdLst1, &m_wireframe, worldMatrix, vCenter, vRadius, vColor); + } + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Light's frustum"); + } + } + pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_shadowMap.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE)); + // pCmdLst1->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_depthBuffer.GetResource(), D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE)); + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Rendering scene"); + + // Resolve MSAA ------------------------------------------------------------------------ + // + { + UserMarker marker(pCmdLst1, "Resolving MSAA"); + + D3D12_RESOURCE_BARRIER preResolve[2] = { + CD3DX12_RESOURCE_BARRIER::Transition(m_hdr.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_DEST), + CD3DX12_RESOURCE_BARRIER::Transition(m_hdrMSAA.GetResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_SOURCE) + }; + pCmdLst1->ResourceBarrier(2, preResolve); + + pCmdLst1->ResolveSubresource(m_hdr.GetResource(), 0, m_hdrMSAA.GetResource(), 0, DXGI_FORMAT_R16G16B16A16_FLOAT); + + D3D12_RESOURCE_BARRIER postResolve[2] = { + CD3DX12_RESOURCE_BARRIER::Transition(m_hdr.GetResource(), D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE), + CD3DX12_RESOURCE_BARRIER::Transition(m_hdrMSAA.GetResource(), D3D12_RESOURCE_STATE_RESOLVE_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET) + }; + pCmdLst1->ResourceBarrier(2, postResolve); + + m_gpuTimer.GetTimeStamp(pCmdLst1, "Resolve MSAA"); + } + + // Post proc--------------------------------------------------------------------------- + // if (0) - { - // Bloom, takes HDR as input and applies bloom to it. - // - { - D3D12_CPU_DESCRIPTOR_HANDLE renderTargets[] = { m_HDRRTV.GetCPU() }; - pCmdLst1->OMSetRenderTargets(ARRAYSIZE(renderTargets), renderTargets, false, NULL); + { + // Bloom, takes HDR as input and applies bloom to it. + // + { + D3D12_CPU_DESCRIPTOR_HANDLE renderTargets[] = { m_hdrRTV.GetCPU() }; + pCmdLst1->OMSetRenderTargets(ARRAYSIZE(renderTargets), renderTargets, false, NULL); - m_downSample.Draw(pCmdLst1); - //m_downSample.Gui(); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Downsample"); + m_downSample.Draw(pCmdLst1); + //m_downSample.Gui(); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Downsample"); - m_bloom.Draw(pCmdLst1, &m_HDR); - //m_bloom.Gui(); - m_GPUTimer.GetTimeStamp(pCmdLst1, "Bloom"); - } - } + m_bloom.Draw(pCmdLst1, &m_hdr); + //m_bloom.Gui(); + m_gpuTimer.GetTimeStamp(pCmdLst1, "Bloom"); + } + } - // submit command buffer + // submit command buffer - ThrowIfFailed(pCmdLst1->Close()); - ID3D12CommandList* CmdListList1[] = { pCmdLst1 }; - m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList1); + ThrowIfFailed(pCmdLst1->Close()); + ID3D12CommandList* CmdListList1[] = { pCmdLst1 }; + m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList1); - // Wait for swapchain (we are going to render to it) ----------------------------------- - // - pSwapChain->WaitForSwapChain(); + // Wait for swapchain (we are going to render to it) ----------------------------------- + // + pSwapChain->WaitForSwapChain(); - m_CommandListRing.OnBeginFrame(); + m_commandListRing.OnBeginFrame(); - ID3D12GraphicsCommandList* pCmdLst2 = m_CommandListRing.GetNewCommandList(); + ID3D12GraphicsCommandList* pCmdLst2 = m_commandListRing.GetNewCommandList(); - // Tonemapping ------------------------------------------------------------------------ - // - if (pState->bDisplayCacaoDirectly) + // Tonemapping ------------------------------------------------------------------------ + // + if (pState->displayCacaoDirectly) { pCmdLst2->RSSetViewports(1, &m_viewPort); - pCmdLst2->RSSetScissorRects(1, &m_RectScissor); + pCmdLst2->RSSetScissorRects(1, &m_rectScissor); pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); - m_applyDirect.Draw(pCmdLst2, 1, &m_applyDirectInput, NULL); + m_cacaoApplyDirect.Draw(pCmdLst2, 1, &m_cacaoApplyDirectInput, NULL); } else { - pCmdLst2->RSSetViewports(1, &m_viewPort); - pCmdLst2->RSSetScissorRects(1, &m_RectScissor); - pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); + pCmdLst2->RSSetViewports(1, &m_viewPort); + pCmdLst2->RSSetScissorRects(1, &m_rectScissor); + pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); - m_toneMapping.Draw(pCmdLst2, &m_HDRSRV, pState->exposure, pState->toneMapper); - m_GPUTimer.GetTimeStamp(pCmdLst2, "Tone mapping"); + m_toneMapping.Draw(pCmdLst2, &m_hdrSRV, pState->exposure, pState->toneMapper); + m_gpuTimer.GetTimeStamp(pCmdLst2, "Tone mapping"); - pCmdLst2->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_HDR.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); - } + pCmdLst2->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(m_hdr.GetResource(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET)); + } - // Render HUD ------------------------------------------------------------------------ - // - { - pCmdLst2->RSSetViewports(1, &m_viewPort); - pCmdLst2->RSSetScissorRects(1, &m_RectScissor); - pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); + // Render HUD ------------------------------------------------------------------------ + // + { + pCmdLst2->RSSetViewports(1, &m_viewPort); + pCmdLst2->RSSetScissorRects(1, &m_rectScissor); + pCmdLst2->OMSetRenderTargets(1, pSwapChain->GetCurrentBackBufferRTV(), true, NULL); - m_ImGUI.Draw(pCmdLst2); + m_imGUI.Draw(pCmdLst2); - m_GPUTimer.GetTimeStamp(pCmdLst2, "ImGUI rendering"); - } + m_gpuTimer.GetTimeStamp(pCmdLst2, "ImGUI rendering"); + } - // Transition swapchain into present mode + // Transition swapchain into present mode - pCmdLst2->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); + pCmdLst2->ResourceBarrier(1, &CD3DX12_RESOURCE_BARRIER::Transition(pSwapChain->GetCurrentBackBufferResource(), D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_PRESENT)); - m_GPUTimer.OnEndFrame(); + m_gpuTimer.OnEndFrame(); - m_GPUTimer.CollectTimings(pCmdLst2); + m_gpuTimer.CollectTimings(pCmdLst2); - // Close & Submit the command list ---------------------------------------------------- - // - ThrowIfFailed(pCmdLst2->Close()); + // Close & Submit the command list ---------------------------------------------------- + // + ThrowIfFailed(pCmdLst2->Close()); - ID3D12CommandList* CmdListList2[] = { pCmdLst2 }; - m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList2); + ID3D12CommandList* CmdListList2[] = { pCmdLst2 }; + m_pDevice->GetGraphicsQueue()->ExecuteCommandLists(1, CmdListList2); } #ifdef FFX_CACAO_ENABLE_PROFILING -void SampleRenderer::GetCacaoTimings(State *pState, FfxCacaoDetailedTiming* timings, uint64_t* gpuTicksPerSecond) +void SampleRenderer::GetCacaoTimings(State *pState, FFX_CACAO_DetailedTiming* timings, uint64_t* gpuTicksPerSecond) { - FfxCacaoD3D12Context *context = NULL; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - context = pState->bUseDownsampledSSAO ? m_pFfxCacaoContextDownsampled : m_pFfxCacaoContextNative; -#else - context = m_pFfxCacaoContextDownsampled; -#endif + FFX_CACAO_D3D12Context *context = NULL; + context = pState->useDownsampledSSAO ? m_pCACAOContextDownsampled : m_pCACAOContextNative; - ffxCacaoD3D12GetDetailedTimings(context, timings); + FFX_CACAO_D3D12GetDetailedTimings(context, timings); m_pDevice->GetGraphicsQueue()->GetTimestampFrequency(gpuTicksPerSecond); } #endif diff --git a/sample/src/DX12/SampleRenderer.h b/sample/src/DX12/SampleRenderer.h index 221828c..be6840b 100644 --- a/sample/src/DX12/SampleRenderer.h +++ b/sample/src/DX12/SampleRenderer.h @@ -1,6 +1,6 @@ // AMD SampleDX12 sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. +// +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -18,7 +18,7 @@ // THE SOFTWARE. #pragma once -#include "ffx_cacao.h" +#include "ffx_cacao_impl.h" static const int backBufferCount = 2; @@ -32,148 +32,144 @@ using namespace CAULDRON_DX12; class SampleRenderer { public: - struct Spotlight - { - Camera light; - XMVECTOR color; - float intensity; - }; - - struct State - { - float time; - Camera camera; - - float exposure; - float iblFactor; - float emmisiveFactor; - - int toneMapper; - int skyDomeType; - bool bDrawBoundingBoxes; - - uint32_t spotlightCount; - Spotlight spotlight[4]; - bool bDrawLightFrustum; - -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - bool bUseDownsampledSSAO; -#endif - bool bDisplayCacaoDirectly; - bool bUseCACAO; - FfxCacaoSettings cacaoSettings; - }; + struct Spotlight + { + Camera light; + XMVECTOR color; + float intensity; + }; + + struct State + { + float time; + Camera camera; + + float exposure; + float iblFactor; + float emmisiveFactor; - void OnCreate(Device* pDevice, SwapChain *pSwapChain); - void OnDestroy(); + int toneMapper; + int skyDomeType; + bool drawBoundingBoxes; - void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); - void OnDestroyWindowSizeDependentResources(); + uint32_t spotlightCount; + Spotlight spotlight[4]; + bool drawLightFrustum; - int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); - void UnloadScene(); + bool useDownsampledSSAO; + bool displayCacaoDirectly; + bool useCACAO; + FFX_CACAO_Settings cacaoSettings; + }; - const std::vector &GetTimingValues() { return m_TimeStamps; } + void OnCreate(Device* pDevice, SwapChain *pSwapChain); + void OnDestroy(); - void OnRender(State *pState, SwapChain *pSwapChain); + void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); + void OnDestroyWindowSizeDependentResources(); + + int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); + void UnloadScene(); + + const std::vector &GetTimingValues() { return m_timeStamps; } + + void OnRender(State *pState, SwapChain *pSwapChain); #ifdef FFX_CACAO_ENABLE_PROFILING - void GetCacaoTimings(State *pState, FfxCacaoDetailedTiming* timings, uint64_t* gpuTicksPerSecong); + void GetCacaoTimings(State *pState, FFX_CACAO_DetailedTiming* timings, uint64_t* gpuTicksPerSecong); #endif private: - Device *m_pDevice; - - uint32_t m_Width; - uint32_t m_Height; - uint32_t m_HalfWidth; - uint32_t m_HalfHeight; - D3D12_VIEWPORT m_viewPort; - D3D12_RECT m_RectScissor; - bool m_HasTAA = false; - - // Initialize helper classes - ResourceViewHeaps m_resourceViewHeaps; - UploadHeap m_UploadHeap; - DynamicBufferRing m_ConstantBufferRing; - StaticBufferPool m_VidMemBufferPool; - CommandListRing m_CommandListRing; - GPUTimestamps m_GPUTimer; - - //gltf passes + Device *m_pDevice; + + uint32_t m_width; + uint32_t m_height; + uint32_t m_halfWidth; + uint32_t m_halfHeight; + D3D12_VIEWPORT m_viewPort; + D3D12_RECT m_rectScissor; + bool m_hasTAA = false; + + // Initialize helper classes + ResourceViewHeaps m_resourceViewHeaps; + UploadHeap m_uploadHeap; + DynamicBufferRing m_constantBufferRing; + StaticBufferPool m_vidMemBufferPool; + CommandListRing m_commandListRing; + GPUTimestamps m_gpuTimer; + + //gltf passes GltfPbrPass *m_gltfPBRNonMsaa; - GltfPbrPass *m_gltfPBR; - GltfBBoxPass *m_gltfBBox; - GltfDepthPass *m_gltfDepth; - GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers; - - // effects - Bloom m_bloom; - SkyDome m_skyDome; - DownSamplePS m_downSample; - SkyDomeProc m_skyDomeProc; - ToneMapping m_toneMapping; - PostProcCS m_motionBlur; - Sharpen m_sharpen; - TAA m_taa; + GltfPbrPass *m_gltfPBR; + GltfBBoxPass *m_gltfBBox; + GltfDepthPass *m_gltfDepth; + GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers; + + // effects + Bloom m_bloom; + SkyDome m_skyDome; + DownSamplePS m_downSample; + SkyDomeProc m_skyDomeProc; + ToneMapping m_toneMapping; + PostProcCS m_motionBlur; + Sharpen m_sharpen; + TAA m_taa; // ================================ // CACAO stuff - CBV_SRV_UAV m_applyDirectInput; - PostProcPS m_applyDirect; - PostProcCS m_cacaoUavClear; + CBV_SRV_UAV m_cacaoApplyDirectInput; + PostProcPS m_cacaoApplyDirect; + PostProcCS m_cacaoUAVClear; - Texture m_FfxCacaoOutput; - CBV_SRV_UAV m_FfxCacaoOutputUAV; - CBV_SRV_UAV m_FfxCacaoOutputSRV; + Texture m_cacaoOutput; + CBV_SRV_UAV m_cacaoOutputUAV; + CBV_SRV_UAV m_cacaoOutputSRV; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - FfxCacaoD3D12Context *m_pFfxCacaoContextNative; -#endif - FfxCacaoD3D12Context *m_pFfxCacaoContextDownsampled; + FFX_CACAO_D3D12Context *m_pCACAOContextNative; + FFX_CACAO_D3D12Context *m_pCACAOContextDownsampled; - // GUI - ImGUI m_ImGUI; + // GUI + ImGUI m_imGUI; // Deferred pass buffers - Texture m_depthBufferNonMsaa; - DSV m_depthBufferNonMsaaDSV; - CBV_SRV_UAV m_depthBufferNonMsaaSRV; - - Texture m_normalBufferNonMsaa; - RTV m_normalBufferNonMsaaRTV; - CBV_SRV_UAV m_normalBufferNonMsaaSRV; - - // depth buffer - Texture m_depthBuffer; - DSV m_depthBufferDSV; - CBV_SRV_UAV m_depthBufferSRV; - - // TAA buffer - Texture m_TAABuffer; - CBV_SRV_UAV m_TAABufferSRV; - CBV_SRV_UAV m_TAABufferUAV; - CBV_SRV_UAV m_TAAInputsSRV; - Texture m_HistoryBuffer; - RTV m_HistoryBufferRTV; - - // shadowmaps - Texture m_ShadowMap; - DSV m_ShadowMapDSV; - CBV_SRV_UAV m_ShadowMapSRV; - - // MSAA RT - Texture m_HDRMSAA; - RTV m_HDRRTVMSAA; - - // Resolved RT - Texture m_HDR; - CBV_SRV_UAV m_HDRSRV; - RTV m_HDRRTV; - - // widgets - Wireframe m_wireframe; - WireframeBox m_wireframeBox; - - std::vector m_TimeStamps; + Texture m_depthBufferNonMsaa; + DSV m_depthBufferNonMsaaDSV; + CBV_SRV_UAV m_depthBufferNonMsaaSRV; + + Texture m_normalBufferNonMsaa; + RTV m_normalBufferNonMsaaRTV; + CBV_SRV_UAV m_normalBufferNonMsaaSRV; + + // depth buffer + Texture m_depthBuffer; + DSV m_depthBufferDSV; + CBV_SRV_UAV m_depthBufferSRV; + + // TAA buffer + Texture m_taaBuffer; + CBV_SRV_UAV m_taaBufferSRV; + CBV_SRV_UAV m_taaBufferUAV; + CBV_SRV_UAV m_taaInputsSRV; + Texture m_historyBuffer; + RTV m_historyBufferRTV; + + // shadowmaps + Texture m_shadowMap; + DSV m_shadowMapDSV; + CBV_SRV_UAV m_shadowMapSRV; + + // MSAA RT + Texture m_hdrMSAA; + RTV m_hdrRTVMSAA; + + // Resolved RT + Texture m_hdr; + CBV_SRV_UAV m_hdrSRV; + RTV m_hdrRTV; + + // widgets + Wireframe m_wireframe; + WireframeBox m_wireframeBox; + + std::vector m_timeStamps; }; diff --git a/sample/src/VK/CMakeLists.txt b/sample/src/VK/CMakeLists.txt index ece063b..583084c 100644 --- a/sample/src/VK/CMakeLists.txt +++ b/sample/src/VK/CMakeLists.txt @@ -4,14 +4,16 @@ include(${CMAKE_CURRENT_SOURCE_DIR}/../../common.cmake) add_compile_options(/MP) set(sources - FFX_CACAO_Sample.cpp - FFX_CACAO_Sample.h + Sample.cpp + Sample.h SampleRenderer.cpp SampleRenderer.h ../../../ffx-cacao/src/ffx_cacao_defines.h ../../../ffx-cacao/src/ffx_cacao.cpp ../../../ffx-cacao/inc/ffx_cacao.h - ../Common/FFX_CACAO_Common.h + ../../../ffx-cacao/src/ffx_cacao_impl.cpp + ../../../ffx-cacao/inc/ffx_cacao_impl.h + ../Common/Common.h stdafx.cpp stdafx.h) @@ -21,7 +23,7 @@ set(shaders ${CMAKE_CURRENT_SOURCE_DIR}/Apply_CACAO_Direct.glsl) set(config - ${CMAKE_CURRENT_SOURCE_DIR}/../Common/FFX_CACAO_Sample.json + ${CMAKE_CURRENT_SOURCE_DIR}/../Common/SampleSettings.json ) copyCommand("${shaders}" ${CMAKE_HOME_DIRECTORY}/bin/ShaderLibVK) diff --git a/sample/src/VK/FFX_CACAO_Sample.h b/sample/src/VK/FFX_CACAO_Sample.h deleted file mode 100644 index 1b7c9ce..0000000 --- a/sample/src/VK/FFX_CACAO_Sample.h +++ /dev/null @@ -1,96 +0,0 @@ -// AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files(the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions : -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. -#pragma once - -#include "SampleRenderer.h" - -// -// This is the main class, it manages the state of the sample and does all the high level work without touching the GPU directly. -// This class uses the GPU via the the SampleRenderer class. We would have a SampleRenderer instance for each GPU. -// -// This class takes care of: -// -// - loading a scene (just the CPU data) -// - updating the camera -// - keeping track of time -// - handling the keyboard -// - updating the animation -// - building the UI (but do not renders it) -// - uses the SampleRenderer to update all the state to the GPU and do the rendering -// - -class FfxCacaoSample : public FrameworkWindows -{ -public: - FfxCacaoSample(LPCSTR name); - void OnCreate(HWND hWnd); - void OnDestroy(); - void BuildUI(); - void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen); - void OnRender(); - bool OnEvent(MSG msg); - void OnResize(uint32_t Width, uint32_t Height) { OnResize(Width, Height, DISPLAYMODE_SDR, false); } - void OnResize(uint32_t Width, uint32_t Height, DisplayModes displayMode, bool force); - void SetFullScreen(bool fullscreen); - -private: - Device m_device; - SwapChain m_swapChain; - - DisplayModes m_currentDisplayMode; - std::vector m_displayModesAvailable; - std::vector m_displayModesNamesAvailable; - - GLTFCommon *m_pGltfLoader = NULL; - bool m_loadingScene = false; - - SampleRenderer *m_Node = NULL; - SampleRenderer::State m_state; - - float m_distance; - float m_roll; - float m_pitch; - - float m_microsecondsPerGpuTick; - float m_time; // WallClock in seconds. - double m_lastFrameTime; - float m_timeStep = 0; - int m_cameraControlSelected = 0; - - // json config file - json m_jsonConfigFile; - std::vector m_sceneNames; - int m_activeScene; - int m_activeCamera; - bool m_isCpuValidationLayerEnabled; - bool m_isGpuValidationLayerEnabled; - - bool m_vsyncEnabled = false; - bool m_bPlay; - bool m_requiresLoad = true; - int m_presetIndex = 3; - -#ifdef FFX_CACAO_ENABLE_PROFILING - char m_benchmarkFilename[1024]; - bool m_isBenchmarking; - uint32_t m_benchmarkScreenWidth; - uint32_t m_benchmarkScreenHeight; - uint32_t m_benchmarkWarmUpFramesToRun; -#endif -}; \ No newline at end of file diff --git a/sample/src/VK/FFX_CACAO_Sample.cpp b/sample/src/VK/Sample.cpp similarity index 57% rename from sample/src/VK/FFX_CACAO_Sample.cpp rename to sample/src/VK/Sample.cpp index 2fc5a38..3924664 100644 --- a/sample/src/VK/FFX_CACAO_Sample.cpp +++ b/sample/src/VK/Sample.cpp @@ -1,6 +1,6 @@ // AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. +// +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -19,17 +19,17 @@ #include "stdafx.h" -#include "FFX_CACAO_Sample.h" -#include "FFX_CACAO_Common.h" +#include "Sample.h" +#include "Common.h" -FfxCacaoSample::FfxCacaoSample(LPCSTR name) : FrameworkWindows(name) +Sample::Sample(LPCSTR name) : FrameworkWindows(name) { - m_lastFrameTime = MillisecondsNow(); - m_time = 0; - m_bPlay = true; + m_lastFrameTime = MillisecondsNow(); + m_time = 0; + m_bPlay = true; - m_pGltfLoader = NULL; - m_currentDisplayMode = DISPLAYMODE_SDR; + m_pGltfLoader = NULL; + m_currentDisplayMode = DISPLAYMODE_SDR; } //-------------------------------------------------------------------------------------- @@ -37,7 +37,7 @@ FfxCacaoSample::FfxCacaoSample(LPCSTR name) : FrameworkWindows(name) // OnParseCommandLine // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen) +void Sample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen) { // set some default values *pWidth = 1920; @@ -68,7 +68,7 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 // read config file (and override values from commandline if so) // { - std::ifstream f("FFX_CACAO_Sample.json"); + std::ifstream f("SampleSettings.json"); if (!f) { MessageBox(NULL, "Config file not found!\n", "Cauldron Panic!", MB_ICONERROR); @@ -89,7 +89,7 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 json globals = m_jsonConfigFile["globals"]; process(globals); - + // get the list of scenes for (const auto & scene : m_jsonConfigFile["scenes"]) m_sceneNames.push_back(scene["name"]); @@ -113,18 +113,12 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 #ifdef FFX_CACAO_ENABLE_PROFILING if (m_isBenchmarking) { -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION bool downsampled = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; -#endif uint32_t quality = FFX_CACAO_PRESETS[m_presetIndex].settings.qualityLevel; m_benchmarkScreenWidth = *pWidth; m_benchmarkScreenHeight = *pHeight; m_benchmarkWarmUpFramesToRun = 100; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION snprintf(m_benchmarkFilename, _countof(m_benchmarkFilename), "FFX_CACAO_Vulkan_Benchmark_%s_%ux%u_Q%u.csv", downsampled ? "downsampled" : "native", *pWidth, *pHeight, quality); -#else - snprintf(m_benchmarkFilename, _countof(m_benchmarkFilename), "FFX_CACAO_Vulkan_Benchmark_downsampled_%ux%u_Q%u.csv", *pWidth, *pHeight, quality); -#endif m_vsyncEnabled = false; m_isGpuValidationLayerEnabled = false; m_isCpuValidationLayerEnabled = false; @@ -138,60 +132,58 @@ void FfxCacaoSample::OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint3 // OnCreate // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnCreate(HWND hWnd) +void Sample::OnCreate(HWND hWnd) { - // Create Device - // - m_device.OnCreate("myapp", "myEngine", m_isCpuValidationLayerEnabled, m_isGpuValidationLayerEnabled, hWnd); - m_device.CreatePipelineCache(); - + // Create Device + // + m_device.OnCreate("FfxCacaoSample", "Cauldron", m_isCpuValidationLayerEnabled, m_isGpuValidationLayerEnabled, hWnd); + m_device.CreatePipelineCache(); + VkPhysicalDeviceProperties physicalDeviceProperties; vkGetPhysicalDeviceProperties(m_device.GetPhysicalDevice(), &physicalDeviceProperties); m_microsecondsPerGpuTick = 1e-3f * physicalDeviceProperties.limits.timestampPeriod; - //init the shader compiler + //init the shader compiler InitDirectXCompiler(); - CreateShaderCache(); + CreateShaderCache(); - // Create Swapchain - // + // Create Swapchain + // uint32_t dwNumberOfBackBuffers = 2; - m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); - - // Create a instance of the renderer and initialize it, we need to do that for each GPU - // - m_Node = new SampleRenderer(); - m_Node->OnCreate(&m_device, &m_swapChain); - - // init GUI (non gfx stuff) - // - ImGUI_Init((void *)hWnd); - - // Init Camera, looking at the origin - // - m_roll = 0.0f; - m_pitch = 0.0f; - m_distance = 3.5f; - - // init GUI state - m_state.toneMapper = 0; - m_state.m_useTAA = false; // no TAA in VK - m_state.skyDomeType = 0; - m_state.exposure = 1.0f; - m_state.iblFactor = 2.0f; - m_state.emmisiveFactor = 1.0f; - m_state.bDrawLightFrustum = false; - m_state.bDrawBoundingBoxes = false; - m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); - - m_state.m_useCacao = true; - m_state.m_dispalyCacaoDirectly = true; - - m_state.m_cacaoSettings = FFX_CACAO_PRESETS[m_presetIndex].settings; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - m_state.m_useDownsampledSsao = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; -#endif + m_swapChain.OnCreate(&m_device, dwNumberOfBackBuffers, hWnd); + + // Create a instance of the renderer and initialize it, we need to do that for each GPU + // + m_node = new SampleRenderer(); + m_node->OnCreate(&m_device, &m_swapChain); + + // init GUI (non gfx stuff) + // + ImGUI_Init((void *)hWnd); + + // Init Camera, looking at the origin + // + m_roll = 0.0f; + m_pitch = 0.0f; + m_distance = 3.5f; + + // init GUI state + m_state.toneMapper = 0; + m_state.useTAA = false; // no TAA in VK + m_state.skyDomeType = 0; + m_state.exposure = 1.0f; + m_state.iblFactor = 2.0f; + m_state.emmisiveFactor = 1.0f; + m_state.drawLightFrustum = false; + m_state.drawBoundingBoxes = false; + m_state.camera.LookAt(m_roll, m_pitch, m_distance, XMVectorSet(0, 0, 0, 0)); + + m_state.useCacao = true; + m_state.dispalyCacaoDirectly = true; + + m_state.cacaoSettings = FFX_CACAO_PRESETS[m_presetIndex].settings; + m_state.useDownsampledSsao = FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao; } //-------------------------------------------------------------------------------------- @@ -199,39 +191,39 @@ void FfxCacaoSample::OnCreate(HWND hWnd) // OnDestroy // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnDestroy() +void Sample::OnDestroy() { #ifdef FFX_CACAO_ENABLE_PROFILING m_isBenchmarking = false; #endif - ImGUI_Shutdown(); + ImGUI_Shutdown(); - m_device.GPUFlush(); + m_device.GPUFlush(); - // Fullscreen state should always be false before exiting the app. - m_swapChain.SetFullScreen(false); + // Fullscreen state should always be false before exiting the app. + m_swapChain.SetFullScreen(false); - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); + m_node->UnloadScene(); + m_node->OnDestroyWindowSizeDependentResources(); + m_node->OnDestroy(); - delete m_Node; + delete m_node; - m_swapChain.OnDestroyWindowSizeDependentResources(); - m_swapChain.OnDestroy(); + m_swapChain.OnDestroyWindowSizeDependentResources(); + m_swapChain.OnDestroy(); - //shut down the shader compiler - DestroyShaderCache(&m_device); + //shut down the shader compiler + DestroyShaderCache(&m_device); - if (m_pGltfLoader) - { - delete m_pGltfLoader; - m_pGltfLoader = NULL; - } + if (m_pGltfLoader) + { + delete m_pGltfLoader; + m_pGltfLoader = NULL; + } - m_device.DestroyPipelineCache(); - m_device.OnDestroy(); + m_device.DestroyPipelineCache(); + m_device.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -239,11 +231,11 @@ void FfxCacaoSample::OnDestroy() // OnEvent, win32 sends us events and we forward them to ImGUI // //-------------------------------------------------------------------------------------- -bool FfxCacaoSample::OnEvent(MSG msg) +bool Sample::OnEvent(MSG msg) { - if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) - return true; - return true; + if (ImGUI_WndProcHandler(msg.hwnd, msg.message, msg.wParam, msg.lParam)) + return true; + return true; } //-------------------------------------------------------------------------------------- @@ -251,16 +243,16 @@ bool FfxCacaoSample::OnEvent(MSG msg) // SetFullScreen // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::SetFullScreen(bool fullscreen) +void Sample::SetFullScreen(bool fullscreen) { - m_device.GPUFlush(); + m_device.GPUFlush(); if (!fullscreen) { m_currentDisplayMode = DISPLAYMODE_SDR; } - m_swapChain.SetFullScreen(fullscreen); + m_swapChain.SetFullScreen(fullscreen); } //-------------------------------------------------------------------------------------- @@ -268,7 +260,7 @@ void FfxCacaoSample::SetFullScreen(bool fullscreen) // OnResize // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnResize(uint32_t width, uint32_t height, DisplayModes displayMode, bool force) +void Sample::OnResize(uint32_t width, uint32_t height, DisplayModes displayMode, bool force) { #ifdef FFX_CACAO_ENABLE_PROFILING if (m_isBenchmarking && !m_benchmarkWarmUpFramesToRun) @@ -281,39 +273,39 @@ void FfxCacaoSample::OnResize(uint32_t width, uint32_t height, DisplayModes disp } #endif - if (m_Width != width || m_Height != height || m_currentDisplayMode != displayMode || force) - { - // Flush GPU - // - m_device.GPUFlush(); - - // If resizing but no minimizing - // - if (m_Width > 0 && m_Height > 0) - { - if (m_Node != NULL) - { - m_Node->OnDestroyWindowSizeDependentResources(); - } - m_swapChain.OnDestroyWindowSizeDependentResources(); - } - - m_Width = width; - m_Height = height; - m_currentDisplayMode = displayMode; - - // if resizing but not minimizing the recreate it with the new size - // - if (m_Width > 0 && m_Height > 0) - { - m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, m_vsyncEnabled, m_currentDisplayMode); - if (m_Node != NULL) - { - m_Node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); - } - } - } - m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); + if (m_Width != width || m_Height != height || m_currentDisplayMode != displayMode || force) + { + // Flush GPU + // + m_device.GPUFlush(); + + // If resizing but no minimizing + // + if (m_Width > 0 && m_Height > 0) + { + if (m_node != NULL) + { + m_node->OnDestroyWindowSizeDependentResources(); + } + m_swapChain.OnDestroyWindowSizeDependentResources(); + } + + m_Width = width; + m_Height = height; + m_currentDisplayMode = displayMode; + + // if resizing but not minimizing the recreate it with the new size + // + if (m_Width > 0 && m_Height > 0) + { + m_swapChain.OnCreateWindowSizeDependentResources(m_Width, m_Height, m_vsyncEnabled, m_currentDisplayMode); + if (m_node != NULL) + { + m_node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); + } + } + } + m_state.camera.SetFov(XM_PI / 4, m_Width, m_Height, 0.1f, 1000.0f); } //-------------------------------------------------------------------------------------- @@ -321,20 +313,20 @@ void FfxCacaoSample::OnResize(uint32_t width, uint32_t height, DisplayModes disp // BuildUI, also loads the scene! // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::BuildUI() +void Sample::BuildUI() { - ImGuiStyle& style = ImGui::GetStyle(); - style.FrameBorderSize = 1.0f; + ImGuiStyle& style = ImGui::GetStyle(); + style.FrameBorderSize = 1.0f; - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_FirstUseEver); - ImGui::SetNextWindowSize(ImVec2(250, 700), ImGuiCond_FirstUseEver); + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_FirstUseEver); + ImGui::SetNextWindowSize(ImVec2(250, 700), ImGuiCond_FirstUseEver); - bool opened = true; - ImGui::Begin("CACAO Sample", &opened); + bool opened = true; + ImGui::Begin("CACAO Sample", &opened); - if (ImGui::CollapsingHeader("Sample Settings", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Text("Resolution : %ix%i", m_Width, m_Height); + if (ImGui::CollapsingHeader("Sample Settings", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Text("Resolution : %ix%i", m_Width, m_Height); const char *cameraControls = "Orbit\0WASD\0"; ImGui::Combo("Camera", &m_cameraControlSelected, cameraControls); @@ -343,7 +335,7 @@ void FfxCacaoSample::BuildUI() { OnResize(m_Width, m_Height, DISPLAYMODE_SDR, true); } - } + } if (m_requiresLoad) { @@ -353,12 +345,12 @@ void FfxCacaoSample::BuildUI() // release everything and load the GLTF, just the light json data, the rest (textures and geometry) will be done in the main loop if (m_pGltfLoader != NULL) { - m_Node->UnloadScene(); - m_Node->OnDestroyWindowSizeDependentResources(); - m_Node->OnDestroy(); + m_node->UnloadScene(); + m_node->OnDestroyWindowSizeDependentResources(); + m_node->OnDestroy(); m_pGltfLoader->Unload(); - m_Node->OnCreate(&m_device, &m_swapChain); - m_Node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); + m_node->OnCreate(&m_device, &m_swapChain); + m_node->OnCreateWindowSizeDependentResources(&m_swapChain, m_Width, m_Height); } delete(m_pGltfLoader); @@ -374,7 +366,7 @@ void FfxCacaoSample::BuildUI() #define LOAD(j, key, val) val = j.value(key, val) // global settings - LOAD(scene, "TAA", m_state.m_useTAA); + LOAD(scene, "TAA", m_state.useTAA); LOAD(scene, "toneMapper", m_state.toneMapper); LOAD(scene, "skyDomeType", m_state.skyDomeType); LOAD(scene, "exposure", m_state.exposure); @@ -435,14 +427,12 @@ void FfxCacaoSample::BuildUI() { if (ImGui::Combo("Preset", &m_presetIndex, FFX_CACAO_PRESET_NAMES, _countof(FFX_CACAO_PRESET_NAMES)) && m_presetIndex < _countof(FFX_CACAO_PRESETS)) { - FfxCacaoPreset preset = FFX_CACAO_PRESETS[m_presetIndex]; - m_state.m_cacaoSettings = preset.settings; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - m_state.m_useDownsampledSsao = preset.useDownsampledSsao; -#endif + Preset preset = FFX_CACAO_PRESETS[m_presetIndex]; + m_state.cacaoSettings = preset.settings; + m_state.useDownsampledSsao = preset.useDownsampledSsao; } - FfxCacaoSettings *settings = &m_state.m_cacaoSettings; + FFX_CACAO_Settings *settings = &m_state.cacaoSettings; ImGui::SliderFloat("Radius", &settings->radius, 0.0f, 10.0f); ImGui::SliderFloat("Shadow Multiplier", &settings->shadowMultiplier, 0.0f, 5.0f); ImGui::SliderFloat("Shadow Power", &settings->shadowPower, 0.5f, 5.0f); @@ -453,7 +443,7 @@ void FfxCacaoSample::BuildUI() int qualityIndex = settings->qualityLevel; char *qualityLevels = "Lowest\0Low\0Medium\0High\0Highest\0" ; ImGui::Combo("Quality Level", &qualityIndex, qualityLevels); - settings->qualityLevel = (FfxCacaoQuality)qualityIndex; + settings->qualityLevel = (FFX_CACAO_Quality)qualityIndex; if (settings->qualityLevel == FFX_CACAO_QUALITY_HIGHEST) { ImGui::SliderFloat("Adaptive Quality Level", &settings->adaptiveQualityLimit, 0.5f, 1.0f); @@ -464,39 +454,33 @@ void FfxCacaoSample::BuildUI() bool generateNormals = settings->generateNormals; ImGui::Checkbox("Generate Normal Buffer From Depth Buffer", &generateNormals); settings->generateNormals = generateNormals ? FFX_CACAO_TRUE : FFX_CACAO_FALSE; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ImGui::Checkbox("Use Downsampled SSAO", &m_state.m_useDownsampledSsao); - if (m_state.m_useDownsampledSsao) -#endif + ImGui::Checkbox("Use Downsampled SSAO", &m_state.useDownsampledSsao); + if (m_state.useDownsampledSsao) { ImGui::SliderFloat("Bilateral Sigma Squared", &settings->bilateralSigmaSquared, 0.0f, 10.0f); ImGui::SliderFloat("Bilateral Similarity Distance Sigma", &settings->bilateralSimilarityDistanceSigma, 0.1f, 1.0f); } - ImGui::Checkbox("Display FFX CACAO Output Directly", &m_state.m_dispalyCacaoDirectly); - if (!m_state.m_dispalyCacaoDirectly) + ImGui::Checkbox("Display FFX CACAO Output Directly", &m_state.dispalyCacaoDirectly); + if (!m_state.dispalyCacaoDirectly) { - ImGui::Checkbox("Use FFX CACAO", &m_state.m_useCacao); + ImGui::Checkbox("Use FFX CACAO", &m_state.useCacao); } - m_state.m_useCacao |= m_state.m_dispalyCacaoDirectly; + m_state.useCacao |= m_state.dispalyCacaoDirectly; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - if (m_presetIndex < _countof(FFX_CACAO_PRESETS) && (memcmp(&FFX_CACAO_PRESETS[m_presetIndex].settings, &m_state.m_cacaoSettings, sizeof(m_state.m_cacaoSettings)) || (FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao != m_state.m_useDownsampledSsao))) -#else - if (m_presetIndex < _countof(FFX_CACAO_PRESETS) && memcmp(&FFX_CACAO_PRESETS[m_presetIndex].settings, &m_state.m_cacaoSettings, sizeof(m_state.m_cacaoSettings))) -#endif + if (m_presetIndex < _countof(FFX_CACAO_PRESETS) && (memcmp(&FFX_CACAO_PRESETS[m_presetIndex].settings, &m_state.cacaoSettings, sizeof(m_state.cacaoSettings)) || (FFX_CACAO_PRESETS[m_presetIndex].useDownsampledSsao != m_state.useDownsampledSsao))) { m_presetIndex = _countof(FFX_CACAO_PRESETS); } } #ifdef FFX_CACAO_ENABLE_PROFILING - if (m_state.m_useCacao && !m_vsyncEnabled) + if (m_state.useCacao && !m_vsyncEnabled && !m_isCpuValidationLayerEnabled && !m_isGpuValidationLayerEnabled) { if (ImGui::CollapsingHeader("Profiler", ImGuiTreeNodeFlags_DefaultOpen)) { - FfxCacaoDetailedTiming timings = {}; - m_Node->GetCacaoTimingValues(&m_state, &timings); + FFX_CACAO_DetailedTiming timings = {}; + m_node->GetCacaoTimingValues(&m_state, &timings); for (uint32_t i = 0; i < timings.numTimestamps; ++i) { const char *name = timings.timestamps[i].label; @@ -508,58 +492,58 @@ void FfxCacaoSample::BuildUI() } else { - ImGui::CollapsingHeader("Profiler Disabled (enable CACAO and turn off vsync)"); + ImGui::CollapsingHeader("Profiler Disabled (enable CACAO and turn off vsync and validation)"); } #endif - ImGui::End(); - - // Sets Camera based on UI selection (WASD, Orbit or any of the GLTF cameras) - // - ImGuiIO& io = ImGui::GetIO(); - { - //If the mouse was not used by the GUI then it's for the camera - // - if (io.WantCaptureMouse) - { - io.MouseDelta.x = 0; - io.MouseDelta.y = 0; - io.MouseWheel = 0; - } - else if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) - { - m_roll -= io.MouseDelta.x / 100.f; - m_pitch += io.MouseDelta.y / 100.f; - } - - // Choose camera movement depending on setting - // - if (m_cameraControlSelected == 0) - { - // Orbiting - // - m_distance -= (float)io.MouseWheel / 3.0f; - m_distance = std::max(m_distance, 0.1f); - - bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); - - m_state.camera.UpdateCameraPolar(m_roll, m_pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_distance); - } - else if (m_cameraControlSelected == 1) - { - // WASD - // - m_state.camera.UpdateCameraWASD(m_roll, m_pitch, io.KeysDown, io.DeltaTime); - } - else if (m_cameraControlSelected > 1) - { - // Use a camera from the GLTF - // - m_pGltfLoader->GetCamera(m_cameraControlSelected - 2, &m_state.camera); - m_roll = m_state.camera.GetYaw(); - m_pitch = m_state.camera.GetPitch(); - } - } + ImGui::End(); + + // Sets Camera based on UI selection (WASD, Orbit or any of the GLTF cameras) + // + ImGuiIO& io = ImGui::GetIO(); + { + //If the mouse was not used by the GUI then it's for the camera + // + if (io.WantCaptureMouse) + { + io.MouseDelta.x = 0; + io.MouseDelta.y = 0; + io.MouseWheel = 0; + } + else if ((io.KeyCtrl == false) && (io.MouseDown[0] == true)) + { + m_roll -= io.MouseDelta.x / 100.f; + m_pitch += io.MouseDelta.y / 100.f; + } + + // Choose camera movement depending on setting + // + if (m_cameraControlSelected == 0) + { + // Orbiting + // + m_distance -= (float)io.MouseWheel / 3.0f; + m_distance = std::max(m_distance, 0.1f); + + bool panning = (io.KeyCtrl == true) && (io.MouseDown[0] == true); + + m_state.camera.UpdateCameraPolar(m_roll, m_pitch, panning ? -io.MouseDelta.x / 100.0f : 0.0f, panning ? io.MouseDelta.y / 100.0f : 0.0f, m_distance); + } + else if (m_cameraControlSelected == 1) + { + // WASD + // + m_state.camera.UpdateCameraWASD(m_roll, m_pitch, io.KeysDown, io.DeltaTime); + } + else if (m_cameraControlSelected > 1) + { + // Use a camera from the GLTF + // + m_pGltfLoader->GetCamera(m_cameraControlSelected - 2, &m_state.camera); + m_roll = m_state.camera.GetYaw(); + m_pitch = m_state.camera.GetPitch(); + } + } } //-------------------------------------------------------------------------------------- @@ -567,39 +551,39 @@ void FfxCacaoSample::BuildUI() // OnRender, updates the state from the UI, animates, transforms and renders the scene // //-------------------------------------------------------------------------------------- -void FfxCacaoSample::OnRender() +void Sample::OnRender() { - // Get timings - // - double timeNow = MillisecondsNow(); - float deltaTime = (m_timeStep == 0.0f) ? (float)(timeNow - m_lastFrameTime) : m_timeStep; - m_lastFrameTime = timeNow; - - // Set animation time - // - if (m_bPlay) - { - m_time += (float)deltaTime / 1000.0f; - } - - ImGUI_UpdateIO(); - ImGui::NewFrame(); - - if (m_loadingScene) - { - // the scene loads in chuncks, that way we can show a progress bar - static int loadingStage = 0; - loadingStage = m_Node->LoadScene(m_pGltfLoader, loadingStage); - if (loadingStage == 0) - { - m_time = 0; - m_loadingScene = false; - } - } + // Get timings + // + double timeNow = MillisecondsNow(); + float deltaTime = (m_timeStep == 0.0f) ? (float)(timeNow - m_lastFrameTime) : m_timeStep; + m_lastFrameTime = timeNow; + + // Set animation time + // + if (m_bPlay) + { + m_time += (float)deltaTime / 1000.0f; + } + + ImGUI_UpdateIO(); + ImGui::NewFrame(); + + if (m_loadingScene) + { + // the scene loads in chuncks, that way we can show a progress bar + static int loadingStage = 0; + loadingStage = m_node->LoadScene(m_pGltfLoader, loadingStage); + if (loadingStage == 0) + { + m_time = 0; + m_loadingScene = false; + } + } #ifdef FFX_CACAO_ENABLE_PROFILING - else if (m_pGltfLoader && m_isBenchmarking) - { - // benchmarking takes control of the time, and exits the app when the animation is done + else if (m_pGltfLoader && m_isBenchmarking) + { + // benchmarking takes control of the time, and exits the app when the animation is done if (m_benchmarkWarmUpFramesToRun) { @@ -613,8 +597,8 @@ void FfxCacaoSample::OnRender() exit(0); } - FfxCacaoDetailedTiming timings = {}; - m_Node->GetCacaoTimingValues(&m_state, &timings); + FFX_CACAO_DetailedTiming timings = {}; + m_node->GetCacaoTimingValues(&m_state, &timings); if (timings.numTimestamps) { @@ -633,35 +617,35 @@ void FfxCacaoSample::OnRender() m_time = BenchmarkLoop(timestamps, &m_state.camera, (const std::string**)&pFilename); } } - } + } #endif - else - { - // Build the UI. Note that the rendering of the UI happens later. - BuildUI(); + else + { + // Build the UI. Note that the rendering of the UI happens later. + BuildUI(); if (m_bPlay) { m_time += (float)deltaTime / 1000.0f; } - } + } - // Animate and transform the scene - // - if (m_pGltfLoader) - { - m_pGltfLoader->SetAnimationTime(0, m_time); - m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); - } + // Animate and transform the scene + // + if (m_pGltfLoader) + { + m_pGltfLoader->SetAnimationTime(0, m_time); + m_pGltfLoader->TransformScene(0, XMMatrixIdentity()); + } - m_state.time = m_time; + m_state.time = m_time; - // Do Render frame using AFR - // - m_Node->OnRender(&m_state, &m_swapChain); + // Do Render frame using AFR + // + m_node->OnRender(&m_state, &m_swapChain); - m_swapChain.Present(); + m_swapChain.Present(); } @@ -671,13 +655,13 @@ void FfxCacaoSample::OnRender() // //-------------------------------------------------------------------------------------- int WINAPI WinMain(HINSTANCE hInstance, - HINSTANCE hPrevInstance, - LPSTR lpCmdLine, - int nCmdShow) + HINSTANCE hPrevInstance, + LPSTR lpCmdLine, + int nCmdShow) { - LPCSTR Name = "FFX CACAO Vulkan Sample v1.0"; + LPCSTR Name = "FFX CACAO Vulkan Sample v1.2"; - // create new Vulkan sample - return RunFramework(hInstance, lpCmdLine, nCmdShow, new FfxCacaoSample(Name)); + // create new Vulkan sample + return RunFramework(hInstance, lpCmdLine, nCmdShow, new Sample(Name)); } diff --git a/sample/src/VK/Sample.h b/sample/src/VK/Sample.h new file mode 100644 index 0000000..560276f --- /dev/null +++ b/sample/src/VK/Sample.h @@ -0,0 +1,96 @@ +// AMD SampleVK sample code +// +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files(the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions : +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. +#pragma once + +#include "SampleRenderer.h" + +// +// This is the main class, it manages the state of the sample and does all the high level work without touching the GPU directly. +// This class uses the GPU via the the SampleRenderer class. We would have a SampleRenderer instance for each GPU. +// +// This class takes care of: +// +// - loading a scene (just the CPU data) +// - updating the camera +// - keeping track of time +// - handling the keyboard +// - updating the animation +// - building the UI (but do not renders it) +// - uses the SampleRenderer to update all the state to the GPU and do the rendering +// + +class Sample : public FrameworkWindows +{ +public: + Sample(LPCSTR name); + void OnCreate(HWND hWnd); + void OnDestroy(); + void BuildUI(); + void OnParseCommandLine(LPSTR lpCmdLine, uint32_t* pWidth, uint32_t* pHeight, bool *pbFullScreen); + void OnRender(); + bool OnEvent(MSG msg); + void OnResize(uint32_t Width, uint32_t Height) { OnResize(Width, Height, DISPLAYMODE_SDR, false); } + void OnResize(uint32_t Width, uint32_t Height, DisplayModes displayMode, bool force); + void SetFullScreen(bool fullscreen); + +private: + Device m_device; + SwapChain m_swapChain; + + DisplayModes m_currentDisplayMode; + std::vector m_displayModesAvailable; + std::vector m_displayModesNamesAvailable; + + GLTFCommon *m_pGltfLoader = NULL; + bool m_loadingScene = false; + + SampleRenderer *m_node = NULL; + SampleRenderer::State m_state; + + float m_distance; + float m_roll; + float m_pitch; + + float m_microsecondsPerGpuTick; + float m_time; // WallClock in seconds. + double m_lastFrameTime; + float m_timeStep = 0; + int m_cameraControlSelected = 0; + + // json config file + json m_jsonConfigFile; + std::vector m_sceneNames; + int m_activeScene; + int m_activeCamera; + bool m_isCpuValidationLayerEnabled; + bool m_isGpuValidationLayerEnabled; + + bool m_vsyncEnabled = false; + bool m_bPlay; + bool m_requiresLoad = true; + int m_presetIndex = 3; + +#ifdef FFX_CACAO_ENABLE_PROFILING + char m_benchmarkFilename[1024]; + bool m_isBenchmarking; + uint32_t m_benchmarkScreenWidth; + uint32_t m_benchmarkScreenHeight; + uint32_t m_benchmarkWarmUpFramesToRun; +#endif +}; diff --git a/sample/src/VK/SampleRenderer.cpp b/sample/src/VK/SampleRenderer.cpp index ae4f518..f5c36d8 100644 --- a/sample/src/VK/SampleRenderer.cpp +++ b/sample/src/VK/SampleRenderer.cpp @@ -1,6 +1,6 @@ // AMD SampleVK sample code // -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -28,75 +28,75 @@ //-------------------------------------------------------------------------------------- void SampleRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) { - m_pDevice = pDevice; - - // Initialize helpers - - // Create all the heaps for the resources views - const uint32_t cbvDescriptorCount = 2000; - const uint32_t srvDescriptorCount = 2000; - const uint32_t uavDescriptorCount = 10; - const uint32_t samplerDescriptorCount = 20; - m_resourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, samplerDescriptorCount); - - // Create a commandlist ring for the Direct queue - uint32_t commandListsPerBackBuffer = 8; - m_CommandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer); - - // Create a 'dynamic' constant buffer - const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; - m_ConstantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, "Uniforms"); - - // Create a 'static' pool for vertices and indices - const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; - const uint32_t systemGeometryMemSize = 32 * 1024; - m_VidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); - m_SysMemBufferPool.OnCreate(pDevice, systemGeometryMemSize, false, "PostProcGeom"); - - // initialize the GPU time stamps module - m_GPUTimer.OnCreate(pDevice, backBufferCount); - - // Quick helper to upload resources, it has it's own commandList and uses suballocation. - // for 4K textures we'll need 100Megs - const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; - m_UploadHeap.OnCreate(pDevice, staticGeometryMemSize); // initialize an upload heap (uses suballocation for faster results) - - // Create a 2Kx2K Shadowmap atlas to hold 4 cascades/spotlights - m_shadowMap.InitDepthStencil(m_pDevice, 2 * 1024, 2 * 1024, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "ShadowMap"); - m_shadowMap.CreateSRV(&m_shadowMapSRV); - m_shadowMap.CreateDSV(&m_shadowMapDSV); - - // Create render pass shadow, will clear contents - // - { - VkAttachmentDescription depthAttachments; - AttachClearBeforeUse(m_shadowMap.GetFormat(), VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachments); - m_render_pass_shadow = CreateRenderPassOptimal(m_pDevice->GetDevice(), 0, NULL, &depthAttachments); - - // Create frame buffer, its size is now window dependant so we can do this here. - // - VkImageView attachmentViews[1] = { m_shadowMapDSV }; - VkFramebufferCreateInfo fb_info = {}; - fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - fb_info.pNext = NULL; - fb_info.renderPass = m_render_pass_shadow; - fb_info.attachmentCount = 1; - fb_info.pAttachments = attachmentViews; - fb_info.width = m_shadowMap.GetWidth(); - fb_info.height = m_shadowMap.GetHeight(); - fb_info.layers = 1; - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_shadow); - assert(res == VK_SUCCESS); - } - - // Create HDR MSAA render pass + clear, for the sky, PBR and Wireframe passes - // - { - VkAttachmentDescription colorAttachment, depthAttachment; - AttachClearBeforeUse(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_4_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachment); + m_pDevice = pDevice; + + // Initialize helpers + + // Create all the heaps for the resources views + const uint32_t cbvDescriptorCount = 2000; + const uint32_t srvDescriptorCount = 2000; + const uint32_t uavDescriptorCount = 10; + const uint32_t samplerDescriptorCount = 20; + m_resourceViewHeaps.OnCreate(pDevice, cbvDescriptorCount, srvDescriptorCount, uavDescriptorCount, samplerDescriptorCount); + + // Create a commandlist ring for the Direct queue + uint32_t commandListsPerBackBuffer = 8; + m_commandListRing.OnCreate(pDevice, backBufferCount, commandListsPerBackBuffer); + + // Create a 'dynamic' constant buffer + const uint32_t constantBuffersMemSize = 20 * 1024 * 1024; + m_constantBufferRing.OnCreate(pDevice, backBufferCount, constantBuffersMemSize, "Uniforms"); + + // Create a 'static' pool for vertices and indices + const uint32_t staticGeometryMemSize = 128 * 1024 * 1024; + const uint32_t systemGeometryMemSize = 32 * 1024; + m_vidMemBufferPool.OnCreate(pDevice, staticGeometryMemSize, USE_VID_MEM, "StaticGeom"); + m_sysMemBufferPool.OnCreate(pDevice, systemGeometryMemSize, false, "PostProcGeom"); + + // initialize the GPU time stamps module + m_gpuTimer.OnCreate(pDevice, backBufferCount); + + // Quick helper to upload resources, it has it's own commandList and uses suballocation. + // for 4K textures we'll need 100Megs + const uint32_t uploadHeapMemSize = 1000 * 1024 * 1024; + m_uploadHeap.OnCreate(pDevice, staticGeometryMemSize); // initialize an upload heap (uses suballocation for faster results) + + // Create a 2Kx2K Shadowmap atlas to hold 4 cascades/spotlights + m_shadowMap.InitDepthStencil(m_pDevice, 2 * 1024, 2 * 1024, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "ShadowMap"); + m_shadowMap.CreateSRV(&m_shadowMapSRV); + m_shadowMap.CreateDSV(&m_shadowMapDSV); + + // Create render pass shadow, will clear contents + // + { + VkAttachmentDescription depthAttachments; + AttachClearBeforeUse(m_shadowMap.GetFormat(), VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachments); + m_renderPassShadow = CreateRenderPassOptimal(m_pDevice->GetDevice(), 0, NULL, &depthAttachments); + + // Create frame buffer, its size is now window dependant so we can do this here. + // + VkImageView attachmentViews[1] = { m_shadowMapDSV }; + VkFramebufferCreateInfo fb_info = {}; + fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fb_info.pNext = NULL; + fb_info.renderPass = m_renderPassShadow; + fb_info.attachmentCount = 1; + fb_info.pAttachments = attachmentViews; + fb_info.width = m_shadowMap.GetWidth(); + fb_info.height = m_shadowMap.GetHeight(); + fb_info.layers = 1; + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBufferShadow); + assert(res == VK_SUCCESS); + } + + // Create HDR MSAA render pass + clear, for the sky, PBR and Wireframe passes + // + { + VkAttachmentDescription colorAttachment, depthAttachment; + AttachClearBeforeUse(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_4_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, &colorAttachment); AttachClearBeforeUse(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_4_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, &depthAttachment); - m_render_pass_HDR_MSAA = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, &depthAttachment); - } + m_renderPassHDRMSAA = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, &depthAttachment); + } // Create non msaa render pass // @@ -104,43 +104,37 @@ void SampleRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) VkAttachmentDescription colorAttachment, depthAttachment; AttachClearBeforeUse(VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachment); AttachClearBeforeUse(VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &depthAttachment); - m_render_pass_non_msaa = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, &depthAttachment); + m_renderPassNonMSAA = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, &depthAttachment); } - // Create HDR render pass, for the GUI - // - { - VkAttachmentDescription colorAttachment; - AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachment); - m_render_pass_PBR_HDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, NULL); - } - - m_skyDome.OnCreate(pDevice, m_render_pass_HDR_MSAA, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", VK_SAMPLE_COUNT_4_BIT); - m_skyDomeProc.OnCreate(pDevice, m_render_pass_HDR_MSAA, &m_UploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); - m_wireframe.OnCreate(pDevice, m_render_pass_HDR_MSAA, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); - m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool); - m_downSample.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); - m_bloom.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing, &m_VidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); - - // Create tonemapping pass - m_toneMappingCS.OnCreate(pDevice, &m_resourceViewHeaps, &m_ConstantBufferRing); - m_toneMappingPS.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_resourceViewHeaps, &m_VidMemBufferPool, &m_ConstantBufferRing); - m_colorConversionPS.OnCreate(pDevice, pSwapChain->GetRenderPass(), &m_resourceViewHeaps, &m_VidMemBufferPool, &m_ConstantBufferRing); - - // Initialize UI rendering resources - m_ImGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_UploadHeap, &m_ConstantBufferRing); - - // Make sure upload heap has finished uploading before continuing -#if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); -#endif + // Create HDR render pass, for the GUI + // + { + VkAttachmentDescription colorAttachment; + AttachBlending(VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, &colorAttachment); + m_renderPassPBRHDR = CreateRenderPassOptimal(m_pDevice->GetDevice(), 1, &colorAttachment, NULL); + } + + m_skyDome.OnCreate(pDevice, m_renderPassHDRMSAA, &m_uploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, "..\\media\\envmaps\\papermill\\diffuse.dds", "..\\media\\envmaps\\papermill\\specular.dds", VK_SAMPLE_COUNT_4_BIT); + m_skyDomeProc.OnCreate(pDevice, m_renderPassHDRMSAA, &m_uploadHeap, VK_FORMAT_R16G16B16A16_SFLOAT, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); + m_wireframe.OnCreate(pDevice, m_renderPassHDRMSAA, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, VK_SAMPLE_COUNT_4_BIT); + m_wireframeBox.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool); + m_downSample.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); + m_bloom.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing, &m_vidMemBufferPool, VK_FORMAT_R16G16B16A16_SFLOAT); + + // Create tonemapping pass + m_toneMappingCS.OnCreate(pDevice, &m_resourceViewHeaps, &m_constantBufferRing); + m_toneMappingPS.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_resourceViewHeaps, &m_vidMemBufferPool, &m_constantBufferRing); + m_colorConversionPS.OnCreate(pDevice, pSwapChain->GetRenderPass(), &m_resourceViewHeaps, &m_vidMemBufferPool, &m_constantBufferRing); + + // Initialize UI rendering resources + m_imGUI.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), &m_uploadHeap, &m_constantBufferRing); // ======================================================================= // CACAO - size_t cacaoContextSize = ffxCacaoVkGetContextSize(); - FfxCacaoVkCreateInfo info = {}; + size_t cacaoContextSize = FFX_CACAO_VkGetContextSize(); + FFX_CACAO_VkCreateInfo info = {}; info.physicalDevice = pDevice->GetPhysicalDevice(); info.device = pDevice->GetDevice(); info.flags = FFX_CACAO_VK_CREATE_USE_DEBUG_MARKERS | FFX_CACAO_VK_CREATE_NAME_OBJECTS; @@ -148,12 +142,10 @@ void SampleRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) { info.flags |= FFX_CACAO_VK_CREATE_USE_16_BIT; } -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - m_cacaoContextNative = (FfxCacaoVkContext*)malloc(cacaoContextSize); - ffxCacaoVkInitContext(m_cacaoContextNative, &info); -#endif - m_cacaoContextDownsampled = (FfxCacaoVkContext*)malloc(cacaoContextSize); - ffxCacaoVkInitContext(m_cacaoContextDownsampled, &info); + m_cacaoContextNative = (FFX_CACAO_VkContext*)malloc(cacaoContextSize); + FFX_CACAO_VkInitContext(m_cacaoContextNative, &info); + m_cacaoContextDownsampled = (FFX_CACAO_VkContext*)malloc(cacaoContextSize); + FFX_CACAO_VkInitContext(m_cacaoContextDownsampled, &info); // create direct output PS descriptor set layout { @@ -171,7 +163,7 @@ void SampleRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; bindings[1].pImmutableSamplers = NULL; - bool succeeded = m_resourceViewHeaps.CreateDescriptorSetLayout(&bindings, &m_directOutputDescriptorSetLayout); + bool succeeded = m_resourceViewHeaps.CreateDescriptorSetLayout(&bindings, &m_cacaoApplyDirectDescriptorSetLayout); assert(succeeded); } @@ -188,17 +180,23 @@ void SampleRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) info.minLod = -1000; info.maxLod = 1000; info.maxAnisotropy = 1.0f; - VkResult res = vkCreateSampler(m_pDevice->GetDevice(), &info, NULL, &m_directOutputSampler); + VkResult res = vkCreateSampler(m_pDevice->GetDevice(), &info, NULL, &m_cacaoApplyDirectSampler); assert(res == VK_SUCCESS); } // alloc direct output PS descriptor sets - for (uint32_t i = 0; i < _countof(m_directOutputDescriptorSets); ++i) + for (uint32_t i = 0; i < _countof(m_cacaoApplyDirectDescriptorSets); ++i) { - m_resourceViewHeaps.AllocDescriptor(m_directOutputDescriptorSetLayout, &m_directOutputDescriptorSets[i]); + m_resourceViewHeaps.AllocDescriptor(m_cacaoApplyDirectDescriptorSetLayout, &m_cacaoApplyDirectDescriptorSets[i]); } - m_directOutputPS.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), "Apply_CACAO_Direct.glsl", "main", "", &m_VidMemBufferPool, &m_ConstantBufferRing, m_directOutputDescriptorSetLayout); + m_cacaoApplyDirectPS.OnCreate(m_pDevice, pSwapChain->GetRenderPass(), "Apply_CACAO_Direct.glsl", "main", "", &m_vidMemBufferPool, &m_constantBufferRing, m_cacaoApplyDirectDescriptorSetLayout); + + // Make sure upload heap has finished uploading before continuing +#if (USE_VID_MEM==true) + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); +#endif } //-------------------------------------------------------------------------------------- @@ -208,45 +206,43 @@ void SampleRenderer::OnCreate(Device *pDevice, SwapChain *pSwapChain) //-------------------------------------------------------------------------------------- void SampleRenderer::OnDestroy() { - m_directOutputPS.OnDestroy(); - vkDestroySampler(m_pDevice->GetDevice(), m_directOutputSampler, NULL); + m_cacaoApplyDirectPS.OnDestroy(); + vkDestroySampler(m_pDevice->GetDevice(), m_cacaoApplyDirectSampler, NULL); - ffxCacaoVkDestroyContext(m_cacaoContextDownsampled); + FFX_CACAO_VkDestroyContext(m_cacaoContextDownsampled); free(m_cacaoContextDownsampled); -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ffxCacaoVkDestroyContext(m_cacaoContextNative); + FFX_CACAO_VkDestroyContext(m_cacaoContextNative); free(m_cacaoContextNative); -#endif - m_ImGUI.OnDestroy(); - m_colorConversionPS.OnDestroy(); - m_toneMappingPS.OnDestroy(); - m_toneMappingCS.OnDestroy(); - m_bloom.OnDestroy(); - m_downSample.OnDestroy(); - m_wireframeBox.OnDestroy(); - m_wireframe.OnDestroy(); - m_skyDomeProc.OnDestroy(); - m_skyDome.OnDestroy(); - m_shadowMap.OnDestroy(); - - vkDestroyImageView(m_pDevice->GetDevice(), m_shadowMapDSV, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_shadowMapSRV, nullptr); - - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_non_msaa, NULL); - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_shadow, nullptr); - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_PBR_HDR, nullptr); - vkDestroyRenderPass(m_pDevice->GetDevice(), m_render_pass_HDR_MSAA, nullptr); - - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_shadow, nullptr); - - m_UploadHeap.OnDestroy(); - m_GPUTimer.OnDestroy(); - m_VidMemBufferPool.OnDestroy(); - m_SysMemBufferPool.OnDestroy(); - m_ConstantBufferRing.OnDestroy(); - m_resourceViewHeaps.OnDestroy(); - m_CommandListRing.OnDestroy(); + m_imGUI.OnDestroy(); + m_colorConversionPS.OnDestroy(); + m_toneMappingPS.OnDestroy(); + m_toneMappingCS.OnDestroy(); + m_bloom.OnDestroy(); + m_downSample.OnDestroy(); + m_wireframeBox.OnDestroy(); + m_wireframe.OnDestroy(); + m_skyDomeProc.OnDestroy(); + m_skyDome.OnDestroy(); + m_shadowMap.OnDestroy(); + + vkDestroyImageView(m_pDevice->GetDevice(), m_shadowMapDSV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_shadowMapSRV, nullptr); + + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassNonMSAA, NULL); + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassShadow, nullptr); + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassPBRHDR, nullptr); + vkDestroyRenderPass(m_pDevice->GetDevice(), m_renderPassHDRMSAA, nullptr); + + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBufferShadow, nullptr); + + m_uploadHeap.OnDestroy(); + m_gpuTimer.OnDestroy(); + m_vidMemBufferPool.OnDestroy(); + m_sysMemBufferPool.OnDestroy(); + m_constantBufferRing.OnDestroy(); + m_resourceViewHeaps.OnDestroy(); + m_commandListRing.OnDestroy(); } //-------------------------------------------------------------------------------------- @@ -256,27 +252,27 @@ void SampleRenderer::OnDestroy() //-------------------------------------------------------------------------------------- void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height) { - m_Width = Width; - m_Height = Height; - - // Set the viewport - // - m_viewport.x = 0; - m_viewport.y = (float)Height; - m_viewport.width = (float)Width; - m_viewport.height = -(float)(Height); - m_viewport.minDepth = (float)0.0f; - m_viewport.maxDepth = (float)1.0f; - - // Create scissor rectangle - // - m_rectScissor.extent.width = Width; - m_rectScissor.extent.height = Height; - m_rectScissor.offset.x = 0; - m_rectScissor.offset.y = 0; - - // Create depth buffer - // + m_width = Width; + m_height = Height; + + // Set the viewport + // + m_viewport.x = 0; + m_viewport.y = (float)Height; + m_viewport.width = (float)Width; + m_viewport.height = -(float)(Height); + m_viewport.minDepth = (float)0.0f; + m_viewport.maxDepth = (float)1.0f; + + // Create scissor rectangle + // + m_rectScissor.extent.width = Width; + m_rectScissor.extent.height = Height; + m_rectScissor.offset.x = 0; + m_rectScissor.offset.y = 0; + + // Create depth buffer + // { VkImageCreateInfo image_info = {}; image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; @@ -298,72 +294,72 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, image_info.tiling = VK_IMAGE_TILING_OPTIMAL; m_depthBuffer.Init(m_pDevice, &image_info, "DepthBuffer"); } - m_depthBuffer.CreateDSV(&m_depthBufferDSV); - - // Create Texture + RTV with x4 MSAA - // - m_HDRMSAA.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_4_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "HDRMSAA"); - m_HDRMSAA.CreateRTV(&m_HDRMSAASRV); - - // Create Texture + RTV, to hold the resolved scene - // - m_HDR.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT), false, "HDR"); - m_HDR.CreateSRV(&m_HDRSRV); - m_HDR.CreateSRV(&m_HDRUAV); - - // Create framebuffer for the MSAA RT - // - { - VkImageView attachments_PBR_HDR_MSAA[] = { m_HDRMSAASRV, m_depthBufferDSV }; - VkImageView attachments_PBR_HDR[1] = { m_HDRSRV }; - - VkFramebufferCreateInfo fb_info = {}; - fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; - fb_info.pNext = NULL; - fb_info.attachmentCount = _countof(attachments_PBR_HDR_MSAA); - fb_info.pAttachments = attachments_PBR_HDR_MSAA; - fb_info.width = Width; - fb_info.height = Height; - fb_info.layers = 1; - - VkResult res; - - fb_info.renderPass = m_render_pass_HDR_MSAA; - res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_HDR_MSAA); - assert(res == VK_SUCCESS); - - fb_info.attachmentCount = 1; - fb_info.pAttachments = attachments_PBR_HDR; - fb_info.renderPass = m_render_pass_PBR_HDR; - res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_PBR_HDR); - assert(res == VK_SUCCESS); - } - - // update bloom and downscaling effect - // - m_downSample.OnCreateWindowSizeDependentResources(m_Width, m_Height, &m_HDR, 6); //downsample the HDR texture 6 times - m_bloom.OnCreateWindowSizeDependentResources(m_Width / 2, m_Height / 2, m_downSample.GetTexture(), 6, &m_HDR); - - // update the pipelines if the swapchain render pass has changed (for example when the format of the swapchain changes) - // - m_colorConversionPS.UpdatePipelines(pSwapChain->GetRenderPass(), pSwapChain->GetDisplayMode()); - m_toneMappingPS.UpdatePipelines(pSwapChain->GetRenderPass()); - - m_ImGUI.UpdatePipeline((pSwapChain->GetDisplayMode() == DISPLAYMODE_SDR) ? pSwapChain->GetRenderPass() : m_render_pass_PBR_HDR); + m_depthBuffer.CreateDSV(&m_depthBufferDSV); + + // Create Texture + RTV with x4 MSAA + // + m_hdrMSAA.InitRenderTarget(m_pDevice, m_width, m_height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_4_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT), false, "HDRMSAA"); + m_hdrMSAA.CreateRTV(&m_hdrMSAASRV); + + // Create Texture + RTV, to hold the resolved scene + // + m_hdr.InitRenderTarget(m_pDevice, m_width, m_height, VK_FORMAT_R16G16B16A16_SFLOAT, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_STORAGE_BIT), false, "HDR"); + m_hdr.CreateSRV(&m_hdrSRV); + m_hdr.CreateSRV(&m_hdrUAV); + + // Create framebuffer for the MSAA RT + // + { + VkImageView attachments_PBR_HDR_MSAA[] = { m_hdrMSAASRV, m_depthBufferDSV }; + VkImageView attachments_PBR_HDR[1] = { m_hdrSRV }; + + VkFramebufferCreateInfo fb_info = {}; + fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + fb_info.pNext = NULL; + fb_info.attachmentCount = _countof(attachments_PBR_HDR_MSAA); + fb_info.pAttachments = attachments_PBR_HDR_MSAA; + fb_info.width = Width; + fb_info.height = Height; + fb_info.layers = 1; + + VkResult res; + + fb_info.renderPass = m_renderPassHDRMSAA; + res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBufferHDRMSAA); + assert(res == VK_SUCCESS); + + fb_info.attachmentCount = 1; + fb_info.pAttachments = attachments_PBR_HDR; + fb_info.renderPass = m_renderPassPBRHDR; + res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBufferPBRHDR); + assert(res == VK_SUCCESS); + } + + // update bloom and downscaling effect + // + m_downSample.OnCreateWindowSizeDependentResources(m_width, m_height, &m_hdr, 6); //downsample the HDR texture 6 times + m_bloom.OnCreateWindowSizeDependentResources(m_width / 2, m_height / 2, m_downSample.GetTexture(), 6, &m_hdr); + + // update the pipelines if the swapchain render pass has changed (for example when the format of the swapchain changes) + // + m_colorConversionPS.UpdatePipelines(pSwapChain->GetRenderPass(), pSwapChain->GetDisplayMode()); + m_toneMappingPS.UpdatePipelines(pSwapChain->GetRenderPass()); + + m_imGUI.UpdatePipeline((pSwapChain->GetDisplayMode() == DISPLAYMODE_SDR) ? pSwapChain->GetRenderPass() : m_renderPassPBRHDR); // ========================================================== // CACAO - m_NormalBufferNonMsaa.InitRenderTarget(m_pDevice, m_Width, m_Height, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT), false, "NormalBufferNonMsaa"); - m_NormalBufferNonMsaa.CreateRTV(&m_NormalBufferNonMsaaView); + m_normalBufferNonMsaa.InitRenderTarget(m_pDevice, m_width, m_height, VK_FORMAT_A2B10G10R10_UNORM_PACK32, VK_SAMPLE_COUNT_1_BIT, (VkImageUsageFlags)(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT), false, "NormalBufferNonMsaa"); + m_normalBufferNonMsaa.CreateRTV(&m_normalBufferNonMsaaView); - m_DepthBufferNonMsaa.InitDepthStencil(m_pDevice, Width, Height, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "DepthBufferNonMsaa"); - m_DepthBufferNonMsaa.CreateSRV(&m_DepthBufferNonMsaaView); + m_depthBufferNonMsaa.InitDepthStencil(m_pDevice, Width, Height, VK_FORMAT_D32_SFLOAT, VK_SAMPLE_COUNT_1_BIT, "DepthBufferNonMsaa"); + m_depthBufferNonMsaa.CreateSRV(&m_depthBufferNonMsaaView); // Create framebuffer for the MSAA RT // { - VkImageView attachments[] = { m_NormalBufferNonMsaaView, m_DepthBufferNonMsaaView }; + VkImageView attachments[] = { m_normalBufferNonMsaaView, m_depthBufferNonMsaaView }; VkFramebufferCreateInfo fb_info = {}; fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; @@ -373,9 +369,9 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, fb_info.width = Width; fb_info.height = Height; fb_info.layers = 1; - fb_info.renderPass = m_render_pass_non_msaa; + fb_info.renderPass = m_renderPassNonMSAA; - VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBuffer_non_msaa); + VkResult res = vkCreateFramebuffer(m_pDevice->GetDevice(), &fb_info, NULL, &m_pFrameBufferNonMSAA); assert(res == VK_SUCCESS); } @@ -407,25 +403,21 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, m_gltfPBR->OnUpdateWindowSizeDependentResources(m_cacaoOutputSRV); } - FfxCacaoVkScreenSizeInfo ssi = {}; + FFX_CACAO_VkScreenSizeInfo ssi = {}; ssi.width = Width; ssi.height = Height; - ssi.depthView = m_DepthBufferNonMsaaView; - ssi.normalsView = m_NormalBufferNonMsaaView; + ssi.depthView = m_depthBufferNonMsaaView; + ssi.normalsView = m_normalBufferNonMsaaView; ssi.output = m_cacaoOutput.Resource(); ssi.outputView = m_cacaoOutputSRV; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION ssi.useDownsampledSsao = FFX_CACAO_TRUE; - ffxCacaoVkInitScreenSizeDependentResources(m_cacaoContextDownsampled, &ssi); + FFX_CACAO_VkInitScreenSizeDependentResources(m_cacaoContextDownsampled, &ssi); ssi.useDownsampledSsao = FFX_CACAO_FALSE; - ffxCacaoVkInitScreenSizeDependentResources(m_cacaoContextNative, &ssi); -#else - ffxCacaoVkInitScreenSizeDependentResources(m_cacaoContextDownsampled, &ssi); -#endif + FFX_CACAO_VkInitScreenSizeDependentResources(m_cacaoContextNative, &ssi); - m_directOutputPS.UpdatePipeline(pSwapChain->GetRenderPass()); + m_cacaoApplyDirectPS.UpdatePipeline(pSwapChain->GetRenderPass()); } //-------------------------------------------------------------------------------------- @@ -435,34 +427,32 @@ void SampleRenderer::OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, //-------------------------------------------------------------------------------------- void SampleRenderer::OnDestroyWindowSizeDependentResources() { -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ffxCacaoVkDestroyScreenSizeDependentResources(m_cacaoContextNative); -#endif - ffxCacaoVkDestroyScreenSizeDependentResources(m_cacaoContextDownsampled); + FFX_CACAO_VkDestroyScreenSizeDependentResources(m_cacaoContextNative); + FFX_CACAO_VkDestroyScreenSizeDependentResources(m_cacaoContextDownsampled); - vkDestroyImageView(m_pDevice->GetDevice(), m_NormalBufferNonMsaaView, NULL); - m_NormalBufferNonMsaa.OnDestroy(); - vkDestroyImageView(m_pDevice->GetDevice(), m_DepthBufferNonMsaaView, NULL); - m_DepthBufferNonMsaa.OnDestroy(); + vkDestroyImageView(m_pDevice->GetDevice(), m_normalBufferNonMsaaView, NULL); + m_normalBufferNonMsaa.OnDestroy(); + vkDestroyImageView(m_pDevice->GetDevice(), m_depthBufferNonMsaaView, NULL); + m_depthBufferNonMsaa.OnDestroy(); vkDestroyImageView(m_pDevice->GetDevice(), m_cacaoOutputSRV, NULL); m_cacaoOutput.OnDestroy(); - m_bloom.OnDestroyWindowSizeDependentResources(); - m_downSample.OnDestroyWindowSizeDependentResources(); + m_bloom.OnDestroyWindowSizeDependentResources(); + m_downSample.OnDestroyWindowSizeDependentResources(); - m_HDR.OnDestroy(); - m_HDRMSAA.OnDestroy(); - m_depthBuffer.OnDestroy(); + m_hdr.OnDestroy(); + m_hdrMSAA.OnDestroy(); + m_depthBuffer.OnDestroy(); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_non_msaa, NULL); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_HDR_MSAA, nullptr); - vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBuffer_PBR_HDR, nullptr); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBufferNonMSAA, NULL); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBufferHDRMSAA, nullptr); + vkDestroyFramebuffer(m_pDevice->GetDevice(), m_pFrameBufferPBRHDR, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_depthBufferDSV, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_HDRMSAASRV, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_HDRSRV, nullptr); - vkDestroyImageView(m_pDevice->GetDevice(), m_HDRUAV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_depthBufferDSV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_hdrMSAASRV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_hdrSRV, nullptr); + vkDestroyImageView(m_pDevice->GetDevice(), m_hdrUAV, nullptr); } //-------------------------------------------------------------------------------------- @@ -472,86 +462,86 @@ void SampleRenderer::OnDestroyWindowSizeDependentResources() //-------------------------------------------------------------------------------------- int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) { - // show loading progress - // - ImGui::OpenPopup("Loading"); - if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) - { - float progress = (float)stage / 12.0f; - ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); - ImGui::EndPopup(); - } - - // Loading stages - // - if (stage == 0) - { - } - else if (stage == 5) - { - Profile p("m_pGltfLoader->Load"); - - m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); - m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_UploadHeap, &m_VidMemBufferPool, &m_ConstantBufferRing); - } - else if (stage == 6) - { - Profile p("LoadTextures"); - - // here we are loading onto the GPU all the textures and the inverse matrices - // this data will be used to create the PBR and Depth passes - m_pGLTFTexturesAndBuffers->LoadTextures(); - } - else if (stage == 7) - { - Profile p("m_gltfDepth->OnCreate"); - - //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass - m_gltfDepth = new GltfDepthPass(); - m_gltfDepth->OnCreate( - m_pDevice, - m_render_pass_shadow, - &m_UploadHeap, - &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers - ); + // show loading progress + // + ImGui::OpenPopup("Loading"); + if (ImGui::BeginPopupModal("Loading", NULL, ImGuiWindowFlags_AlwaysAutoResize)) + { + float progress = (float)stage / 12.0f; + ImGui::ProgressBar(progress, ImVec2(0.f, 0.f), NULL); + ImGui::EndPopup(); + } + + // Loading stages + // + if (stage == 0) + { + } + else if (stage == 5) + { + Profile p("m_pGltfLoader->Load"); + + m_pGLTFTexturesAndBuffers = new GLTFTexturesAndBuffers(); + m_pGLTFTexturesAndBuffers->OnCreate(m_pDevice, pGLTFCommon, &m_uploadHeap, &m_vidMemBufferPool, &m_constantBufferRing); + } + else if (stage == 6) + { + Profile p("LoadTextures"); + + // here we are loading onto the GPU all the textures and the inverse matrices + // this data will be used to create the PBR and Depth passes + m_pGLTFTexturesAndBuffers->LoadTextures(); + } + else if (stage == 7) + { + Profile p("m_gltfDepth->OnCreate"); + + //create the glTF's textures, VBs, IBs, shaders and descriptors for this particular pass + m_gltfDepth = new GltfDepthPass(); + m_gltfDepth->OnCreate( + m_pDevice, + m_renderPassShadow, + &m_uploadHeap, + &m_resourceViewHeaps, + &m_constantBufferRing, + &m_vidMemBufferPool, + m_pGLTFTexturesAndBuffers + ); #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif - } - else if (stage == 8) - { - Profile p("m_gltfPBR->OnCreate"); - - // same thing as above but for the PBR pass - m_gltfPBR = new GltfPbrPass(); - m_gltfPBR->OnCreate( - m_pDevice, - m_render_pass_HDR_MSAA, - &m_UploadHeap, - &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_skyDome, + } + else if (stage == 8) + { + Profile p("m_gltfPBR->OnCreate"); + + // same thing as above but for the PBR pass + m_gltfPBR = new GltfPbrPass(); + m_gltfPBR->OnCreate( + m_pDevice, + m_renderPassHDRMSAA, + &m_uploadHeap, + &m_resourceViewHeaps, + &m_constantBufferRing, + &m_vidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_skyDome, true, // we will pass in a buffer with AO - m_shadowMapSRV, - true, // Exports ForwardPass - false, // Won't export Specular Roughness - false, // Won't export Diffuse Color + m_shadowMapSRV, + true, // Exports ForwardPass + false, // Won't export Specular Roughness + false, // Won't export Diffuse Color false, // export normals - VK_SAMPLE_COUNT_4_BIT - ); + VK_SAMPLE_COUNT_4_BIT + ); m_gltfPBR->OnUpdateWindowSizeDependentResources(m_cacaoOutputSRV); #if (USE_VID_MEM==true) - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); - m_UploadHeap.FlushAndFinish(); + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); + m_uploadHeap.FlushAndFinish(); #endif - } + } else if (stage == 9) { Profile p("m_gltfPBR->OnCreate (Non MSAA)"); @@ -559,11 +549,11 @@ int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) m_gltfPbrNonMsaa = new GltfPbrPass(); m_gltfPbrNonMsaa->OnCreate( m_pDevice, - m_render_pass_non_msaa, - &m_UploadHeap, + m_renderPassNonMSAA, + &m_uploadHeap, &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, + &m_constantBufferRing, + &m_vidMemBufferPool, m_pGLTFTexturesAndBuffers, &m_skyDome, false, // We won't pass in a buffer with AO @@ -575,42 +565,42 @@ int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) VK_SAMPLE_COUNT_1_BIT ); } - else if (stage == 10) - { - Profile p("m_gltfBBox->OnCreate"); - - // just a bounding box pass that will draw boundingboxes instead of the geometry itself - m_gltfBBox = new GltfBBoxPass(); - m_gltfBBox->OnCreate( - m_pDevice, - m_render_pass_HDR_MSAA, - &m_resourceViewHeaps, - &m_ConstantBufferRing, - &m_VidMemBufferPool, - m_pGLTFTexturesAndBuffers, - &m_wireframe - ); + else if (stage == 10) + { + Profile p("m_gltfBBox->OnCreate"); + + // just a bounding box pass that will draw boundingboxes instead of the geometry itself + m_gltfBBox = new GltfBBoxPass(); + m_gltfBBox->OnCreate( + m_pDevice, + m_renderPassHDRMSAA, + &m_resourceViewHeaps, + &m_constantBufferRing, + &m_vidMemBufferPool, + m_pGLTFTexturesAndBuffers, + &m_wireframe + ); #if (USE_VID_MEM==true) - // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs - m_VidMemBufferPool.UploadData(m_UploadHeap.GetCommandList()); + // we are borrowing the upload heap command list for uploading to the GPU the IBs and VBs + m_vidMemBufferPool.UploadData(m_uploadHeap.GetCommandList()); #endif - } - else if (stage == 11) - { - Profile p("Flush"); + } + else if (stage == 11) + { + Profile p("Flush"); - m_UploadHeap.FlushAndFinish(); + m_uploadHeap.FlushAndFinish(); #if (USE_VID_MEM==true) - //once everything is uploaded we dont need the upload heaps anymore - m_VidMemBufferPool.FreeUploadHeap(); + //once everything is uploaded we dont need the upload heaps anymore + m_vidMemBufferPool.FreeUploadHeap(); #endif - // tell caller that we are done loading the map - return 0; - } + // tell caller that we are done loading the map + return 0; + } - stage++; - return stage; + stage++; + return stage; } //-------------------------------------------------------------------------------------- @@ -620,14 +610,14 @@ int SampleRenderer::LoadScene(GLTFCommon *pGLTFCommon, int stage) //-------------------------------------------------------------------------------------- void SampleRenderer::UnloadScene() { - m_pDevice->GPUFlush(); + m_pDevice->GPUFlush(); - if (m_gltfPBR) - { - m_gltfPBR->OnDestroy(); - delete m_gltfPBR; - m_gltfPBR = NULL; - } + if (m_gltfPBR) + { + m_gltfPBR->OnDestroy(); + delete m_gltfPBR; + m_gltfPBR = NULL; + } if (m_gltfPbrNonMsaa) { @@ -636,26 +626,26 @@ void SampleRenderer::UnloadScene() m_gltfPbrNonMsaa = NULL; } - if (m_gltfDepth) - { - m_gltfDepth->OnDestroy(); - delete m_gltfDepth; - m_gltfDepth = NULL; - } - - if (m_gltfBBox) - { - m_gltfBBox->OnDestroy(); - delete m_gltfBBox; - m_gltfBBox = NULL; - } - - if (m_pGLTFTexturesAndBuffers) - { - m_pGLTFTexturesAndBuffers->OnDestroy(); - delete m_pGLTFTexturesAndBuffers; - m_pGLTFTexturesAndBuffers = NULL; - } + if (m_gltfDepth) + { + m_gltfDepth->OnDestroy(); + delete m_gltfDepth; + m_gltfDepth = NULL; + } + + if (m_gltfBBox) + { + m_gltfBBox->OnDestroy(); + delete m_gltfBBox; + m_gltfBBox = NULL; + } + + if (m_pGLTFTexturesAndBuffers) + { + m_pGLTFTexturesAndBuffers->OnDestroy(); + delete m_pGLTFTexturesAndBuffers; + m_pGLTFTexturesAndBuffers = NULL; + } } //-------------------------------------------------------------------------------------- @@ -665,117 +655,117 @@ void SampleRenderer::UnloadScene() //-------------------------------------------------------------------------------------- void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) { - // Let our resource managers do some house keeping - // - m_ConstantBufferRing.OnBeginFrame(); - - // command buffer calls - // - VkCommandBuffer cmdBuf1 = m_CommandListRing.GetNewCommandList(); - - { - VkCommandBufferBeginInfo cmd_buf_info; - cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmd_buf_info.pNext = NULL; - cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmd_buf_info.pInheritanceInfo = NULL; - VkResult res = vkBeginCommandBuffer(cmdBuf1, &cmd_buf_info); - assert(res == VK_SUCCESS); - } - - m_GPUTimer.OnBeginFrame(cmdBuf1, &m_TimeStamps); - - // Sets the perFrame data - // - per_frame *pPerFrame = NULL; - if (m_pGLTFTexturesAndBuffers) - { - // fill as much as possible using the GLTF (camera, lights, ...) - pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); - - // Set some lighting factors - pPerFrame->iblFactor = pState->iblFactor; - pPerFrame->emmisiveFactor = pState->emmisiveFactor; - pPerFrame->invScreenResolution[0] = 1.0f / ((float)m_Width); - pPerFrame->invScreenResolution[1] = 1.0f / ((float)m_Height); - - // Set shadowmaps bias and an index that indicates the rectangle of the atlas in which depth will be rendered - uint32_t shadowMapIndex = 0; - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index - pPerFrame->lights[i].depthBias = 70.0f / 100000.0f; - } - else if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Directional)) - { - pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index - pPerFrame->lights[i].depthBias = 1000.0f / 100000.0f; - } - else - { - pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light - } - } - - m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); - m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); - } - - // Render to shadow map atlas for spot lights ------------------------------------------ - // - if (m_gltfDepth && pPerFrame != NULL) - { - SetPerfMarkerBegin(cmdBuf1, "ShadowPass"); - - VkClearValue depth_clear_values[1]; - depth_clear_values[0].depthStencil.depth = 1.0f; - depth_clear_values[0].depthStencil.stencil = 0; - - { - VkRenderPassBeginInfo rp_begin; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass_shadow; - rp_begin.framebuffer = m_pFrameBuffer_shadow; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_shadowMap.GetWidth(); - rp_begin.renderArea.extent.height = m_shadowMap.GetHeight(); - rp_begin.clearValueCount = 1; - rp_begin.pClearValues = depth_clear_values; - - vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - m_GPUTimer.GetTimeStamp(cmdBuf1, "Clear Shadow Map"); - } - - uint32_t shadowMapIndex = 0; - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - if (!(pPerFrame->lights[i].type == LightType_Spot || pPerFrame->lights[i].type == LightType_Directional)) - continue; - - // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) - uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; - uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; - uint32_t viewportWidth = m_shadowMap.GetWidth() / 2; - uint32_t viewportHeight = m_shadowMap.GetHeight() / 2; - SetViewportAndScissor(cmdBuf1, viewportOffsetsX[shadowMapIndex] * viewportWidth, viewportOffsetsY[shadowMapIndex] * viewportHeight, viewportWidth, viewportHeight); - - //set per frame constant buffer values - GltfDepthPass::per_frame *cbPerFrame = m_gltfDepth->SetPerFrameConstants(); - cbPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; - - m_gltfDepth->Draw(cmdBuf1); - - m_GPUTimer.GetTimeStamp(cmdBuf1, "Shadow maps"); - shadowMapIndex++; - } - vkCmdEndRenderPass(cmdBuf1); - - SetPerfMarkerEnd(cmdBuf1); - } + // Let our resource managers do some house keeping + // + m_constantBufferRing.OnBeginFrame(); + + // command buffer calls + // + VkCommandBuffer cmdBuf1 = m_commandListRing.GetNewCommandList(); + + { + VkCommandBufferBeginInfo cmd_buf_info; + cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmd_buf_info.pNext = NULL; + cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + cmd_buf_info.pInheritanceInfo = NULL; + VkResult res = vkBeginCommandBuffer(cmdBuf1, &cmd_buf_info); + assert(res == VK_SUCCESS); + } + + m_gpuTimer.OnBeginFrame(cmdBuf1, &m_timeStamps); + + // Sets the perFrame data + // + per_frame *pPerFrame = NULL; + if (m_pGLTFTexturesAndBuffers) + { + // fill as much as possible using the GLTF (camera, lights, ...) + pPerFrame = m_pGLTFTexturesAndBuffers->m_pGLTFCommon->SetPerFrameData(pState->camera); + + // Set some lighting factors + pPerFrame->iblFactor = pState->iblFactor; + pPerFrame->emmisiveFactor = pState->emmisiveFactor; + pPerFrame->invScreenResolution[0] = 1.0f / ((float)m_width); + pPerFrame->invScreenResolution[1] = 1.0f / ((float)m_height); + + // Set shadowmaps bias and an index that indicates the rectangle of the atlas in which depth will be rendered + uint32_t shadowMapIndex = 0; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Spot)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index + pPerFrame->lights[i].depthBias = 70.0f / 100000.0f; + } + else if ((shadowMapIndex < 4) && (pPerFrame->lights[i].type == LightType_Directional)) + { + pPerFrame->lights[i].shadowMapIndex = shadowMapIndex++; // set the shadowmap index + pPerFrame->lights[i].depthBias = 1000.0f / 100000.0f; + } + else + { + pPerFrame->lights[i].shadowMapIndex = -1; // no shadow for this light + } + } + + m_pGLTFTexturesAndBuffers->SetPerFrameConstants(); + m_pGLTFTexturesAndBuffers->SetSkinningMatricesForSkeletons(); + } + + // Render to shadow map atlas for spot lights ------------------------------------------ + // + if (m_gltfDepth && pPerFrame != NULL) + { + SetPerfMarkerBegin(cmdBuf1, "ShadowPass"); + + VkClearValue depth_clear_values[1]; + depth_clear_values[0].depthStencil.depth = 1.0f; + depth_clear_values[0].depthStencil.stencil = 0; + + { + VkRenderPassBeginInfo rp_begin; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_renderPassShadow; + rp_begin.framebuffer = m_pFrameBufferShadow; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_shadowMap.GetWidth(); + rp_begin.renderArea.extent.height = m_shadowMap.GetHeight(); + rp_begin.clearValueCount = 1; + rp_begin.pClearValues = depth_clear_values; + + vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + m_gpuTimer.GetTimeStamp(cmdBuf1, "Clear Shadow Map"); + } + + uint32_t shadowMapIndex = 0; + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + if (!(pPerFrame->lights[i].type == LightType_Spot || pPerFrame->lights[i].type == LightType_Directional)) + continue; + + // Set the RT's quadrant where to render the shadomap (these viewport offsets need to match the ones in shadowFiltering.h) + uint32_t viewportOffsetsX[4] = { 0, 1, 0, 1 }; + uint32_t viewportOffsetsY[4] = { 0, 0, 1, 1 }; + uint32_t viewportWidth = m_shadowMap.GetWidth() / 2; + uint32_t viewportHeight = m_shadowMap.GetHeight() / 2; + SetViewportAndScissor(cmdBuf1, viewportOffsetsX[shadowMapIndex] * viewportWidth, viewportOffsetsY[shadowMapIndex] * viewportHeight, viewportWidth, viewportHeight); + + //set per frame constant buffer values + GltfDepthPass::per_frame *cbPerFrame = m_gltfDepth->SetPerFrameConstants(); + cbPerFrame->mViewProj = pPerFrame->lights[i].mLightViewProj; + + m_gltfDepth->Draw(cmdBuf1); + + m_gpuTimer.GetTimeStamp(cmdBuf1, "Shadow maps"); + shadowMapIndex++; + } + vkCmdEndRenderPass(cmdBuf1); + + SetPerfMarkerEnd(cmdBuf1); + } // =============================================================================================== // CACAO stuff @@ -786,7 +776,7 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) // if (m_gltfPbrNonMsaa && pPerFrame) { - m_GPUTimer.GetTimeStamp(cmdBuf1, "PBR Non MSAA"); + m_gpuTimer.GetTimeStamp(cmdBuf1, "PBR Non MSAA"); SetPerfMarkerBegin(cmdBuf1, "PBR Non MSAA pass"); { @@ -801,23 +791,23 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) VkRenderPassBeginInfo rp_begin; rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass_non_msaa; - rp_begin.framebuffer = m_pFrameBuffer_non_msaa; + rp_begin.renderPass = m_renderPassNonMSAA; + rp_begin.framebuffer = m_pFrameBufferNonMSAA; rp_begin.renderArea.offset.x = 0; rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_NormalBufferNonMsaa.GetWidth(); - rp_begin.renderArea.extent.height = m_NormalBufferNonMsaa.GetHeight(); + rp_begin.renderArea.extent.width = m_normalBufferNonMsaa.GetWidth(); + rp_begin.renderArea.extent.height = m_normalBufferNonMsaa.GetHeight(); rp_begin.clearValueCount = _countof(clearValues); rp_begin.pClearValues = clearValues; vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - m_GPUTimer.GetTimeStamp(cmdBuf1, "Clear Depth Buffer Non MSAA"); + m_gpuTimer.GetTimeStamp(cmdBuf1, "Clear Depth Buffer Non MSAA"); } - SetViewportAndScissor(cmdBuf1, 0, 0, m_NormalBufferNonMsaa.GetWidth(), m_NormalBufferNonMsaa.GetHeight()); + SetViewportAndScissor(cmdBuf1, 0, 0, m_normalBufferNonMsaa.GetWidth(), m_normalBufferNonMsaa.GetHeight()); m_gltfPbrNonMsaa->Draw(cmdBuf1); - m_GPUTimer.GetTimeStamp(cmdBuf1, "GLTF PBR Non MSAA"); + m_gpuTimer.GetTimeStamp(cmdBuf1, "GLTF PBR Non MSAA"); vkCmdEndRenderPass(cmdBuf1); @@ -825,8 +815,8 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) } // call CACAO - if (pState->m_useCacao && m_gltfPbrNonMsaa && pPerFrame) { - FfxCacaoMatrix4x4 proj, normalsWorldToView; + if (pState->useCacao && m_gltfPbrNonMsaa && pPerFrame) { + FFX_CACAO_Matrix4x4 proj, normalsWorldToView; { XMFLOAT4X4 p; XMMATRIX xProj = pState->camera.GetProjection(); @@ -844,24 +834,18 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) normalsWorldToView.elements[3][0] = p._41; normalsWorldToView.elements[3][1] = p._42; normalsWorldToView.elements[3][2] = p._43; normalsWorldToView.elements[3][3] = p._44; } -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - ffxCacaoVkUpdateSettings(m_cacaoContextNative, &pState->m_cacaoSettings); -#endif - ffxCacaoVkUpdateSettings(m_cacaoContextDownsampled, &pState->m_cacaoSettings); + FFX_CACAO_VkUpdateSettings(m_cacaoContextNative, &pState->cacaoSettings); + FFX_CACAO_VkUpdateSettings(m_cacaoContextDownsampled, &pState->cacaoSettings); - FfxCacaoStatus status = FFX_CACAO_STATUS_OK; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - if (pState->m_useDownsampledSsao) + FFX_CACAO_Status status = FFX_CACAO_STATUS_OK; + if (pState->useDownsampledSsao) { - status = ffxCacaoVkDraw(m_cacaoContextDownsampled, cmdBuf1, &proj, &normalsWorldToView); + status = FFX_CACAO_VkDraw(m_cacaoContextDownsampled, cmdBuf1, &proj, &normalsWorldToView); } else { - status = ffxCacaoVkDraw(m_cacaoContextNative, cmdBuf1, &proj, &normalsWorldToView); + status = FFX_CACAO_VkDraw(m_cacaoContextNative, cmdBuf1, &proj, &normalsWorldToView); } -#else - status = ffxCacaoVkDraw(m_cacaoContextDownsampled, cmdBuf1, &proj, &normalsWorldToView); -#endif assert(status == FFX_CACAO_STATUS_OK); } else @@ -901,374 +885,374 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier); } - // Render Scene to the MSAA HDR RT ------------------------------------------------ - // - - { - SetPerfMarkerBegin(cmdBuf1, "Color pass"); - m_GPUTimer.GetTimeStamp(cmdBuf1, "before color RP"); - VkClearValue clear_values[2]; - clear_values[0].color.float32[0] = 0.0f; - clear_values[0].color.float32[1] = 0.0f; - clear_values[0].color.float32[2] = 0.0f; - clear_values[0].color.float32[3] = 0.0f; - clear_values[1].depthStencil.depth = 1.0f; - clear_values[1].depthStencil.stencil = 0; - - VkRenderPassBeginInfo rp_begin; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass_HDR_MSAA; - rp_begin.framebuffer = m_pFrameBuffer_HDR_MSAA; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_Width; - rp_begin.renderArea.extent.height = m_Height; - rp_begin.clearValueCount = 2; - rp_begin.pClearValues = clear_values; - - vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - - vkCmdSetScissor(cmdBuf1, 0, 1, &m_rectScissor); - vkCmdSetViewport(cmdBuf1, 0, 1, &m_viewport); - m_GPUTimer.GetTimeStamp(cmdBuf1, "after color RP"); - } - - if (pPerFrame != NULL) - { - // Render skydome - // - if (pState->skyDomeType == 1) - { - XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - m_skyDome.Draw(cmdBuf1, clipToView); - - m_GPUTimer.GetTimeStamp(cmdBuf1, "Skydome cube"); - } - else if (pState->skyDomeType == 0) - { - SkyDomeProc::Constants skyDomeConstants; - skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); - skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); - skyDomeConstants.turbidity = 10.0f; - skyDomeConstants.rayleigh = 2.0f; - skyDomeConstants.mieCoefficient = 0.005f; - skyDomeConstants.mieDirectionalG = 0.8f; - skyDomeConstants.luminance = 1.0f; - skyDomeConstants.sun = false; - m_skyDomeProc.Draw(cmdBuf1, skyDomeConstants); - - m_GPUTimer.GetTimeStamp(cmdBuf1, "Skydome Proc"); - } - - // Render scene to color buffer - // - if (m_gltfPBR && pPerFrame != NULL) - { - m_gltfPBR->Draw(cmdBuf1); - m_GPUTimer.GetTimeStamp(cmdBuf1, "PBR Forward"); - } - - // draw object's bounding boxes - // - if (m_gltfBBox && pPerFrame != NULL) - { - if (pState->bDrawBoundingBoxes) - { - m_gltfBBox->Draw(cmdBuf1, pPerFrame->mCameraViewProj); - - m_GPUTimer.GetTimeStamp(cmdBuf1, "Bounding Box"); - } - } - - // draw light's frustums - // - if (pState->bDrawLightFrustum && pPerFrame != NULL) - { - SetPerfMarkerBegin(cmdBuf1, "light frustrums"); - - XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.5f, 0.0f); - XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 0.5f, 0.0f); - XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); - for (uint32_t i = 0; i < pPerFrame->lightCount; i++) - { - XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); - XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; - m_wireframeBox.Draw(cmdBuf1, &m_wireframe, worldMatrix, vCenter, vRadius, vColor); - } - - m_GPUTimer.GetTimeStamp(cmdBuf1, "Light's frustum"); - - SetPerfMarkerEnd(cmdBuf1); - } - } - - { - vkCmdEndRenderPass(cmdBuf1); - SetPerfMarkerEnd(cmdBuf1); - } - - // Resolve MSAA ------------------------------------------------------------------------ - // Ideally this resolve should be part of the previous rende pass, that would save a decompression - // - { - SetPerfMarkerBegin(cmdBuf1, "Resolving MSAA"); - { - VkImageMemoryBarrier barrier[2] = {}; - barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier[0].pNext = NULL; - barrier[0].srcAccessMask = 0; - barrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier[0].subresourceRange.baseMipLevel = 0; - barrier[0].subresourceRange.levelCount = 1; - barrier[0].subresourceRange.baseArrayLayer = 0; - barrier[0].subresourceRange.layerCount = 1; - barrier[0].image = m_HDR.Resource(); - - barrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier[1].pNext = NULL; - barrier[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier[1].dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier[1].oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier[1].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - barrier[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier[1].subresourceRange.baseMipLevel = 0; - barrier[1].subresourceRange.levelCount = 1; - barrier[1].subresourceRange.baseArrayLayer = 0; - barrier[1].subresourceRange.layerCount = 1; - barrier[1].image = m_HDRMSAA.Resource(); - - vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 2, barrier); - } - - { - VkImageResolve re = {}; - re.srcOffset.x = 0; - re.srcOffset.y = 0; - re.extent.width = m_Width; - re.extent.height = m_Height; - re.extent.depth = 1; - re.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - re.srcSubresource.layerCount = 1; - re.dstOffset.x = 0; - re.dstOffset.y = 0; - re.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - re.dstSubresource.layerCount = 1; - vkCmdResolveImage(cmdBuf1, m_HDRMSAA.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_HDR.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &re); - } - - { - VkImageMemoryBarrier barrier[2] = {}; - barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier[0].pNext = NULL; - barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; - barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; - barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; - barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier[0].subresourceRange.baseMipLevel = 0; - barrier[0].subresourceRange.levelCount = 1; - barrier[0].subresourceRange.baseArrayLayer = 0; - barrier[0].subresourceRange.layerCount = 1; - barrier[0].image = m_HDR.Resource(); - - barrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier[1].pNext = NULL; - barrier[1].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; - barrier[1].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - barrier[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; - barrier[1].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier[1].subresourceRange.baseMipLevel = 0; - barrier[1].subresourceRange.levelCount = 1; - barrier[1].subresourceRange.baseArrayLayer = 0; - barrier[1].subresourceRange.layerCount = 1; - barrier[1].image = m_HDRMSAA.Resource(); - - vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 2, barrier); - } - - m_GPUTimer.GetTimeStamp(cmdBuf1, "Resolve MSAA"); - SetPerfMarkerEnd(cmdBuf1); - } - - // Post proc--------------------------------------------------------------------------- - // - - // Bloom, takes HDR as input and applies bloom to it. - // + // Render Scene to the MSAA HDR RT ------------------------------------------------ + // + + { + SetPerfMarkerBegin(cmdBuf1, "Color pass"); + m_gpuTimer.GetTimeStamp(cmdBuf1, "before color RP"); + VkClearValue clear_values[2]; + clear_values[0].color.float32[0] = 0.0f; + clear_values[0].color.float32[1] = 0.0f; + clear_values[0].color.float32[2] = 0.0f; + clear_values[0].color.float32[3] = 0.0f; + clear_values[1].depthStencil.depth = 1.0f; + clear_values[1].depthStencil.stencil = 0; + + VkRenderPassBeginInfo rp_begin; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_renderPassHDRMSAA; + rp_begin.framebuffer = m_pFrameBufferHDRMSAA; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_width; + rp_begin.renderArea.extent.height = m_height; + rp_begin.clearValueCount = 2; + rp_begin.pClearValues = clear_values; + + vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + + vkCmdSetScissor(cmdBuf1, 0, 1, &m_rectScissor); + vkCmdSetViewport(cmdBuf1, 0, 1, &m_viewport); + m_gpuTimer.GetTimeStamp(cmdBuf1, "after color RP"); + } + + if (pPerFrame != NULL) + { + // Render skydome + // + if (pState->skyDomeType == 1) + { + XMMATRIX clipToView = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + m_skyDome.Draw(cmdBuf1, clipToView); + + m_gpuTimer.GetTimeStamp(cmdBuf1, "Skydome cube"); + } + else if (pState->skyDomeType == 0) + { + SkyDomeProc::Constants skyDomeConstants; + skyDomeConstants.invViewProj = XMMatrixInverse(NULL, pPerFrame->mCameraViewProj); + skyDomeConstants.vSunDirection = XMVectorSet(1.0f, 0.05f, 0.0f, 0.0f); + skyDomeConstants.turbidity = 10.0f; + skyDomeConstants.rayleigh = 2.0f; + skyDomeConstants.mieCoefficient = 0.005f; + skyDomeConstants.mieDirectionalG = 0.8f; + skyDomeConstants.luminance = 1.0f; + skyDomeConstants.sun = false; + m_skyDomeProc.Draw(cmdBuf1, skyDomeConstants); + + m_gpuTimer.GetTimeStamp(cmdBuf1, "Skydome Proc"); + } + + // Render scene to color buffer + // + if (m_gltfPBR && pPerFrame != NULL) + { + m_gltfPBR->Draw(cmdBuf1); + m_gpuTimer.GetTimeStamp(cmdBuf1, "PBR Forward"); + } + + // draw object's bounding boxes + // + if (m_gltfBBox && pPerFrame != NULL) + { + if (pState->drawBoundingBoxes) + { + m_gltfBBox->Draw(cmdBuf1, pPerFrame->mCameraViewProj); + + m_gpuTimer.GetTimeStamp(cmdBuf1, "Bounding Box"); + } + } + + // draw light's frustums + // + if (pState->drawLightFrustum && pPerFrame != NULL) + { + SetPerfMarkerBegin(cmdBuf1, "light frustrums"); + + XMVECTOR vCenter = XMVectorSet(0.0f, 0.0f, 0.5f, 0.0f); + XMVECTOR vRadius = XMVectorSet(1.0f, 1.0f, 0.5f, 0.0f); + XMVECTOR vColor = XMVectorSet(1.0f, 1.0f, 1.0f, 1.0f); + for (uint32_t i = 0; i < pPerFrame->lightCount; i++) + { + XMMATRIX spotlightMatrix = XMMatrixInverse(NULL, pPerFrame->lights[i].mLightViewProj); + XMMATRIX worldMatrix = spotlightMatrix * pPerFrame->mCameraViewProj; + m_wireframeBox.Draw(cmdBuf1, &m_wireframe, worldMatrix, vCenter, vRadius, vColor); + } + + m_gpuTimer.GetTimeStamp(cmdBuf1, "Light's frustum"); + + SetPerfMarkerEnd(cmdBuf1); + } + } + + { + vkCmdEndRenderPass(cmdBuf1); + SetPerfMarkerEnd(cmdBuf1); + } + + // Resolve MSAA ------------------------------------------------------------------------ + // Ideally this resolve should be part of the previous rende pass, that would save a decompression + // + { + SetPerfMarkerBegin(cmdBuf1, "Resolving MSAA"); + { + VkImageMemoryBarrier barrier[2] = {}; + barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier[0].pNext = NULL; + barrier[0].srcAccessMask = 0; + barrier[0].dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + barrier[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier[0].subresourceRange.baseMipLevel = 0; + barrier[0].subresourceRange.levelCount = 1; + barrier[0].subresourceRange.baseArrayLayer = 0; + barrier[0].subresourceRange.layerCount = 1; + barrier[0].image = m_hdr.Resource(); + + barrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier[1].pNext = NULL; + barrier[1].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier[1].dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier[1].oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier[1].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barrier[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier[1].subresourceRange.baseMipLevel = 0; + barrier[1].subresourceRange.levelCount = 1; + barrier[1].subresourceRange.baseArrayLayer = 0; + barrier[1].subresourceRange.layerCount = 1; + barrier[1].image = m_hdrMSAA.Resource(); + + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 0, NULL, 2, barrier); + } + + { + VkImageResolve re = {}; + re.srcOffset.x = 0; + re.srcOffset.y = 0; + re.extent.width = m_width; + re.extent.height = m_height; + re.extent.depth = 1; + re.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + re.srcSubresource.layerCount = 1; + re.dstOffset.x = 0; + re.dstOffset.y = 0; + re.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + re.dstSubresource.layerCount = 1; + vkCmdResolveImage(cmdBuf1, m_hdrMSAA.Resource(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_hdr.Resource(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &re); + } + + { + VkImageMemoryBarrier barrier[2] = {}; + barrier[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier[0].pNext = NULL; + barrier[0].srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier[0].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + barrier[0].newLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + barrier[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier[0].subresourceRange.baseMipLevel = 0; + barrier[0].subresourceRange.levelCount = 1; + barrier[0].subresourceRange.baseArrayLayer = 0; + barrier[0].subresourceRange.layerCount = 1; + barrier[0].image = m_hdr.Resource(); + + barrier[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier[1].pNext = NULL; + barrier[1].srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; + barrier[1].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + barrier[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + barrier[1].newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier[1].subresourceRange.baseMipLevel = 0; + barrier[1].subresourceRange.levelCount = 1; + barrier[1].subresourceRange.baseArrayLayer = 0; + barrier[1].subresourceRange.layerCount = 1; + barrier[1].image = m_hdrMSAA.Resource(); + + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, NULL, 0, NULL, 2, barrier); + } + + m_gpuTimer.GetTimeStamp(cmdBuf1, "Resolve MSAA"); + SetPerfMarkerEnd(cmdBuf1); + } + + // Post proc--------------------------------------------------------------------------- + // + + // Bloom, takes HDR as input and applies bloom to it. + // if (0) - { - SetPerfMarkerBegin(cmdBuf1, "post proc"); - - // Downsample pass - m_downSample.Draw(cmdBuf1); - // m_downSample.Gui(); - m_GPUTimer.GetTimeStamp(cmdBuf1, "Downsample"); - - // Bloom pass (needs the downsampled data) - m_bloom.Draw(cmdBuf1); - // m_bloom.Gui(); - m_GPUTimer.GetTimeStamp(cmdBuf1, "Bloom"); - - SetPerfMarkerEnd(cmdBuf1); - } - - // If using FreeSyncHDR we need to to the tonemapping in-place and then apply the GUI, later we'll apply the color conversion into the swapchain - // - if (pSwapChain->GetDisplayMode() != DISPLAYMODE_SDR && !pState->m_dispalyCacaoDirectly) - { - // In place Tonemapping ------------------------------------------------------------------------ - // - { - { - VkImageMemoryBarrier barrier = {}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = NULL; - barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier.subresourceRange.baseMipLevel = 0; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = 0; - barrier.subresourceRange.layerCount = 1; - barrier.image = m_HDR.Resource(); - vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier); - } - - m_toneMappingCS.Draw(cmdBuf1, m_HDRUAV, pState->exposure, pState->toneMapper, m_Width, m_Height); - - { - VkImageMemoryBarrier barrier = {}; - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = NULL; - barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; - barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; - barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; - barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; - barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; - barrier.subresourceRange.baseMipLevel = 0; - barrier.subresourceRange.levelCount = 1; - barrier.subresourceRange.baseArrayLayer = 0; - barrier.subresourceRange.layerCount = 1; - barrier.image = m_HDR.Resource(); - vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier); - } - } - - // Render HUD ------------------------------------------------------------------------ - // - { - // prepare render pass - { - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.pNext = NULL; - rp_begin.renderPass = m_render_pass_PBR_HDR; - rp_begin.framebuffer = m_pFrameBuffer_PBR_HDR; - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_Width; - rp_begin.renderArea.extent.height = m_Height; - rp_begin.clearValueCount = 0; - rp_begin.pClearValues = NULL; - vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - } - - vkCmdSetScissor(cmdBuf1, 0, 1, &m_rectScissor); - vkCmdSetViewport(cmdBuf1, 0, 1, &m_viewport); - - m_ImGUI.Draw(cmdBuf1); - - vkCmdEndRenderPass(cmdBuf1); - - m_GPUTimer.GetTimeStamp(cmdBuf1, "ImGUI Rendering"); - } - } - - // submit command buffer - { - VkResult res = vkEndCommandBuffer(cmdBuf1); - assert(res == VK_SUCCESS); - - VkSubmitInfo submit_info; - submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info.pNext = NULL; - submit_info.waitSemaphoreCount = 0; - submit_info.pWaitSemaphores = NULL; - submit_info.pWaitDstStageMask = NULL; - submit_info.commandBufferCount = 1; - submit_info.pCommandBuffers = &cmdBuf1; - submit_info.signalSemaphoreCount = 0; - submit_info.pSignalSemaphores = NULL; - res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE); - assert(res == VK_SUCCESS); - } - - // Wait for swapchain (we are going to render to it) ----------------------------------- - // - int imageIndex = pSwapChain->WaitForSwapChain(); - - m_CommandListRing.OnBeginFrame(); - - VkCommandBuffer cmdBuf2 = m_CommandListRing.GetNewCommandList(); - - { - VkCommandBufferBeginInfo cmd_buf_info; - cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; - cmd_buf_info.pNext = NULL; - cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; - cmd_buf_info.pInheritanceInfo = NULL; - VkResult res = vkBeginCommandBuffer(cmdBuf2, &cmd_buf_info); - assert(res == VK_SUCCESS); - } - - SetPerfMarkerBegin(cmdBuf2, "rendering to swap chain"); - - // prepare render pass - { - VkRenderPassBeginInfo rp_begin = {}; - rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - rp_begin.pNext = NULL; - rp_begin.renderPass = pSwapChain->GetRenderPass(); - rp_begin.framebuffer = pSwapChain->GetFramebuffer(imageIndex); - rp_begin.renderArea.offset.x = 0; - rp_begin.renderArea.offset.y = 0; - rp_begin.renderArea.extent.width = m_Width; - rp_begin.renderArea.extent.height = m_Height; - rp_begin.clearValueCount = 0; - rp_begin.pClearValues = NULL; - vkCmdBeginRenderPass(cmdBuf2, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); - } - - vkCmdSetScissor(cmdBuf2, 0, 1, &m_rectScissor); - vkCmdSetViewport(cmdBuf2, 0, 1, &m_viewport); - - if (!pState->m_dispalyCacaoDirectly) + { + SetPerfMarkerBegin(cmdBuf1, "post proc"); + + // Downsample pass + m_downSample.Draw(cmdBuf1); + // m_downSample.Gui(); + m_gpuTimer.GetTimeStamp(cmdBuf1, "Downsample"); + + // Bloom pass (needs the downsampled data) + m_bloom.Draw(cmdBuf1); + // m_bloom.Gui(); + m_gpuTimer.GetTimeStamp(cmdBuf1, "Bloom"); + + SetPerfMarkerEnd(cmdBuf1); + } + + // If using FreeSyncHDR we need to to the tonemapping in-place and then apply the GUI, later we'll apply the color conversion into the swapchain + // + if (pSwapChain->GetDisplayMode() != DISPLAYMODE_SDR && !pState->dispalyCacaoDirectly) + { + // In place Tonemapping ------------------------------------------------------------------------ + // + { + { + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + barrier.image = m_hdr.Resource(); + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier); + } + + m_toneMappingCS.Draw(cmdBuf1, m_hdrUAV, pState->exposure, pState->toneMapper, m_width, m_height); + + { + VkImageMemoryBarrier barrier = {}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.pNext = NULL; + barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = 1; + barrier.image = m_hdr.Resource(); + vkCmdPipelineBarrier(cmdBuf1, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, 0, NULL, 0, NULL, 1, &barrier); + } + } + + // Render HUD ------------------------------------------------------------------------ + // + { + // prepare render pass + { + VkRenderPassBeginInfo rp_begin = {}; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = m_renderPassPBRHDR; + rp_begin.framebuffer = m_pFrameBufferPBRHDR; + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_width; + rp_begin.renderArea.extent.height = m_height; + rp_begin.clearValueCount = 0; + rp_begin.pClearValues = NULL; + vkCmdBeginRenderPass(cmdBuf1, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + } + + vkCmdSetScissor(cmdBuf1, 0, 1, &m_rectScissor); + vkCmdSetViewport(cmdBuf1, 0, 1, &m_viewport); + + m_imGUI.Draw(cmdBuf1); + + vkCmdEndRenderPass(cmdBuf1); + + m_gpuTimer.GetTimeStamp(cmdBuf1, "ImGUI Rendering"); + } + } + + // submit command buffer + { + VkResult res = vkEndCommandBuffer(cmdBuf1); + assert(res == VK_SUCCESS); + + VkSubmitInfo submit_info; + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = NULL; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 1; + submit_info.pCommandBuffers = &cmdBuf1; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info, VK_NULL_HANDLE); + assert(res == VK_SUCCESS); + } + + // Wait for swapchain (we are going to render to it) ----------------------------------- + // + int imageIndex = pSwapChain->WaitForSwapChain(); + + m_commandListRing.OnBeginFrame(); + + VkCommandBuffer cmdBuf2 = m_commandListRing.GetNewCommandList(); + + { + VkCommandBufferBeginInfo cmd_buf_info; + cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + cmd_buf_info.pNext = NULL; + cmd_buf_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + cmd_buf_info.pInheritanceInfo = NULL; + VkResult res = vkBeginCommandBuffer(cmdBuf2, &cmd_buf_info); + assert(res == VK_SUCCESS); + } + + SetPerfMarkerBegin(cmdBuf2, "rendering to swap chain"); + + // prepare render pass + { + VkRenderPassBeginInfo rp_begin = {}; + rp_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + rp_begin.pNext = NULL; + rp_begin.renderPass = pSwapChain->GetRenderPass(); + rp_begin.framebuffer = pSwapChain->GetFramebuffer(imageIndex); + rp_begin.renderArea.offset.x = 0; + rp_begin.renderArea.offset.y = 0; + rp_begin.renderArea.extent.width = m_width; + rp_begin.renderArea.extent.height = m_height; + rp_begin.clearValueCount = 0; + rp_begin.pClearValues = NULL; + vkCmdBeginRenderPass(cmdBuf2, &rp_begin, VK_SUBPASS_CONTENTS_INLINE); + } + + vkCmdSetScissor(cmdBuf2, 0, 1, &m_rectScissor); + vkCmdSetViewport(cmdBuf2, 0, 1, &m_viewport); + + if (!pState->dispalyCacaoDirectly) { if (pSwapChain->GetDisplayMode() != DISPLAYMODE_SDR) { - if (!pState->m_dispalyCacaoDirectly) + if (!pState->dispalyCacaoDirectly) { - m_colorConversionPS.Draw(cmdBuf2, m_HDRSRV); - m_GPUTimer.GetTimeStamp(cmdBuf2, "Color conversion"); + m_colorConversionPS.Draw(cmdBuf2, m_hdrSRV); + m_gpuTimer.GetTimeStamp(cmdBuf2, "Color conversion"); } } else @@ -1277,16 +1261,16 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) // { { - m_toneMappingPS.Draw(cmdBuf2, m_HDRSRV, pState->exposure, pState->toneMapper); - m_GPUTimer.GetTimeStamp(cmdBuf2, "Tone mapping"); + m_toneMappingPS.Draw(cmdBuf2, m_hdrSRV, pState->exposure, pState->toneMapper); + m_gpuTimer.GetTimeStamp(cmdBuf2, "Tone mapping"); } } // Render HUD ------------------------------------------------------------------------ // { - m_ImGUI.Draw(cmdBuf2); - m_GPUTimer.GetTimeStamp(cmdBuf2, "ImGUI Rendering"); + m_imGUI.Draw(cmdBuf2); + m_gpuTimer.GetTimeStamp(cmdBuf2, "ImGUI Rendering"); } } } @@ -1296,72 +1280,68 @@ void SampleRenderer::OnRender(State *pState, SwapChain *pSwapChain) VkDescriptorBufferInfo cbDummyConstantBuffer; uint32_t *dummy; - m_ConstantBufferRing.AllocConstantBuffer(sizeof(*dummy), (void **)&dummy, &cbDummyConstantBuffer); + m_constantBufferRing.AllocConstantBuffer(sizeof(*dummy), (void **)&dummy, &cbDummyConstantBuffer); *dummy = 0; - VkDescriptorSet descriptorSet = m_directOutputDescriptorSets[m_curBackBuffer]; + VkDescriptorSet descriptorSet = m_cacaoApplyDirectDescriptorSets[m_curBackBuffer]; // modify Descriptor set - SetDescriptorSet(m_pDevice->GetDevice(), 1, m_cacaoOutputSRV, &m_directOutputSampler, descriptorSet); - m_ConstantBufferRing.SetDescriptorSet(0, sizeof(*dummy), descriptorSet); + SetDescriptorSet(m_pDevice->GetDevice(), 1, m_cacaoOutputSRV, &m_cacaoApplyDirectSampler, descriptorSet); + m_constantBufferRing.SetDescriptorSet(0, sizeof(*dummy), descriptorSet); // Draw! - m_directOutputPS.Draw(cmdBuf2, cbDummyConstantBuffer, descriptorSet); + m_cacaoApplyDirectPS.Draw(cmdBuf2, cbDummyConstantBuffer, descriptorSet); SetPerfMarkerEnd(cmdBuf2); - m_ImGUI.Draw(cmdBuf2); - m_GPUTimer.GetTimeStamp(cmdBuf2, "ImGUI Rendering"); + m_imGUI.Draw(cmdBuf2); + m_gpuTimer.GetTimeStamp(cmdBuf2, "ImGUI Rendering"); } - SetPerfMarkerEnd(cmdBuf2); - - m_GPUTimer.OnEndFrame(); - - vkCmdEndRenderPass(cmdBuf2); - - // Close & Submit the command list ---------------------------------------------------- - // - { - VkResult res = vkEndCommandBuffer(cmdBuf2); - assert(res == VK_SUCCESS); - - VkSemaphore ImageAvailableSemaphore; - VkSemaphore RenderFinishedSemaphores; - VkFence CmdBufExecutedFences; - pSwapChain->GetSemaphores(&ImageAvailableSemaphore, &RenderFinishedSemaphores, &CmdBufExecutedFences); - - VkPipelineStageFlags submitWaitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - VkSubmitInfo submit_info2; - submit_info2.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; - submit_info2.pNext = NULL; - submit_info2.waitSemaphoreCount = 1; - submit_info2.pWaitSemaphores = &ImageAvailableSemaphore; - submit_info2.pWaitDstStageMask = &submitWaitStage; - submit_info2.commandBufferCount = 1; - submit_info2.pCommandBuffers = &cmdBuf2; - submit_info2.signalSemaphoreCount = 1; - submit_info2.pSignalSemaphores = &RenderFinishedSemaphores; - - res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info2, CmdBufExecutedFences); - assert(res == VK_SUCCESS); - } + SetPerfMarkerEnd(cmdBuf2); + + m_gpuTimer.OnEndFrame(); + + vkCmdEndRenderPass(cmdBuf2); + + // Close & Submit the command list ---------------------------------------------------- + // + { + VkResult res = vkEndCommandBuffer(cmdBuf2); + assert(res == VK_SUCCESS); + + VkSemaphore ImageAvailableSemaphore; + VkSemaphore RenderFinishedSemaphores; + VkFence CmdBufExecutedFences; + pSwapChain->GetSemaphores(&ImageAvailableSemaphore, &RenderFinishedSemaphores, &CmdBufExecutedFences); + + VkPipelineStageFlags submitWaitStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + VkSubmitInfo submit_info2; + submit_info2.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info2.pNext = NULL; + submit_info2.waitSemaphoreCount = 1; + submit_info2.pWaitSemaphores = &ImageAvailableSemaphore; + submit_info2.pWaitDstStageMask = &submitWaitStage; + submit_info2.commandBufferCount = 1; + submit_info2.pCommandBuffers = &cmdBuf2; + submit_info2.signalSemaphoreCount = 1; + submit_info2.pSignalSemaphores = &RenderFinishedSemaphores; + + res = vkQueueSubmit(m_pDevice->GetGraphicsQueue(), 1, &submit_info2, CmdBufExecutedFences); + assert(res == VK_SUCCESS); + } } #ifdef FFX_CACAO_ENABLE_PROFILING -void SampleRenderer::GetCacaoTimingValues(State* pState, FfxCacaoDetailedTiming* timings) +void SampleRenderer::GetCacaoTimingValues(State* pState, FFX_CACAO_DetailedTiming* timings) { -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - if (pState->m_useDownsampledSsao) + if (pState->useDownsampledSsao) { - ffxCacaoVkGetDetailedTimings(m_cacaoContextDownsampled, timings); + FFX_CACAO_VkGetDetailedTimings(m_cacaoContextDownsampled, timings); } else { - ffxCacaoVkGetDetailedTimings(m_cacaoContextNative, timings); + FFX_CACAO_VkGetDetailedTimings(m_cacaoContextNative, timings); } -#else - ffxCacaoVkGetDetailedTimings(m_cacaoContextDownsampled, timings); -#endif } #endif diff --git a/sample/src/VK/SampleRenderer.h b/sample/src/VK/SampleRenderer.h index dd8eec9..f3ab168 100644 --- a/sample/src/VK/SampleRenderer.h +++ b/sample/src/VK/SampleRenderer.h @@ -1,6 +1,6 @@ // AMD SampleVK sample code -// -// Copyright(c) 2018 Advanced Micro Devices, Inc.All rights reserved. +// +// Copyright(c) 2021 Advanced Micro Devices, Inc.All rights reserved. // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files(the "Software"), to deal // in the Software without restriction, including without limitation the rights @@ -18,7 +18,7 @@ // THE SOFTWARE. #pragma once -#include "ffx_cacao.h" +#include "ffx_cacao_impl.h" // We are queuing (backBufferCount + 0.5) frames, so we need to triple buffer the resources that get modified each frame static const int backBufferCount = 3; @@ -34,122 +34,118 @@ using namespace CAULDRON_VK; class SampleRenderer { public: - struct Spotlight - { - Camera light; - XMVECTOR color; - float intensity; - }; + struct Spotlight + { + Camera light; + XMVECTOR color; + float intensity; + }; - struct State - { - float time; - Camera camera; + struct State + { + float time; + Camera camera; - float exposure; - float iblFactor; - float emmisiveFactor; + float exposure; + float iblFactor; + float emmisiveFactor; - int toneMapper; - int skyDomeType; - bool bDrawBoundingBoxes; + int toneMapper; + int skyDomeType; + bool drawBoundingBoxes; - bool m_useTAA; + bool useTAA; - bool bDrawLightFrustum; + bool drawLightFrustum; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - bool m_useDownsampledSsao; -#endif - FfxCacaoSettings m_cacaoSettings; - bool m_useCacao; - bool m_dispalyCacaoDirectly; + bool useDownsampledSsao; + FFX_CACAO_Settings cacaoSettings; + bool useCacao; + bool dispalyCacaoDirectly; }; - void OnCreate(Device *pDevice, SwapChain *pSwapChain); - void OnDestroy(); + void OnCreate(Device *pDevice, SwapChain *pSwapChain); + void OnDestroy(); - void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); - void OnDestroyWindowSizeDependentResources(); + void OnCreateWindowSizeDependentResources(SwapChain *pSwapChain, uint32_t Width, uint32_t Height); + void OnDestroyWindowSizeDependentResources(); - int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); - void UnloadScene(); + int LoadScene(GLTFCommon *pGLTFCommon, int stage = 0); + void UnloadScene(); #ifdef FFX_CACAO_ENABLE_PROFILING - void GetCacaoTimingValues(State* pState, FfxCacaoDetailedTiming* timings); + void GetCacaoTimingValues(State* pState, FFX_CACAO_DetailedTiming* timings); #endif - const std::vector &GetTimingValues() { return m_TimeStamps; } + const std::vector &GetTimingValues() { return m_timeStamps; } - void OnRender(State *pState, SwapChain *pSwapChain); + void OnRender(State *pState, SwapChain *pSwapChain); private: - Device *m_pDevice; + Device *m_pDevice; -#ifdef FFX_CACAO_ENABLE_NATIVE_RESOLUTION - FfxCacaoVkContext *m_cacaoContextNative; -#endif - FfxCacaoVkContext *m_cacaoContextDownsampled; + FFX_CACAO_VkContext *m_cacaoContextNative; + FFX_CACAO_VkContext *m_cacaoContextDownsampled; - uint32_t m_Width; - uint32_t m_Height; + uint32_t m_width; + uint32_t m_height; - VkRect2D m_rectScissor; - VkViewport m_viewport; + VkRect2D m_rectScissor; + VkViewport m_viewport; - // Initialize helper classes - ResourceViewHeaps m_resourceViewHeaps; - UploadHeap m_UploadHeap; - DynamicBufferRing m_ConstantBufferRing; - StaticBufferPool m_VidMemBufferPool; - StaticBufferPool m_SysMemBufferPool; - CommandListRing m_CommandListRing; - GPUTimestamps m_GPUTimer; + // Initialize helper classes + ResourceViewHeaps m_resourceViewHeaps; + UploadHeap m_uploadHeap; + DynamicBufferRing m_constantBufferRing; + StaticBufferPool m_vidMemBufferPool; + StaticBufferPool m_sysMemBufferPool; + CommandListRing m_commandListRing; + GPUTimestamps m_gpuTimer; - //gltf passes - GltfPbrPass *m_gltfPBR; + //gltf passes + GltfPbrPass *m_gltfPBR; GltfPbrPass *m_gltfPbrNonMsaa; - GltfBBoxPass *m_gltfBBox; - GltfDepthPass *m_gltfDepth; - GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers; - - // effects - Bloom m_bloom; - SkyDome m_skyDome; - DownSamplePS m_downSample; - SkyDomeProc m_skyDomeProc; - ToneMapping m_toneMappingPS; - ToneMappingCS m_toneMappingCS; - ColorConversionPS m_colorConversionPS; - - // GUI - ImGUI m_ImGUI; - - // Temporary render targets - - // depth buffer - Texture m_depthBuffer; - VkImageView m_depthBufferDSV; - - // shadowmaps - Texture m_shadowMap; - VkImageView m_shadowMapDSV; - VkImageView m_shadowMapSRV; - - // MSAA RT - Texture m_HDRMSAA; - VkImageView m_HDRMSAASRV; + GltfBBoxPass *m_gltfBBox; + GltfDepthPass *m_gltfDepth; + GLTFTexturesAndBuffers *m_pGLTFTexturesAndBuffers; + + // effects + Bloom m_bloom; + SkyDome m_skyDome; + DownSamplePS m_downSample; + SkyDomeProc m_skyDomeProc; + ToneMapping m_toneMappingPS; + ToneMappingCS m_toneMappingCS; + ColorConversionPS m_colorConversionPS; + + // GUI + ImGUI m_imGUI; + + // Temporary render targets + + // depth buffer + Texture m_depthBuffer; + VkImageView m_depthBufferDSV; + + // shadowmaps + Texture m_shadowMap; + VkImageView m_shadowMapDSV; + VkImageView m_shadowMapSRV; + + // MSAA RT + Texture m_hdrMSAA; + VkImageView m_hdrMSAASRV; // Non MSAA - Texture m_NormalBufferNonMsaa; - Texture m_DepthBufferNonMsaa; - VkImageView m_NormalBufferNonMsaaView; - VkImageView m_DepthBufferNonMsaaView; + Texture m_normalBufferNonMsaa; + Texture m_depthBufferNonMsaa; + VkImageView m_normalBufferNonMsaaView; + VkImageView m_depthBufferNonMsaaView; - // Resolved RT - Texture m_HDR; - VkImageView m_HDRSRV; - VkImageView m_HDRUAV; + // Resolved RT + Texture m_hdr; + VkImageView m_hdrSRV; + VkImageView m_hdrUAV; // CACAO Texture m_cacaoOutput; @@ -160,25 +156,25 @@ class SampleRenderer uint32_t m_curBackBuffer; - VkSampler m_directOutputSampler; - VkDescriptorSet m_directOutputDescriptorSets[backBufferCount]; - VkDescriptorSetLayout m_directOutputDescriptorSetLayout; - PostProcPS m_directOutputPS; + VkSampler m_cacaoApplyDirectSampler; + VkDescriptorSet m_cacaoApplyDirectDescriptorSets[backBufferCount]; + VkDescriptorSetLayout m_cacaoApplyDirectDescriptorSetLayout; + PostProcPS m_cacaoApplyDirectPS; - // widgets - Wireframe m_wireframe; - WireframeBox m_wireframeBox; + // widgets + Wireframe m_wireframe; + WireframeBox m_wireframeBox; - VkRenderPass m_render_pass_shadow; - VkRenderPass m_render_pass_HDR_MSAA; - VkRenderPass m_render_pass_PBR_HDR; - VkRenderPass m_render_pass_non_msaa; + VkRenderPass m_renderPassShadow; + VkRenderPass m_renderPassHDRMSAA; + VkRenderPass m_renderPassPBRHDR; + VkRenderPass m_renderPassNonMSAA; - VkFramebuffer m_pFrameBuffer_shadow; - VkFramebuffer m_pFrameBuffer_HDR_MSAA; - VkFramebuffer m_pFrameBuffer_PBR_HDR; - VkFramebuffer m_pFrameBuffer_non_msaa; + VkFramebuffer m_pFrameBufferShadow; + VkFramebuffer m_pFrameBufferHDRMSAA; + VkFramebuffer m_pFrameBufferPBRHDR; + VkFramebuffer m_pFrameBufferNonMSAA; - std::vector m_TimeStamps; + std::vector m_timeStamps; };