From e5fb4e68a65054f92e9f4aaf979edeecc606bbfd Mon Sep 17 00:00:00 2001 From: Niklas Haas Date: Fri, 21 Apr 2023 15:22:14 +0200 Subject: [PATCH] vulkan: switch to VK_KHR_synchronization2 Largely boilerplate changes, but more inherently future-compatible. --- src/tests/vulkan.c | 2 +- src/vulkan/command.c | 77 ++++++++++++++++---------------- src/vulkan/command.h | 34 +++++--------- src/vulkan/common.h | 8 ++-- src/vulkan/context.c | 11 +++-- src/vulkan/gpu.c | 8 ++-- src/vulkan/gpu.h | 8 ++-- src/vulkan/gpu_buf.c | 85 +++++++++++++++++++---------------- src/vulkan/gpu_pass.c | 63 ++++++++++++++------------ src/vulkan/gpu_tex.c | 98 +++++++++++++++++++++++------------------ src/vulkan/utils.h | 4 +- src/vulkan/utils_gen.py | 6 +-- 12 files changed, 206 insertions(+), 198 deletions(-) diff --git a/src/tests/vulkan.c b/src/tests/vulkan.c index 9fae4ba8..5b839ded 100644 --- a/src/tests/vulkan.c +++ b/src/tests/vulkan.c @@ -53,7 +53,7 @@ static void vulkan_interop_tests(pl_vulkan pl_vk, struct vk_cmd *cmd = vk_cmd_begin(vk, vk->pool_graphics, NULL); REQUIRE(cmd); struct pl_sync_vk *sync_vk = PL_PRIV(sync); - vk_cmd_sig(cmd, (pl_vulkan_sem){ sync_vk->signal }); + vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_NONE, (pl_vulkan_sem){ sync_vk->signal }); REQUIRE(vk_cmd_submit(vk, &cmd)); // Do something with the image again to "import" it diff --git a/src/vulkan/command.c b/src/vulkan/command.c index 59c09c9b..dc8aae40 100644 --- a/src/vulkan/command.c +++ b/src/vulkan/command.c @@ -51,10 +51,7 @@ static void vk_cmd_reset(struct vk_ctx *vk, struct vk_cmd *cmd) cmd->callbacks.num = 0; cmd->deps.num = 0; - cmd->depstages.num = 0; - cmd->depvalues.num = 0; cmd->sigs.num = 0; - cmd->sigvalues.num = 0; } static void vk_cmd_destroy(struct vk_ctx *vk, struct vk_cmd *cmd) @@ -130,33 +127,40 @@ void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, }); } -void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags stage, pl_vulkan_sem dep) +void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem dep) { - assert(cmd->deps.num == cmd->depstages.num); - assert(cmd->deps.num == cmd->depvalues.num); - PL_ARRAY_APPEND(cmd, cmd->deps, dep.sem); - PL_ARRAY_APPEND(cmd, cmd->depvalues, dep.value); - PL_ARRAY_APPEND(cmd, cmd->depstages, stage); + PL_ARRAY_APPEND(cmd, cmd->deps, (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = dep.sem, + .value = dep.value, + .stageMask = stage, + }); } -void vk_cmd_sig(struct vk_cmd *cmd, pl_vulkan_sem sig) +void vk_cmd_sig(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem sig) { + VkSemaphoreSubmitInfo sinfo = { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = sig.sem, + .value = sig.value, + .stageMask = stage, + }; + // Try updating existing semaphore signal operations in-place for (int i = 0; i < cmd->sigs.num; i++) { - if (cmd->sigs.elem[i] == sig.sem) { - cmd->sigvalues.elem[i] = PL_MAX(cmd->sigvalues.elem[i], sig.value); + if (cmd->sigs.elem[i].semaphore == sig.sem) { + pl_assert(sig.value > cmd->sigs.elem[i].value); + cmd->sigs.elem[i] = sinfo; return; } } - assert(cmd->sigs.num == cmd->sigvalues.num); - PL_ARRAY_APPEND(cmd, cmd->sigs, sig.sem); - PL_ARRAY_APPEND(cmd, cmd->sigvalues, sig.value); + PL_ARRAY_APPEND(cmd, cmd->sigs, sinfo); } struct vk_sync_scope vk_sem_barrier(struct vk_ctx *vk, struct vk_cmd *cmd, - struct vk_sem *sem, VkPipelineStageFlags stage, - VkAccessFlags access, bool is_trans) + struct vk_sem *sem, VkPipelineStageFlags2 stage, + VkAccessFlags2 access, bool is_trans) { bool is_write = (access & vk_access_write) || is_trans; @@ -299,7 +303,7 @@ struct vk_cmd *vk_cmd_begin(struct vk_ctx *vk, struct vk_cmdpool *pool, PL_VK_NAME(SEMAPHORE, cmd->sync.sem, debug_tag); cmd->sync.value++; - vk_cmd_sig(cmd, cmd->sync); + vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, cmd->sync); return cmd; error: @@ -320,24 +324,17 @@ bool vk_cmd_submit(struct vk_ctx *vk, struct vk_cmd **pcmd) VK(vk->EndCommandBuffer(cmd->buf)); - VkTimelineSemaphoreSubmitInfo tinfo = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .waitSemaphoreValueCount = cmd->depvalues.num, - .pWaitSemaphoreValues = cmd->depvalues.elem, - .signalSemaphoreValueCount = cmd->sigvalues.num, - .pSignalSemaphoreValues = cmd->sigvalues.elem, - }; - - VkSubmitInfo sinfo = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &tinfo, - .commandBufferCount = 1, - .pCommandBuffers = &cmd->buf, - .waitSemaphoreCount = cmd->deps.num, - .pWaitSemaphores = cmd->deps.elem, - .pWaitDstStageMask = cmd->depstages.elem, - .signalSemaphoreCount = cmd->sigs.num, - .pSignalSemaphores = cmd->sigs.elem, + VkSubmitInfo2 sinfo = { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .waitSemaphoreInfoCount = cmd->deps.num, + .pWaitSemaphoreInfos = cmd->deps.elem, + .signalSemaphoreInfoCount = cmd->sigs.num, + .pSignalSemaphoreInfos = cmd->sigs.elem, + .commandBufferInfoCount = 1, + .pCommandBufferInfos = &(VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = cmd->buf, + }, }; if (pl_msg_test(vk->log, PL_LOG_TRACE)) { @@ -345,20 +342,20 @@ bool vk_cmd_submit(struct vk_ctx *vk, struct vk_cmd **pcmd) (void *) cmd->buf, (void *) cmd->queue, pool->qf); for (int n = 0; n < cmd->deps.num; n++) { PL_TRACE(vk, " waits on semaphore 0x%"PRIx64" = %"PRIu64, - (uint64_t) cmd->deps.elem[n], cmd->depvalues.elem[n]); + (uint64_t) cmd->deps.elem[n].semaphore, cmd->deps.elem[n].value); } for (int n = 0; n < cmd->sigs.num; n++) { PL_TRACE(vk, " signals semaphore 0x%"PRIx64" = %"PRIu64, - (uint64_t) cmd->sigs.elem[n], cmd->sigvalues.elem[n]); + (uint64_t) cmd->sigs.elem[n].semaphore, cmd->sigs.elem[n].value); } if (cmd->callbacks.num) PL_TRACE(vk, " signals %d callbacks", cmd->callbacks.num); } vk->lock_queue(vk->queue_ctx, pool->qf, cmd->qindex); - VkResult res = vk->QueueSubmit(cmd->queue, 1, &sinfo, VK_NULL_HANDLE); + VkResult res = vk->QueueSubmit2(cmd->queue, 1, &sinfo, VK_NULL_HANDLE); vk->unlock_queue(vk->queue_ctx, pool->qf, cmd->qindex); - PL_VK_ASSERT(res, "vkQueueSubmit"); + PL_VK_ASSERT(res, "vkQueueSubmit2"); pl_mutex_lock(&vk->lock); PL_ARRAY_APPEND(vk->alloc, vk->cmds_pending, cmd); diff --git a/src/vulkan/command.h b/src/vulkan/command.h index 734d3c4c..ec171ca4 100644 --- a/src/vulkan/command.h +++ b/src/vulkan/command.h @@ -45,15 +45,9 @@ struct vk_cmd { VkQueue queue; // the submission queue (for recording/pending) int qindex; // the index of `queue` in `pool` VkCommandBuffer buf; // the command buffer itself - // The semaphores represent dependencies that need to complete before - // this command can be executed. These are *not* owned by the vk_cmd - PL_ARRAY(VkSemaphore) deps; - PL_ARRAY(VkPipelineStageFlags) depstages; - PL_ARRAY(uint64_t) depvalues; - // The signals represent semaphores that fire once the command finishes - // executing. These are also not owned by the vk_cmd - PL_ARRAY(VkSemaphore) sigs; - PL_ARRAY(uint64_t) sigvalues; + // Command dependencies and signals. Not owned by the vk_cmd. + PL_ARRAY(VkSemaphoreSubmitInfo) deps; + PL_ARRAY(VkSemaphoreSubmitInfo) sigs; // "Callbacks" to fire once a command completes. These are used for // multiple purposes, ranging from resource deallocation to fencing. PL_ARRAY(struct vk_callback) callbacks; @@ -66,18 +60,18 @@ void vk_cmd_callback(struct vk_cmd *cmd, vk_cb callback, // Associate a raw dependency for the current command. This semaphore must // signal by the corresponding stage before the command may execute. -void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags stage, pl_vulkan_sem dep); +void vk_cmd_dep(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem dep); // Associate a raw signal with the current command. This semaphore will signal -// after the command completes. -void vk_cmd_sig(struct vk_cmd *cmd, pl_vulkan_sem sig); +// after the given stage completes. +void vk_cmd_sig(struct vk_cmd *cmd, VkPipelineStageFlags2 stage, pl_vulkan_sem sig); // Synchronization scope struct vk_sync_scope { pl_vulkan_sem sync; // semaphore of last access VkQueue queue; // source queue of last access - VkPipelineStageFlags stage; // stage bitmask of last access - VkAccessFlags access; // access type bitmask + VkPipelineStageFlags2 stage;// stage bitmask of last access + VkAccessFlags2 access; // access type bitmask }; // Synchronization primitive @@ -85,22 +79,14 @@ struct vk_sem { struct vk_sync_scope read, write; }; -static inline void vk_sem_init(struct vk_sem *sem) -{ - *sem = (struct vk_sem) { - .write.stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - .read.stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - }; -}; - // Updates the `vk_sem` state for a given access. If `is_trans` is set, this // access is treated as a write (since it alters the resource's state). // // Returns a struct describing the previous access to a resource. A pipeline // barrier is only required if the previous access scope is nonzero. struct vk_sync_scope vk_sem_barrier(struct vk_ctx *vk, struct vk_cmd *cmd, - struct vk_sem *sem, VkPipelineStageFlags stage, - VkAccessFlags access, bool is_trans); + struct vk_sem *sem, VkPipelineStageFlags2 stage, + VkAccessFlags2 access, bool is_trans); // Command pool / queue family hybrid abstraction struct vk_cmdpool { diff --git a/src/vulkan/common.h b/src/vulkan/common.h index 6db42b1d..dcfefb6c 100644 --- a/src/vulkan/common.h +++ b/src/vulkan/common.h @@ -139,16 +139,14 @@ struct vk_ctx { PL_VK_FUN(CmdDrawIndexed); PL_VK_FUN(CmdEndDebugUtilsLabelEXT); PL_VK_FUN(CmdEndRenderPass); - PL_VK_FUN(CmdPipelineBarrier); + PL_VK_FUN(CmdPipelineBarrier2); PL_VK_FUN(CmdPushConstants); PL_VK_FUN(CmdPushDescriptorSetKHR); PL_VK_FUN(CmdResetQueryPool); - PL_VK_FUN(CmdSetEvent); PL_VK_FUN(CmdSetScissor); PL_VK_FUN(CmdSetViewport); PL_VK_FUN(CmdUpdateBuffer); - PL_VK_FUN(CmdWaitEvents); - PL_VK_FUN(CmdWriteTimestamp); + PL_VK_FUN(CmdWriteTimestamp2); PL_VK_FUN(CreateBuffer); PL_VK_FUN(CreateBufferView); PL_VK_FUN(CreateCommandPool); @@ -212,7 +210,7 @@ struct vk_ctx { PL_VK_FUN(InvalidateMappedMemoryRanges); PL_VK_FUN(MapMemory); PL_VK_FUN(QueuePresentKHR); - PL_VK_FUN(QueueSubmit); + PL_VK_FUN(QueueSubmit2); PL_VK_FUN(QueueWaitIdle); PL_VK_FUN(ResetEvent); PL_VK_FUN(ResetFences); diff --git a/src/vulkan/context.c b/src/vulkan/context.c index 3565351d..c728ef5f 100644 --- a/src/vulkan/context.c +++ b/src/vulkan/context.c @@ -235,7 +235,8 @@ const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features = { // Required features static const VkPhysicalDeviceVulkan13Features required_vk13 = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, - .maintenance4 = true + .maintenance4 = true, + .synchronization2 = true, }; static const VkPhysicalDeviceVulkan12Features required_vk12 = { @@ -283,15 +284,13 @@ static const struct vk_fun vk_dev_funs[] = { PL_VK_DEV_FUN(CmdDrawIndexed), PL_VK_DEV_FUN(CmdEndDebugUtilsLabelEXT), PL_VK_DEV_FUN(CmdEndRenderPass), - PL_VK_DEV_FUN(CmdPipelineBarrier), + PL_VK_DEV_FUN(CmdPipelineBarrier2), PL_VK_DEV_FUN(CmdPushConstants), PL_VK_DEV_FUN(CmdResetQueryPool), - PL_VK_DEV_FUN(CmdSetEvent), PL_VK_DEV_FUN(CmdSetScissor), PL_VK_DEV_FUN(CmdSetViewport), PL_VK_DEV_FUN(CmdUpdateBuffer), - PL_VK_DEV_FUN(CmdWaitEvents), - PL_VK_DEV_FUN(CmdWriteTimestamp), + PL_VK_DEV_FUN(CmdWriteTimestamp2), PL_VK_DEV_FUN(CreateBuffer), PL_VK_DEV_FUN(CreateBufferView), PL_VK_DEV_FUN(CreateCommandPool), @@ -344,7 +343,7 @@ static const struct vk_fun vk_dev_funs[] = { PL_VK_DEV_FUN(GetQueryPoolResults), PL_VK_DEV_FUN(InvalidateMappedMemoryRanges), PL_VK_DEV_FUN(MapMemory), - PL_VK_DEV_FUN(QueueSubmit), + PL_VK_DEV_FUN(QueueSubmit2), PL_VK_DEV_FUN(QueueWaitIdle), PL_VK_DEV_FUN(ResetEvent), PL_VK_DEV_FUN(ResetFences), diff --git a/src/vulkan/gpu.c b/src/vulkan/gpu.c index 4a444dab..0f27cb54 100644 --- a/src/vulkan/gpu.c +++ b/src/vulkan/gpu.c @@ -133,8 +133,8 @@ static void timer_begin(pl_gpu gpu, struct vk_cmd *cmd, pl_timer timer) vk->ResetQueryPool(vk->dev, timer->qpool, timer->index_write, 2); } - vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - timer->qpool, timer->index_write); + vk->CmdWriteTimestamp2(cmd->buf, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + timer->qpool, timer->index_write); p->cmd_timer = timer; } @@ -207,8 +207,8 @@ bool _end_cmd(pl_gpu gpu, struct vk_cmd **pcmd, bool submit) if (p->cmd_timer) { pl_timer timer = p->cmd_timer; - vk->CmdWriteTimestamp(cmd->buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - timer->qpool, timer->index_write + 1); + vk->CmdWriteTimestamp2(cmd->buf, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + timer->qpool, timer->index_write + 1); timer->pending |= timer_bit(timer->index_write); vk_cmd_callback(cmd, (vk_cb) timer_end_cb, timer, diff --git a/src/vulkan/gpu.h b/src/vulkan/gpu.h index e9d53e28..7ca130aa 100644 --- a/src/vulkan/gpu.h +++ b/src/vulkan/gpu.h @@ -127,8 +127,8 @@ bool vk_tex_upload(pl_gpu, const struct pl_tex_transfer_params *); bool vk_tex_download(pl_gpu, const struct pl_tex_transfer_params *); bool vk_tex_poll(pl_gpu, pl_tex, uint64_t timeout); bool vk_tex_export(pl_gpu, pl_tex, pl_sync); -void vk_tex_barrier(pl_gpu, struct vk_cmd *, pl_tex, VkPipelineStageFlags, - VkAccessFlags, VkImageLayout, uint32_t qf); +void vk_tex_barrier(pl_gpu, struct vk_cmd *, pl_tex, VkPipelineStageFlags2, + VkAccessFlags2, VkImageLayout, uint32_t qf); struct pl_buf_vk { pl_rc_t rc; @@ -152,8 +152,8 @@ bool vk_buf_export(pl_gpu, pl_buf); bool vk_buf_poll(pl_gpu, pl_buf, uint64_t timeout); // Helper to ease buffer barrier creation. (`offset` is relative to pl_buf) -void vk_buf_barrier(pl_gpu, struct vk_cmd *, pl_buf, VkPipelineStageFlags, - VkAccessFlags, size_t offset, size_t size, bool export); +void vk_buf_barrier(pl_gpu, struct vk_cmd *, pl_buf, VkPipelineStageFlags2, + VkAccessFlags2, size_t offset, size_t size, bool export); // Flush visible writes to a buffer made by the API void vk_buf_flush(pl_gpu, struct vk_cmd *, pl_buf, size_t offset, size_t size); diff --git a/src/vulkan/gpu_buf.c b/src/vulkan/gpu_buf.c index c82e8c91..8b8985a7 100644 --- a/src/vulkan/gpu_buf.c +++ b/src/vulkan/gpu_buf.c @@ -18,7 +18,7 @@ #include "gpu.h" void vk_buf_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, - VkPipelineStageFlags stage, VkAccessFlags access, + VkPipelineStageFlags2 stage, VkAccessFlags2 access, size_t offset, size_t size, bool export) { struct pl_vk *p = PL_PRIV(gpu); @@ -49,20 +49,26 @@ void vk_buf_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, // CONCURRENT buffers require transitioning to/from IGNORED, EXCLUSIVE // buffers require transitioning to/from the concrete QF index uint32_t qf = vk->pools.num > 1 ? VK_QUEUE_FAMILY_IGNORED : cmd->pool->qf; - VkBufferMemoryBarrier barr = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcQueueFamilyIndex = buf_vk->exported ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf, - .dstQueueFamilyIndex = export ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf, - .srcAccessMask = last.access, - .dstAccessMask = access, - .buffer = buf_vk->mem.buf, - .offset = buf_vk->mem.offset + offset, - .size = size, - }; - - if (last.access || barr.srcQueueFamilyIndex != barr.dstQueueFamilyIndex) { - vk->CmdPipelineBarrier(cmd->buf, last.stage, stage, 0, 0, NULL, - 1, &barr, 0, NULL); + uint32_t src_qf = buf_vk->exported ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf; + uint32_t dst_qf = export ? VK_QUEUE_FAMILY_EXTERNAL_KHR : qf; + + if (last.access || src_qf != dst_qf) { + vk->CmdPipelineBarrier2(cmd->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = last.stage, + .srcAccessMask = last.access, + .dstStageMask = stage, + .dstAccessMask = access, + .srcQueueFamilyIndex = src_qf, + .dstQueueFamilyIndex = dst_qf, + .buffer = buf_vk->mem.buf, + .offset = buf_vk->mem.offset + offset, + .size = size, + }, + }); } buf_vk->needs_flush = false; @@ -97,7 +103,6 @@ pl_buf vk_buf_create(pl_gpu gpu, const struct pl_buf_params *params) struct pl_buf_vk *buf_vk = PL_PRIV(buf); pl_rc_init(&buf_vk->rc); - vk_sem_init(&buf_vk->sem); struct vk_malloc_params mparams = { .reqs = { @@ -283,21 +288,23 @@ void vk_buf_flush(pl_gpu gpu, struct vk_cmd *cmd, pl_buf buf, if (!can_read && !can_write) return; - VkBufferMemoryBarrier buffBarrier = { - .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .srcAccessMask = buf_vk->sem.write.access, - .dstAccessMask = (can_read ? VK_ACCESS_HOST_READ_BIT : 0) - | (can_write ? VK_ACCESS_HOST_WRITE_BIT : 0), - .buffer = buf_vk->mem.buf, - .offset = buf_vk->mem.offset + offset, - .size = size, - }; - - vk->CmdPipelineBarrier(cmd->buf, buf_vk->sem.write.stage, - VK_PIPELINE_STAGE_HOST_BIT, 0, - 0, NULL, 1, &buffBarrier, 0, NULL); + vk->CmdPipelineBarrier2(cmd->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &(VkBufferMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, + .srcStageMask = buf_vk->sem.write.stage, + .srcAccessMask = buf_vk->sem.write.access, + .dstStageMask = VK_PIPELINE_STAGE_2_HOST_BIT, + .dstAccessMask = (can_read ? VK_ACCESS_2_HOST_READ_BIT : 0) + | (can_write ? VK_ACCESS_2_HOST_WRITE_BIT : 0), + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .buffer = buf_vk->mem.buf, + .offset = buf_vk->mem.offset + offset, + .size = size, + }, + }); // We need to hold on to the buffer until this barrier completes vk_cmd_callback(cmd, (vk_cb) invalidate_buf, gpu, buf); @@ -348,8 +355,8 @@ void vk_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, return; } - vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, offset, size, false); + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, offset, size, false); // Vulkan requires `size` to be a multiple of 4, so we need to make // sure to handle the end separately if the original data is not @@ -425,10 +432,10 @@ void vk_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, return; } - vk_buf_barrier(gpu, cmd, dst, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, dst_offset, size, false); - vk_buf_barrier(gpu, cmd, src, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, src_offset, size, false); + vk_buf_barrier(gpu, cmd, dst, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, dst_offset, size, false); + vk_buf_barrier(gpu, cmd, src, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, src_offset, size, false); VkBufferCopy region = { .srcOffset = src_vk->mem.offset + src_offset, @@ -457,8 +464,8 @@ bool vk_buf_export(pl_gpu gpu, pl_buf buf) // For the queue family ownership transfer, we can ignore all pipeline // stages since the synchronization via fences/semaphores is required - vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, - 0, buf->params.size, true); + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_NONE, 0, 0, + buf->params.size, true); return CMD_SUBMIT(&cmd); diff --git a/src/vulkan/gpu_pass.c b/src/vulkan/gpu_pass.c index 92b78ead..2aaf8337 100644 --- a/src/vulkan/gpu_pass.c +++ b/src/vulkan/gpu_pass.c @@ -664,10 +664,9 @@ no_descriptors: ; return pass; } -static const VkPipelineStageFlags passStages[] = { - [PL_PASS_RASTER] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - [PL_PASS_COMPUTE] = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, +static const VkPipelineStageFlags2 shaderStages[] = { + [PL_PASS_RASTER] = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT, + [PL_PASS_COMPUTE] = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, }; static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, @@ -687,11 +686,11 @@ static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, .descriptorType = dsType[desc->type], }; - static const VkAccessFlags access[PL_DESC_ACCESS_COUNT] = { - [PL_DESC_ACCESS_READONLY] = VK_ACCESS_SHADER_READ_BIT, - [PL_DESC_ACCESS_WRITEONLY] = VK_ACCESS_SHADER_WRITE_BIT, - [PL_DESC_ACCESS_READWRITE] = VK_ACCESS_SHADER_READ_BIT | - VK_ACCESS_SHADER_WRITE_BIT, + static const VkAccessFlags2 storageAccess[PL_DESC_ACCESS_COUNT] = { + [PL_DESC_ACCESS_READONLY] = VK_ACCESS_2_SHADER_STORAGE_READ_BIT, + [PL_DESC_ACCESS_WRITEONLY] = VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, + [PL_DESC_ACCESS_READWRITE] = VK_ACCESS_2_SHADER_STORAGE_READ_BIT | + VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT, }; switch (desc->type) { @@ -699,8 +698,8 @@ static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, pl_tex tex = db.object; struct pl_tex_vk *tex_vk = PL_PRIV(tex); - vk_tex_barrier(gpu, cmd, tex, passStages[pass->params.type], - VK_ACCESS_SHADER_READ_BIT, + vk_tex_barrier(gpu, cmd, tex, shaderStages[pass->params.type], + VK_ACCESS_2_SHADER_SAMPLED_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); @@ -718,8 +717,8 @@ static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, pl_tex tex = db.object; struct pl_tex_vk *tex_vk = PL_PRIV(tex); - vk_tex_barrier(gpu, cmd, tex, passStages[pass->params.type], - access[desc->access], VK_IMAGE_LAYOUT_GENERAL, + vk_tex_barrier(gpu, cmd, tex, shaderStages[pass->params.type], + storageAccess[desc->access], VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); VkDescriptorImageInfo *iinfo = &pass_vk->dsiinfo[idx]; @@ -736,8 +735,12 @@ static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, pl_buf buf = db.object; struct pl_buf_vk *buf_vk = PL_PRIV(buf); - vk_buf_barrier(gpu, cmd, buf, passStages[pass->params.type], - access[desc->access], 0, buf->params.size, false); + VkAccessFlags2 access = VK_ACCESS_2_UNIFORM_READ_BIT; + if (desc->type == PL_DESC_BUF_STORAGE) + access = storageAccess[desc->access]; + + vk_buf_barrier(gpu, cmd, buf, shaderStages[pass->params.type], + access, 0, buf->params.size, false); VkDescriptorBufferInfo *binfo = &pass_vk->dsbinfo[idx]; *binfo = (VkDescriptorBufferInfo) { @@ -754,8 +757,12 @@ static void vk_update_descriptor(pl_gpu gpu, struct vk_cmd *cmd, pl_pass pass, pl_buf buf = db.object; struct pl_buf_vk *buf_vk = PL_PRIV(buf); - vk_buf_barrier(gpu, cmd, buf, passStages[pass->params.type], - access[desc->access], 0, buf->params.size, false); + VkAccessFlags2 access = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT; + if (desc->type == PL_DESC_BUF_TEXEL_STORAGE) + access = storageAccess[desc->access]; + + vk_buf_barrier(gpu, cmd, buf, shaderStages[pass->params.type], + access, 0, buf->params.size, false); wds->pTexelBufferView = &buf_vk->view; return; @@ -921,20 +928,22 @@ void vk_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) // In the edge case that vert = index buffer, we need to synchronize // for both flags simultaneously - VkAccessFlags vbo_flags = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; - if (index == vert) - vbo_flags |= VK_ACCESS_INDEX_READ_BIT; + VkPipelineStageFlags2 vbo_stage = VK_PIPELINE_STAGE_2_VERTEX_ATTRIBUTE_INPUT_BIT; + VkAccessFlags2 vbo_flags = VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT; + if (index == vert) { + vbo_stage |= VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT; + vbo_flags |= VK_ACCESS_2_INDEX_READ_BIT; + } - vk_buf_barrier(gpu, cmd, vert, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - vbo_flags, 0, vert->params.size, false); + vk_buf_barrier(gpu, cmd, vert, vbo_stage, vbo_flags, 0, vert->params.size, false); VkDeviceSize offset = vert_vk->mem.offset + params->buf_offset; vk->CmdBindVertexBuffers(cmd->buf, 0, 1, &vert_vk->mem.buf, &offset); if (index) { if (index != vert) { - vk_buf_barrier(gpu, cmd, index, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, - VK_ACCESS_INDEX_READ_BIT, 0, index->params.size, + vk_buf_barrier(gpu, cmd, index, VK_PIPELINE_STAGE_2_INDEX_INPUT_BIT, + VK_ACCESS_2_INDEX_READ_BIT, 0, index->params.size, false); } @@ -949,11 +958,11 @@ void vk_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) } - VkAccessFlags fbo_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + VkAccessFlags2 fbo_access = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT; if (pass->params.load_target) - fbo_access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT; + fbo_access |= VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT; - vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT, fbo_access, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); diff --git a/src/vulkan/gpu_tex.c b/src/vulkan/gpu_tex.c index dba6d1ea..cee53e7c 100644 --- a/src/vulkan/gpu_tex.c +++ b/src/vulkan/gpu_tex.c @@ -18,7 +18,7 @@ #include "gpu.h" void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex, - VkPipelineStageFlags stage, VkAccessFlags access, + VkPipelineStageFlags2 stage, VkAccessFlags2 access, VkImageLayout layout, uint32_t qf) { struct pl_vk *p = PL_PRIV(gpu); @@ -41,14 +41,16 @@ void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex, bool is_trans = layout != tex_vk->layout, is_xfer = qf != tex_vk->qf; last = vk_sem_barrier(vk, cmd, &tex_vk->sem, stage, access, is_trans || is_xfer); - VkImageMemoryBarrier barr = { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + VkImageMemoryBarrier2 barr = { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .srcStageMask = last.stage, + .srcAccessMask = last.access, + .dstStageMask = stage, + .dstAccessMask = access, .oldLayout = tex_vk->layout, .newLayout = layout, .srcQueueFamilyIndex = tex_vk->qf, .dstQueueFamilyIndex = qf, - .srcAccessMask = last.access, - .dstAccessMask = access, .image = tex_vk->img, .subresourceRange = { .aspectMask = tex_vk->aspect, @@ -63,8 +65,11 @@ void vk_tex_barrier(pl_gpu gpu, struct vk_cmd *cmd, pl_tex tex, } if (last.access || is_trans || is_xfer) { - vk->CmdPipelineBarrier(cmd->buf, last.stage, stage, 0, 0, NULL, - 0, NULL, 1, &barr); + vk->CmdPipelineBarrier2(cmd->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &barr, + }); } tex_vk->qf = qf; @@ -127,7 +132,6 @@ static bool vk_init_image(pl_gpu gpu, pl_tex tex, pl_debug_tag debug_tag) pl_rc_init(&tex_vk->rc); if (tex_vk->num_planes) return true; - vk_sem_init(&tex_vk->sem); tex_vk->layout = VK_IMAGE_LAYOUT_UNDEFINED; tex_vk->transfer_queue = GRAPHICS; tex_vk->qf = VK_QUEUE_FAMILY_IGNORED; // will be set on first use, if needed @@ -687,8 +691,8 @@ void vk_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) if (!cmd) return; - vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_CLEAR_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); @@ -732,19 +736,19 @@ void vk_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) if (!cmd) return; - vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - VK_QUEUE_FAMILY_IGNORED); - - vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - VK_QUEUE_FAMILY_IGNORED); - // When the blit operation doesn't require scaling, we can use the more // efficient vkCmdCopyImage instead of vkCmdBlitImage if (!requires_scaling) { + vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + pl_rect3d_normalize(&src_rc); VkImageCopy region = { @@ -768,6 +772,16 @@ void vk_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) vk->CmdCopyImage(cmd->buf, src_vk->img, src_vk->layout, dst_vk->img, dst_vk->layout, 1, ®ion); } else { + vk_tex_barrier(gpu, cmd, params->src, VK_PIPELINE_STAGE_2_BLIT_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + + vk_tex_barrier(gpu, cmd, params->dst, VK_PIPELINE_STAGE_2_BLIT_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + VkImageBlit region = { .srcSubresource = { .aspectMask = src_vk->aspect, @@ -914,11 +928,11 @@ bool vk_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) .size = size, }; - vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, params->buf_offset, size, + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size, false); - vk_buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, 0, size, false); + vk_buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, 0, size, false); vk->CmdCopyBuffer(cmd->buf, buf_vk->mem.buf, tbuf_vk->mem.buf, 1, ®ion); @@ -957,11 +971,11 @@ bool vk_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) if (!cmd) goto error; - vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, params->buf_offset, size, + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, params->buf_offset, size, false); - vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdCopyBufferToImage(cmd->buf, buf_vk->mem.buf, tex_vk->img, @@ -1039,10 +1053,10 @@ bool vk_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) .size = size, }; - vk_buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, 0, size, false); - vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, params->buf_offset, size, + vk_buf_barrier(gpu, cmd, tbuf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, 0, size, false); + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size, false); vk->CmdCopyBuffer(cmd->buf, tbuf_vk->mem.buf, buf_vk->mem.buf, 1, ®ion); @@ -1076,11 +1090,11 @@ bool vk_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) if (!cmd) goto error; - vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_WRITE_BIT, params->buf_offset, size, + vk_buf_barrier(gpu, cmd, buf, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_WRITE_BIT, params->buf_offset, size, false); - vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_TRANSFER_BIT, - VK_ACCESS_TRANSFER_READ_BIT, + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_COPY_BIT, + VK_ACCESS_2_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_QUEUE_FAMILY_IGNORED); vk->CmdCopyImageToBuffer(cmd->buf, tex_vk->img, tex_vk->layout, @@ -1143,13 +1157,13 @@ bool vk_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync) if (!cmd) goto error; - vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + vk_tex_barrier(gpu, cmd, tex, VK_PIPELINE_STAGE_2_NONE, 0, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_EXTERNAL); // Make the next barrier appear as though coming from a different queue tex_vk->sem.write.queue = tex_vk->sem.read.queue = NULL; - vk_cmd_sig(cmd, (pl_vulkan_sem){ sync_vk->wait }); + vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, (pl_vulkan_sem){ sync_vk->wait }); if (!CMD_SUBMIT(&cmd)) goto error; @@ -1339,19 +1353,17 @@ bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params) bool may_invalidate = true; if (!tex_vk->num_planes) { may_invalidate &= tex_vk->may_invalidate; - vk_tex_barrier(gpu, cmd, params->tex, - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + vk_tex_barrier(gpu, cmd, params->tex, VK_PIPELINE_STAGE_2_NONE, 0, layout, params->qf); } for (int i = 0; i < tex_vk->num_planes; i++) { may_invalidate &= tex_vk->planes[i]->may_invalidate; vk_tex_barrier(gpu, cmd, params->tex->planes[i], - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - 0, layout, params->qf); + VK_PIPELINE_STAGE_2_NONE, 0, layout, params->qf); } - vk_cmd_sig(cmd, params->semaphore); + vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, params->semaphore); bool ok = CMD_SUBMIT(&cmd); if (!tex_vk->num_planes) { diff --git a/src/vulkan/utils.h b/src/vulkan/utils.h index def05efb..cb1c5f59 100644 --- a/src/vulkan/utils.h +++ b/src/vulkan/utils.h @@ -40,8 +40,8 @@ VkExternalMemoryHandleTypeFlagBitsKHR vk_mem_handle_type(enum pl_handle_type); VkExternalSemaphoreHandleTypeFlagBitsKHR vk_sync_handle_type(enum pl_handle_type); // Bitmask of all access flags that imply a read/write operation, respectively -extern const VkAccessFlags vk_access_read; -extern const VkAccessFlags vk_access_write; +extern const VkAccessFlags2 vk_access_read; +extern const VkAccessFlags2 vk_access_write; // Check for compatibility of a VkExternalMemoryProperties bool vk_external_mem_check(struct vk_ctx *vk, diff --git a/src/vulkan/utils_gen.py b/src/vulkan/utils_gen.py index ab17d273..df7e4dbb 100644 --- a/src/vulkan/utils_gen.py +++ b/src/vulkan/utils_gen.py @@ -165,8 +165,8 @@ } } -const VkAccessFlags vk_access_read = {{ '0x%x' % vkaccess.read }}LLU; -const VkAccessFlags vk_access_write = {{ '0x%x' % vkaccess.write }}LLU; +const VkAccessFlags2 vk_access_read = {{ '0x%x' % vkaccess.read }}LLU; +const VkAccessFlags2 vk_access_write = {{ '0x%x' % vkaccess.write }}LLU; """) class Obj(object): @@ -246,7 +246,7 @@ def get_vkstructs(registry): def get_vkaccess(registry): access = Obj(read = 0, write = 0) - for e in registry.findall_enum('VkAccessFlagBits'): + for e in registry.findall_enum('VkAccessFlagBits2'): if '_READ_' in e.attrib['name']: access.read |= 1 << int(e.attrib['bitpos']) if '_WRITE_' in e.attrib['name']: