diff options
author | Lynne <dev@lynne.ee> | 2024-09-22 13:43:33 +0200 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2024-09-23 13:41:07 +0200 |
commit | bc36fe6f1fc5244d5fc85c3b763015f58d50b358 (patch) | |
tree | 5ae5b8cbec5ec473b03f4546f7f54124ccc6e100 /libavfilter | |
parent | 8a7af4aa497f20c3f3d1632fd43436780fb3f4f8 (diff) | |
download | ffmpeg-bc36fe6f1fc5244d5fc85c3b763015f58d50b358.tar.gz |
vulkan: use push descriptors where possible
Push descriptors are in theory slightly faster, but come with
limitations for which we have to check.
Either way, they're not difficult to implement, so even though
no one should be using peasant-tier descriptors, do it anyway.
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/vf_nlmeans_vulkan.c | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c index 9d96efa27b..f0a7353d5c 100644 --- a/libavfilter/vf_nlmeans_vulkan.c +++ b/libavfilter/vf_nlmeans_vulkan.c @@ -687,14 +687,16 @@ static int denoise_pass(NLMeansVulkanContext *s, FFVkExecContext *exec, VkBufferMemoryBarrier2 buf_bar[8]; int nb_buf_bar = 0; + DenoisePushData pd = { + { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, + }; + /* Denoise pass pipeline */ ff_vk_exec_bind_pipeline(vkctx, exec, &s->pl_denoise); /* Push data */ ff_vk_update_push_exec(vkctx, exec, &s->pl_denoise, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(DenoisePushData), &(DenoisePushData) { - { ws_stride[0], ws_stride[1], ws_stride[2], ws_stride[3] }, - }); + 0, sizeof(pd), &pd); buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2, @@ -970,6 +972,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) offsets_dispatched, }; + /* Push data */ + ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT, + 0, sizeof(pd), &pd); + if (offsets_dispatched) { nb_buf_bar = 0; buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) { @@ -995,10 +1001,6 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) integral_vk->access = buf_bar[1].dstAccessMask; } - /* Push data */ - ff_vk_update_push_exec(vkctx, exec, &s->pl_weights, VK_SHADER_STAGE_COMPUTE_BIT, - 0, sizeof(pd), &pd); - wg_invoc = FFMIN((s->nb_offsets - offsets_dispatched)/TYPE_ELEMS, s->opts.t); wg_invoc = FFMIN(wg_invoc, vkctx->props.properties.limits.maxComputeWorkGroupCount[2]); |