diff options
author | Lynne <dev@lynne.ee> | 2024-09-22 07:24:59 +0200 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2024-09-23 13:40:38 +0200 |
commit | 8a7af4aa497f20c3f3d1632fd43436780fb3f4f8 (patch) | |
tree | 74a158b36182b2bfce3c294bba83fa17f6137e7c | |
parent | 3d75ba749589c3b6f6b737a1cb9381e8a4b54088 (diff) | |
download | ffmpeg-8a7af4aa497f20c3f3d1632fd43436780fb3f4f8.tar.gz |
vulkan: add support for regular descriptor pools
This permits:
- The use of Vulkan filtering on many more devices
- Better debugging due to lack of descriptor buffer support in layers
Much of the changes here are due to a requirement that updates to
descriptors must happen between the command buffer being waited on,
and the pipeline not being bound.
We routinely did it the other way around, by updating only after
we bind the pipeline.
-rw-r--r-- | libavfilter/vf_gblur_vulkan.c | 2 | ||||
-rw-r--r-- | libavfilter/vf_nlmeans_vulkan.c | 21 | ||||
-rw-r--r-- | libavfilter/vulkan_filter.c | 120 | ||||
-rw-r--r-- | libavutil/hwcontext_vulkan.c | 6 | ||||
-rw-r--r-- | libavutil/vulkan.c | 385 | ||||
-rw-r--r-- | libavutil/vulkan.h | 89 | ||||
-rw-r--r-- | libavutil/vulkan_functions.h | 3 |
7 files changed, 394 insertions, 232 deletions
diff --git a/libavfilter/vf_gblur_vulkan.c b/libavfilter/vf_gblur_vulkan.c index 546e2828b0..d0fb6c9940 100644 --- a/libavfilter/vf_gblur_vulkan.c +++ b/libavfilter/vf_gblur_vulkan.c @@ -189,7 +189,7 @@ static int init_gblur_pipeline(GBlurVulkanContext *s, FFVulkanPipeline *pl, RET(ff_vk_unmap_buffer(&s->vkctx, params_buf, 1)); RET(ff_vk_set_descriptor_buffer(&s->vkctx, pl, NULL, 1, 0, 0, - params_buf->address, params_buf->size, + params_buf, 0, params_buf->size, VK_FORMAT_UNDEFINED)); fail: diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c index cd44f5e4db..9d96efa27b 100644 --- a/libavfilter/vf_nlmeans_vulkan.c +++ b/libavfilter/vf_nlmeans_vulkan.c @@ -657,7 +657,7 @@ static av_cold int init_filter(AVFilterContext *ctx) spv, desc, planes)); RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, NULL, 1, 0, 0, - s->xyoffsets_buf.address, s->xyoffsets_buf.size, + &s->xyoffsets_buf, 0, s->xyoffsets_buf.size, VK_FORMAT_UNDEFINED)); do { @@ -751,8 +751,8 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) /* Weights/sums */ AVBufferRef *ws_buf = NULL; FFVkBuffer *ws_vk; - VkDeviceAddress weights_addr[4]; - VkDeviceAddress sums_addr[4]; + VkDeviceSize weights_addr[4]; + VkDeviceSize sums_addr[4]; uint32_t ws_stride[4]; size_t ws_size[4]; size_t ws_total_size = 0; @@ -810,8 +810,8 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) return err; ws_vk = (FFVkBuffer *)ws_buf->data; - weights_addr[0] = ws_vk->address; - sums_addr[0] = ws_vk->address + ws_total_size; + weights_addr[0] = 0; + sums_addr[0] = ws_total_size; for (int i = 1; i < desc->nb_components; i++) { weights_addr[i] = weights_addr[i - 1] + ws_size[i - 1]; sums_addr[i] = sums_addr[i - 1] + ws_size[i - 1]; @@ -844,9 +844,6 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) /* Input frame prep */ RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); - ff_vk_update_descriptor_img_array(vkctx, &s->pl_weights, exec, in, in_views, 0, 0, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - s->sampler); ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, @@ -934,10 +931,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) s->sampler); for (int i = 0; i < desc->nb_components; i++) { RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 0, 0, - weights_addr[i], ws_size[i], + ws_vk, weights_addr[i], ws_size[i], VK_FORMAT_UNDEFINED)); RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_weights, exec, 0, 1 + i*2 + 1, 0, - sums_addr[i], ws_size[i], + ws_vk, sums_addr[i], ws_size[i], VK_FORMAT_UNDEFINED)); } @@ -949,10 +946,10 @@ static int nlmeans_vulkan_filter_frame(AVFilterLink *link, AVFrame *in) VK_IMAGE_LAYOUT_GENERAL, s->sampler); for (int i = 0; i < desc->nb_components; i++) { RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 0, 0, - weights_addr[i], ws_size[i], + ws_vk, weights_addr[i], ws_size[i], VK_FORMAT_UNDEFINED)); RET(ff_vk_set_descriptor_buffer(&s->vkctx, &s->pl_denoise, exec, 0, 2 + i*2 + 1, 0, - sums_addr[i], ws_size[i], + ws_vk, sums_addr[i], ws_size[i], VK_FORMAT_UNDEFINED)); } diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c index c31d42b91a..2c6ab72849 100644 --- a/libavfilter/vulkan_filter.c +++ b/libavfilter/vulkan_filter.c @@ -152,18 +152,6 @@ skip: s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, vk_dev->nb_enabled_dev_extensions); - /** - * libplacebo does not use descriptor buffers. - */ - if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && - strcmp(avctx->filter->name, "libplacebo")) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that " - "the %s extension is supported!\n", - VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME); - av_buffer_unref(&frames_ref); - return AVERROR(EINVAL); - } - err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); if (err < 0) { av_buffer_unref(&frames_ref); @@ -264,12 +252,13 @@ int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, FFVkExecContext *exec = ff_vk_exec_get(e); ff_vk_exec_start(vkctx, exec); - ff_vk_exec_bind_pipeline(vkctx, exec, pl); - - if (push_src) - ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, - 0, push_size, push_src); - + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); if (in_f) { RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, @@ -278,27 +267,28 @@ int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, sampler); - ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_QUEUE_FAMILY_IGNORED); } - RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); - ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); + /* Bind pipeline, update push data */ + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + /* Add data sync barriers */ ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); + if (in_f) + ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, @@ -380,12 +370,6 @@ int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, VkImageView *src_views = !i ? in_views : tmp_views; VkImageView *dst_views = !i ? tmp_views : out_views; - ff_vk_exec_bind_pipeline(vkctx, exec, pl); - - if (push_src) - ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, - 0, push_size, push_src); - ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL, @@ -394,6 +378,12 @@ int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, VK_IMAGE_LAYOUT_GENERAL, VK_NULL_HANDLE); + /* Bind pipeline, update push data */ + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], @@ -422,32 +412,47 @@ int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, FFVkExecContext *exec = ff_vk_exec_get(e); ff_vk_exec_start(vkctx, exec); - /* Inputs */ + /* Add deps and create temporary imageviews */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); for (int i = 0; i < nb_in; i++) { RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i])); - - ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, - VK_ACCESS_SHADER_READ_BIT, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - VK_QUEUE_FAMILY_IGNORED); } - /* Output */ - RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, - VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, - VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); - RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + /* Update descriptor sets */ + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + for (int i = 0; i < nb_in; i++) + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + + /* Bind pipeline, update push data */ + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + /* Add data sync barriers */ ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, VK_QUEUE_FAMILY_IGNORED); + for (int i = 0; i < nb_in; i++) + ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, @@ -455,21 +460,6 @@ int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, .imageMemoryBarrierCount = nb_img_bar, }); - ff_vk_exec_bind_pipeline(vkctx, exec, pl); - - if (push_src) - ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, - 0, push_size, push_src); - - for (int i = 0; i < nb_in; i++) - ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, - VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - sampler); - - ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, - VK_IMAGE_LAYOUT_GENERAL, - VK_NULL_HANDLE); - vk->CmdDispatch(exec->buf, FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c index 5e56a215e8..7e7d9cb70b 100644 --- a/libavutil/hwcontext_vulkan.c +++ b/libavutil/hwcontext_vulkan.c @@ -578,6 +578,12 @@ static int check_extensions(AVHWDeviceContext *ctx, int dev, AVDictionary *opts, for (int i = 0; i < optional_exts_num; i++) { tstr = optional_exts[i].name; found = 0; + + if (dev && debug_mode && + !strcmp(tstr, VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME)) { + continue; + } + for (int j = 0; j < sup_ext_count; j++) { if (!strcmp(tstr, sup_ext[j].extensionName)) { found = 1; diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index e3fb70da46..7ea4c33619 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1561,7 +1561,8 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, .bindingCount = nb, .pBindings = set->binding, - .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? + VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0, }; for (int i = 0; i < nb; i++) { @@ -1589,13 +1590,35 @@ int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, return AVERROR_EXTERNAL; } - vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, *layout, &set->layout_size); + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, *layout, &set->layout_size); + set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - - for (int i = 0; i < nb; i++) - vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, *layout, - i, &set->binding_offset[i]); + for (int i = 0; i < nb; i++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, *layout, + i, &set->binding_offset[i]); + } else { + for (int i = 0; i < nb; i++) { + int j; + VkDescriptorPoolSize *desc_pool_size; + for (j = 0; j < pl->nb_desc_pool_size; j++) + if (pl->desc_pool_size[j].type == desc[i].type) + break; + if (j >= pl->nb_desc_pool_size) { + desc_pool_size = av_realloc_array(pl->desc_pool_size, + sizeof(*desc_pool_size), + pl->nb_desc_pool_size + 1); + if (!desc_pool_size) + return AVERROR(ENOMEM); + + pl->desc_pool_size = desc_pool_size; + pl->nb_desc_pool_size++; + memset(&desc_pool_size[j], 0, sizeof(VkDescriptorPoolSize)); + } + pl->desc_pool_size[j].type = desc[i].type; + pl->desc_pool_size[j].descriptorCount += FFMAX(desc[i].elems, 1); + } + } set->singular = singular; set->nb_bindings = nb; @@ -1643,38 +1666,102 @@ int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, { int err; - pl->desc_bind = av_calloc(pl->nb_descriptor_sets, sizeof(*pl->desc_bind)); - if (!pl->desc_bind) - return AVERROR(ENOMEM); + if (!pl->nb_descriptor_sets) + return 0; - pl->bound_buffer_indices = av_calloc(pl->nb_descriptor_sets, - sizeof(*pl->bound_buffer_indices)); - if (!pl->bound_buffer_indices) - return AVERROR(ENOMEM); + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + pl->desc_bind = av_calloc(pl->nb_descriptor_sets, sizeof(*pl->desc_bind)); + if (!pl->desc_bind) + return AVERROR(ENOMEM); - for (int i = 0; i < pl->nb_descriptor_sets; i++) { - FFVulkanDescriptorSet *set = &pl->desc_set[i]; - int nb = set->singular ? 1 : pool->pool_size; - - err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, - NULL, NULL, set->usage, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - if (err < 0) - return err; + pl->bound_buffer_indices = av_calloc(pl->nb_descriptor_sets, + sizeof(*pl->bound_buffer_indices)); + if (!pl->bound_buffer_indices) + return AVERROR(ENOMEM); - err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0); - if (err < 0) - return err; + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + int nb = set->singular ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0); + if (err < 0) + return err; + + pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = set->buf.address, + }; - pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, - .usage = set->usage, - .address = set->buf.address, + pl->bound_buffer_indices[i] = i; + } + } else { + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkDescriptorSetLayout *tmp_layouts; + VkDescriptorSetAllocateInfo set_alloc_info; + VkDescriptorPoolCreateInfo pool_create_info; + + for (int i = 0; i < pl->nb_desc_pool_size; i++) + pl->desc_pool_size[i].descriptorCount *= pool->pool_size; + + pool_create_info = (VkDescriptorPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, + .flags = 0, + .pPoolSizes = pl->desc_pool_size, + .poolSizeCount = pl->nb_desc_pool_size, + .maxSets = pl->nb_descriptor_sets*pool->pool_size, }; - pl->bound_buffer_indices[i] = i; + ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, + s->hwctx->alloc, &pl->desc_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to create descriptor pool: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + tmp_layouts = av_malloc_array(pool_create_info.maxSets, sizeof(*tmp_layouts)); + if (!tmp_layouts) + return AVERROR(ENOMEM); + + /* Colate each execution context's descriptor set layouts */ + for (int i = 0; i < pool->pool_size; i++) + for (int j = 0; j < pl->nb_descriptor_sets; j++) + tmp_layouts[i*pl->nb_descriptor_sets + j] = pl->desc_layout[j]; + + set_alloc_info = (VkDescriptorSetAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, + .descriptorPool = pl->desc_pool, + .pSetLayouts = tmp_layouts, + .descriptorSetCount = pool_create_info.maxSets, + }; + + pl->desc_sets = av_malloc_array(pool_create_info.maxSets, + sizeof(*tmp_layouts)); + if (!pl->desc_sets) { + av_free(tmp_layouts); + return AVERROR(ENOMEM); + } + ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &set_alloc_info, + pl->desc_sets); + av_free(tmp_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", + ff_vk_ret2str(ret)); + av_freep(&pl->desc_sets); + return AVERROR_EXTERNAL; + } + + pl->assoc_pool = pool; } return 0; @@ -1696,94 +1783,150 @@ static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } +static inline void update_set_pool_write(FFVulkanContext *s, + FFVulkanPipeline *pl, + FFVkExecContext *e, + FFVulkanDescriptorSet *desc_set, int set, + VkWriteDescriptorSet *write_info) +{ + FFVulkanFunctions *vk = &s->vkfn; + if (desc_set->singular) { + for (int i = 0; i < pl->assoc_pool->pool_size; i++) { + write_info->dstSet = pl->desc_sets[i*pl->nb_descriptor_sets + set]; + vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); + } + } else { + write_info->dstSet = pl->desc_sets[e->idx*pl->nb_descriptor_sets + set]; + vk->UpdateDescriptorSets(s->hwctx->act_dev, 1, write_info, 0, NULL); + } +} + static int vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, int set, int bind, int offs, VkImageView view, VkImageLayout layout, VkSampler sampler) { FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; - VkDescriptorGetInfoEXT desc_get_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, - .type = desc_set->binding[bind].descriptorType, - }; - VkDescriptorImageInfo desc_img_info = { - .imageView = view, - .sampler = sampler, - .imageLayout = layout, - }; - size_t desc_size; - switch (desc_get_info.type) { - case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: - desc_get_info.data.pSampledImage = &desc_img_info; - desc_size = s->desc_buf_props.sampledImageDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - desc_get_info.data.pStorageImage = &desc_img_info; - desc_size = s->desc_buf_props.storageImageDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: - desc_get_info.data.pInputAttachmentImage = &desc_img_info; - desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: - desc_get_info.data.pCombinedImageSampler = &desc_img_info; - desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; - break; - default: - av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", - set, bind, desc_get_info.type); - return AVERROR(EINVAL); - break; - }; + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; + + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); + update_set_descriptor(s, e, desc_set, bind, offs, + &desc_get_info, desc_size); + } else { + VkDescriptorImageInfo desc_pool_write_info_img = { + .sampler = sampler, + .imageView = view, + .imageLayout = layout, + }; + VkWriteDescriptorSet desc_pool_write_info = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = bind, + .descriptorCount = 1, + .dstArrayElement = offs, + .descriptorType = desc_set->binding[bind].descriptorType, + .pImageInfo = &desc_pool_write_info_img, + }; + update_set_pool_write(s, pl, e, desc_set, set, &desc_pool_write_info); + } return 0; } int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkExecContext *e, int set, int bind, int offs, - VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt) + FFVkExecContext *e, int set, int bind, int elem, + FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, + VkFormat fmt) { FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; - VkDescriptorGetInfoEXT desc_get_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, - .type = desc_set->binding[bind].descriptorType, - }; - VkDescriptorAddressInfoEXT desc_buf_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, - .address = addr, - .range = len, - .format = fmt, - }; - size_t desc_size; - switch (desc_get_info.type) { - case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: - desc_get_info.data.pUniformBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.uniformBufferDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: - desc_get_info.data.pStorageBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.storageBufferDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: - desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; - break; - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: - desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; - desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; - break; - default: - av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", - set, bind, desc_get_info.type); - return AVERROR(EINVAL); - break; - }; + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_ADDRESS_INFO_EXT, + .address = buf->address + offset, + .range = len, + .format = fmt, + }; + size_t desc_size; + + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); + update_set_descriptor(s, e, desc_set, bind, elem, &desc_get_info, desc_size); + } else { + VkDescriptorBufferInfo desc_pool_write_info_buf = { + .buffer = buf->buf, + .offset = offset, + .range = len, + }; + VkWriteDescriptorSet desc_pool_write_info = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstBinding = bind, + .descriptorCount = 1, + .dstArrayElement = elem, + .descriptorType = desc_set->binding[bind].descriptorType, + .pBufferInfo = &desc_pool_write_info_buf, + }; + update_set_pool_write(s, pl, e, desc_set, set, &desc_pool_write_info); + } return 0; } @@ -1852,7 +1995,8 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, pipeline_create_info = (VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + .flags = (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) ? + VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT : 0x0, .layout = pl->pipeline_layout, .stage = shd->shader, }; @@ -1884,15 +2028,22 @@ void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); if (pl->nb_descriptor_sets) { - for (int i = 0; i < pl->nb_descriptor_sets; i++) - offsets[i] = pl->desc_set[i].singular ? 0 : pl->desc_set[i].aligned_size*e->idx; - - /* Bind descriptor buffers */ - vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); - /* Binding offsets */ - vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, - 0, pl->nb_descriptor_sets, - pl->bound_buffer_indices, offsets); + if (s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) { + for (int i = 0; i < pl->nb_descriptor_sets; i++) + offsets[i] = pl->desc_set[i].singular ? 0 : pl->desc_set[i].aligned_size*e->idx; + + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + pl->bound_buffer_indices, offsets); + } else { + vk->CmdBindDescriptorSets(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + &pl->desc_sets[e->idx*pl->nb_descriptor_sets], + 0, NULL); + } } } @@ -1920,7 +2071,13 @@ void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], s->hwctx->alloc); + if (pl->desc_pool) + vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, + s->hwctx->alloc); + + av_freep(&pl->desc_pool_size); av_freep(&pl->desc_layout); + av_freep(&pl->desc_sets); av_freep(&pl->desc_set); av_freep(&pl->desc_bind); av_freep(&pl->bound_buffer_indices); diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 35e3488690..7009104a8f 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -112,44 +112,6 @@ typedef struct FFVkQueueFamilyCtx { int nb_queues; } FFVkQueueFamilyCtx; -typedef struct FFVulkanDescriptorSet { - FFVkBuffer buf; - uint8_t *desc_mem; - VkDeviceSize layout_size; - VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ - VkDeviceSize total_size; /* Once registered to an exec context */ - VkBufferUsageFlags usage; - - VkDescriptorSetLayoutBinding *binding; - VkDeviceSize *binding_offset; - int nb_bindings; - - /* Descriptor set is shared between all submissions */ - int singular; -} FFVulkanDescriptorSet; - -typedef struct FFVulkanPipeline { - VkPipelineBindPoint bind_point; - - /* Contexts */ - VkPipelineLayout pipeline_layout; - VkPipeline pipeline; - - /* Push consts */ - VkPushConstantRange *push_consts; - int push_consts_num; - - /* Workgroup */ - int wg_size[3]; - - /* Descriptor buffer */ - VkDescriptorSetLayout *desc_layout; - FFVulkanDescriptorSet *desc_set; - VkDescriptorBufferBindingInfoEXT *desc_bind; - uint32_t *bound_buffer_indices; - int nb_descriptor_sets; -} FFVulkanPipeline; - typedef struct FFVkExecContext { uint32_t idx; const struct FFVkExecPool *parent; @@ -226,6 +188,52 @@ typedef struct FFVkExecPool { size_t qd_size; } FFVkExecPool; +typedef struct FFVulkanDescriptorSet { + FFVkBuffer buf; + uint8_t *desc_mem; + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkDeviceSize total_size; /* Once registered to an exec context */ + VkBufferUsageFlags usage; + + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + + /* Descriptor set is shared between all submissions */ + int singular; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanPipeline { + VkPipelineBindPoint bind_point; + + /* Contexts */ + VkPipelineLayout pipeline_layout; + VkPipeline pipeline; + + /* Push consts */ + VkPushConstantRange *push_consts; + int push_consts_num; + + /* Workgroup */ + int wg_size[3]; + + /* Descriptor buffer */ + VkDescriptorSetLayout *desc_layout; + FFVulkanDescriptorSet *desc_set; + VkDescriptorBufferBindingInfoEXT *desc_bind; + uint32_t *bound_buffer_indices; + int nb_descriptor_sets; + + /* Descriptor pool */ + VkDescriptorSet *desc_sets; + VkDescriptorPool desc_pool; + VkDescriptorPoolSize *desc_pool_size; + int nb_desc_pool_size; + int total_desc_sets; + FFVkExecPool *assoc_pool; +} FFVulkanPipeline; + typedef struct FFVulkanContext { const AVClass *class; void *log_parent; @@ -508,8 +516,9 @@ void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl); int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkExecContext *e, int set, int bind, int offs, - VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt); + FFVkExecContext *e, int set, int bind, int elem, + FFVkBuffer *buf, VkDeviceSize offset, VkDeviceSize len, + VkFormat fmt); void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, FFVkExecContext *e, AVFrame *f, diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index 6aeaf4e79a..da555b37c7 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -176,6 +176,9 @@ typedef enum FFVulkanExtensions { MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSetWithTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateDescriptorUpdateTemplate) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, DestroyDescriptorUpdateTemplate) \ + \ + /* Descriptors */ \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, UpdateDescriptorSets) \ \ /* Queries */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateQueryPool) \ |