diff options
author | Lynne <dev@lynne.ee> | 2022-12-29 21:16:21 +0100 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2023-05-29 00:41:48 +0200 |
commit | b5eaeb1f134d973836d6e3dced271713fb97f234 (patch) | |
tree | ed4ca1f3e40ae29469adb9f9b503acf7b9ad103e | |
parent | 721b71da4aac9396a0788b9aaae4b05824a4a302 (diff) | |
download | ffmpeg-b5eaeb1f134d973836d6e3dced271713fb97f234.tar.gz |
vulkan: rewrite to support all necessary features
This commit rewrites the majority of vulkan.c to enable its use
as a general-purpose high-level utility code, usable for decoding,
encoding, and filtering of video frames.
The dependency system was rewritten to simplify management of
execution.
The image handling system was rewritten to accomodate multiplane
images.
Due to how related all the new features were, this is a single
commit.
-rw-r--r-- | libavutil/vulkan.c | 2163 | ||||
-rw-r--r-- | libavutil/vulkan.h | 516 | ||||
-rw-r--r-- | libavutil/vulkan_functions.h | 1 |
3 files changed, 1357 insertions, 1323 deletions
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c index 0a92f894fa..6dd4de0343 100644 --- a/libavutil/vulkan.c +++ b/libavutil/vulkan.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -21,33 +23,6 @@ #include "vulkan.h" #include "vulkan_loader.h" -#if CONFIG_LIBGLSLANG -#include "vulkan_glslang.c" -#elif CONFIG_LIBSHADERC -#include "vulkan_shaderc.c" -#endif - -/* Generic macro for creating contexts which need to keep their addresses - * if another context is created. */ -#define FN_CREATING(ctx, type, shortname, array, num) \ -static av_always_inline type *create_ ##shortname(ctx *dctx) \ -{ \ - type **array, *sctx = av_mallocz(sizeof(*sctx)); \ - if (!sctx) \ - return NULL; \ - \ - array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ - if (!array) { \ - av_free(sctx); \ - return NULL; \ - } \ - \ - dctx->array = array; \ - dctx->array[dctx->num++] = sctx; \ - \ - return sctx; \ -} - const VkComponentMapping ff_comp_identity_map = { .r = VK_COMPONENT_SWIZZLE_IDENTITY, .g = VK_COMPONENT_SWIZZLE_IDENTITY, @@ -110,43 +85,50 @@ const char *ff_vk_ret2str(VkResult res) int ff_vk_load_props(FFVulkanContext *s) { - uint32_t qc = 0; FFVulkanFunctions *vk = &s->vkfn; + s->hprops = (VkPhysicalDeviceExternalMemoryHostPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, + }; + s->desc_buf_props = (VkPhysicalDeviceDescriptorBufferPropertiesEXT) { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, + .pNext = &s->hprops, + }; s->driver_props = (VkPhysicalDeviceDriverProperties) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + .pNext = &s->desc_buf_props, }; s->props = (VkPhysicalDeviceProperties2) { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, .pNext = &s->driver_props, }; - vk->GetPhysicalDeviceProperties2(s->hwctx->phys_dev, &s->props); vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); - vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); if (s->qf_props) return 0; - s->qf_props = av_calloc(qc, sizeof(*s->qf_props)); + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, NULL); + + s->qf_props = av_calloc(s->tot_nb_qfs, sizeof(*s->qf_props)); if (!s->qf_props) return AVERROR(ENOMEM); - s->query_props = av_calloc(qc, sizeof(*s->query_props)); + s->query_props = av_calloc(s->tot_nb_qfs, sizeof(*s->query_props)); if (!s->qf_props) { av_freep(&s->qf_props); return AVERROR(ENOMEM); } - s->video_props = av_calloc(qc, sizeof(*s->video_props)); + s->video_props = av_calloc(s->tot_nb_qfs, sizeof(*s->video_props)); if (!s->video_props) { av_freep(&s->qf_props); av_freep(&s->query_props); return AVERROR(ENOMEM); } - for (uint32_t i = 0; i < qc; i++) { + for (uint32_t i = 0; i < s->tot_nb_qfs; i++) { s->query_props[i] = (VkQueueFamilyQueryResultStatusPropertiesKHR) { .sType = VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, }; @@ -160,37 +142,12 @@ int ff_vk_load_props(FFVulkanContext *s) }; } - vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &qc, s->qf_props); + vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, &s->tot_nb_qfs, s->qf_props); return 0; } -void ff_vk_qf_fill(FFVulkanContext *s) -{ - s->nb_qfs = 0; - - /* Simply fills in all unique queues into s->qfs */ - if (s->hwctx->queue_family_index >= 0) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; - if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; - if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && - s->qfs[1] != s->hwctx->queue_family_comp_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; - if (s->hwctx->queue_family_decode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_decode_index && - s->qfs[1] != s->hwctx->queue_family_decode_index && - s->qfs[2] != s->hwctx->queue_family_decode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; - if (s->hwctx->queue_family_encode_index >= 0 && - (s->qfs[0] != s->hwctx->queue_family_encode_index && - s->qfs[1] != s->hwctx->queue_family_encode_index && - s->qfs[2] != s->hwctx->queue_family_encode_index && - s->qfs[3] != s->hwctx->queue_family_encode_index)) - s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; -} - -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) +static int vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) { int ret, num; @@ -226,24 +183,552 @@ int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb) } int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues) + VkQueueFlagBits dev_family) { - int ret; + /* Fill in queue families from context if not done yet */ + if (!s->nb_qfs) { + s->nb_qfs = 0; + + /* Simply fills in all unique queues into s->qfs */ + if (s->hwctx->queue_family_index >= 0) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_index; + if (!s->nb_qfs || s->qfs[0] != s->hwctx->queue_family_tx_index) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_tx_index; + if (!s->nb_qfs || (s->qfs[0] != s->hwctx->queue_family_comp_index && + s->qfs[1] != s->hwctx->queue_family_comp_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_comp_index; + if (s->hwctx->queue_family_decode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_decode_index && + s->qfs[1] != s->hwctx->queue_family_decode_index && + s->qfs[2] != s->hwctx->queue_family_decode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_decode_index; + if (s->hwctx->queue_family_encode_index >= 0 && + (s->qfs[0] != s->hwctx->queue_family_encode_index && + s->qfs[1] != s->hwctx->queue_family_encode_index && + s->qfs[2] != s->hwctx->queue_family_encode_index && + s->qfs[3] != s->hwctx->queue_family_encode_index)) + s->qfs[s->nb_qfs++] = s->hwctx->queue_family_encode_index; + } + + return (qf->queue_family = vk_qf_get_index(s, dev_family, &qf->nb_queues)); +} - ret = qf->queue_family = ff_vk_qf_get_index(s, dev_family, &qf->actual_queues); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool) +{ + FFVulkanFunctions *vk = &s->vkfn; - if (!nb_queues) - qf->nb_queues = qf->actual_queues; - else - qf->nb_queues = nb_queues; + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; - return ret; + if (e->fence) { + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->DestroyFence(s->hwctx->act_dev, e->fence, s->hwctx->alloc); + } + + ff_vk_exec_discard_deps(s, e); + + av_free(e->frame_deps); + av_free(e->buf_deps); + av_free(e->queue_family_dst); + av_free(e->layout_dst); + av_free(e->access_dst); + av_free(e->frame_update); + av_free(e->frame_locked); + av_free(e->sem_sig); + av_free(e->sem_sig_val_dst); + av_free(e->sem_wait); + } + + if (pool->cmd_bufs) + vk->FreeCommandBuffers(s->hwctx->act_dev, pool->cmd_buf_pool, + pool->pool_size, pool->cmd_bufs); + if (pool->cmd_buf_pool) + vk->DestroyCommandPool(s->hwctx->act_dev, pool->cmd_buf_pool, s->hwctx->alloc); + if (pool->query_pool) + vk->DestroyQueryPool(s->hwctx->act_dev, pool->query_pool, s->hwctx->alloc); + + av_free(pool->query_data); + av_free(pool->cmd_bufs); + av_free(pool->contexts); } -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext) { - qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; - return qf->cur_queue; + int err; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + + VkCommandPoolCreateInfo cqueue_create; + VkCommandBufferAllocateInfo cbuf_create; + + atomic_init(&pool->idx, 0); + + /* Create command pool */ + cqueue_create = (VkCommandPoolCreateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, + .flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT | + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, + .queueFamilyIndex = qf->queue_family, + }; + ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, + s->hwctx->alloc, &pool->cmd_buf_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Allocate space for command buffers */ + pool->cmd_bufs = av_malloc(nb_contexts*sizeof(*pool->cmd_bufs)); + if (!pool->cmd_bufs) { + err = AVERROR(ENOMEM); + goto fail; + } + + /* Allocate command buffer */ + cbuf_create = (VkCommandBufferAllocateInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, + .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, + .commandPool = pool->cmd_buf_pool, + .commandBufferCount = nb_contexts, + }; + ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, + pool->cmd_bufs); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + /* Query pool */ + if (nb_queries) { + VkQueryPoolCreateInfo query_pool_info = { + .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, + .pNext = query_create_pnext, + .queryType = query_type, + .queryCount = nb_queries*nb_contexts, + }; + ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, + s->hwctx->alloc, &pool->query_pool); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Query pool alloc failure: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + pool->nb_queries = nb_queries; + pool->query_status_stride = 2; + pool->query_results = nb_queries; + pool->query_statuses = 0; /* if radv supports it, nb_queries; */ + +#if 0 /* CONFIG_VULKAN_ENCODE */ + /* Video encode quieries produce two results per query */ + if (query_type == VK_QUERY_TYPE_VIDEO_ENCODE_FEEDBACK_KHR) { + pool->query_status_stride = 3; /* skip,skip,result,skip,skip,result */ + pool->query_results *= 2; + } else +#endif + if (query_type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { + pool->query_status_stride = 1; + pool->query_results = 0; + pool->query_statuses = nb_queries; + } + + pool->qd_size = (pool->query_results + pool->query_statuses)*(query_64bit ? 8 : 4); + + /* Allocate space for the query data */ + pool->query_data = av_calloc(nb_contexts, pool->qd_size); + if (!pool->query_data) { + err = AVERROR(ENOMEM); + goto fail; + } + } + + /* Allocate space for the contexts */ + pool->contexts = av_calloc(nb_contexts, sizeof(*pool->contexts)); + if (!pool->contexts) { + err = AVERROR(ENOMEM); + goto fail; + } + + pool->pool_size = nb_contexts; + + /* Init contexts */ + for (int i = 0; i < pool->pool_size; i++) { + FFVkExecContext *e = &pool->contexts[i]; + + /* Fence */ + VkFenceCreateInfo fence_create = { + .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + .flags = VK_FENCE_CREATE_SIGNALED_BIT, + }; + ret = vk->CreateFence(s->hwctx->act_dev, &fence_create, s->hwctx->alloc, + &e->fence); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create submission fence: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + e->idx = i; + e->parent = pool; + + /* Query data */ + e->query_data = ((uint8_t *)pool->query_data) + pool->qd_size*i; + e->query_idx = nb_queries*i; + + /* Command buffer */ + e->buf = pool->cmd_bufs[i]; + + /* Queue index distribution */ + e->qi = i % qf->nb_queues; + e->qf = qf->queue_family; + vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, + e->qi, &e->queue); + } + + return 0; + +fail: + ff_vk_exec_pool_free(s, pool); + return err; +} + +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + int32_t *res32 = e->query_data; + int64_t *res64 = e->query_data; + int64_t res = 0; + VkQueryResultFlags qf = 0; + + qf |= pool->query_64bit ? + VK_QUERY_RESULT_64_BIT : 0x0; + qf |= pool->query_statuses ? + VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; + + ret = vk->GetQueryPoolResults(s->hwctx->act_dev, pool->query_pool, + e->query_idx, + pool->nb_queries, + pool->qd_size, e->query_data, + pool->query_64bit ? 8 : 4, qf); + if (ret != VK_SUCCESS) + return ret; + + if (pool->query_statuses && pool->query_64bit) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? + res64[i] : res; + res64 += pool->query_status_stride; + } + } else if (pool->query_statuses) { + for (int i = 0; i < pool->query_statuses; i++) { + res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? + res32[i] : res; + res32 += pool->query_status_stride; + } + } + + if (data) + *data = e->query_data; + if (status) + *status = res; + + return VK_SUCCESS; +} + +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool) +{ + int idx = atomic_fetch_add_explicit(&pool->idx, 1, memory_order_relaxed); + idx %= pool->pool_size; + return &pool->contexts[idx]; +} + +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); +} + +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + const FFVkExecPool *pool = e->parent; + + VkCommandBufferBeginInfo cmd_start = { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, + .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, + }; + + /* Create the fence and don't wait for it initially */ + vk->WaitForFences(s->hwctx->act_dev, 1, &e->fence, VK_TRUE, UINT64_MAX); + vk->ResetFences(s->hwctx->act_dev, 1, &e->fence); + + /* Discard queue dependencies */ + ff_vk_exec_discard_deps(s, e); + + ret = vk->BeginCommandBuffer(e->buf, &cmd_start); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + if (pool->nb_queries) + vk->CmdResetQueryPool(e->buf, pool->query_pool, + e->query_idx, pool->nb_queries); + + return 0; +} + +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e) +{ + for (int j = 0; j < e->nb_buf_deps; j++) + av_buffer_unref(&e->buf_deps[j]); + e->nb_buf_deps = 0; + + for (int j = 0; j < e->nb_frame_deps; j++) { + AVFrame *f = e->frame_deps[j]; + if (e->frame_locked[j]) { + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + e->frame_update[j] = 0; + if (f->buf[0]) + av_frame_free(&e->frame_deps[j]); + } + e->nb_frame_deps = 0; + + e->sem_wait_cnt = 0; + e->sem_sig_cnt = 0; + e->sem_sig_val_dst_cnt = 0; +} + +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref) +{ + AVBufferRef **dst = av_fast_realloc(e->buf_deps, &e->buf_deps_alloc_size, + (e->nb_buf_deps + nb_deps) * sizeof(*dst)); + if (!dst) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + e->buf_deps = dst; + + for (int i = 0; i < nb_deps; i++) { + e->buf_deps[e->nb_buf_deps] = ref ? av_buffer_ref(deps[i]) : deps[i]; + if (!e->buf_deps[e->nb_buf_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + e->nb_buf_deps++; + } + + return 0; +} + +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage) +{ + uint8_t *frame_locked; + uint8_t *frame_update; + AVFrame **frame_deps; + VkImageLayout *layout_dst; + uint32_t *queue_family_dst; + VkAccessFlagBits *access_dst; + + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + int nb_images = ff_vk_count_images(vkf); + + /* Don't add duplicates */ + for (int i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + return 1; + +#define ARR_REALLOC(str, arr, alloc_s, cnt) \ + do { \ + arr = av_fast_realloc(str->arr, alloc_s, (cnt + 1)*sizeof(*arr)); \ + if (!arr) { \ + ff_vk_exec_discard_deps(s, e); \ + return AVERROR(ENOMEM); \ + } \ + str->arr = arr; \ + } while (0) + + ARR_REALLOC(e, layout_dst, &e->layout_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, queue_family_dst, &e->queue_family_dst_alloc, e->nb_frame_deps); + ARR_REALLOC(e, access_dst, &e->access_dst_alloc, e->nb_frame_deps); + + ARR_REALLOC(e, frame_locked, &e->frame_locked_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_update, &e->frame_update_alloc_size, e->nb_frame_deps); + ARR_REALLOC(e, frame_deps, &e->frame_deps_alloc_size, e->nb_frame_deps); + + e->frame_deps[e->nb_frame_deps] = f->buf[0] ? av_frame_clone(f) : f; + if (!e->frame_deps[e->nb_frame_deps]) { + ff_vk_exec_discard_deps(s, e); + return AVERROR(ENOMEM); + } + + vkfc->lock_frame(hwfc, vkf); + e->frame_locked[e->nb_frame_deps] = 1; + e->frame_update[e->nb_frame_deps] = 0; + e->nb_frame_deps++; + + for (int i = 0; i < nb_images; i++) { + VkSemaphoreSubmitInfo *sem_wait; + VkSemaphoreSubmitInfo *sem_sig; + uint64_t **sem_sig_val_dst; + + ARR_REALLOC(e, sem_wait, &e->sem_wait_alloc, e->sem_wait_cnt); + ARR_REALLOC(e, sem_sig, &e->sem_sig_alloc, e->sem_sig_cnt); + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + e->sem_wait[e->sem_wait_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i], + .stageMask = wait_stage, + }; + + e->sem_sig[e->sem_sig_cnt++] = (VkSemaphoreSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .semaphore = vkf->sem[i], + .value = vkf->sem_value[i] + 1, + .stageMask = signal_stage, + }; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = &vkf->sem_value[i]; + e->sem_sig_val_dst_cnt++; + } + + return 0; +} + +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar) +{ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + av_assert0(i < e->nb_frame_deps); + + /* Don't update duplicates */ + if (nb_img_bar && !e->frame_update[i]) + (*nb_img_bar)++; + + e->queue_family_dst[i] = bar->dstQueueFamilyIndex; + e->access_dst[i] = bar->dstAccessMask; + e->layout_dst[i] = bar->newLayout; + e->frame_update[i] = 1; +} + +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f) +{ + uint64_t **sem_sig_val_dst; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + /* Reject unknown frames */ + int i; + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == f->data[0]) + break; + if (i == e->nb_frame_deps) + return AVERROR(EINVAL); + + ARR_REALLOC(e, sem_sig_val_dst, &e->sem_sig_val_dst_alloc, e->sem_sig_val_dst_cnt); + + *dst = vkf->sem[0]; + *dst_val = vkf->sem_value[0]; + + e->sem_sig_val_dst[e->sem_sig_val_dst_cnt] = dst_val; + e->sem_sig_val_dst_cnt++; + + return 0; +} + +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e) +{ + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkCommandBufferSubmitInfo cmd_buf_info = (VkCommandBufferSubmitInfo) { + .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO, + .commandBuffer = e->buf, + }; + VkSubmitInfo2 submit_info = (VkSubmitInfo2) { + .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2, + .pCommandBufferInfos = &cmd_buf_info, + .commandBufferInfoCount = 1, + .pWaitSemaphoreInfos = e->sem_wait, + .waitSemaphoreInfoCount = e->sem_wait_cnt, + .pSignalSemaphoreInfos = e->sem_sig, + .signalSemaphoreInfoCount = e->sem_sig_cnt, + }; + + ret = vk->EndCommandBuffer(e->buf); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + s->hwctx->lock_queue(s->device, e->qf, e->qi); + ret = vk->QueueSubmit2(e->queue, 1, &submit_info, e->fence); + s->hwctx->unlock_queue(s->device, e->qf, e->qi); + + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", + ff_vk_ret2str(ret)); + ff_vk_exec_discard_deps(s, e); + return AVERROR_EXTERNAL; + } + + for (int i = 0; i < e->sem_sig_val_dst_cnt; i++) + *e->sem_sig_val_dst[i] += 1; + + /* Unlock all frames */ + for (int j = 0; j < e->nb_frame_deps; j++) { + if (e->frame_locked[j]) { + AVFrame *f = e->frame_deps[j]; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + + if (e->frame_update[j]) { + int nb_images = ff_vk_count_images(vkf); + for (int i = 0; i < nb_images; i++) { + vkf->layout[i] = e->layout_dst[j]; + vkf->access[i] = e->access_dst[j]; + vkf->queue_family[i] = e->queue_family_dst[j]; + } + } + vkfc->unlock_frame(hwfc, vkf); + e->frame_locked[j] = 0; + } + } + + return 0; } int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, @@ -322,6 +807,10 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, but should be ok */ }; + VkMemoryAllocateFlagsInfo alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT, + }; VkBufferMemoryRequirementsInfo2 req_desc = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, }; @@ -351,11 +840,18 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, /* In case the implementation prefers/requires dedicated allocation */ use_ded_mem = ded_req.prefersDedicatedAllocation | ded_req.requiresDedicatedAllocation; - if (use_ded_mem) + if (use_ded_mem) { ded_alloc.buffer = buf->buf; + ded_alloc.pNext = alloc_pNext; + alloc_pNext = &ded_alloc; + } + + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + alloc_flags.pNext = alloc_pNext; + alloc_pNext = &alloc_flags; + } - err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, - use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, + err = ff_vk_alloc_mem(s, &req.memoryRequirements, flags, alloc_pNext, &buf->flags, &buf->mem); if (err) return err; @@ -367,27 +863,72 @@ int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, return AVERROR_EXTERNAL; } + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) { + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buf->buf, + }; + buf->address = vk->GetBufferDeviceAddress(s->hwctx->act_dev, &address_info); + } + buf->size = size; return 0; } -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], +static void destroy_avvkbuf(void *opaque, uint8_t *data) +{ + FFVulkanContext *s = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(s, buf); + av_free(buf); +} + +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) +{ + int err; + AVBufferRef *buf; + FFVkBuffer *vkb = av_mallocz(sizeof(*vkb)); + if (!vkb) + return AVERROR(ENOMEM); + + err = ff_vk_create_buf(s, vkb, size, pNext, alloc_pNext, usage, flags); + if (err < 0) { + av_free(vkb); + return err; + } + + buf = av_buffer_create((uint8_t *)vkb, sizeof(*vkb), destroy_avvkbuf, s, 0); + if (!buf) { + destroy_avvkbuf(s, (uint8_t *)vkb); + return AVERROR(ENOMEM); + } + + *ref = buf; + + return 0; +} + +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], int nb_buffers, int invalidate) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *inval_list = NULL; + VkMappedMemoryRange inval_list[64]; int inval_count = 0; for (int i = 0; i < nb_buffers; i++) { - ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, - VK_WHOLE_SIZE, 0, (void **)&mem[i]); + void *dst; + ret = vk->MapMemory(s->hwctx->act_dev, buf[i]->mem, 0, + VK_WHOLE_SIZE, 0, &dst); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } + mem[i] = dst; } if (!invalidate) @@ -396,16 +937,12 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange ival_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - inval_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++inval_count)*sizeof(*inval_list)); - if (!inval_list) - return AVERROR(ENOMEM); - inval_list[inval_count - 1] = ival_buf; + inval_list[inval_count++] = ival_buf; } if (inval_count) { @@ -421,29 +958,25 @@ int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], return 0; } -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, int flush) { int err = 0; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkMappedMemoryRange *flush_list = NULL; + VkMappedMemoryRange flush_list[64]; int flush_count = 0; if (flush) { for (int i = 0; i < nb_buffers; i++) { const VkMappedMemoryRange flush_buf = { .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, - .memory = buf[i].mem, + .memory = buf[i]->mem, .size = VK_WHOLE_SIZE, }; - if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) + if (buf[i]->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) continue; - flush_list = av_fast_realloc(s->scratch, &s->scratch_size, - (++flush_count)*sizeof(*flush_list)); - if (!flush_list) - return AVERROR(ENOMEM); - flush_list[flush_count - 1] = flush_buf; + flush_list[flush_count++] = flush_buf; } } @@ -458,7 +991,7 @@ int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, } for (int i = 0; i < nb_buffers; i++) - vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); + vk->UnmapMemory(s->hwctx->act_dev, buf[i]->mem); return err; } @@ -470,547 +1003,109 @@ void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) if (!buf || !s->hwctx) return; + if (buf->mapped_mem) + ff_vk_unmap_buffer(s, buf, 0); if (buf->buf != VK_NULL_HANDLE) vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); if (buf->mem != VK_NULL_HANDLE) vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); } -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *f, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, VkDeviceMemory *mem, void *alloc_pnext) -{ - int err; - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - AVVulkanDeviceContext *hwctx = s->hwctx; - - VkExportSemaphoreCreateInfo ext_sem_info = { - .sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, -#ifdef _WIN32 - .handleTypes = IsWindows8OrGreater() - ? VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT - : VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT, -#else - .handleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT, -#endif - }; - - VkSemaphoreTypeCreateInfo sem_type_info = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, -#ifdef _WIN32 - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_WIN32_SEM ? &ext_sem_info : NULL, -#else - .pNext = s->extensions & FF_VK_EXT_EXTERNAL_FD_SEM ? &ext_sem_info : NULL, -#endif - .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE, - .initialValue = 0, - }; - - VkSemaphoreCreateInfo sem_spawn = { - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, - .pNext = &sem_type_info, - }; - - /* Create the image */ - VkImageCreateInfo create_info = { - .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, - .pNext = create_pnext, - .imageType = VK_IMAGE_TYPE_2D, - .format = fmt, - .extent.depth = 1, - .mipLevels = 1, - .arrayLayers = 1, - .flags = flags, - .tiling = tiling, - .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .usage = usage, - .samples = VK_SAMPLE_COUNT_1_BIT, - .pQueueFamilyIndices = s->qfs, - .queueFamilyIndexCount = s->nb_qfs, - .sharingMode = s->nb_qfs > 1 ? VK_SHARING_MODE_CONCURRENT : - VK_SHARING_MODE_EXCLUSIVE, - }; - - ret = vk->CreateImage(hwctx->act_dev, &create_info, - hwctx->alloc, &f->img[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Image creation failure: %s\n", - ff_vk_ret2str(ret)); - err = AVERROR(EINVAL); - goto fail; - } - - /* Create semaphore */ - ret = vk->CreateSemaphore(hwctx->act_dev, &sem_spawn, - hwctx->alloc, &f->sem[0]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create semaphore: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - f->queue_family[0] = s->nb_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : s->qfs[0]; - f->layout[0] = create_info.initialLayout; - f->access[0] = 0x0; - f->sem_value[0] = 0; - - f->flags = 0x0; - f->tiling = tiling; - - return 0; - -fail: - return err; -} - -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage) +static void free_data_buf(void *opaque, uint8_t *data) { - VkPushConstantRange *pc; - - pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), - pl->push_consts_num + 1); - if (!pl->push_consts) - return AVERROR(ENOMEM); - - pc = &pl->push_consts[pl->push_consts_num++]; - memset(pc, 0, sizeof(*pc)); - - pc->stageFlags = stage; - pc->offset = offset; - pc->size = size; - - return 0; + FFVulkanContext *ctx = opaque; + FFVkBuffer *buf = (FFVkBuffer *)data; + ff_vk_free_buf(ctx, buf); + av_free(data); } -FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf) +static AVBufferRef *alloc_data_buf(void *opaque, size_t size) { - VkResult ret; - FFVkExecContext *e; - FFVulkanFunctions *vk = &s->vkfn; - - VkCommandPoolCreateInfo cqueue_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, - .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, - .queueFamilyIndex = qf->queue_family, - }; - VkCommandBufferAllocateInfo cbuf_create = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, - .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, - .commandBufferCount = qf->nb_queues, - }; - - e = create_exec_ctx(s); - if (!e) - return AVERROR(ENOMEM); - - e->qf = qf; - - e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); - if (!e->queues) - return AVERROR(ENOMEM); - - e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); - if (!e->bufs) - return AVERROR(ENOMEM); - - /* Create command pool */ - ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, - s->hwctx->alloc, &e->pool); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - cbuf_create.commandPool = e->pool; - - /* Allocate command buffer */ - ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, - i % qf->actual_queues, &q->queue); - } - - *ctx = e; + AVBufferRef *ref; + uint8_t *buf = av_mallocz(size); + if (!buf) + return NULL; - return 0; + ref = av_buffer_create(buf, size, free_data_buf, opaque, 0); + if (!ref) + av_free(buf); + return ref; } -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext) +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props) { - VkResult ret; - size_t qd_size; - int nb_results = nb_queries; - int nb_statuses = 0 /* Once RADV has support, = nb_queries */; - int status_stride = 2; - int result_elem_size = elem_64bits ? 8 : 4; - FFVulkanFunctions *vk = &s->vkfn; - VkQueryPoolCreateInfo query_pool_info = { - .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, - .pNext = create_pnext, - .queryType = type, - .queryCount = nb_queries*e->qf->nb_queues, - }; - - if (e->query.pool) - return AVERROR(EINVAL); + int err; + AVBufferRef *ref; + FFVkBuffer *data; - /* Video encode quieries produce two results per query */ - if (type == VK_QUERY_TYPE_VIDEO_ENCODE_BITSTREAM_BUFFER_RANGE_KHR) { - status_stride = 3; /* skip,skip,result,skip,skip,result */ - nb_results *= 2; - } else if (type == VK_QUERY_TYPE_RESULT_STATUS_ONLY_KHR) { - status_stride = 1; - nb_results *= 0; + if (!(*buf_pool)) { + *buf_pool = av_buffer_pool_init2(sizeof(FFVkBuffer), ctx, + alloc_data_buf, NULL); + if (!(*buf_pool)) + return AVERROR(ENOMEM); } - qd_size = nb_results*result_elem_size + nb_statuses*result_elem_size; - - e->query.data = av_mallocz(e->qf->nb_queues*qd_size); - if (!e->query.data) + *buf = ref = av_buffer_pool_get(*buf_pool); + if (!ref) return AVERROR(ENOMEM); - ret = vk->CreateQueryPool(s->hwctx->act_dev, &query_pool_info, - s->hwctx->alloc, &e->query.pool); - if (ret != VK_SUCCESS) - return AVERROR_EXTERNAL; - - e->query.data_per_queue = qd_size; - e->query.nb_queries = nb_queries; - e->query.nb_results = nb_results; - e->query.nb_statuses = nb_statuses; - e->query.elem_64bits = elem_64bits; - e->query.status_stride = status_stride; - - return 0; -} - -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - uint8_t *qd; - int32_t *res32; - int64_t *res64; - int64_t res = 0; - VkQueryResultFlags qf = 0; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; + data = (FFVkBuffer *)ref->data; + data->stage = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT; + data->access = VK_ACCESS_2_NONE; - if (!q->submitted) { - *data = NULL; + if (data->size >= size) return 0; - } - - qd = e->query.data + e->qf->cur_queue*e->query.data_per_queue; - qf |= e->query.nb_results && e->query.nb_statuses ? - VK_QUERY_RESULT_WITH_STATUS_BIT_KHR : 0x0; - qf |= e->query.elem_64bits ? VK_QUERY_RESULT_64_BIT : 0x0; - res32 = (int32_t *)(qd + e->query.nb_results*4); - res64 = (int64_t *)(qd + e->query.nb_results*8); - - ret = vk->GetQueryPoolResults(s->hwctx->act_dev, e->query.pool, - query_idx, - e->query.nb_queries, - e->query.data_per_queue, qd, - e->query.elem_64bits ? 8 : 4, qf); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to perform query: %s!\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - if (e->query.nb_statuses && e->query.elem_64bits) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res64[i] < res) || (res >= 0 && res64[i] > res) ? - res64[i] : res; - res64 += e->query.status_stride; - } - } else if (e->query.nb_statuses) { - for (int i = 0; i < e->query.nb_queries; i++) { - res = (res32[i] < res) || (res >= 0 && res32[i] > res) ? - res32[i] : res; - res32 += e->query.status_stride; - } - } - - if (data) - *data = qd; - if (status) - *status = res; - - return 0; -} - -void ff_vk_discard_exec_deps(FFVkExecContext *e) -{ - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - q->nb_buf_deps = 0; - - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - q->nb_frame_deps = 0; - - e->sem_wait_cnt = 0; - e->sem_sig_cnt = 0; -} - -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkCommandBufferBeginInfo cmd_start = { - .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, - .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, - }; - - /* Create the fence and don't wait for it initially */ - if (!q->fence) { - VkFenceCreateInfo fence_spawn = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - }; - ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, - &q->fence); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } else if (!q->synchronous) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - q->synchronous = 0; - /* Discard queue dependencies */ - ff_vk_discard_exec_deps(e); + ff_vk_free_buf(ctx, data); + memset(data, 0, sizeof(*data)); - ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } + av_log(ctx, AV_LOG_DEBUG, "Allocating buffer of %lu bytes for pool %p\n", + size, *buf_pool); - if (e->query.pool) { - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - vk->CmdResetQueryPool(e->bufs[e->qf->cur_queue], e->query.pool, - e->query.idx, e->query.nb_queries); + err = ff_vk_create_buf(ctx, data, size, + create_pNext, NULL, usage, + mem_props); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - return 0; -} - -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e) -{ - return e->bufs[e->qf->cur_queue]; -} - -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag) -{ - AVFrame **dst; - AVVkFrame *f = (AVVkFrame *)frame->data[0]; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; - int planes = av_pix_fmt_count_planes(fc->sw_format); - - for (int i = 0; i < planes; i++) { - e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); - if (!e->sem_wait) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); - if (!e->sem_wait_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); + if (mem_props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) { + err = ff_vk_map_buffer(ctx, data, &data->mapped_mem, 0); + if (err < 0) { + av_buffer_unref(&ref); + return err; } - - e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, - (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); - if (!e->sem_wait_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); - if (!e->sem_sig) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); - if (!e->sem_sig_val) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, - (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); - if (!e->sem_sig_val_dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - e->sem_wait[e->sem_wait_cnt] = f->sem[i]; - e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; - e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; - e->sem_wait_cnt++; - - e->sem_sig[e->sem_sig_cnt] = f->sem[i]; - e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; - e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; - e->sem_sig_cnt++; } - dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, - (q->nb_frame_deps + 1) * sizeof(*dst)); - if (!dst) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - - q->frame_deps = dst; - q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); - if (!q->frame_deps[q->nb_frame_deps]) { - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); - } - q->nb_frame_deps++; - return 0; } -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) -{ - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { - .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, - .pWaitSemaphoreValues = e->sem_wait_val, - .pSignalSemaphoreValues = e->sem_sig_val, - .waitSemaphoreValueCount = e->sem_wait_cnt, - .signalSemaphoreValueCount = e->sem_sig_cnt, - }; - - VkSubmitInfo s_info = { - .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, - .pNext = &s_timeline_sem_info, - - .commandBufferCount = 1, - .pCommandBuffers = &e->bufs[e->qf->cur_queue], - - .pWaitSemaphores = e->sem_wait, - .pWaitDstStageMask = e->sem_wait_dst, - .waitSemaphoreCount = e->sem_wait_cnt, - - .pSignalSemaphores = e->sem_sig, - .signalSemaphoreCount = e->sem_sig_cnt, - }; - - ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - s->hwctx->lock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); - - s->hwctx->unlock_queue((AVHWDeviceContext *)s->device_ref->data, - e->qf->queue_family, e->qf->cur_queue % e->qf->actual_queues); - - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - - for (int i = 0; i < e->sem_sig_cnt; i++) - *e->sem_sig_val_dst[i] += 1; - - e->query.idx = e->qf->cur_queue*e->query.nb_queries; - q->submitted = 1; - - return 0; -} - -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - if (!q->submitted) - return; - - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - q->synchronous = 1; -} - -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps) +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage) { - AVBufferRef **dst; - FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; - - if (!deps || !nb_deps) - return 0; + VkPushConstantRange *pc; - dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, - (q->nb_buf_deps + nb_deps) * sizeof(*dst)); - if (!dst) - goto err; + pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), + pl->push_consts_num + 1); + if (!pl->push_consts) + return AVERROR(ENOMEM); - q->buf_deps = dst; + pc = &pl->push_consts[pl->push_consts_num++]; + memset(pc, 0, sizeof(*pc)); - for (int i = 0; i < nb_deps; i++) { - q->buf_deps[q->nb_buf_deps] = deps[i]; - if (!q->buf_deps[q->nb_buf_deps]) - goto err; - q->nb_buf_deps++; - } + pc->stageFlags = stage; + pc->offset = offset; + pc->size = size; return 0; - -err: - ff_vk_discard_exec_deps(e); - return AVERROR(ENOMEM); } -FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, - int unnorm_coords, VkFilter filt) +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt) { VkResult ret; FFVulkanFunctions *vk = &s->vkfn; @@ -1030,22 +1125,15 @@ FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, .unnormalizedCoordinates = unnorm_coords, }; - FFVkSampler *sctx = create_sampler(s); - if (!sctx) - return NULL; - ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, - s->hwctx->alloc, &sctx->sampler[0]); + s->hwctx->alloc, sampler); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", ff_vk_ret2str(ret)); - return NULL; + return AVERROR_EXTERNAL; } - for (int i = 1; i < 4; i++) - sctx->sampler[i] = sctx->sampler[0]; - - return sctx; + return 0; } int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) @@ -1068,79 +1156,139 @@ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) } typedef struct ImageViewCtx { - VkImageView view; + VkImageView views[AV_NUM_DATA_POINTERS]; + int nb_views; } ImageViewCtx; -static void destroy_imageview(void *opaque, uint8_t *data) +static void destroy_imageviews(void *opaque, uint8_t *data) { FFVulkanContext *s = opaque; FFVulkanFunctions *vk = &s->vkfn; ImageViewCtx *iv = (ImageViewCtx *)data; - vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); } -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map) +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f) { int err; + VkResult ret; AVBufferRef *buf; FFVulkanFunctions *vk = &s->vkfn; - - VkImageViewCreateInfo imgview_spawn = { - .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, - .pNext = NULL, - .image = img, - .viewType = VK_IMAGE_VIEW_TYPE_2D, - .format = fmt, - .components = map, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }, - }; + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const VkFormat *rep_fmts = av_vkfmt_from_pixfmt(hwfc->sw_format); + AVVkFrame *vkf = (AVVkFrame *)f->data[0]; + const int nb_images = ff_vk_count_images(vkf); + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); ImageViewCtx *iv = av_mallocz(sizeof(*iv)); + if (!iv) + return AVERROR(ENOMEM); - VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, - s->hwctx->alloc, &iv->view); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; + for (int i = 0; i < nb_planes; i++) { + VkImageAspectFlags plane_aspect[] = { VK_IMAGE_ASPECT_COLOR_BIT, + VK_IMAGE_ASPECT_PLANE_0_BIT, + VK_IMAGE_ASPECT_PLANE_1_BIT, + VK_IMAGE_ASPECT_PLANE_2_BIT, }; + + VkImageViewCreateInfo view_create_info = { + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = NULL, + .image = vkf->img[FFMIN(i, nb_images - 1)], + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = rep_fmts[i], + .components = ff_comp_identity_map, + .subresourceRange = { + .aspectMask = plane_aspect[(nb_planes != nb_images) + + i*(nb_planes != nb_images)], + .levelCount = 1, + .layerCount = 1, + }, + }; + + ret = vk->CreateImageView(s->hwctx->act_dev, &view_create_info, + s->hwctx->alloc, &iv->views[i]); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", + ff_vk_ret2str(ret)); + err = AVERROR_EXTERNAL; + goto fail; + } + + iv->nb_views++; } - buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); + buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageviews, s, 0); if (!buf) { - destroy_imageview(s, (uint8_t *)iv); - return AVERROR(ENOMEM); + err = AVERROR(ENOMEM); + goto fail; } /* Add to queue dependencies */ - err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1); - if (err) { + err = ff_vk_exec_add_dep_buf(s, e, &buf, 1, 0); + if (err < 0) av_buffer_unref(&buf); - return err; - } - *v = iv->view; + memcpy(views, iv->views, nb_planes*sizeof(*views)); - return 0; + return err; + +fail: + for (int i = 0; i < iv->nb_views; i++) + vk->DestroyImageView(s->hwctx->act_dev, iv->views[i], s->hwctx->alloc); + av_free(iv); + return err; } -FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num) -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage) +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf) { - FFVkSPIRVShader *shd = create_shader(pl); - if (!shd) - return NULL; + int i, found; + AVVkFrame *vkf = (AVVkFrame *)pic->data[0]; + const int nb_images = ff_vk_count_images(vkf); + for (i = 0; i < e->nb_frame_deps; i++) + if (e->frame_deps[i]->data[0] == pic->data[0]) + break; + found = (i < e->nb_frame_deps) && (e->frame_update[i]) ? i : -1; + + for (int i = 0; i < nb_images; i++) { + bar[*nb_bar] = (VkImageMemoryBarrier2) { + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2, + .pNext = NULL, + .srcStageMask = src_stage, + .dstStageMask = dst_stage, + .srcAccessMask = found >= 0 ? e->access_dst[found] : vkf->access[i], + .dstAccessMask = new_access, + .oldLayout = found >= 0 ? e->layout_dst[found] : vkf->layout[0], + .newLayout = new_layout, + .srcQueueFamilyIndex = found >= 0 ? e->queue_family_dst[found] : vkf->queue_family[0], + .dstQueueFamilyIndex = new_qf, + .image = vkf->img[i], + .subresourceRange = (VkImageSubresourceRange) { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .layerCount = 1, + .levelCount = 1, + }, + }; + *nb_bar += 1; + } + ff_vk_exec_update_frame(s, e, pic, &bar[*nb_bar - nb_images], NULL); +} + +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage) +{ av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; @@ -1151,22 +1299,24 @@ FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, GLSLF(0, #version %i ,460); GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); GLSLC(0, ); + GLSLC(0, #extension GL_EXT_buffer_reference : require ); + GLSLC(0, #extension GL_EXT_buffer_reference2 : require ); - return shd; + return 0; } -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z) { - shd->local_size[0] = local_size[0]; - shd->local_size[1] = local_size[1]; - shd->local_size[2] = local_size[2]; + shd->local_size[0] = x; + shd->local_size[1] = y; + shd->local_size[2] = z; av_bprintf(&shd->src, "layout (local_size_x = %i, " "local_size_y = %i, local_size_z = %i) in;\n\n", shd->local_size[0], shd->local_size[1], shd->local_size[2]); } -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio) { int line = 0; const char *p = shd->src.str; @@ -1188,36 +1338,24 @@ void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) av_bprint_finalize(&buf, NULL); } -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint) +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd) +{ + FFVulkanFunctions *vk = &s->vkfn; + av_bprint_finalize(&shd->src, NULL); + + if (shd->shader.module) + vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, s->hwctx->alloc); +} + +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint) { - int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; VkShaderModuleCreateInfo shader_create; - uint8_t *spirv; - size_t spirv_size; - void *priv; shd->shader.pName = entrypoint; - if (!s->spirv_compiler) { -#if CONFIG_LIBGLSLANG - s->spirv_compiler = ff_vk_glslang_init(); -#elif CONFIG_LIBSHADERC - s->spirv_compiler = ff_vk_shaderc_init(); -#else - return AVERROR(ENOSYS); -#endif - if (!s->spirv_compiler) - return AVERROR(ENOMEM); - } - - err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv, - &spirv_size, entrypoint, &priv); - if (err < 0) - return err; - av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", shd->name, spirv_size); @@ -1229,11 +1367,8 @@ int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, &shd->shader.module); - - s->spirv_compiler->free_shader(s->spirv_compiler, &priv); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n", + av_log(s, AV_LOG_VERBOSE, "Error creating shader module: %s\n", ff_vk_ret2str(ret)); return AVERROR_EXTERNAL; } @@ -1262,132 +1397,88 @@ static const struct descriptor_props { [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, }; -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader) +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only) { VkResult ret; - VkDescriptorSetLayout *layout; + int has_sampler = 0; FFVulkanFunctions *vk = &s->vkfn; + FFVulkanDescriptorSet *set; + VkDescriptorSetLayoutCreateInfo desc_create_layout; - if (only_print_to_shader) + if (print_to_shader_only) goto print; - pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), - pl->desc_layout_num + pl->qf->nb_queues); - if (!pl->desc_layout) + /* Actual layout allocated for the pipeline */ + set = av_realloc_array(pl->desc_set, sizeof(*pl->desc_set), + pl->nb_descriptor_sets + 1); + if (!set) return AVERROR(ENOMEM); + pl->desc_set = set; + set = &set[pl->nb_descriptor_sets]; + memset(set, 0, sizeof(*set)); - pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, - sizeof(*pl->desc_set_initialized), - pl->descriptor_sets_num + 1); - if (!pl->desc_set_initialized) + set->binding = av_calloc(nb, sizeof(*set->binding)); + if (!set->binding) return AVERROR(ENOMEM); - pl->desc_set_initialized[pl->descriptor_sets_num] = 0; - layout = &pl->desc_layout[pl->desc_layout_num]; - - { /* Create descriptor set layout descriptions */ - VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; - VkDescriptorSetLayoutBinding *desc_binding; - - desc_binding = av_mallocz(sizeof(*desc_binding)*num); - if (!desc_binding) - return AVERROR(ENOMEM); - - for (int i = 0; i < num; i++) { - desc_binding[i].binding = i; - desc_binding[i].descriptorType = desc[i].type; - desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); - desc_binding[i].stageFlags = desc[i].stages; - desc_binding[i].pImmutableSamplers = desc[i].sampler ? - desc[i].sampler->sampler : - NULL; - } - - desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; - desc_create_layout.pBindings = desc_binding; - desc_create_layout.bindingCount = num; - - for (int i = 0; i < pl->qf->nb_queues; i++) { - ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, - s->hwctx->alloc, &layout[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "layout: %s\n", ff_vk_ret2str(ret)); - av_free(desc_binding); - return AVERROR_EXTERNAL; - } - } - - av_free(desc_binding); + set->binding_offset = av_calloc(nb, sizeof(*set->binding_offset)); + if (!set->binding_offset) { + av_freep(&set->binding); + return AVERROR(ENOMEM); } - { /* Pool each descriptor by type and update pool counts */ - for (int i = 0; i < num; i++) { - int j; - for (j = 0; j < pl->pool_size_desc_num; j++) - if (pl->pool_size_desc[j].type == desc[i].type) - break; - if (j >= pl->pool_size_desc_num) { - pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, - sizeof(*pl->pool_size_desc), - ++pl->pool_size_desc_num); - if (!pl->pool_size_desc) - return AVERROR(ENOMEM); - memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); - } - pl->pool_size_desc[j].type = desc[i].type; - pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; - } - } + desc_create_layout = (VkDescriptorSetLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, + .bindingCount = nb, + .pBindings = set->binding, + .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, + }; - { /* Create template creation struct */ - VkDescriptorUpdateTemplateCreateInfo *dt; - VkDescriptorUpdateTemplateEntry *des_entries; + for (int i = 0; i < nb; i++) { + set->binding[i].binding = i; + set->binding[i].descriptorType = desc[i].type; + set->binding[i].descriptorCount = FFMAX(desc[i].elems, 1); + set->binding[i].stageFlags = desc[i].stages; + set->binding[i].pImmutableSamplers = desc[i].samplers; - /* Freed after descriptor set initialization */ - des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); - if (!des_entries) - return AVERROR(ENOMEM); + if (desc[i].type == VK_DESCRIPTOR_TYPE_SAMPLER || + desc[i].type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) + has_sampler |= 1; + } - for (int i = 0; i < num; i++) { - des_entries[i].dstBinding = i; - des_entries[i].descriptorType = desc[i].type; - des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); - des_entries[i].dstArrayElement = 0; - des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; - des_entries[i].stride = descriptor_props[desc[i].type].struct_size; - } + set->usage = VK_BUFFER_USAGE_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + if (has_sampler) + set->usage |= VK_BUFFER_USAGE_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT; - pl->desc_template_info = av_realloc_array(pl->desc_template_info, - sizeof(*pl->desc_template_info), - pl->total_descriptor_sets + pl->qf->nb_queues); - if (!pl->desc_template_info) - return AVERROR(ENOMEM); + ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, + s->hwctx->alloc, &set->layout); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init descriptor set layout: %s", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } - dt = &pl->desc_template_info[pl->total_descriptor_sets]; - memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); + vk->GetDescriptorSetLayoutSizeEXT(s->hwctx->act_dev, set->layout, &set->layout_size); - for (int i = 0; i < pl->qf->nb_queues; i++) { - dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; - dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; - dt[i].descriptorSetLayout = layout[i]; - dt[i].pDescriptorUpdateEntries = des_entries; - dt[i].descriptorUpdateEntryCount = num; - } - } + set->aligned_size = FFALIGN(set->layout_size, s->desc_buf_props.descriptorBufferOffsetAlignment); - pl->descriptor_sets_num++; + for (int i = 0; i < nb; i++) + vk->GetDescriptorSetLayoutBindingOffsetEXT(s->hwctx->act_dev, set->layout, + i, &set->binding_offset[i]); - pl->desc_layout_num += pl->qf->nb_queues; - pl->total_descriptor_sets += pl->qf->nb_queues; + set->read_only = read_only; + set->nb_bindings = nb; + pl->nb_descriptor_sets++; print: /* Write shader info */ - for (int i = 0; i < num; i++) { + for (int i = 0; i < nb; i++) { const struct descriptor_props *prop = &descriptor_props[desc[i].type]; - GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); + GLSLA("layout (set = %i, binding = %i", pl->nb_descriptor_sets - 1, i); if (desc[i].mem_layout) GLSLA(", %s", desc[i].mem_layout); @@ -1412,185 +1503,268 @@ print: else if (desc[i].elems > 0) GLSLA("[%i]", desc[i].elems); - GLSLA(";\n"); + GLSLA(";"); + GLSLA("\n"); } GLSLA("\n"); return 0; } -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id) +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl) { - FFVulkanFunctions *vk = &s->vkfn; + int err; - /* If a set has never been updated, update all queues' sets. */ - if (!pl->desc_set_initialized[set_id]) { - for (int i = 0; i < pl->qf->nb_queues; i++) { - int idx = set_id*pl->qf->nb_queues + i; - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[idx], - pl->desc_template[idx], - s); - } - pl->desc_set_initialized[set_id] = 1; - return; - } + pl->desc_bind = av_calloc(pl->nb_descriptor_sets, sizeof(*pl->desc_bind)); + if (!pl->desc_bind) + return AVERROR(ENOMEM); - set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; + pl->bound_buffer_indices = av_calloc(pl->nb_descriptor_sets, + sizeof(*pl->bound_buffer_indices)); + if (!pl->bound_buffer_indices) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + int nb = set->read_only ? 1 : pool->pool_size; + + err = ff_vk_create_buf(s, &set->buf, set->aligned_size*nb, + NULL, NULL, set->usage, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (err < 0) + return err; + + err = ff_vk_map_buffer(s, &set->buf, &set->desc_mem, 0); + if (err < 0) + return err; + + pl->desc_bind[i] = (VkDescriptorBufferBindingInfoEXT) { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_INFO_EXT, + .usage = set->usage, + .address = set->buf.address, + }; - vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, - pl->desc_set[set_id], - pl->desc_template[set_id], - s); + pl->bound_buffer_indices[i] = i; + } + + return 0; } -void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src) +static inline void update_set_descriptor(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanDescriptorSet *set, + int bind_idx, int array_idx, + VkDescriptorGetInfoEXT *desc_get_info, + size_t desc_size) { FFVulkanFunctions *vk = &s->vkfn; + const size_t exec_offset = set->read_only ? 0 : set->aligned_size*e->idx; + void *desc = set->desc_mem + /* Base */ + exec_offset + /* Execution context */ + set->binding_offset[bind_idx] + /* Descriptor binding */ + array_idx*desc_size; /* Array position */ - vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, - stage, offset, size, src); + vk->GetDescriptorEXT(s->hwctx->act_dev, desc_get_info, desc_size, desc); } -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler) { - VkResult ret; - FFVulkanFunctions *vk = &s->vkfn; - - pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); - if (!pl->desc_staging) - return AVERROR(ENOMEM); + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; - { /* Init descriptor set pool */ - VkDescriptorPoolCreateInfo pool_create_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, - .poolSizeCount = pl->pool_size_desc_num, - .pPoolSizes = pl->pool_size_desc, - .maxSets = pl->total_descriptor_sets, - }; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLER: + desc_get_info.data.pSampler = sampler; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, - s->hwctx->alloc, &pl->desc_pool); - av_freep(&pl->pool_size_desc); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " - "pool: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, + s->desc_buf_props.samplerDescriptorSize); - { /* Allocate descriptor sets */ - VkDescriptorSetAllocateInfo alloc_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = pl->desc_pool, - .descriptorSetCount = pl->total_descriptor_sets, - .pSetLayouts = pl->desc_layout, - }; + return 0; +} - pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); - if (!pl->desc_set) - return AVERROR(ENOMEM); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorImageInfo desc_img_info = { + .imageView = view, + .sampler = sampler, + .imageLayout = layout, + }; + size_t desc_size; - ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, - pl->desc_set); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + desc_get_info.data.pSampledImage = &desc_img_info; + desc_size = s->desc_buf_props.sampledImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + desc_get_info.data.pStorageImage = &desc_img_info; + desc_size = s->desc_buf_props.storageImageDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: + desc_get_info.data.pInputAttachmentImage = &desc_img_info; + desc_size = s->desc_buf_props.inputAttachmentDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: + desc_get_info.data.pCombinedImageSampler = &desc_img_info; + desc_size = s->desc_buf_props.combinedImageSamplerDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - { /* Finally create the pipeline layout */ - VkPipelineLayoutCreateInfo spawn_pipeline_layout = { - .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, - .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, - .pushConstantRangeCount = pl->push_consts_num, - .pPushConstantRanges = pl->push_consts, - }; + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) - pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; + return 0; +} - ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, - s->hwctx->alloc, &pl->pipeline_layout); - av_freep(&pl->push_consts); - pl->push_consts_num = 0; - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", - ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt) +{ + FFVulkanDescriptorSet *desc_set = &pl->desc_set[set]; + VkDescriptorGetInfoEXT desc_get_info = { + .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_GET_INFO_EXT, + .type = desc_set->binding[bind].descriptorType, + }; + VkDescriptorAddressInfoEXT desc_buf_info = { + .address = addr, + .range = len, + .format = fmt, + }; + size_t desc_size; - { /* Descriptor template (for tightly packed descriptors) */ - VkDescriptorUpdateTemplateCreateInfo *dt; + switch (desc_get_info.type) { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + desc_get_info.data.pUniformBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + desc_get_info.data.pStorageBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + desc_get_info.data.pUniformTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.uniformTexelBufferDescriptorSize; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + desc_get_info.data.pStorageTexelBuffer = &desc_buf_info; + desc_size = s->desc_buf_props.storageTexelBufferDescriptorSize; + break; + default: + av_log(s, AV_LOG_ERROR, "Invalid descriptor type at set %i binding %i: %i!\n", + set, bind, desc_get_info.type); + return AVERROR(EINVAL); + break; + }; - pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); - if (!pl->desc_template) - return AVERROR(ENOMEM); + update_set_descriptor(s, e, desc_set, bind, offs, &desc_get_info, desc_size); - /* Create update templates for the descriptor sets */ - for (int i = 0; i < pl->total_descriptor_sets; i++) { - dt = &pl->desc_template_info[i]; - dt->pipelineLayout = pl->pipeline_layout; - ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, - dt, s->hwctx->alloc, - &pl->desc_template[i]); - if (ret != VK_SUCCESS) { - av_log(s, AV_LOG_ERROR, "Unable to init descriptor " - "template: %s\n", ff_vk_ret2str(ret)); - return AVERROR_EXTERNAL; - } - } + return 0; +} - /* Free the duplicated memory used for the template entries */ - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler) +{ + AVHWFramesContext *hwfc = (AVHWFramesContext *)f->hw_frames_ctx->data; + const int nb_planes = av_pix_fmt_count_planes(hwfc->sw_format); - av_freep(&pl->desc_template_info); - } + for (int i = 0; i < nb_planes; i++) + ff_vk_set_descriptor_image(s, pl, e, set, binding, i, + views[i], layout, sampler); +} - return 0; +void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src) +{ + FFVulkanFunctions *vk = &s->vkfn; + vk->CmdPushConstants(e->buf, pl->pipeline_layout, + stage, offset, size, src); } -FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf) +static int init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) { - FFVulkanPipeline *pl = create_pipeline(s); - if (pl) - pl->qf = qf; + VkResult ret; + FFVulkanFunctions *vk = &s->vkfn; + VkPipelineLayoutCreateInfo pipeline_layout_info; + + VkDescriptorSetLayout *desc_layouts = av_malloc(pl->nb_descriptor_sets* + sizeof(desc_layouts)); + if (!desc_layouts) + return AVERROR(ENOMEM); + + for (int i = 0; i < pl->nb_descriptor_sets; i++) + desc_layouts[i] = pl->desc_set[i].layout; - return pl; + /* Finally create the pipeline layout */ + pipeline_layout_info = (VkPipelineLayoutCreateInfo) { + .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, + .pSetLayouts = desc_layouts, + .setLayoutCount = pl->nb_descriptor_sets, + .pushConstantRangeCount = pl->push_consts_num, + .pPushConstantRanges = pl->push_consts, + }; + + ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &pipeline_layout_info, + s->hwctx->alloc, &pl->pipeline_layout); + av_free(desc_layouts); + if (ret != VK_SUCCESS) { + av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", + ff_vk_ret2str(ret)); + return AVERROR_EXTERNAL; + } + + return 0; } -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd) { - int i; + int err; VkResult ret; FFVulkanFunctions *vk = &s->vkfn; - VkComputePipelineCreateInfo pipe = { + VkComputePipelineCreateInfo pipeline_create_info; + + err = init_pipeline_layout(s, pl); + if (err < 0) + return err; + + pipeline_create_info = (VkComputePipelineCreateInfo) { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .flags = VK_PIPELINE_CREATE_DESCRIPTOR_BUFFER_BIT_EXT, .layout = pl->pipeline_layout, + .stage = shd->shader, }; - for (i = 0; i < pl->shaders_num; i++) { - if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { - pipe.stage = pl->shaders[i]->shader; - break; - } - } - if (i == pl->shaders_num) { - av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); - return AVERROR(EINVAL); - } - - ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, + ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, + &pipeline_create_info, s->hwctx->alloc, &pl->pipeline); if (ret != VK_SUCCESS) { av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", @@ -1599,157 +1773,68 @@ int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) } pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; + pl->wg_size[0] = shd->local_size[0]; + pl->wg_size[1] = shd->local_size[1]; + pl->wg_size[2] = shd->local_size[2]; return 0; } -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; + VkDeviceSize offsets[1024]; - vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); - - for (int i = 0; i < pl->descriptor_sets_num; i++) - pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; - - vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, - pl->pipeline_layout, 0, - pl->descriptor_sets_num, - (VkDescriptorSet *)pl->desc_staging, - 0, NULL); - - e->bound_pl = pl; -} - -static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) -{ - FFVulkanFunctions *vk = &s->vkfn; - - /* Make sure all queues have finished executing */ - for (int i = 0; i < e->qf->nb_queues; i++) { - FFVkQueueCtx *q = &e->queues[i]; - - if (q->fence) { - vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); - vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); - } - - /* Free the fence */ - if (q->fence) - vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); + /* Bind pipeline */ + vk->CmdBindPipeline(e->buf, pl->bind_point, pl->pipeline); - /* Free buffer dependencies */ - for (int j = 0; j < q->nb_buf_deps; j++) - av_buffer_unref(&q->buf_deps[j]); - av_free(q->buf_deps); + if (pl->nb_descriptor_sets) { + for (int i = 0; i < pl->nb_descriptor_sets; i++) + offsets[i] = pl->desc_set[i].read_only ? 0 : pl->desc_set[i].aligned_size*e->idx; - /* Free frame dependencies */ - for (int j = 0; j < q->nb_frame_deps; j++) - av_frame_free(&q->frame_deps[j]); - av_free(q->frame_deps); + /* Bind descriptor buffers */ + vk->CmdBindDescriptorBuffersEXT(e->buf, pl->nb_descriptor_sets, pl->desc_bind); + /* Binding offsets */ + vk->CmdSetDescriptorBufferOffsetsEXT(e->buf, pl->bind_point, pl->pipeline_layout, + 0, pl->nb_descriptor_sets, + pl->bound_buffer_indices, offsets); } - - if (e->bufs) - vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); - if (e->pool) - vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); - if (e->query.pool) - vk->DestroyQueryPool(s->hwctx->act_dev, e->query.pool, s->hwctx->alloc); - - av_freep(&e->query.data); - av_freep(&e->bufs); - av_freep(&e->queues); - av_freep(&e->sem_sig); - av_freep(&e->sem_sig_val); - av_freep(&e->sem_sig_val_dst); - av_freep(&e->sem_wait); - av_freep(&e->sem_wait_dst); - av_freep(&e->sem_wait_val); - av_free(e); } -static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl) { FFVulkanFunctions *vk = &s->vkfn; - for (int i = 0; i < pl->shaders_num; i++) { - FFVkSPIRVShader *shd = pl->shaders[i]; - av_bprint_finalize(&shd->src, NULL); - vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, - s->hwctx->alloc); - av_free(shd); - } - - vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); - vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, - s->hwctx->alloc); + if (pl->pipeline) + vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); + if (pl->pipeline_layout) + vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, + s->hwctx->alloc); - for (int i = 0; i < pl->desc_layout_num; i++) { - if (pl->desc_template && pl->desc_template[i]) - vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], - s->hwctx->alloc); - if (pl->desc_layout && pl->desc_layout[i]) - vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], + for (int i = 0; i < pl->nb_descriptor_sets; i++) { + FFVulkanDescriptorSet *set = &pl->desc_set[i]; + if (set->buf.mem) + ff_vk_unmap_buffer(s, &set->buf, 0); + ff_vk_free_buf(s, &set->buf); + if (set->layout) + vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, set->layout, s->hwctx->alloc); + av_free(set->binding); + av_free(set->binding_offset); } - /* Also frees the descriptor sets */ - if (pl->desc_pool) - vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, - s->hwctx->alloc); - - av_freep(&pl->desc_staging); av_freep(&pl->desc_set); - av_freep(&pl->shaders); - av_freep(&pl->desc_layout); - av_freep(&pl->desc_template); - av_freep(&pl->desc_set_initialized); + av_freep(&pl->desc_bind); av_freep(&pl->push_consts); pl->push_consts_num = 0; - - /* Only freed in case of failure */ - av_freep(&pl->pool_size_desc); - if (pl->desc_template_info) { - for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { - VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; - av_free((void *)dt->pDescriptorUpdateEntries); - } - av_freep(&pl->desc_template_info); - } - - av_free(pl); } void ff_vk_uninit(FFVulkanContext *s) { - FFVulkanFunctions *vk = &s->vkfn; - av_freep(&s->query_props); av_freep(&s->qf_props); av_freep(&s->video_props); - if (s->spirv_compiler) - s->spirv_compiler->uninit(&s->spirv_compiler); - - for (int i = 0; i < s->exec_ctx_num; i++) - free_exec_ctx(s, s->exec_ctx[i]); - av_freep(&s->exec_ctx); - - for (int i = 0; i < s->samplers_num; i++) { - vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], - s->hwctx->alloc); - av_free(s->samplers[i]); - } - av_freep(&s->samplers); - - for (int i = 0; i < s->pipelines_num; i++) - free_pipeline(s, s->pipelines[i]); - av_freep(&s->pipelines); - - av_freep(&s->scratch); - s->scratch_size = 0; - - av_buffer_unref(&s->device_ref); av_buffer_unref(&s->frames_ref); } diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h index 3f887a782e..7f31ced41d 100644 --- a/libavutil/vulkan.h +++ b/libavutil/vulkan.h @@ -21,6 +21,8 @@ #define VK_NO_PROTOTYPES +#include <stdatomic.h> + #include "pixdesc.h" #include "bprint.h" #include "hwcontext.h" @@ -28,11 +30,6 @@ #include "hwcontext_vulkan.h" #include "vulkan_loader.h" -#define FF_VK_DEFAULT_USAGE_FLAGS (VK_IMAGE_USAGE_SAMPLED_BIT | \ - VK_IMAGE_USAGE_STORAGE_BIT | \ - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | \ - VK_IMAGE_USAGE_TRANSFER_DST_BIT) - /* GLSL management macros */ #define INDENT(N) INDENT_##N #define INDENT_0 @@ -57,6 +54,8 @@ goto fail; \ } while (0) +#define DUP_SAMPLER(x) { x, x, x, x } + typedef struct FFVkSPIRVShader { const char *name; /* Name for id/debugging purposes */ AVBPrint src; @@ -64,19 +63,6 @@ typedef struct FFVkSPIRVShader { VkPipelineShaderStageCreateInfo shader; } FFVkSPIRVShader; -typedef struct FFVkSPIRVCompiler { - void *priv; - int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, - struct FFVkSPIRVShader *shd, uint8_t **data, - size_t *size, const char *entrypoint, void **opaque); - void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); - void (*uninit)(struct FFVkSPIRVCompiler **ctx); -} FFVkSPIRVCompiler; - -typedef struct FFVkSampler { - VkSampler sampler[4]; -} FFVkSampler; - typedef struct FFVulkanDescriptorSetBinding { const char *name; VkDescriptorType type; @@ -86,8 +72,7 @@ typedef struct FFVulkanDescriptorSetBinding { uint32_t dimensions; /* Needed for e.g. sampler%iD */ uint32_t elems; /* 0 - scalar, 1 or more - vector */ VkShaderStageFlags stages; - FFVkSampler *sampler; /* Sampler to use for all elems */ - void *updater; /* Pointer to VkDescriptor*Info */ + VkSampler samplers[4]; /* Sampler to use for all elems */ } FFVulkanDescriptorSetBinding; typedef struct FFVkBuffer { @@ -95,119 +80,133 @@ typedef struct FFVkBuffer { VkDeviceMemory mem; VkMemoryPropertyFlagBits flags; size_t size; + VkDeviceAddress address; + + /* Local use only */ + VkPipelineStageFlags2 stage; + VkAccessFlags2 access; + + /* Only valid when allocated via ff_vk_get_pooled_buffer with HOST_VISIBLE */ + uint8_t *mapped_mem; } FFVkBuffer; typedef struct FFVkQueueFamilyCtx { int queue_family; int nb_queues; - int cur_queue; - int actual_queues; } FFVkQueueFamilyCtx; -typedef struct FFVulkanPipeline { - FFVkQueueFamilyCtx *qf; +typedef struct FFVulkanDescriptorSet { + VkDescriptorSetLayout layout; + FFVkBuffer buf; + uint8_t *desc_mem; + VkDeviceSize layout_size; + VkDeviceSize aligned_size; /* descriptorBufferOffsetAlignment */ + VkDeviceSize total_size; /* Once registered to an exec context */ + VkBufferUsageFlags usage; + + VkDescriptorSetLayoutBinding *binding; + VkDeviceSize *binding_offset; + int nb_bindings; + int read_only; +} FFVulkanDescriptorSet; + +typedef struct FFVulkanPipeline { VkPipelineBindPoint bind_point; /* Contexts */ VkPipelineLayout pipeline_layout; VkPipeline pipeline; - /* Shaders */ - FFVkSPIRVShader **shaders; - int shaders_num; - /* Push consts */ VkPushConstantRange *push_consts; int push_consts_num; + /* Workgroup */ + int wg_size[3]; + /* Descriptors */ - VkDescriptorSetLayout *desc_layout; - VkDescriptorPool desc_pool; - VkDescriptorSet *desc_set; -#if VK_USE_64_BIT_PTR_DEFINES == 1 - void **desc_staging; -#else - uint64_t *desc_staging; -#endif - VkDescriptorSetLayoutBinding **desc_binding; - VkDescriptorUpdateTemplate *desc_template; - int *desc_set_initialized; - int desc_layout_num; - int descriptor_sets_num; - int total_descriptor_sets; - int pool_size_desc_num; - - /* Temporary, used to store data in between initialization stages */ - VkDescriptorUpdateTemplateCreateInfo *desc_template_info; - VkDescriptorPoolSize *pool_size_desc; + FFVulkanDescriptorSet *desc_set; + VkDescriptorBufferBindingInfoEXT *desc_bind; + uint32_t *bound_buffer_indices; + int nb_descriptor_sets; } FFVulkanPipeline; -typedef struct FFVkQueueCtx { - VkFence fence; +typedef struct FFVkExecContext { + int idx; + const struct FFVkExecPool *parent; + + /* Queue for the execution context */ VkQueue queue; + int qf; + int qi; + + /* Command buffer for the context */ + VkCommandBuffer buf; + + /* Fence for the command buffer */ + VkFence fence; - int synchronous; - int submitted; + void *query_data; + int query_idx; /* Buffer dependencies */ AVBufferRef **buf_deps; int nb_buf_deps; - int buf_deps_alloc_size; + unsigned int buf_deps_alloc_size; /* Frame dependencies */ AVFrame **frame_deps; + unsigned int frame_deps_alloc_size; int nb_frame_deps; - int frame_deps_alloc_size; -} FFVkQueueCtx; - -typedef struct FFVkExecContext { - FFVkQueueFamilyCtx *qf; - VkCommandPool pool; - VkCommandBuffer *bufs; - FFVkQueueCtx *queues; - - struct { - int idx; - VkQueryPool pool; - uint8_t *data; - - int nb_queries; - int nb_results; - int nb_statuses; - int elem_64bits; - size_t data_per_queue; - int status_stride; - } query; + VkSemaphoreSubmitInfo *sem_wait; + unsigned int sem_wait_alloc; + int sem_wait_cnt; - AVBufferRef ***deps; - int *nb_deps; - int *dep_alloc_size; + VkSemaphoreSubmitInfo *sem_sig; + unsigned int sem_sig_alloc; + int sem_sig_cnt; - FFVulkanPipeline *bound_pl; + uint64_t **sem_sig_val_dst; + unsigned int sem_sig_val_dst_alloc; + int sem_sig_val_dst_cnt; - VkSemaphore *sem_wait; - int sem_wait_alloc; /* Allocated sem_wait */ - int sem_wait_cnt; + uint8_t *frame_locked; + unsigned int frame_locked_alloc_size; - uint64_t *sem_wait_val; - int sem_wait_val_alloc; + VkAccessFlagBits *access_dst; + unsigned int access_dst_alloc; - VkPipelineStageFlagBits *sem_wait_dst; - int sem_wait_dst_alloc; /* Allocated sem_wait_dst */ + VkImageLayout *layout_dst; + unsigned int layout_dst_alloc; - VkSemaphore *sem_sig; - int sem_sig_alloc; /* Allocated sem_sig */ - int sem_sig_cnt; + uint32_t *queue_family_dst; + unsigned int queue_family_dst_alloc; - uint64_t *sem_sig_val; - int sem_sig_val_alloc; - - uint64_t **sem_sig_val_dst; - int sem_sig_val_dst_alloc; + uint8_t *frame_update; + unsigned int frame_update_alloc_size; } FFVkExecContext; +typedef struct FFVkExecPool { + FFVkQueueFamilyCtx *qf; + FFVkExecContext *contexts; + atomic_int_least64_t idx; + + VkCommandPool cmd_buf_pool; + VkCommandBuffer *cmd_bufs; + int pool_size; + + VkQueryPool query_pool; + void *query_data; + int query_results; + int query_statuses; + int query_64bit; + int query_status_stride; + int nb_queries; + size_t qd_size; +} FFVkExecPool; + typedef struct FFVulkanContext { const AVClass *class; /* Filters and encoders use this */ @@ -216,14 +215,17 @@ typedef struct FFVulkanContext { VkPhysicalDeviceProperties2 props; VkPhysicalDeviceDriverProperties driver_props; VkPhysicalDeviceMemoryProperties mprops; + VkPhysicalDeviceExternalMemoryHostPropertiesEXT hprops; + VkPhysicalDeviceDescriptorBufferPropertiesEXT desc_buf_props; VkQueueFamilyQueryResultStatusPropertiesKHR *query_props; VkQueueFamilyVideoPropertiesKHR *video_props; VkQueueFamilyProperties2 *qf_props; + int tot_nb_qfs; - AVBufferRef *device_ref; AVHWDeviceContext *device; AVVulkanDeviceContext *hwctx; + AVBufferRef *input_frames_ref; AVBufferRef *frames_ref; AVHWFramesContext *frames; AVVulkanFramesContext *hwfc; @@ -231,28 +233,11 @@ typedef struct FFVulkanContext { uint32_t qfs[5]; int nb_qfs; - FFVkSPIRVCompiler *spirv_compiler; - /* Properties */ int output_width; int output_height; enum AVPixelFormat output_format; enum AVPixelFormat input_format; - - /* Samplers */ - FFVkSampler **samplers; - int samplers_num; - - /* Exec contexts */ - FFVkExecContext **exec_ctx; - int exec_ctx_num; - - /* Pipelines (each can have 1 shader of each type) */ - FFVulkanPipeline **pipelines; - int pipelines_num; - - void *scratch; /* Scratch memory used only in functions */ - unsigned int scratch_size; } FFVulkanContext; /* Identity mapping - r = r, b = b, g = g, a = a */ @@ -264,244 +249,207 @@ extern const VkComponentMapping ff_comp_identity_map; const char *ff_vk_ret2str(VkResult res); /** - * Loads props/mprops/driver_props - */ -int ff_vk_load_props(FFVulkanContext *s); - -/** - * Returns 1 if the image is any sort of supported RGB + * Returns 1 if pixfmt is a usable RGB format. */ int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt); /** - * Gets the glsl format string for a pixel format + * Returns the format to use for images in shaders. */ const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt); /** - * Setup the queue families from the hardware device context. - * Necessary for image creation to work. - */ -void ff_vk_qf_fill(FFVulkanContext *s); - -/** - * Allocate device memory. - */ -int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, - VkMemoryPropertyFlagBits req_flags, void *alloc_extension, - VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); - -/** - * Get a queue family index and the number of queues. nb is optional. + * Loads props/mprops/driver_props */ -int ff_vk_qf_get_index(FFVulkanContext *s, VkQueueFlagBits dev_family, int *nb); +int ff_vk_load_props(FFVulkanContext *s); /** - * Initialize a queue family with a specific number of queues. - * If nb_queues == 0, use however many queues the queue family has. + * Chooses a QF and loads it into a context. */ int ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, - VkQueueFlagBits dev_family, int nb_queues); - -/** - * Rotate through the queues in a queue family. - */ -int ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf); - -/** - * Create a Vulkan sampler, will be auto-freed in ff_vk_filter_uninit() - */ -FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, int unnorm_coords, - VkFilter filt); + VkQueueFlagBits dev_family); /** - * Create an imageview. - * Guaranteed to remain alive until the queue submission has finished executing, - * and will be destroyed after that. + * Allocates/frees an execution pool. + * ff_vk_exec_pool_init_desc() MUST be called if ff_vk_exec_descriptor_set_add() + * has been called. */ -int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, - VkImageView *v, VkImage img, VkFormat fmt, - const VkComponentMapping map); +int ff_vk_exec_pool_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, + FFVkExecPool *pool, int nb_contexts, + int nb_queries, VkQueryType query_type, int query_64bit, + const void *query_create_pnext); +void ff_vk_exec_pool_free(FFVulkanContext *s, FFVkExecPool *pool); /** - * Define a push constant for a given stage into a pipeline. - * Must be called before the pipeline layout has been initialized. + * Retrieve an execution pool. Threadsafe. */ -int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, - VkShaderStageFlagBits stage); +FFVkExecContext *ff_vk_exec_get(FFVkExecPool *pool); /** - * Inits a pipeline. Everything in it will be auto-freed when calling - * ff_vk_filter_uninit(). + * Performs nb_queries queries and returns their results and statuses. + * Execution must have been waited on to produce valid results. */ -FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf); +VkResult ff_vk_exec_get_query(FFVulkanContext *s, FFVkExecContext *e, + void **data, int64_t *status); /** - * Inits a shader for a specific pipeline. Will be auto-freed on uninit. + * Start/submit/wait an execution. + * ff_vk_exec_start() always waits on a submission, so using ff_vk_exec_wait() + * is not necessary (unless using it is just better). */ -FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, - VkShaderStageFlags stage); +int ff_vk_exec_start(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext *e); +void ff_vk_exec_wait(FFVulkanContext *s, FFVkExecContext *e); /** - * Writes the workgroup size for a shader. + * Execution dependency management. + * Can attach buffers to executions that will only be unref'd once the + * buffer has finished executing. + * Adding a frame dep will *lock the frame*, until either the dependencies + * are discarded, the execution is submitted, or a failure happens. + * update_frame will update the frame's properties before it is unlocked, + * only if submission was successful. */ -void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]); +int ff_vk_exec_add_dep_buf(FFVulkanContext *s, FFVkExecContext *e, + AVBufferRef **deps, int nb_deps, int ref); +int ff_vk_exec_add_dep_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkPipelineStageFlagBits2 wait_stage, + VkPipelineStageFlagBits2 signal_stage); +void ff_vk_exec_update_frame(FFVulkanContext *s, FFVkExecContext *e, AVFrame *f, + VkImageMemoryBarrier2 *bar, uint32_t *nb_img_bar); +int ff_vk_exec_mirror_sem_value(FFVulkanContext *s, FFVkExecContext *e, + VkSemaphore *dst, uint64_t *dst_val, + AVFrame *f); +void ff_vk_exec_discard_deps(FFVulkanContext *s, FFVkExecContext *e); /** - * Adds a descriptor set to the shader and registers them in the pipeline. + * Create an imageview and add it as a dependency to an execution. */ -int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, - int num, int only_print_to_shader); +int ff_vk_create_imageviews(FFVulkanContext *s, FFVkExecContext *e, + VkImageView views[AV_NUM_DATA_POINTERS], + AVFrame *f); -/** - * Compiles the shader, entrypoint must be set to "main". - */ -int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, - const char *entrypoint); +void ff_vk_frame_barrier(FFVulkanContext *s, FFVkExecContext *e, + AVFrame *pic, VkImageMemoryBarrier2 *bar, int *nb_bar, + VkPipelineStageFlags src_stage, + VkPipelineStageFlags dst_stage, + VkAccessFlagBits new_access, + VkImageLayout new_layout, + uint32_t new_qf); /** - * Pretty print shader, mainly used by shader compilers. + * Memory/buffer/image allocation helpers. */ -void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio); - -/** - * Initializes the pipeline layout after all shaders and descriptor sets have - * been finished. - */ -int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl); +int ff_vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, + VkMemoryPropertyFlagBits req_flags, void *alloc_extension, + VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem); +int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); +int ff_vk_create_avbuf(FFVulkanContext *s, AVBufferRef **ref, size_t size, + void *pNext, void *alloc_pNext, + VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); /** - * Initializes a compute pipeline. Will pick the first shader with the - * COMPUTE flag set. + * Buffer management code. */ -int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl); +int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer **buf, uint8_t *mem[], + int nb_buffers, int invalidate); +int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer **buf, int nb_buffers, + int flush); -/** - * Updates a descriptor set via the updaters defined. - * Can be called immediately after pipeline creation, but must be called - * at least once before queue submission. - */ -void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, - int set_id); +static inline int ff_vk_map_buffer(FFVulkanContext *s, FFVkBuffer *buf, uint8_t **mem, + int invalidate) +{ + return ff_vk_map_buffers(s, (FFVkBuffer *[]){ buf }, mem, + 1, invalidate); +} -/** - * Init an execution context for command recording and queue submission. - * WIll be auto-freed on uninit. - */ -int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, - FFVkQueueFamilyCtx *qf); +static inline int ff_vk_unmap_buffer(FFVulkanContext *s, FFVkBuffer *buf, int flush) +{ + return ff_vk_unmap_buffers(s, (FFVkBuffer *[]){ buf }, 1, flush); +} -/** - * Create a query pool for a command context. - * elem_64bits exists to troll driver devs for compliance. All results - * and statuses returned should be 32 bits, unless this is set, then it's 64bits. - */ -int ff_vk_create_exec_ctx_query_pool(FFVulkanContext *s, FFVkExecContext *e, - int nb_queries, VkQueryType type, - int elem_64bits, void *create_pnext); +void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); -/** - * Get results for query. - * Returns the status of the query. - * Sets *res to the status of the queries. - */ -int ff_vk_get_exec_ctx_query_results(FFVulkanContext *s, FFVkExecContext *e, - int query_idx, void **data, int64_t *status); +/** Initialize a pool and create AVBufferRefs containing FFVkBuffer. + * Threadsafe to use. Buffers are automatically mapped on creation if + * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT is set in mem_props. Users should + * synchronize access themselvesd. Mainly meant for device-local buffers. */ +int ff_vk_get_pooled_buffer(FFVulkanContext *ctx, AVBufferPool **buf_pool, + AVBufferRef **buf, VkBufferUsageFlags usage, + void *create_pNext, size_t size, + VkMemoryPropertyFlagBits mem_props); /** - * Begin recording to the command buffer. Previous execution must have been - * completed, which ff_vk_submit_exec_queue() will ensure. + * Create a sampler. */ -int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e); +int ff_vk_init_sampler(FFVulkanContext *s, VkSampler *sampler, + int unnorm_coords, VkFilter filt); /** - * Add a command to bind the completed pipeline and its descriptor sets. - * Must be called after ff_vk_start_exec_recording() and before submission. + * Shader management. */ -void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, - FFVulkanPipeline *pl); +int ff_vk_shader_init(FFVulkanPipeline *pl, FFVkSPIRVShader *shd, const char *name, + VkShaderStageFlags stage); +void ff_vk_shader_set_compute_sizes(FFVkSPIRVShader *shd, int x, int y, int z); +void ff_vk_shader_print(void *ctx, FFVkSPIRVShader *shd, int prio); +int ff_vk_shader_create(FFVulkanContext *s, FFVkSPIRVShader *shd, + uint8_t *spirv, size_t spirv_size, const char *entrypoint); +void ff_vk_shader_free(FFVulkanContext *s, FFVkSPIRVShader *shd); /** - * Updates push constants. - * Must be called after binding a pipeline if any push constants were defined. + * Add/update push constants for execution. */ +int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, + VkShaderStageFlagBits stage); void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, - VkShaderStageFlagBits stage, int offset, - size_t size, void *src); + FFVulkanPipeline *pl, + VkShaderStageFlagBits stage, + int offset, size_t size, void *src); /** - * Gets the command buffer to use for this submission from the exe context. + * Add descriptor to a pipeline. Must be called before pipeline init. */ -VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e); +int ff_vk_pipeline_descriptor_set_add(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd, + FFVulkanDescriptorSetBinding *desc, int nb, + int read_only, int print_to_shader_only); -/** - * Adds a generic AVBufferRef as a queue depenency. - */ -int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, - AVBufferRef **deps, int nb_deps); - -/** - * Discards all queue dependencies - */ -void ff_vk_discard_exec_deps(FFVkExecContext *e); +/* Initialize/free a pipeline. */ +int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkSPIRVShader *shd); +void ff_vk_pipeline_free(FFVulkanContext *s, FFVulkanPipeline *pl); /** - * Adds a frame as a queue dependency. This also manages semaphore signalling. - * Must be called before submission. + * Register a pipeline with an exec pool. + * Pool may be NULL if all descriptor sets are read-only. */ -int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, - VkPipelineStageFlagBits in_wait_dst_flag); +int ff_vk_exec_pipeline_register(FFVulkanContext *s, FFVkExecPool *pool, + FFVulkanPipeline *pl); -/** - * Submits a command buffer to the queue for execution. Will not block. - */ -int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Wait on a command buffer's execution. Mainly useful for debugging and - * development. - */ -void ff_vk_wait_on_exec_ctx(FFVulkanContext *s, FFVkExecContext *e); - -/** - * Create a VkBuffer with the specified parameters. - */ -int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, - void *pNext, void *alloc_pNext, - VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags); - -/** - * Maps the buffer to userspace. Set invalidate to 1 if reading the contents - * is necessary. - */ -int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], - int nb_buffers, int invalidate); - -/** - * Unmaps the buffer from userspace. Set flush to 1 to write and sync. - */ -int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, - int flush); +/* Bind pipeline */ +void ff_vk_exec_bind_pipeline(FFVulkanContext *s, FFVkExecContext *e, + FFVulkanPipeline *pl); -/** - * Frees a buffer. - */ -void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf); +/* Update sampler/image/buffer descriptors. e may be NULL for read-only descriptors. */ +int ff_vk_set_descriptor_sampler(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkSampler *sampler); +int ff_vk_set_descriptor_image(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkImageView view, VkImageLayout layout, VkSampler sampler); +int ff_vk_set_descriptor_buffer(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, int set, int bind, int offs, + VkDeviceAddress addr, VkDeviceSize len, VkFormat fmt); -/** - * Creates an image, allocates and binds memory in the given - * idx value of the dst frame. If mem is non-NULL, then no memory will be - * allocated, but instead the given memory will be bound to the image. - */ -int ff_vk_image_create(FFVulkanContext *s, AVVkFrame *dst, int idx, - int width, int height, VkFormat fmt, VkImageTiling tiling, - VkImageUsageFlagBits usage, VkImageCreateFlags flags, - void *create_pnext, - VkDeviceMemory *mem, void *alloc_pnext); +void ff_vk_update_descriptor_img_array(FFVulkanContext *s, FFVulkanPipeline *pl, + FFVkExecContext *e, AVFrame *f, + VkImageView *views, int set, int binding, + VkImageLayout layout, VkSampler sampler); /** - * Frees the main Vulkan context. + * Frees main context. */ void ff_vk_uninit(FFVulkanContext *s); diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h index e06d097807..c81e12f27e 100644 --- a/libavutil/vulkan_functions.h +++ b/libavutil/vulkan_functions.h @@ -93,6 +93,7 @@ typedef enum FFVulkanExtensions { /* Queue */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetDeviceQueue) \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit) \ + MACRO(1, 1, FF_VK_EXT_NO_FLAG, QueueSubmit2) \ \ /* Fences */ \ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CreateFence) \ |