diff options
author | Lynne <dev@lynne.ee> | 2023-02-17 03:10:58 +0100 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2023-05-29 00:41:51 +0200 |
commit | 05ce6473acb34c2b79311e314dd15867863b56b8 (patch) | |
tree | bc816d6317c2abba842877d6d670e4e839f01f50 /libavfilter | |
parent | 51b7fe81be5892b0c2590443b3ff2d684878a83c (diff) | |
download | ffmpeg-05ce6473acb34c2b79311e314dd15867863b56b8.tar.gz |
lavfi: add lavfi-only Vulkan infrastructure
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/Makefile | 6 | ||||
-rw-r--r-- | libavfilter/vulkan_filter.c | 480 | ||||
-rw-r--r-- | libavfilter/vulkan_filter.h | 39 | ||||
-rw-r--r-- | libavfilter/vulkan_glslang.c | 283 | ||||
-rw-r--r-- | libavfilter/vulkan_shaderc.c | 124 | ||||
-rw-r--r-- | libavfilter/vulkan_spirv.h | 45 |
6 files changed, 864 insertions, 113 deletions
diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 19283a71de..c4b52d0257 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -623,6 +623,10 @@ OBJS-$(CONFIG_AVSYNCTEST_FILTER) += src_avsynctest.o OBJS-$(CONFIG_AMOVIE_FILTER) += src_movie.o OBJS-$(CONFIG_MOVIE_FILTER) += src_movie.o +# vulkan libs +OBJS-$(CONFIG_LIBGLSLANG) += vulkan_glslang.o +OBJS-$(CONFIG_LIBSHADERC) += vulkan_shaderc.o + # Objects duplicated from other libraries for shared builds SHLIBOBJS += log2_tab.o @@ -636,6 +640,8 @@ SKIPHEADERS-$(CONFIG_QSVVPP) += qsvvpp.h stack_internal.h SKIPHEADERS-$(CONFIG_OPENCL) += opencl.h SKIPHEADERS-$(CONFIG_VAAPI) += vaapi_vpp.h stack_internal.h SKIPHEADERS-$(CONFIG_VULKAN) += vulkan.h vulkan_filter.h +SKIPHEADERS-$(CONFIG_LIBSHADERC) += vulkan_spirv.h +SKIPHEADERS-$(CONFIG_LIBGLSLANG) += vulkan_spirv.h TOOLS = graph2dot TESTPROGS = drawutils filtfmts formats integral diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c index e22541bd23..b4d8f952b5 100644 --- a/libavfilter/vulkan_filter.c +++ b/libavfilter/vulkan_filter.c @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -18,107 +20,186 @@ #include "vulkan_filter.h" -static int vulkan_filter_set_device(AVFilterContext *avctx, - AVBufferRef *device) +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format) { - FFVulkanContext *s = avctx->priv; + int err; + AVHWFramesContext *frames_ctx; + AVHWDeviceContext *device_ctx; + AVVulkanFramesContext *vk_frames; + AVVulkanDeviceContext *vk_dev; + AVBufferRef *device_ref = avctx->hw_device_ctx; + + /* Check if context is reusable as-is */ + if (frames_ref) { + int no_storage = 0; + FFVulkanFunctions *vk; + const VkFormat *sub = av_vkfmt_from_pixfmt(sw_format); + + frames_ctx = (AVHWFramesContext *)frames_ref->data; + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_frames = frames_ctx->hwctx; + vk_dev = device_ctx->hwctx; + + /* Basic format validation */ + if (width != frames_ctx->width || + height != frames_ctx->height || + sw_format != frames_ctx->sw_format || + (vk_frames->tiling != VK_IMAGE_TILING_LINEAR && + vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) || + !(vk_frames->usage & VK_IMAGE_USAGE_SAMPLED_BIT)) { + goto skip; + } - av_buffer_unref(&s->device_ref); + if (vk_frames->usage & VK_IMAGE_USAGE_STORAGE_BIT) + goto accept; - s->device_ref = av_buffer_ref(device); - if (!s->device_ref) - return AVERROR(ENOMEM); + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) + return err; + vk = &s->vkfn; + + /* Check if the subformats can do storage */ + for (int i = 0; sub[i] != VK_FORMAT_UNDEFINED; i++) { + VkFormatProperties2 prop = { + .sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2, + }; + vk->GetPhysicalDeviceFormatProperties2(vk_dev->phys_dev, sub[i], + &prop); + + if (vk_frames->tiling == VK_IMAGE_TILING_LINEAR) { + no_storage |= !(prop.formatProperties.linearTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } else { + no_storage |= !(prop.formatProperties.optimalTilingFeatures & + VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT); + } + } - s->device = (AVHWDeviceContext*)s->device_ref->data; - s->hwctx = s->device->hwctx; + /* Check if it's usable */ + if (no_storage) { +skip: + device_ref = frames_ctx->device_ref; + frames_ref = NULL; + } else { +accept: + frames_ref = av_buffer_ref(frames_ref); + if (!frames_ref) + return AVERROR(ENOMEM); + } + } - return 0; -} + if (!frames_ref) { + if (!device_ref) { + av_log(avctx, AV_LOG_ERROR, + "Vulkan filtering requires a device context!\n"); + return AVERROR(EINVAL); + } -static int vulkan_filter_set_frames(AVFilterContext *avctx, - AVBufferRef *frames) -{ - FFVulkanContext *s = avctx->priv; + frames_ref = av_hwframe_ctx_alloc(device_ref); - av_buffer_unref(&s->frames_ref); + frames_ctx = (AVHWFramesContext *)frames_ref->data; + frames_ctx->format = AV_PIX_FMT_VULKAN; + frames_ctx->sw_format = sw_format; + frames_ctx->width = width; + frames_ctx->height = height; - s->frames_ref = av_buffer_ref(frames); - if (!s->frames_ref) - return AVERROR(ENOMEM); + vk_frames = frames_ctx->hwctx; + vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL; + vk_frames->usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; - return 0; + err = av_hwframe_ctx_init(frames_ref); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + device_ctx = (AVHWDeviceContext *)frames_ctx->device_ref->data; + vk_dev = device_ctx->hwctx; + } + + s->extensions = ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions, + vk_dev->nb_enabled_dev_extensions); + + /** + * libplacebo does not use descriptor buffers. + */ + if (!(s->extensions & FF_VK_EXT_DESCRIPTOR_BUFFER) && + strcmp(avctx->filter->name, "libplacebo")) { + av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires that " + "the %s extension is supported!\n", + VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME); + av_buffer_unref(&frames_ref); + return AVERROR(EINVAL); + } + + err = ff_vk_load_functions(device_ctx, &s->vkfn, s->extensions, 1, 1); + if (err < 0) { + av_buffer_unref(&frames_ref); + return err; + } + + s->frames_ref = frames_ref; + s->frames = frames_ctx; + s->hwfc = vk_frames; + s->device = device_ctx; + s->hwctx = device_ctx->hwctx; + + err = ff_vk_load_props(s); + if (err < 0) + av_buffer_unref(&s->frames_ref); + + return err; } int ff_vk_filter_config_input(AVFilterLink *inlink) { - int err; - AVFilterContext *avctx = inlink->dst; - FFVulkanContext *s = avctx->priv; - FFVulkanFunctions *vk = &s->vkfn; AVHWFramesContext *input_frames; + AVFilterContext *avctx = inlink->dst; + FFVulkanContext *s = inlink->dst->priv; if (!inlink->hw_frames_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " + av_log(inlink->dst, AV_LOG_ERROR, "Vulkan filtering requires a " "hardware frames context on the input.\n"); return AVERROR(EINVAL); } - /* Extract the device and default output format from the first input. */ - if (avctx->inputs[0] != inlink) - return 0; - input_frames = (AVHWFramesContext *)inlink->hw_frames_ctx->data; if (input_frames->format != AV_PIX_FMT_VULKAN) return AVERROR(EINVAL); - err = vulkan_filter_set_device(avctx, input_frames->device_ref); - if (err < 0) - return err; - err = vulkan_filter_set_frames(avctx, inlink->hw_frames_ctx); - if (err < 0) - return err; - - s->extensions = ff_vk_extensions_to_mask(s->hwctx->enabled_dev_extensions, - s->hwctx->nb_enabled_dev_extensions); - - err = ff_vk_load_functions(s->device, &s->vkfn, s->extensions, 1, 1); - if (err < 0) - return err; + /* Extract the device and default output format from the first input. */ + if (avctx->inputs[0] != inlink) + return 0; - vk->GetPhysicalDeviceProperties(s->hwctx->phys_dev, &s->props); - vk->GetPhysicalDeviceMemoryProperties(s->hwctx->phys_dev, &s->mprops); + /* Save the ref, without reffing it */ + s->input_frames_ref = inlink->hw_frames_ctx; - /* Default output parameters match input parameters. */ - s->input_format = input_frames->sw_format; - if (s->output_format == AV_PIX_FMT_NONE) - s->output_format = input_frames->sw_format; - if (!s->output_width) - s->output_width = inlink->w; - if (!s->output_height) - s->output_height = inlink->h; + /* Defaults */ + s->output_format = input_frames->sw_format; + s->output_width = inlink->w; + s->output_height = inlink->h; return 0; } -int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) +int ff_vk_filter_config_output(AVFilterLink *outlink) { int err; - AVFilterContext *avctx = outlink->src; - FFVulkanContext *s = avctx->priv; + FFVulkanContext *s = outlink->src->priv; av_buffer_unref(&outlink->hw_frames_ctx); - if (!s->device_ref) { - if (!avctx->hw_device_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " - "Vulkan device.\n"); - return AVERROR(EINVAL); - } - - err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); - if (err < 0) - return err; - } + err = ff_vk_filter_init_context(outlink->src, s, s->input_frames_ref, + s->output_width, s->output_height, + s->output_format); + if (err < 0) + return err; outlink->hw_frames_ctx = av_buffer_ref(s->frames_ref); if (!outlink->hw_frames_ctx) @@ -127,65 +208,246 @@ int ff_vk_filter_config_output_inplace(AVFilterLink *outlink) outlink->w = s->output_width; outlink->h = s->output_height; - return 0; + return err; } -int ff_vk_filter_config_output(AVFilterLink *outlink) +int ff_vk_filter_init(AVFilterContext *avctx) { - int err; - AVFilterContext *avctx = outlink->src; FFVulkanContext *s = avctx->priv; - AVBufferRef *output_frames_ref; - AVHWFramesContext *output_frames; - - av_buffer_unref(&outlink->hw_frames_ctx); - if (!s->device_ref) { - if (!avctx->hw_device_ctx) { - av_log(avctx, AV_LOG_ERROR, "Vulkan filtering requires a " - "Vulkan device.\n"); - return AVERROR(EINVAL); - } + s->output_format = AV_PIX_FMT_NONE; - err = vulkan_filter_set_device(avctx, avctx->hw_device_ctx); - if (err < 0) - return err; - } + return 0; +} - output_frames_ref = av_hwframe_ctx_alloc(s->device_ref); - if (!output_frames_ref) { - err = AVERROR(ENOMEM); - goto fail; +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + if (in_f) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in_f, in_views, 0, 0, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + ff_vk_frame_barrier(vkctx, exec, in_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); } - output_frames = (AVHWFramesContext*)output_frames_ref->data; - output_frames->format = AV_PIX_FMT_VULKAN; - output_frames->sw_format = s->output_format; - output_frames->width = s->output_width; - output_frames->height = s->output_height; + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out_f, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out_f)); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out_f, out_views, 0, !!in_f, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + ff_vk_frame_barrier(vkctx, exec, out_f, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; +} - err = av_hwframe_ctx_init(output_frames_ref); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Failed to initialise output " - "frames: %d.\n", err); - goto fail; +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size) +{ + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[AV_NUM_DATA_POINTERS]; + VkImageView tmp_views[AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[37]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, tmp, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + + RET(ff_vk_create_imageviews(vkctx, exec, in_views, in)); + RET(ff_vk_create_imageviews(vkctx, exec, tmp_views, tmp)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + + ff_vk_frame_barrier(vkctx, exec, in, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, tmp, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + for (int i = 0; i < 2; i++) { + FFVulkanPipeline *pl = pls[i]; + AVFrame *src_f = !i ? in : tmp; + AVFrame *dst_f = !i ? tmp : out; + VkImageView *src_views = !i ? in_views : tmp_views; + VkImageView *dst_views = !i ? tmp_views : out_views; + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, src_f, src_views, 0, 0, + !i ? VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL : + VK_IMAGE_LAYOUT_GENERAL, + sampler); + ff_vk_update_descriptor_img_array(vkctx, pl, exec, dst_f, dst_views, 0, 1, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); } - outlink->hw_frames_ctx = output_frames_ref; - outlink->w = s->output_width; - outlink->h = s->output_height; - - return 0; + return ff_vk_exec_submit(vkctx, exec); fail: - av_buffer_unref(&output_frames_ref); + ff_vk_exec_discard_deps(vkctx, exec); return err; } -int ff_vk_filter_init(AVFilterContext *avctx) +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size) { - FFVulkanContext *s = avctx->priv; - - s->output_format = AV_PIX_FMT_NONE; + int err = 0; + FFVulkanFunctions *vk = &vkctx->vkfn; + VkImageView in_views[16][AV_NUM_DATA_POINTERS]; + VkImageView out_views[AV_NUM_DATA_POINTERS]; + VkImageMemoryBarrier2 img_bar[128]; + int nb_img_bar = 0; + + /* Update descriptors and init the exec context */ + FFVkExecContext *exec = ff_vk_exec_get(e); + ff_vk_exec_start(vkctx, exec); + + /* Inputs */ + for (int i = 0; i < nb_in; i++) { + RET(ff_vk_exec_add_dep_frame(vkctx, exec, in[i], + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, in_views[i], in[i])); + + ff_vk_frame_barrier(vkctx, exec, in[i], img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_READ_BIT, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED); + } - return 0; + /* Output */ + RET(ff_vk_exec_add_dep_frame(vkctx, exec, out, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT)); + RET(ff_vk_create_imageviews(vkctx, exec, out_views, out)); + ff_vk_frame_barrier(vkctx, exec, out, img_bar, &nb_img_bar, + VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, + VK_ACCESS_SHADER_WRITE_BIT, + VK_IMAGE_LAYOUT_GENERAL, + VK_QUEUE_FAMILY_IGNORED); + + vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) { + .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO, + .pImageMemoryBarriers = img_bar, + .imageMemoryBarrierCount = nb_img_bar, + }); + + ff_vk_exec_bind_pipeline(vkctx, exec, pl); + + if (push_src) + ff_vk_update_push_exec(vkctx, exec, pl, VK_SHADER_STAGE_COMPUTE_BIT, + 0, push_size, push_src); + + for (int i = 0; i < nb_in; i++) + ff_vk_update_descriptor_img_array(vkctx, pl, exec, in[i], in_views[i], 0, i, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, + sampler); + + ff_vk_update_descriptor_img_array(vkctx, pl, exec, out, out_views, 0, nb_in, + VK_IMAGE_LAYOUT_GENERAL, + VK_NULL_HANDLE); + + vk->CmdDispatch(exec->buf, + FFALIGN(vkctx->output_width, pl->wg_size[0])/pl->wg_size[0], + FFALIGN(vkctx->output_height, pl->wg_size[1])/pl->wg_size[1], + pl->wg_size[2]); + + return ff_vk_exec_submit(vkctx, exec); +fail: + ff_vk_exec_discard_deps(vkctx, exec); + return err; } diff --git a/libavfilter/vulkan_filter.h b/libavfilter/vulkan_filter.h index bfdb9b2d7d..d2c14601d9 100644 --- a/libavfilter/vulkan_filter.h +++ b/libavfilter/vulkan_filter.h @@ -1,4 +1,6 @@ /* + * Copyright (c) Lynne + * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or @@ -26,9 +28,38 @@ /** * General lavfi IO functions */ -int ff_vk_filter_init (AVFilterContext *avctx); -int ff_vk_filter_config_input (AVFilterLink *inlink); -int ff_vk_filter_config_output (AVFilterLink *outlink); -int ff_vk_filter_config_output_inplace(AVFilterLink *outlink); +int ff_vk_filter_init (AVFilterContext *avctx); +int ff_vk_filter_config_input (AVFilterLink *inlink); +int ff_vk_filter_config_output(AVFilterLink *outlink); + +/** + * Can be called manually, if not using ff_vk_filter_config_output. + */ +int ff_vk_filter_init_context(AVFilterContext *avctx, FFVulkanContext *s, + AVBufferRef *frames_ref, + int width, int height, enum AVPixelFormat sw_format); + +/** + * Submit a compute shader with a zero/one input and single out for execution. + */ +int ff_vk_filter_process_simple(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, AVFrame *out_f, AVFrame *in_f, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Submit a compute shader with a single in and single out with 2 stages. + */ +int ff_vk_filter_process_2pass(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pls[2], + AVFrame *out, AVFrame *tmp, AVFrame *in, + VkSampler sampler, void *push_src, size_t push_size); + +/** + * Up to 16 inputs, one output + */ +int ff_vk_filter_process_Nin(FFVulkanContext *vkctx, FFVkExecPool *e, + FFVulkanPipeline *pl, + AVFrame *out, AVFrame *in[], int nb_in, + VkSampler sampler, void *push_src, size_t push_size); #endif /* AVFILTER_VULKAN_FILTER_H */ diff --git a/libavfilter/vulkan_glslang.c b/libavfilter/vulkan_glslang.c new file mode 100644 index 0000000000..845a530ee0 --- /dev/null +++ b/libavfilter/vulkan_glslang.c @@ -0,0 +1,283 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <pthread.h> + +#include <glslang/build_info.h> +#include <glslang/Include/glslang_c_interface.h> + +#include "vulkan_spirv.h" +#include "libavutil/mem.h" +#include "libavutil/avassert.h" + +static pthread_mutex_t glslc_mutex = PTHREAD_MUTEX_INITIALIZER; +static int glslc_refcount = 0; + +static const glslang_resource_t glslc_resource_limits = { + .max_lights = 32, + .max_clip_planes = 6, + .max_texture_units = 32, + .max_texture_coords = 32, + .max_vertex_attribs = 64, + .max_vertex_uniform_components = 4096, + .max_varying_floats = 64, + .max_vertex_texture_image_units = 32, + .max_combined_texture_image_units = 80, + .max_texture_image_units = 32, + .max_fragment_uniform_components = 4096, + .max_draw_buffers = 32, + .max_vertex_uniform_vectors = 128, + .max_varying_vectors = 8, + .max_fragment_uniform_vectors = 16, + .max_vertex_output_vectors = 16, + .max_fragment_input_vectors = 15, + .min_program_texel_offset = -8, + .max_program_texel_offset = 7, + .max_clip_distances = 8, + .max_compute_work_group_count_x = 65535, + .max_compute_work_group_count_y = 65535, + .max_compute_work_group_count_z = 65535, + .max_compute_work_group_size_x = 1024, + .max_compute_work_group_size_y = 1024, + .max_compute_work_group_size_z = 64, + .max_compute_uniform_components = 1024, + .max_compute_texture_image_units = 16, + .max_compute_image_uniforms = 8, + .max_compute_atomic_counters = 8, + .max_compute_atomic_counter_buffers = 1, + .max_varying_components = 60, + .max_vertex_output_components = 64, + .max_geometry_input_components = 64, + .max_geometry_output_components = 128, + .max_fragment_input_components = 128, + .max_image_units = 8, + .max_combined_image_units_and_fragment_outputs = 8, + .max_combined_shader_output_resources = 8, + .max_image_samples = 0, + .max_vertex_image_uniforms = 0, + .max_tess_control_image_uniforms = 0, + .max_tess_evaluation_image_uniforms = 0, + .max_geometry_image_uniforms = 0, + .max_fragment_image_uniforms = 8, + .max_combined_image_uniforms = 8, + .max_geometry_texture_image_units = 16, + .max_geometry_output_vertices = 256, + .max_geometry_total_output_components = 1024, + .max_geometry_uniform_components = 1024, + .max_geometry_varying_components = 64, + .max_tess_control_input_components = 128, + .max_tess_control_output_components = 128, + .max_tess_control_texture_image_units = 16, + .max_tess_control_uniform_components = 1024, + .max_tess_control_total_output_components = 4096, + .max_tess_evaluation_input_components = 128, + .max_tess_evaluation_output_components = 128, + .max_tess_evaluation_texture_image_units = 16, + .max_tess_evaluation_uniform_components = 1024, + .max_tess_patch_components = 120, + .max_patch_vertices = 32, + .max_tess_gen_level = 64, + .max_viewports = 16, + .max_vertex_atomic_counters = 0, + .max_tess_control_atomic_counters = 0, + .max_tess_evaluation_atomic_counters = 0, + .max_geometry_atomic_counters = 0, + .max_fragment_atomic_counters = 8, + .max_combined_atomic_counters = 8, + .max_atomic_counter_bindings = 1, + .max_vertex_atomic_counter_buffers = 0, + .max_tess_control_atomic_counter_buffers = 0, + .max_tess_evaluation_atomic_counter_buffers = 0, + .max_geometry_atomic_counter_buffers = 0, + .max_fragment_atomic_counter_buffers = 1, + .max_combined_atomic_counter_buffers = 1, + .max_atomic_counter_buffer_size = 16384, + .max_transform_feedback_buffers = 4, + .max_transform_feedback_interleaved_components = 64, + .max_cull_distances = 8, + .max_combined_clip_and_cull_distances = 8, + .max_samples = 4, + .max_mesh_output_vertices_nv = 256, + .max_mesh_output_primitives_nv = 512, + .max_mesh_work_group_size_x_nv = 32, + .max_mesh_work_group_size_y_nv = 1, + .max_mesh_work_group_size_z_nv = 1, + .max_task_work_group_size_x_nv = 32, + .max_task_work_group_size_y_nv = 1, + .max_task_work_group_size_z_nv = 1, + .max_mesh_view_count_nv = 4, + .maxDualSourceDrawBuffersEXT = 1, + + .limits = { + .non_inductive_for_loops = 1, + .while_loops = 1, + .do_while_loops = 1, + .general_uniform_indexing = 1, + .general_attribute_matrix_vector_indexing = 1, + .general_varying_indexing = 1, + .general_sampler_indexing = 1, + .general_variable_indexing = 1, + .general_constant_matrix_vector_indexing = 1, + } +}; + +static int glslc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, + FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, + void **opaque) +{ + const char *messages; + glslang_shader_t *glslc_shader; + glslang_program_t *glslc_program; + + static const glslang_stage_t glslc_stage[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = GLSLANG_STAGE_VERTEX, + [VK_SHADER_STAGE_FRAGMENT_BIT] = GLSLANG_STAGE_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE_BIT] = GLSLANG_STAGE_COMPUTE, + }; + + const glslang_input_t glslc_input = { + .language = GLSLANG_SOURCE_GLSL, + .stage = glslc_stage[shd->shader.stage], + .client = GLSLANG_CLIENT_VULKAN, + /* GLSLANG_TARGET_VULKAN_1_2 before 11.6 resulted in targeting 1.0 */ +#if (((GLSLANG_VERSION_MAJOR) > 11) || ((GLSLANG_VERSION_MAJOR) == 11 && \ + (((GLSLANG_VERSION_MINOR) > 6) || ((GLSLANG_VERSION_MINOR) == 6 && \ + ((GLSLANG_VERSION_PATCH) > 0))))) + .client_version = GLSLANG_TARGET_VULKAN_1_2, + .target_language_version = GLSLANG_TARGET_SPV_1_5, +#else + .client_version = GLSLANG_TARGET_VULKAN_1_1, + .target_language_version = GLSLANG_TARGET_SPV_1_3, +#endif + .target_language = GLSLANG_TARGET_SPV, + .code = shd->src.str, + .default_version = 460, + .default_profile = GLSLANG_NO_PROFILE, + .force_default_version_and_profile = false, + .forward_compatible = false, + .messages = GLSLANG_MSG_DEFAULT_BIT, + .resource = &glslc_resource_limits, + }; + + av_assert0(glslc_refcount); + + *opaque = NULL; + + if (!(glslc_shader = glslang_shader_create(&glslc_input))) + return AVERROR(ENOMEM); + + if (!glslang_shader_preprocess(glslc_shader, &glslc_input)) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_ERROR, "Unable to preprocess shader: %s (%s)!\n", + glslang_shader_get_info_log(glslc_shader), + glslang_shader_get_info_debug_log(glslc_shader)); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + if (!glslang_shader_parse(glslc_shader, &glslc_input)) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_ERROR, "Unable to parse shader: %s (%s)!\n", + glslang_shader_get_info_log(glslc_shader), + glslang_shader_get_info_debug_log(glslc_shader)); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + if (!(glslc_program = glslang_program_create())) { + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + glslang_program_add_shader(glslc_program, glslc_shader); + + if (!glslang_program_link(glslc_program, GLSLANG_MSG_SPV_RULES_BIT | + GLSLANG_MSG_VULKAN_RULES_BIT)) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_ERROR, "Unable to link shader: %s (%s)!\n", + glslang_program_get_info_log(glslc_program), + glslang_program_get_info_debug_log(glslc_program)); + glslang_program_delete(glslc_program); + glslang_shader_delete(glslc_shader); + return AVERROR(EINVAL); + } + + glslang_program_SPIRV_generate(glslc_program, glslc_input.stage); + + messages = glslang_program_SPIRV_get_messages(glslc_program); + if (messages) { + ff_vk_shader_print(avctx, shd, AV_LOG_WARNING); + av_log(avctx, AV_LOG_WARNING, "%s\n", messages); + } else { + ff_vk_shader_print(avctx, shd, AV_LOG_VERBOSE); + } + + glslang_shader_delete(glslc_shader); + + *size = glslang_program_SPIRV_get_size(glslc_program) * sizeof(unsigned int); + *data = (void *)glslang_program_SPIRV_get_ptr(glslc_program); + *opaque = glslc_program; + + return 0; +} + +static void glslc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) +{ + if (!opaque || !*opaque) + return; + + av_assert0(glslc_refcount); + glslang_program_delete(*opaque); + *opaque = NULL; +} + +static void glslc_uninit(FFVkSPIRVCompiler **ctx) +{ + if (!ctx || !*ctx) + return; + + pthread_mutex_lock(&glslc_mutex); + if (glslc_refcount && (--glslc_refcount == 0)) + glslang_finalize_process(); + pthread_mutex_unlock(&glslc_mutex); + + av_freep(ctx); +} + +FFVkSPIRVCompiler *ff_vk_glslang_init(void) +{ + FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); + if (!ret) + return NULL; + + ret->compile_shader = glslc_shader_compile; + ret->free_shader = glslc_shader_free; + ret->uninit = glslc_uninit; + + pthread_mutex_lock(&glslc_mutex); + if (!glslc_refcount++) { + if (!glslang_initialize_process()) { + av_freep(&ret); + glslc_refcount--; + } + } + pthread_mutex_unlock(&glslc_mutex); + + return ret; +} diff --git a/libavfilter/vulkan_shaderc.c b/libavfilter/vulkan_shaderc.c new file mode 100644 index 0000000000..38be1030ad --- /dev/null +++ b/libavfilter/vulkan_shaderc.c @@ -0,0 +1,124 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <shaderc/shaderc.h> + +#include "libavutil/mem.h" +#include "vulkan_spirv.h" + +static int shdc_shader_compile(FFVkSPIRVCompiler *ctx, void *avctx, + FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, + void **opaque) +{ + int loglevel, err, warn, ret; + const char *status, *message; + shaderc_compilation_result_t res; + static const char *shdc_result[] = { + [shaderc_compilation_status_success] = "success", + [shaderc_compilation_status_invalid_stage] = "invalid stage", + [shaderc_compilation_status_compilation_error] = "error", + [shaderc_compilation_status_internal_error] = "internal error", + [shaderc_compilation_status_null_result_object] = "no result", + [shaderc_compilation_status_invalid_assembly] = "invalid assembly", + }; + static const shaderc_shader_kind shdc_kind[] = { + [VK_SHADER_STAGE_VERTEX_BIT] = shaderc_glsl_vertex_shader, + [VK_SHADER_STAGE_FRAGMENT_BIT] = shaderc_glsl_fragment_shader, + [VK_SHADER_STAGE_COMPUTE_BIT] = shaderc_glsl_compute_shader, + }; + + shaderc_compile_options_t opts = shaderc_compile_options_initialize(); + *opaque = NULL; + if (!opts) + return AVERROR(ENOMEM); + + shaderc_compile_options_set_target_env(opts, shaderc_target_env_vulkan, + shaderc_env_version_vulkan_1_2); + shaderc_compile_options_set_target_spirv(opts, shaderc_spirv_version_1_5); + shaderc_compile_options_set_optimization_level(opts, + shaderc_optimization_level_performance); + + res = shaderc_compile_into_spv((shaderc_compiler_t)ctx->priv, + shd->src.str, strlen(shd->src.str), + shdc_kind[shd->shader.stage], + shd->name, entrypoint, opts); + shaderc_compile_options_release(opts); + + ret = shaderc_result_get_compilation_status(res); + err = shaderc_result_get_num_errors(res); + warn = shaderc_result_get_num_warnings(res); + message = shaderc_result_get_error_message(res); + + loglevel = err ? AV_LOG_ERROR : warn ? AV_LOG_WARNING : AV_LOG_VERBOSE; + + ff_vk_shader_print(avctx, shd, loglevel); + if (message && (err || warn)) + av_log(avctx, loglevel, "%s\n", message); + status = ret < FF_ARRAY_ELEMS(shdc_result) ? shdc_result[ret] : "unknown"; + av_log(avctx, loglevel, "shaderc compile status '%s' (%d errors, %d warnings)\n", + status, err, warn); + + if (err > 0) + return AVERROR(EINVAL); + + *data = (uint8_t *)shaderc_result_get_bytes(res); + *size = shaderc_result_get_length(res); + *opaque = res; + + return 0; +} + +static void shdc_shader_free(FFVkSPIRVCompiler *ctx, void **opaque) +{ + if (!opaque || !*opaque) + return; + + shaderc_result_release((shaderc_compilation_result_t)*opaque); + *opaque = NULL; +} + +static void shdc_uninit(FFVkSPIRVCompiler **ctx) +{ + FFVkSPIRVCompiler *s; + + if (!ctx || !*ctx) + return; + + s = *ctx; + + shaderc_compiler_release((shaderc_compiler_t)s->priv); + av_freep(ctx); +} + +FFVkSPIRVCompiler *ff_vk_shaderc_init(void) +{ + FFVkSPIRVCompiler *ret = av_mallocz(sizeof(*ret)); + if (!ret) + return NULL; + + ret->compile_shader = shdc_shader_compile; + ret->free_shader = shdc_shader_free; + ret->uninit = shdc_uninit; + + ret->priv = (void *)shaderc_compiler_initialize(); + if (!ret->priv) + av_freep(&ret); + + return ret; +} diff --git a/libavfilter/vulkan_spirv.h b/libavfilter/vulkan_spirv.h new file mode 100644 index 0000000000..5638cd9696 --- /dev/null +++ b/libavfilter/vulkan_spirv.h @@ -0,0 +1,45 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_VULKAN_SPIRV_H +#define AVFILTER_VULKAN_SPIRV_H + +#include "libavutil/vulkan.h" + +#include "vulkan.h" +#include "config.h" + +typedef struct FFVkSPIRVCompiler { + void *priv; + int (*compile_shader)(struct FFVkSPIRVCompiler *ctx, void *avctx, + struct FFVkSPIRVShader *shd, uint8_t **data, + size_t *size, const char *entrypoint, void **opaque); + void (*free_shader)(struct FFVkSPIRVCompiler *ctx, void **opaque); + void (*uninit)(struct FFVkSPIRVCompiler **ctx); +} FFVkSPIRVCompiler; + +#if CONFIG_LIBGLSLANG +FFVkSPIRVCompiler *ff_vk_glslang_init(void); +#define ff_vk_spirv_init ff_vk_glslang_init +#endif +#if CONFIG_LIBSHADERC +FFVkSPIRVCompiler *ff_vk_shaderc_init(void); +#define ff_vk_spirv_init ff_vk_shaderc_init +#endif + +#endif /* AVFILTER_VULKAN_H */ |