aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2024-11-20 08:10:22 +0100
committerLynne <dev@lynne.ee>2024-11-26 14:14:15 +0100
commitd8f301cdf263fd05c6c5a3d46ee62143f117d41d (patch)
treee82e1f0ebe3566f98ae4dd25b92e5c4844156d42
parent4fefc6e80c21bbc63e4dee3bd7332f04ca811698 (diff)
downloadffmpeg-d8f301cdf263fd05c6c5a3d46ee62143f117d41d.tar.gz
ffv1enc_vulkan: switch to receive_packet
This allows the encoder to fully saturate all queues the GPU has, giving a good 10% in certain cases and resolutions. This also improves error resilience if an allocation fails, and properly cleans up after itself if it does.
-rw-r--r--libavcodec/ffv1enc_vulkan.c425
-rw-r--r--libavutil/vulkan_functions.h1
2 files changed, 318 insertions, 108 deletions
diff --git a/libavcodec/ffv1enc_vulkan.c b/libavcodec/ffv1enc_vulkan.c
index 12e95d1cb5..3ec03b4566 100644
--- a/libavcodec/ffv1enc_vulkan.c
+++ b/libavcodec/ffv1enc_vulkan.c
@@ -19,6 +19,7 @@
*/
#include "libavutil/crc.h"
+#include "libavutil/mem.h"
#include "libavutil/vulkan.h"
#include "libavutil/vulkan_spirv.h"
@@ -36,13 +37,38 @@
#define LG_ALIGN_W 32
#define LG_ALIGN_H 32
+typedef struct VulkanEncodeFFv1FrameData {
+ /* Output data */
+ AVBufferRef *out_data_ref;
+
+ /* Results data */
+ AVBufferRef *results_data_ref;
+
+ /* Copied from the source */
+ int64_t pts;
+ int64_t duration;
+ void *frame_opaque;
+ AVBufferRef *frame_opaque_ref;
+
+ int key_frame;
+} VulkanEncodeFFv1FrameData;
+
typedef struct VulkanEncodeFFv1Context {
FFV1Context ctx;
+ AVFrame *frame;
FFVulkanContext s;
FFVkQueueFamilyCtx qf;
FFVkExecPool exec_pool;
+ FFVkQueueFamilyCtx transfer_qf;
+ FFVkExecPool transfer_exec_pool;
+
+ VkBufferCopy *buf_regions;
+ VulkanEncodeFFv1FrameData *exec_ctx_info;
+ int in_flight;
+ int async_depth;
+
FFVulkanShader setup;
FFVulkanShader reset;
FFVulkanShader rct;
@@ -59,6 +85,7 @@ typedef struct VulkanEncodeFFv1Context {
/* Output data buffer */
AVBufferPool *out_data_pool;
+ AVBufferPool *pkt_data_pool;
/* Temporary data buffer */
AVBufferPool *tmp_data_pool;
@@ -271,15 +298,16 @@ fail:
return err;
}
-static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
- const AVFrame *pict, int *got_packet)
+static int vulkan_encode_ffv1_submit_frame(AVCodecContext *avctx,
+ FFVkExecContext *exec,
+ const AVFrame *pict)
{
int err;
VulkanEncodeFFv1Context *fv = avctx->priv_data;
FFV1Context *f = &fv->ctx;
FFVulkanFunctions *vk = &fv->s.vkfn;
- FFVkExecContext *exec;
+ VulkanEncodeFFv1FrameData *fd = exec->opaque;
FFv1VkParameters pd;
AVFrame *intermediate_frame = NULL;
@@ -298,14 +326,10 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
/* Output data */
size_t maxsize;
- AVBufferRef *out_data_ref;
FFVkBuffer *out_data_buf;
- uint8_t *buf_p;
/* Results data */
- AVBufferRef *results_data_ref;
FFVkBuffer *results_data_buf;
- uint64_t *sc;
int has_inter = avctx->gop_size > 1;
uint32_t context_count = f->context_count[f->context_model];
@@ -316,44 +340,36 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
AVFrame *enc_in = (AVFrame *)pict;
VkImageView *enc_in_views = in_views;
- VkMappedMemoryRange invalidate_data[2];
- int nb_invalidate_data = 0;
-
VkImageMemoryBarrier2 img_bar[37];
int nb_img_bar = 0;
VkBufferMemoryBarrier2 buf_bar[8];
int nb_buf_bar = 0;
- if (!pict)
- return 0;
-
- exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
+ /* Start recording */
ff_vk_exec_start(&fv->s, exec);
/* Frame state */
f->cur_enc_frame = pict;
if (avctx->gop_size == 0 || f->picture_number % avctx->gop_size == 0) {
av_buffer_unref(&fv->keyframe_slice_data_ref);
- f->key_frame = 1;
+ f->key_frame = fd->key_frame = 1;
f->gob_count++;
} else {
- f->key_frame = 0;
+ f->key_frame = fd->key_frame = 0;
}
- f->max_slice_count = f->num_h_slices * f->num_v_slices;
f->slice_count = f->max_slice_count;
/* Allocate temporary data buffer */
tmp_data_size = f->slice_count*CONTEXT_SIZE;
- err = ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
- &tmp_data_ref,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, tmp_data_size,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
- if (err < 0)
- return err;
+ RET(ff_vk_get_pooled_buffer(&fv->s, &fv->tmp_data_pool,
+ &tmp_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, tmp_data_size,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
tmp_data_buf = (FFVkBuffer *)tmp_data_ref->data;
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
/* Allocate slice buffer data */
if (f->ac == AC_GOLOMB_RICE)
@@ -368,35 +384,33 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
slice_state_size += slice_data_size;
slice_state_size = FFALIGN(slice_state_size, 8);
+ /* Allocate slice data buffer */
slice_data_ref = fv->keyframe_slice_data_ref;
if (!slice_data_ref) {
- /* Allocate slice data buffer */
- err = ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
- &slice_data_ref,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, slice_state_size*f->slice_count,
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
- if (err < 0)
- return err;
+ RET(ff_vk_get_pooled_buffer(&fv->s, &fv->slice_data_pool,
+ &slice_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, slice_state_size*f->slice_count,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
/* Only save it if we're going to use it again */
if (has_inter)
fv->keyframe_slice_data_ref = slice_data_ref;
}
slice_data_buf = (FFVkBuffer *)slice_data_ref->data;
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
/* Allocate results buffer */
- err = ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
- &results_data_ref,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, 2*f->slice_count*sizeof(uint64_t),
- VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
- if (err < 0)
- return err;
- results_data_buf = (FFVkBuffer *)results_data_ref->data;
+ RET(ff_vk_get_pooled_buffer(&fv->s, &fv->results_data_pool,
+ &fd->results_data_ref,
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, 2*f->slice_count*sizeof(uint64_t),
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
+ results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->results_data_ref, 1, 1);
/* Output buffer size */
maxsize = avctx->width*avctx->height*(1 + f->transparency);
@@ -414,26 +428,17 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
maxsize += FF_INPUT_BUFFER_MIN_SIZE;
/* Allocate output buffer */
- err = ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
- &out_data_ref,
- VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
- VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
- NULL, maxsize,
- VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
- VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
- if (err < 0)
- return err;
-
- out_data_buf = (FFVkBuffer *)out_data_ref->data;
- pkt->data = out_data_buf->mapped_mem;
- pkt->size = out_data_buf->size;
- pkt->buf = out_data_ref;
-
- /* Add dependencies */
- ff_vk_exec_add_dep_buf(&fv->s, exec, &tmp_data_ref, 1, 0);
- ff_vk_exec_add_dep_buf(&fv->s, exec, &results_data_ref, 1, 0);
- ff_vk_exec_add_dep_buf(&fv->s, exec, &slice_data_ref, 1, has_inter);
- ff_vk_exec_add_dep_buf(&fv->s, exec, &out_data_ref, 1, 1);
+ RET(ff_vk_get_pooled_buffer(&fv->s, &fv->out_data_pool,
+ &fd->out_data_ref,
+ VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+ VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
+ VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
+ NULL, maxsize,
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT));
+ out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 1);
+
+ /* Prepare input frame */
RET(ff_vk_exec_add_dep_frame(&fv->s, exec, enc_in,
VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT));
@@ -650,70 +655,224 @@ static int vulkan_encode_ffv1_frame(AVCodecContext *avctx, AVPacket *pkt,
if (err < 0)
return err;
+ f->picture_number++;
+
+ /* This, if needed, was referenced by the execution context
+ * as it was declared as a dependency. */
+ av_frame_free(&intermediate_frame);
+ return 0;
+
+fail:
+ av_frame_free(&intermediate_frame);
+ ff_vk_exec_discard_deps(&fv->s, exec);
+
+ return err;
+}
+
+static int download_slices(AVCodecContext *avctx,
+ VkBufferCopy *buf_regions, int nb_regions,
+ VulkanEncodeFFv1FrameData *fd,
+ AVBufferRef *pkt_data_ref)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+ FFVkExecContext *exec;
+
+ FFVkBuffer *out_data_buf = (FFVkBuffer *)fd->out_data_ref->data;
+ FFVkBuffer *pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
+
+ VkBufferMemoryBarrier2 buf_bar[8];
+ int nb_buf_bar = 0;
+
+ /* Transfer the slices */
+ exec = ff_vk_exec_get(&fv->s, &fv->transfer_exec_pool);
+ ff_vk_exec_start(&fv->s, exec);
+
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &fd->out_data_ref, 1, 0);
+ fd->out_data_ref = NULL; /* Ownership passed */
+
+ ff_vk_exec_add_dep_buf(&fv->s, exec, &pkt_data_ref, 1, 1);
+
+ /* Ensure the output buffer is finished */
+ buf_bar[nb_buf_bar++] = (VkBufferMemoryBarrier2) {
+ .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER_2,
+ .srcStageMask = out_data_buf->stage,
+ .dstStageMask = VK_PIPELINE_STAGE_2_TRANSFER_BIT,
+ .srcAccessMask = out_data_buf->access,
+ .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
+ .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+ .buffer = out_data_buf->buf,
+ .size = VK_WHOLE_SIZE,
+ .offset = 0,
+ };
+ vk->CmdPipelineBarrier2(exec->buf, &(VkDependencyInfo) {
+ .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
+ .pBufferMemoryBarriers = buf_bar,
+ .bufferMemoryBarrierCount = nb_buf_bar,
+ });
+ out_data_buf->stage = buf_bar[0].dstStageMask;
+ out_data_buf->access = buf_bar[0].dstAccessMask;
+ nb_buf_bar = 0;
+
+ vk->CmdCopyBuffer(exec->buf,
+ out_data_buf->buf, pkt_data_buf->buf,
+ nb_regions, buf_regions);
+
+ /* Submit */
+ err = ff_vk_exec_submit(&fv->s, exec);
+ if (err < 0)
+ return err;
+
/* We need the encoded data immediately */
ff_vk_exec_wait(&fv->s, exec);
- av_frame_free(&intermediate_frame);
/* Invalidate slice/output data if needed */
- if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
- invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+ if (!(pkt_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange invalidate_data = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = results_data_buf->mem,
+ .memory = pkt_data_buf->mem,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
- if (!(out_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
- invalidate_data[nb_invalidate_data++] = (VkMappedMemoryRange) {
+ vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
+ 1, &invalidate_data);
+ }
+
+ return 0;
+}
+
+static int get_packet(AVCodecContext *avctx, FFVkExecContext *exec,
+ AVPacket *pkt)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ FFV1Context *f = &fv->ctx;
+ FFVulkanFunctions *vk = &fv->s.vkfn;
+
+ /* Packet data */
+ AVBufferRef *pkt_data_ref;
+ FFVkBuffer *pkt_data_buf;
+
+ VulkanEncodeFFv1FrameData *fd = exec->opaque;
+
+ FFVkBuffer *results_data_buf = (FFVkBuffer *)fd->results_data_ref->data;
+ uint64_t *sc;
+
+ /* Make sure encoding's done */
+ ff_vk_exec_wait(&fv->s, exec);
+
+ /* Invalidate slice/output data if needed */
+ if (!(results_data_buf->flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
+ VkMappedMemoryRange invalidate_data = {
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
- .memory = out_data_buf->mem,
+ .memory = results_data_buf->mem,
.offset = 0,
.size = VK_WHOLE_SIZE,
};
- if (nb_invalidate_data)
vk->InvalidateMappedMemoryRanges(fv->s.hwctx->act_dev,
- nb_invalidate_data, invalidate_data);
-
- /* First slice is in-place */
- buf_p = pkt->data;
- sc = &((uint64_t *)results_data_buf->mapped_mem)[0];
- av_log(avctx, AV_LOG_DEBUG, "Slice size = %"PRIu64" (max %i), src offset = %"PRIu64"\n",
- sc[0], pkt->size / f->slice_count, sc[1]);
- av_assert0(sc[0] < pd.slice_size_max);
- av_assert0(sc[0] < (1 << 24));
- buf_p += sc[0];
-
- /* We have to copy the rest */
- for (int i = 1; i < f->slice_count; i++) {
- uint64_t bytes;
- uint8_t *bs_start;
+ 1, &invalidate_data);
+ }
+ /* Calculate final size */
+ pkt->size = 0;
+ for (int i = 0; i < f->slice_count; i++) {
sc = &((uint64_t *)results_data_buf->mapped_mem)[i*2];
- bytes = sc[0];
- bs_start = pkt->data + sc[1];
-
- av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64" (max %"PRIu64"), "
+ av_log(avctx, AV_LOG_DEBUG, "Slice %i size = %"PRIu64", "
"src offset = %"PRIu64"\n",
- i, bytes, pd.slice_size_max, sc[1]);
- av_assert0(bytes < pd.slice_size_max);
- av_assert0(bytes < (1 << 24));
+ i, sc[0], sc[1]);
+
+ fv->buf_regions[i] = (VkBufferCopy) {
+ .srcOffset = sc[1],
+ .dstOffset = pkt->size,
+ .size = sc[0],
+ };
+ pkt->size += sc[0];
+ }
+ av_log(avctx, AV_LOG_VERBOSE, "Encoded data: %iMiB\n", pkt->size / (1024*1024));
+ av_buffer_unref(&fd->results_data_ref); /* No need for this buffer anymore */
+
+ /* Allocate packet buffer */
+ err = ff_vk_get_pooled_buffer(&fv->s, &fv->pkt_data_pool,
+ &pkt_data_ref,
+ VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+ NULL, pkt->size,
+ VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
+ if (err < 0)
+ return err;
+ pkt_data_buf = (FFVkBuffer *)pkt_data_ref->data;
+
+ /* Setup packet data */
+ pkt->data = pkt_data_buf->mapped_mem;
+ pkt->buf = pkt_data_ref;
- memmove(buf_p, bs_start, bytes);
+ pkt->pts = fd->pts;
+ pkt->dts = fd->pts;
+ pkt->duration = fd->duration;
+ pkt->flags |= AV_PKT_FLAG_KEY * fd->key_frame;
- buf_p += bytes;
+ if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
+ pkt->opaque = fd->frame_opaque;
+ pkt->opaque_ref = fd->frame_opaque_ref;
+ fd->frame_opaque_ref = NULL;
}
- f->picture_number++;
- pkt->size = buf_p - pkt->data;
- pkt->flags |= AV_PKT_FLAG_KEY * f->key_frame;
- *got_packet = 1;
+ return download_slices(avctx, fv->buf_regions, f->slice_count, fd,
+ pkt_data_ref);
+}
- av_log(avctx, AV_LOG_VERBOSE, "Total data = %i\n",
- pkt->size);
+static int vulkan_encode_ffv1_receive_packet(AVCodecContext *avctx,
+ AVPacket *pkt)
+{
+ int err;
+ VulkanEncodeFFv1Context *fv = avctx->priv_data;
+ VulkanEncodeFFv1FrameData *fd;
+ FFVkExecContext *exec;
+ AVFrame *frame;
-fail:
- /* Frames added as a dep are always referenced, so we only need to
- * clean this up. */
- av_frame_free(&intermediate_frame);
+ while (1) {
+ /* Roll an execution context */
+ exec = ff_vk_exec_get(&fv->s, &fv->exec_pool);
+
+ /* If it had a frame, immediately output it */
+ if (exec->had_submission) {
+ exec->had_submission = 0;
+ fv->in_flight--;
+ return get_packet(avctx, exec, pkt);
+ }
+
+ /* Get next frame to encode */
+ frame = fv->frame;
+ err = ff_encode_get_frame(avctx, frame);
+ if (err < 0 && err != AVERROR_EOF) {
+ return err;
+ } else if (err == AVERROR_EOF) {
+ if (!fv->in_flight)
+ return err;
+ continue;
+ }
+
+ /* Encode frame */
+ fd = exec->opaque;
+ fd->pts = frame->pts;
+ fd->duration = frame->duration;
+ if (avctx->flags & AV_CODEC_FLAG_COPY_OPAQUE) {
+ fd->frame_opaque = frame->opaque;
+ fd->frame_opaque_ref = frame->opaque_ref;
+ frame->opaque_ref = NULL;
+ }
+
+ err = vulkan_encode_ffv1_submit_frame(avctx, exec, frame);
+ av_frame_unref(frame);
+ if (err < 0)
+ return err;
+
+ fv->in_flight++;
+ if (fv->in_flight < fv->async_depth)
+ return AVERROR(EAGAIN);
+ }
return 0;
}
@@ -1441,8 +1600,23 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
return err;
}
+ if (!fv->async_depth)
+ fv->async_depth = fv->qf.nb_queues;
+
err = ff_vk_exec_pool_init(&fv->s, &fv->qf, &fv->exec_pool,
- 1, /* Single-threaded for now */
+ FFMIN(fv->qf.nb_queues, fv->async_depth),
+ 0, 0, 0, NULL);
+ if (err < 0)
+ return err;
+
+ err = ff_vk_qf_init(&fv->s, &fv->transfer_qf, VK_QUEUE_TRANSFER_BIT);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Device has no transfer queues!\n");
+ return err;
+ }
+
+ err = ff_vk_exec_pool_init(&fv->s, &fv->transfer_qf, &fv->transfer_exec_pool,
+ 1,
0, 0, 0, NULL);
if (err < 0)
return err;
@@ -1510,6 +1684,24 @@ static av_cold int vulkan_encode_ffv1_init(AVCodecContext *avctx)
if (err < 0)
return err;
+ /* Temporary frame */
+ fv->frame = av_frame_alloc();
+ if (!fv->frame)
+ return AVERROR(ENOMEM);
+
+ /* Async data pool */
+ fv->async_depth = fv->exec_pool.pool_size;
+ fv->exec_ctx_info = av_calloc(fv->async_depth, sizeof(*fv->exec_ctx_info));
+ if (!fv->exec_ctx_info)
+ return AVERROR(ENOMEM);
+ for (int i = 0; i < fv->async_depth; i++)
+ fv->exec_pool.contexts[i].opaque = &fv->exec_ctx_info[i];
+
+ f->max_slice_count = f->num_h_slices * f->num_v_slices;
+ fv->buf_regions = av_malloc_array(f->max_slice_count, sizeof(*fv->buf_regions));
+ if (!fv->buf_regions)
+ return AVERROR(ENOMEM);
+
return 0;
}
@@ -1518,17 +1710,29 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
VulkanEncodeFFv1Context *fv = avctx->priv_data;
ff_vk_exec_pool_free(&fv->s, &fv->exec_pool);
+ ff_vk_exec_pool_free(&fv->s, &fv->transfer_exec_pool);
ff_vk_shader_free(&fv->s, &fv->enc);
ff_vk_shader_free(&fv->s, &fv->rct);
ff_vk_shader_free(&fv->s, &fv->reset);
ff_vk_shader_free(&fv->s, &fv->setup);
+ if (fv->exec_ctx_info) {
+ for (int i = 0; i < fv->async_depth; i++) {
+ VulkanEncodeFFv1FrameData *fd = &fv->exec_ctx_info[i];
+ av_buffer_unref(&fd->out_data_ref);
+ av_buffer_unref(&fd->results_data_ref);
+ av_buffer_unref(&fd->frame_opaque_ref);
+ }
+ }
+ av_free(fv->exec_ctx_info);
+
av_buffer_unref(&fv->intermediate_frames_ref);
av_buffer_pool_uninit(&fv->results_data_pool);
av_buffer_pool_uninit(&fv->out_data_pool);
+ av_buffer_pool_uninit(&fv->pkt_data_pool);
av_buffer_pool_uninit(&fv->tmp_data_pool);
av_buffer_unref(&fv->keyframe_slice_data_ref);
@@ -1538,6 +1742,8 @@ static av_cold int vulkan_encode_ffv1_close(AVCodecContext *avctx)
ff_vk_free_buf(&fv->s, &fv->rangecoder_static_buf);
ff_vk_free_buf(&fv->s, &fv->crc_tab_buf);
+ av_free(fv->buf_regions);
+ av_frame_free(&fv->frame);
ff_vk_uninit(&fv->s);
return 0;
@@ -1567,6 +1773,9 @@ static const AVOption vulkan_encode_ffv1_options[] = {
{ "force_pcm", "Code all slices with no prediction", OFFSET(force_pcm), AV_OPT_TYPE_BOOL,
{ .i64 = 0 }, 0, 1, VE },
+ { "async_depth", "Internal parallelization depth", OFFSET(async_depth), AV_OPT_TYPE_INT,
+ { .i64 = 0 }, 0, INT_MAX, VE },
+
{ NULL }
};
@@ -1594,7 +1803,7 @@ const FFCodec ff_ffv1_vulkan_encoder = {
.p.id = AV_CODEC_ID_FFV1,
.priv_data_size = sizeof(VulkanEncodeFFv1Context),
.init = &vulkan_encode_ffv1_init,
- FF_CODEC_ENCODE_CB(vulkan_encode_ffv1_frame),
+ FF_CODEC_RECEIVE_PACKET_CB(&vulkan_encode_ffv1_receive_packet),
.close = &vulkan_encode_ffv1_close,
.p.priv_class = &vulkan_encode_ffv1_class,
.p.capabilities = AV_CODEC_CAP_DELAY |
diff --git a/libavutil/vulkan_functions.h b/libavutil/vulkan_functions.h
index b1ae4d181e..eb6f6b01c3 100644
--- a/libavutil/vulkan_functions.h
+++ b/libavutil/vulkan_functions.h
@@ -146,6 +146,7 @@ typedef uint64_t FFVulkanExtensions;
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdPipelineBarrier) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBufferToImage) \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyImageToBuffer) \
+ MACRO(1, 1, FF_VK_EXT_NO_FLAG, CmdCopyBuffer) \
\
/* Buffer */ \
MACRO(1, 1, FF_VK_EXT_NO_FLAG, GetBufferMemoryRequirements2) \