aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2022-03-15 23:00:32 +0100
committerLynne <dev@lynne.ee>2023-05-29 00:41:37 +0200
commit46a77c6496a54d0079e6a2108a0dfffead861ac8 (patch)
treee80fdb9894d2026efcd684cd829fd0898a3c5359
parent2a1fd2814f7c92a9c10b58ff584381da64a57189 (diff)
downloadffmpeg-46a77c6496a54d0079e6a2108a0dfffead861ac8.tar.gz
hwcontext_vulkan: support threadsafe queue and frame operations
-rw-r--r--libavutil/hwcontext_vulkan.c191
-rw-r--r--libavutil/hwcontext_vulkan.h38
2 files changed, 180 insertions, 49 deletions
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 2434a80090..b511836b7b 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -27,6 +27,7 @@
#include <dlfcn.h>
#endif
+#include "thread.h"
#include <unistd.h>
#include "config.h"
@@ -92,8 +93,10 @@ typedef struct VulkanDevicePriv {
VkPhysicalDeviceVulkan13Features device_features_1_3;
/* Queues */
- uint32_t qfs[5];
- int num_qfs;
+ pthread_mutex_t **qf_mutex;
+ uint32_t nb_tot_qfs;
+ uint32_t img_qfs[5];
+ uint32_t nb_img_qfs;
/* Debug callback */
VkDebugUtilsMessengerEXT debug_ctx;
@@ -127,6 +130,8 @@ typedef struct VulkanFramesPriv {
} VulkanFramesPriv;
typedef struct AVVkFrameInternal {
+ pthread_mutex_t update_mutex;
+
#if CONFIG_CUDA
/* Importing external memory into cuda is really expensive so we keep the
* memory imported all the time */
@@ -1305,6 +1310,12 @@ static void vulkan_device_free(AVHWDeviceContext *ctx)
if (p->libvulkan)
dlclose(p->libvulkan);
+ for (uint32_t i = 0; i < p->nb_tot_qfs; i++) {
+ pthread_mutex_destroy(p->qf_mutex[i]);
+ av_freep(&p->qf_mutex[i]);
+ }
+ av_freep(&p->qf_mutex);
+
RELEASE_PROPS(hwctx->enabled_inst_extensions, hwctx->nb_enabled_inst_extensions);
RELEASE_PROPS(hwctx->enabled_dev_extensions, hwctx->nb_enabled_dev_extensions);
}
@@ -1437,13 +1448,26 @@ end:
return err;
}
+static void lock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_lock(&p->qf_mutex[queue_family][index]);
+}
+
+static void unlock_queue(AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index)
+{
+ VulkanDevicePriv *p = ctx->internal->priv;
+ pthread_mutex_unlock(&p->qf_mutex[queue_family][index]);
+}
+
static int vulkan_device_init(AVHWDeviceContext *ctx)
{
int err;
- uint32_t queue_num;
+ uint32_t qf_num;
AVVulkanDeviceContext *hwctx = ctx->hwctx;
VulkanDevicePriv *p = ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ VkQueueFamilyProperties *qf;
int graph_index, comp_index, tx_index, enc_index, dec_index;
/* Set device extension flags */
@@ -1482,12 +1506,37 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
p->dev_is_nvidia = (p->props.properties.vendorID == 0x10de);
p->dev_is_intel = (p->props.properties.vendorID == 0x8086);
- vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &queue_num, NULL);
- if (!queue_num) {
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, NULL);
+ if (!qf_num) {
av_log(ctx, AV_LOG_ERROR, "Failed to get queues!\n");
return AVERROR_EXTERNAL;
}
+ qf = av_malloc_array(qf_num, sizeof(VkQueueFamilyProperties));
+ if (!qf)
+ return AVERROR(ENOMEM);
+
+ vk->GetPhysicalDeviceQueueFamilyProperties(hwctx->phys_dev, &qf_num, qf);
+
+ p->qf_mutex = av_calloc(qf_num, sizeof(*p->qf_mutex));
+ if (!p->qf_mutex)
+ return AVERROR(ENOMEM);
+ p->nb_tot_qfs = qf_num;
+
+ for (uint32_t i = 0; i < qf_num; i++) {
+ p->qf_mutex[i] = av_calloc(qf[i].queueCount, sizeof(**p->qf_mutex));
+ if (!p->qf_mutex[i])
+ return AVERROR(ENOMEM);
+ for (uint32_t j = 0; j < qf[i].queueCount; j++) {
+ err = pthread_mutex_init(&p->qf_mutex[i][j], NULL);
+ if (err != 0) {
+ av_log(ctx, AV_LOG_ERROR, "pthread_mutex_init failed : %s\n",
+ av_err2str(err));
+ return AVERROR(err);
+ }
+ }
+ }
+
graph_index = hwctx->queue_family_index;
comp_index = hwctx->queue_family_comp_index;
tx_index = hwctx->queue_family_tx_index;
@@ -1502,9 +1551,9 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
return AVERROR(EINVAL); \
} else if (fidx < 0 || ctx_qf < 0) { \
break; \
- } else if (ctx_qf >= queue_num) { \
+ } else if (ctx_qf >= qf_num) { \
av_log(ctx, AV_LOG_ERROR, "Invalid %s family index %i (device has %i families)!\n", \
- type, ctx_qf, queue_num); \
+ type, ctx_qf, qf_num); \
return AVERROR(EINVAL); \
} \
\
@@ -1521,7 +1570,7 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
tx_index = (ctx_qf == tx_index) ? -1 : tx_index; \
enc_index = (ctx_qf == enc_index) ? -1 : enc_index; \
dec_index = (ctx_qf == dec_index) ? -1 : dec_index; \
- p->qfs[p->num_qfs++] = ctx_qf; \
+ p->img_qfs[p->nb_img_qfs++] = ctx_qf; \
} while (0)
CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index, hwctx->nb_graphics_queues);
@@ -1532,6 +1581,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
#undef CHECK_QUEUE
+ if (!hwctx->lock_queue)
+ hwctx->lock_queue = lock_queue;
+ if (!hwctx->unlock_queue)
+ hwctx->unlock_queue = unlock_queue;
+
/* Get device capabilities */
vk->GetPhysicalDeviceMemoryProperties(hwctx->phys_dev, &p->mprops);
@@ -1733,9 +1787,6 @@ static void vulkan_free_internal(AVVkFrame *f)
{
AVVkFrameInternal *internal = f->internal;
- if (!internal)
- return;
-
#if CONFIG_CUDA
if (internal->cuda_fc_ref) {
AVHWFramesContext *cuda_fc = (AVHWFramesContext *)internal->cuda_fc_ref->data;
@@ -1764,6 +1815,7 @@ static void vulkan_free_internal(AVVkFrame *f)
}
#endif
+ pthread_mutex_destroy(&internal->update_mutex);
av_freep(&f->internal);
}
@@ -1924,9 +1976,11 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
uint32_t src_qf, dst_qf;
VkImageLayout new_layout;
VkAccessFlags new_access;
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
const int planes = av_pix_fmt_count_planes(hwfc->sw_format);
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
+ AVFrame tmp = { .data[0] = (uint8_t *)frame };
uint64_t sem_sig_val[AV_NUM_DATA_POINTERS];
VkImageMemoryBarrier img_bar[AV_NUM_DATA_POINTERS] = { 0 };
@@ -1945,6 +1999,12 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
};
VkPipelineStageFlagBits wait_st[AV_NUM_DATA_POINTERS];
+
+ if ((err = wait_start_exec_ctx(hwfc, ectx)))
+ return err;
+
+ vkfc->lock_frame(hwfc, frame);
+
for (int i = 0; i < planes; i++) {
wait_st[i] = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
sem_sig_val[i] = frame->sem_value[i] + 1;
@@ -1981,9 +2041,6 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
break;
}
- if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
-
/* Change the image layout to something more optimal for writes.
* This also signals the newly created semaphore, making it usable
* for synchronization */
@@ -2009,7 +2066,10 @@ static int prepare_frame(AVHWFramesContext *hwfc, VulkanExecCtx *ectx,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, 0, NULL, 0, NULL, planes, img_bar);
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 0);
+ vkfc->unlock_frame(hwfc, frame);
+
+ return err;
}
static inline void get_plane_wh(int *w, int *h, enum AVPixelFormat format,
@@ -2091,10 +2151,10 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.usage = usage,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
get_plane_wh(&create_info.extent.width, &create_info.extent.height,
@@ -2118,6 +2178,7 @@ static int create_frame(AVHWFramesContext *hwfc, AVVkFrame **frame,
return AVERROR_EXTERNAL;
}
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -2162,10 +2223,10 @@ static void try_export_flags(AVHWFramesContext *hwfc,
VkPhysicalDeviceImageDrmFormatModifierInfoEXT phy_dev_mod_info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT,
.pNext = NULL,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
VkPhysicalDeviceExternalImageFormatInfo enext = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO,
@@ -2260,6 +2321,16 @@ fail:
return NULL;
}
+static void lock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_lock(&vkf->internal->update_mutex);
+}
+
+static void unlock_frame(AVHWFramesContext *fc, AVVkFrame *vkf)
+{
+ pthread_mutex_unlock(&vkf->internal->update_mutex);
+}
+
static void vulkan_frames_uninit(AVHWFramesContext *hwfc)
{
VulkanFramesPriv *fp = hwfc->internal->priv;
@@ -2422,6 +2493,11 @@ static int vulkan_frames_init(AVHWFramesContext *hwfc)
return AVERROR(ENOMEM);
}
+ if (!hwctx->lock_frame)
+ hwctx->lock_frame = lock_frame;
+ if (!hwctx->unlock_frame)
+ hwctx->unlock_frame = unlock_frame;
+
return 0;
}
@@ -2728,10 +2804,10 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
.usage = VK_IMAGE_USAGE_SAMPLED_BIT |
VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
.samples = VK_SAMPLE_COUNT_1_BIT,
- .pQueueFamilyIndices = p->qfs,
- .queueFamilyIndexCount = p->num_qfs,
- .sharingMode = p->num_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
- VK_SHARING_MODE_EXCLUSIVE,
+ .pQueueFamilyIndices = p->img_qfs,
+ .queueFamilyIndexCount = p->nb_img_qfs,
+ .sharingMode = p->nb_img_qfs > 1 ? VK_SHARING_MODE_CONCURRENT :
+ VK_SHARING_MODE_EXCLUSIVE,
};
/* Image format verification */
@@ -2810,6 +2886,7 @@ static int vulkan_map_from_drm_frame_desc(AVHWFramesContext *hwfc, AVVkFrame **f
* offer us anything we could import and sync with, so instead
* just signal the semaphore we created. */
+ f->queue_family[i] = p->nb_img_qfs > 1 ? VK_QUEUE_FAMILY_IGNORED : p->img_qfs[0];
f->layout[i] = create_info.initialLayout;
f->access[i] = 0x0;
f->sem_value[i] = 0;
@@ -3018,20 +3095,12 @@ static int vulkan_export_to_cuda(AVHWFramesContext *hwfc,
CU_AD_FORMAT_UNSIGNED_INT8;
dst_f = (AVVkFrame *)frame->data[0];
-
dst_int = dst_f->internal;
- if (!dst_int || !dst_int->cuda_fc_ref) {
- if (!dst_f->internal)
- dst_f->internal = dst_int = av_mallocz(sizeof(*dst_f->internal));
-
- if (!dst_int)
- return AVERROR(ENOMEM);
+ if (!dst_int->cuda_fc_ref) {
dst_int->cuda_fc_ref = av_buffer_ref(cuda_hwfc);
- if (!dst_int->cuda_fc_ref) {
- av_freep(&dst_f->internal);
+ if (!dst_int->cuda_fc_ref)
return AVERROR(ENOMEM);
- }
for (int i = 0; i < planes; i++) {
CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
@@ -3705,13 +3774,14 @@ static int unmap_buffers(AVHWDeviceContext *ctx, AVBufferRef **bufs,
return err;
}
-static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
+static int transfer_image_buf(AVHWFramesContext *hwfc, AVFrame *f,
AVBufferRef **bufs, size_t *buf_offsets,
const int *buf_stride, int w,
int h, enum AVPixelFormat pix_fmt, int to_buf)
{
int err;
AVVkFrame *frame = (AVVkFrame *)f->data[0];
+ AVVulkanFramesContext *vkfc = hwfc->hwctx;
VulkanFramesPriv *fp = hwfc->internal->priv;
VulkanDevicePriv *p = hwfc->device_ctx->internal->priv;
FFVulkanFunctions *vk = &p->vkfn;
@@ -3746,11 +3816,13 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
.waitSemaphoreCount = planes,
};
- for (int i = 0; i < planes; i++)
- sem_signal_values[i] = frame->sem_value[i] + 1;
+ vkfc->lock_frame(hwfc, frame);
if ((err = wait_start_exec_ctx(hwfc, ectx)))
- return err;
+ goto end;
+
+ for (int i = 0; i < planes; i++)
+ sem_signal_values[i] = frame->sem_value[i] + 1;
/* Change the image layout to something more optimal for transfers */
for (int i = 0; i < planes; i++) {
@@ -3825,14 +3897,18 @@ static int transfer_image_buf(AVHWFramesContext *hwfc, const AVFrame *f,
if (!f->buf[ref])
break;
if ((err = add_buf_dep_exec_ctx(hwfc, ectx, &f->buf[ref], 1)))
- return err;
+ goto end;
}
if (ref && (err = add_buf_dep_exec_ctx(hwfc, ectx, bufs, planes)))
- return err;
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
+ goto end;
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, !ref);
} else {
- return submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
+ err = submit_exec_ctx(hwfc, ectx, &s_info, frame, 1);
}
+
+end:
+ vkfc->unlock_frame(hwfc, frame);
+ return err;
}
static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
@@ -3961,8 +4037,9 @@ static int vulkan_transfer_data(AVHWFramesContext *hwfc, const AVFrame *vkf,
}
/* Copy buffers into/from image */
- err = transfer_image_buf(hwfc, vkf, bufs, buf_offsets, tmp.linesize,
- swf->width, swf->height, swf->format, from);
+ err = transfer_image_buf(hwfc, (AVFrame *)vkf, bufs, buf_offsets,
+ tmp.linesize, swf->width, swf->height, swf->format,
+ from);
if (from) {
/* Map, copy buffer (which came FROM the VkImage) to the frame, unmap */
@@ -4143,7 +4220,25 @@ static int vulkan_frames_derive_to(AVHWFramesContext *dst_fc,
AVVkFrame *av_vk_frame_alloc(void)
{
- return av_mallocz(sizeof(AVVkFrame));
+ int err;
+ AVVkFrame *f = av_mallocz(sizeof(AVVkFrame));
+ if (!f)
+ return NULL;
+
+ f->internal = av_mallocz(sizeof(*f->internal));
+ if (!f->internal) {
+ av_free(f);
+ return NULL;
+ }
+
+ err = pthread_mutex_init(&f->internal->update_mutex, NULL);
+ if (err != 0) {
+ av_free(f->internal);
+ av_free(f);
+ return NULL;
+ }
+
+ return f;
}
const HWContextType ff_hwcontext_type_vulkan = {
diff --git a/libavutil/hwcontext_vulkan.h b/libavutil/hwcontext_vulkan.h
index 70c8379dc3..9fbf4b2160 100644
--- a/libavutil/hwcontext_vulkan.h
+++ b/libavutil/hwcontext_vulkan.h
@@ -27,6 +27,8 @@
#include "pixfmt.h"
#include "frame.h"
+typedef struct AVVkFrame AVVkFrame;
+
/**
* @file
* API-specific header for AV_HWDEVICE_TYPE_VULKAN.
@@ -135,6 +137,19 @@ typedef struct AVVulkanDeviceContext {
*/
int queue_family_decode_index;
int nb_decode_queues;
+
+ /**
+ * Locks a queue, preventing other threads from submitting any command
+ * buffers to this queue.
+ * If set to NULL, will be set to lavu-internal functions that utilize a
+ * mutex.
+ */
+ void (*lock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
+
+ /**
+ * Similar to lock_queue(), unlocks a queue. Must only be called after locking.
+ */
+ void (*unlock_queue)(struct AVHWDeviceContext *ctx, uint32_t queue_family, uint32_t index);
} AVVulkanDeviceContext;
/**
@@ -195,6 +210,21 @@ typedef struct AVVulkanFramesContext {
* av_hwframe_ctx_init().
*/
AVVkFrameFlags flags;
+
+ /**
+ * Locks a frame, preventing other threads from changing frame properties.
+ * Users SHOULD only ever lock just before command submission in order
+ * to get accurate frame properties, and unlock immediately after command
+ * submission without waiting for it to finish.
+ *
+ * If unset, will be set to lavu-internal functions that utilize a mutex.
+ */
+ void (*lock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
+
+ /**
+ * Similar to lock_frame(), unlocks a frame. Must only be called after locking.
+ */
+ void (*unlock_frame)(struct AVHWFramesContext *fc, AVVkFrame *vkf);
} AVVulkanFramesContext;
/*
@@ -210,7 +240,7 @@ typedef struct AVVulkanFramesContext {
* @note the size of this structure is not part of the ABI, to allocate
* you must use @av_vk_frame_alloc().
*/
-typedef struct AVVkFrame {
+struct AVVkFrame {
/**
* Vulkan images to which the memory is bound to.
*/
@@ -264,6 +294,12 @@ typedef struct AVVkFrame {
* Describes the binding offset of each plane to the VkDeviceMemory.
*/
ptrdiff_t offset[AV_NUM_DATA_POINTERS];
+
+ /**
+ * Queue family of the images. Must be VK_QUEUE_FAMILY_IGNORED if
+ * the image was allocated with the CONCURRENT concurrency option.
+ */
+ uint32_t queue_family[AV_NUM_DATA_POINTERS];
} AVVkFrame;
/**