aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Cheng <ben@bcheng.me>2025-08-08 10:38:04 -0400
committerLynne <dev@lynne.ee>2025-08-08 14:45:58 +0000
commitf7a5128109b302a193ea9eaaee4346239195a881 (patch)
tree425a58820ea928e124d70c2b179544c1002c1cac
parent4042609dcf172d084c1016445712bc750b25d8a3 (diff)
downloadffmpeg-f7a5128109b302a193ea9eaaee4346239195a881.tar.gz
vulkan_av1: Fix frame threading
Basically do the same thing that was done for VP9, and remove the vestigial frame_id_alloc_mask in the context.
-rw-r--r--libavcodec/vulkan_av1.c28
-rw-r--r--libavcodec/vulkan_decode.c1
-rw-r--r--libavcodec/vulkan_decode.h1
3 files changed, 11 insertions, 19 deletions
diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index 6a6ca56783..afbd2347de 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -254,6 +254,7 @@ static int vk_av1_start_frame(AVCodecContext *avctx,
AV1DecContext *s = avctx->priv_data;
const AV1Frame *pic = &s->cur_frame;
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
+ uint32_t frame_id_alloc_mask = 0;
AV1VulkanDecodePicture *ap = pic->hwaccel_picture_private;
FFVulkanDecodePicture *vp = &ap->vp;
@@ -268,17 +269,24 @@ static int vk_av1_start_frame(AVCodecContext *avctx,
STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_WIENER,
STD_VIDEO_AV1_FRAME_RESTORATION_TYPE_SGRPROJ };
+ /* Use the current frame_ids in ref[] to decide occupied frame_ids */
+ for (int i = 0; i < STD_VIDEO_AV1_NUM_REF_FRAMES; i++) {
+ const AV1VulkanDecodePicture* rp = s->ref[i].hwaccel_picture_private;
+ if (rp)
+ frame_id_alloc_mask |= 1 << rp->frame_id;
+ }
+
if (!ap->frame_id_set) {
unsigned slot_idx = 0;
for (unsigned i = 0; i < 32; i++) {
- if (!(dec->frame_id_alloc_mask & (1 << i))) {
+ if (!(frame_id_alloc_mask & (1 << i))) {
slot_idx = i;
break;
}
}
ap->frame_id = slot_idx;
ap->frame_id_set = 1;
- dec->frame_id_alloc_mask |= (1 << slot_idx);
+ frame_id_alloc_mask |= (1 << slot_idx);
}
ap->ref_frame_sign_bias_mask = 0x0;
@@ -637,10 +645,6 @@ static void vk_av1_free_frame_priv(AVRefStructOpaque _hwctx, void *data)
AVHWDeviceContext *hwctx = _hwctx.nc;
AV1VulkanDecodePicture *ap = data;
- /* Workaround for a spec issue. */
- if (ap->frame_id_set)
- ap->dec->frame_id_alloc_mask &= ~(1 << ap->frame_id);
-
/* Free frame resources, this also destroys the session parameters. */
ff_vk_decode_free_frame(hwctx, &ap->vp);
}
@@ -662,15 +666,5 @@ const FFHWAccel ff_av1_vulkan_hwaccel = {
.uninit = &ff_vk_decode_uninit,
.frame_params = &ff_vk_frame_params,
.priv_data_size = sizeof(FFVulkanDecodeContext),
-
- /* NOTE: Threading is intentionally disabled here. Due to the design of Vulkan,
- * where frames are opaque to users, and mostly opaque for driver developers,
- * there's an issue with current hardware accelerator implementations of AV1,
- * where they require an internal index. With regular hwaccel APIs, this index
- * is given to users as an opaque handle directly. With Vulkan, due to increased
- * flexibility, this index cannot be present anywhere.
- * The current implementation tracks the index for the driver and submits it
- * as necessary information. Due to needing to modify the decoding context,
- * which is not thread-safe, on frame free, threading is disabled. */
- .caps_internal = HWACCEL_CAP_ASYNC_SAFE,
+ .caps_internal = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_THREAD_SAFE,
};
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index a62ad5bc90..fbdc7776b3 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -116,7 +116,6 @@ int ff_vk_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
dst_ctx->dedicated_dpb = src_ctx->dedicated_dpb;
dst_ctx->external_fg = src_ctx->external_fg;
- dst_ctx->frame_id_alloc_mask = src_ctx->frame_id_alloc_mask;
return 0;
}
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index bf51d5a170..3282859053 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -69,7 +69,6 @@ typedef struct FFVulkanDecodeContext {
int dedicated_dpb; /* Oddity #1 - separate DPB images */
int external_fg; /* Oddity #2 - hardware can't apply film grain */
- uint32_t frame_id_alloc_mask; /* For AV1 only */
/* Workaround for NVIDIA drivers tested with CTS version 1.3.8 for AV1.
* The tests were incorrect as the OrderHints were offset by 1. */