avcodec/d3d12va_decode: enable reference-only mode

The Reference-Only feature in DirectX 12 is a memory optimization technique designed for video decoding scenarios. This feature requires that reference resources must be allocated with the D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY resource flag. Reference textures must also be separated from output textures. This feature is not supported in the current version of ffmpeg. Since AMD GPU uses this feature in Direct 12 decoder, ffmpeg does not support AMD GPU Direct 12 decoding. Signed-off-by: Tong Wu <wutong1208@outlook.com>
author: Araz Iusubov <primeadvice@gmail.com> 2025-03-18 20:23:04 +0100
committer: Tong Wu <wutong1208@outlook.com> 2025-04-05 11:57:49 +0800
commit: ccf073e772d72b83e5973c62c4c4112cfa545a9c (patch)
tree: 9f74ff27caf5656414c84b0d59fe232254c3d2f7
parent: 37a319fb95bc558c9ed882a8577cab1ed5cc1fd8 (diff)
download: ffmpeg-ccf073e772d72b83e5973c62c4c4112cfa545a9c.tar.gz
2 files changed, 165 insertions, 12 deletions
diff --git a/libavcodec/d3d12va_decode.c b/libavcodec/d3d12va_decode.c
index 312d6e1ed7..4645ffe122 100644
--- a/libavcodec/d3d12va_decode.c
+++ b/libavcodec/d3d12va_decode.c
@@ -41,6 +41,101 @@ typedef struct HelperObjects {
     uint64_t fence_value;
 } HelperObjects;
 
+typedef struct ReferenceFrame {
+    ID3D12Resource *resource;
+    int            used;
+    ID3D12Resource *output_resource;
+} ReferenceFrame;
+
+static ID3D12Resource *get_reference_only_resource(AVCodecContext *avctx, ID3D12Resource *output_resource)
+{
+    D3D12VADecodeContext   *ctx          = D3D12VA_DECODE_CONTEXT(avctx);
+    AVHWFramesContext      *frames_ctx   = D3D12VA_FRAMES_CONTEXT(avctx);
+    AVD3D12VADeviceContext *device_hwctx = ctx->device_ctx;
+    int i = 0;
+    ID3D12Resource *resource = NULL;
+    D3D12_HEAP_PROPERTIES props = { .Type = D3D12_HEAP_TYPE_DEFAULT };
+    D3D12_RESOURCE_DESC desc;
+    ReferenceFrame *reference_only_map = ctx->reference_only_map;
+    if (reference_only_map == NULL) {
+        av_log(avctx, AV_LOG_ERROR, "Reference frames are not allocated!\n");
+        return NULL;
+    }
+
+    // find unused resource
+    for (i = 0; i < ctx->max_num_ref; i++) {
+        if (!reference_only_map[i].used && reference_only_map[i].resource != NULL) {
+            reference_only_map[i].used = 1;
+            resource = reference_only_map[i].resource;
+            reference_only_map[i].output_resource = output_resource;
+            return resource;
+        }
+    }
+
+    // find space to allocate
+    for (i = 0; i < ctx->max_num_ref; i++) {
+        if (reference_only_map[i].resource == NULL)
+            break;
+    }
+
+    if (i == ctx->max_num_ref) {
+        av_log(avctx, AV_LOG_ERROR, "No space for new Reference frame!\n");
+        return NULL;
+    }
+
+    // allocate frame
+    output_resource->lpVtbl->GetDesc(output_resource, &desc);
+    desc.Flags = D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE;
+
+    if (FAILED(ID3D12Device_CreateCommittedResource(device_hwctx->device, &props, D3D12_HEAP_FLAG_NONE, &desc,
+        D3D12_RESOURCE_STATE_COMMON, NULL, &IID_ID3D12Resource, (void **)&reference_only_map[i].resource))) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to create D3D12 Reference Resource!\n");
+        return NULL;
+    }
+
+    reference_only_map[i].used = 1;
+    resource = reference_only_map[i].resource;
+    reference_only_map[i].output_resource = output_resource;
+
+    return resource;
+}
+
+static void free_reference_only_resources(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    int i;
+    ReferenceFrame *reference_only_map = ctx->reference_only_map;
+    if (reference_only_map != NULL) {
+        for (i = 0; i < ctx->max_num_ref; i++) {
+            if (reference_only_map[i].resource != NULL) {
+                D3D12_OBJECT_RELEASE(reference_only_map[i].resource);
+            }
+        }
+        av_freep(&ctx->reference_only_map);
+        av_freep(&ctx->ref_only_resources);
+    }
+}
+
+static void prepare_reference_only_resources(AVCodecContext *avctx)
+{
+    D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    int i, j;
+    ReferenceFrame *reference_only_map = ctx->reference_only_map;
+    if (reference_only_map == NULL)
+        return;
+    memset(ctx->ref_only_resources, 0, ctx->max_num_ref * sizeof(*(ctx->ref_only_resources)));
+    for (j = 0; j < ctx->max_num_ref; j++) {
+        for (i = 0; i < ctx->max_num_ref; i++) {
+            if (reference_only_map[j].used && reference_only_map[j].output_resource == ctx->ref_resources[i]) {
+                ctx->ref_only_resources[i] = reference_only_map[j].resource;
+                break;
+            }
+        }
+        if (i == ctx->max_num_ref)
+            reference_only_map[j].used = 0;
+    }
+}
+
 int ff_d3d12va_get_suitable_max_bitstream_size(AVCodecContext *avctx)
 {
     AVHWFramesContext *frames_ctx = D3D12VA_FRAMES_CONTEXT(avctx);
@@ -250,6 +345,18 @@ static int d3d12va_create_decoder(AVCodecContext *avctx)
         return AVERROR_PATCHWELCOME;
     }
 
+    ctx->reference_only_map = NULL;
+    ctx->ref_only_resources = NULL;
+    if (feature.ConfigurationFlags & D3D12_VIDEO_DECODE_CONFIGURATION_FLAG_REFERENCE_ONLY_ALLOCATIONS_REQUIRED) {
+        av_log(avctx, AV_LOG_VERBOSE, "Reference-Only Allocations are required for this D3D12 decoder configuration.\n");
+        ctx->reference_only_map = av_calloc(ctx->max_num_ref + 1, sizeof(ReferenceFrame));
+            if (!ctx->reference_only_map)
+                return AVERROR(ENOMEM);
+        ctx->ref_only_resources = av_calloc(ctx->max_num_ref, sizeof(*ctx->ref_only_resources));
+            if (!ctx->ref_only_resources)
+                return AVERROR(ENOMEM);
+    }
+
     desc = (D3D12_VIDEO_DECODER_DESC) {
         .NodeMask = 0,
         .Configuration = ctx->cfg,
@@ -394,6 +501,7 @@ av_cold int ff_d3d12va_decode_uninit(AVCodecContext *avctx)
 
         av_log(avctx, AV_LOG_VERBOSE, "Total number of command allocators reused: %d\n", num_allocator);
     }
+    free_reference_only_resources(avctx);
 
     av_fifo_freep2(&ctx->objects_queue);
 
@@ -412,14 +520,15 @@ static inline int d3d12va_update_reference_frames_state(AVCodecContext *avctx, D
                                                         ID3D12Resource *current_resource, int state_before, int state_end)
 {
     D3D12VADecodeContext *ctx = D3D12VA_DECODE_CONTEXT(avctx);
+    ID3D12Resource **ref_resources = ctx->ref_only_resources ? ctx->ref_only_resources : ctx->ref_resources;
 
     int num_barrier = 0;
     for (int i = 0; i < ctx->max_num_ref; i++) {
-        if (((ctx->used_mask >> i) & 0x1) && ctx->ref_resources[i] && ctx->ref_resources[i] != current_resource) {
+        if (((ctx->used_mask >> i) & 0x1) && ref_resources[i] && ref_resources[i] != current_resource) {
             barriers[num_barrier].Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
             barriers[num_barrier].Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
-            barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER){
-                .pResource   = ctx->ref_resources[i],
+            barriers[num_barrier].Transition = (D3D12_RESOURCE_TRANSITION_BARRIER) {
+                .pResource   = ref_resources[i],
                 .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
                 .StateBefore = state_before,
                 .StateAfter  = state_end,
@@ -440,8 +549,9 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
     D3D12VADecodeContext   *ctx               = D3D12VA_DECODE_CONTEXT(avctx);
     ID3D12Resource         *buffer            = NULL;
     ID3D12CommandAllocator *command_allocator = NULL;
-    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame *)frame->data[0];
-    ID3D12Resource         *resource          = (ID3D12Resource *)f->texture;
+    AVD3D12VAFrame         *f                 = (AVD3D12VAFrame*)frame->data[0];
+    ID3D12Resource         *output_resource   = (ID3D12Resource*)f->texture;
+    ID3D12Resource         *ref_resource      = NULL;
 
     ID3D12VideoDecodeCommandList *cmd_list = ctx->command_list;
     D3D12_RESOURCE_BARRIER barriers[32] = { 0 };
@@ -466,25 +576,55 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
     D3D12_VIDEO_DECODE_OUTPUT_STREAM_ARGUMENTS output_args = {
         .ConversionArguments = { 0 },
         .OutputSubresource   = 0,
-        .pOutputTexture2D    = resource,
+        .pOutputTexture2D    = output_resource,
     };
 
+    memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
+    input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
+    input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
+
+    if (ctx->reference_only_map) {
+        ref_resource = get_reference_only_resource(avctx, output_resource);
+        if (ref_resource == NULL) {
+            av_log(avctx, AV_LOG_ERROR, "Failed to get reference frame!\n");
+            goto fail;
+        }
+        prepare_reference_only_resources(avctx);
+
+        output_args.ConversionArguments.Enable               = 1;
+        input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_only_resources;
+        output_args.ConversionArguments.pReferenceTexture2D  = ref_resource;
+        output_args.ConversionArguments.ReferenceSubresource = 0;
+    } else {
+        ref_resource = output_resource;
+        input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
+    }
+
     UINT num_barrier = 1;
     barriers[0] = (D3D12_RESOURCE_BARRIER) {
         .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
         .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
         .Transition = {
-            .pResource   = resource,
+            .pResource   = output_resource,
             .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
             .StateBefore = D3D12_RESOURCE_STATE_COMMON,
             .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
         },
     };
 
-    memset(ctx->ref_subresources, 0, sizeof(UINT) * ctx->max_num_ref);
-    input_args.ReferenceFrames.NumTexture2Ds = ctx->max_num_ref;
-    input_args.ReferenceFrames.ppTexture2Ds  = ctx->ref_resources;
-    input_args.ReferenceFrames.pSubresources = ctx->ref_subresources;
+    if (ctx->reference_only_map) {
+        barriers[1] = (D3D12_RESOURCE_BARRIER) {
+            .Type  = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
+            .Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE,
+            .Transition = {
+                .pResource   = ref_resource,
+                .Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES,
+                .StateBefore = D3D12_RESOURCE_STATE_COMMON,
+                .StateAfter  = D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE,
+            },
+        };
+        num_barrier++;
+    }
 
     ret = d3d12va_fence_completion(&f->sync_ctx);
     if (ret < 0)
@@ -505,7 +645,7 @@ int ff_d3d12va_common_end_frame(AVCodecContext *avctx, AVFrame *frame,
 
     DX_CHECK(ID3D12VideoDecodeCommandList_Reset(cmd_list, command_allocator));
 
-    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[1], resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
+    num_barrier += d3d12va_update_reference_frames_state(avctx, &barriers[num_barrier], ref_resource, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_VIDEO_DECODE_READ);
 
     ID3D12VideoDecodeCommandList_ResourceBarrier(cmd_list, num_barrier, barriers);
 
diff --git a/libavcodec/d3d12va_decode.h b/libavcodec/d3d12va_decode.h
index b64994760a..c771004222 100644
--- a/libavcodec/d3d12va_decode.h
+++ b/libavcodec/d3d12va_decode.h
@@ -119,6 +119,19 @@ typedef struct D3D12VADecodeContext {
      * Private to the FFmpeg AVHWAccel implementation
      */
     unsigned report_id;
+
+    /**
+     * The Reference-Only feature in DirectX 12 is a memory optimization
+     * technique designed for video decoding/encoding scenarios.
+     * This feature requires that reference resources must be allocated
+     * with the `D3D12_RESOURCE_FLAG_VIDEO_DECODE_REFERENCE_ONLY` resource flag.
+     * Reference textures must also be separated from output textures.
+     * reference_only_map used as a storage for reference only frames
+     * ref_only_resources used as a shadow for  ref_resources
+     */
+    void *reference_only_map;
+    ID3D12Resource **ref_only_resources;
+
 } D3D12VADecodeContext;
 
 /**
author	Araz Iusubov <primeadvice@gmail.com>	2025-03-18 20:23:04 +0100
committer	Tong Wu <wutong1208@outlook.com>	2025-04-05 11:57:49 +0800
commit	ccf073e772d72b83e5973c62c4c4112cfa545a9c (patch)
tree	9f74ff27caf5656414c84b0d59fe232254c3d2f7
parent	37a319fb95bc558c9ed882a8577cab1ed5cc1fd8 (diff)
download	ffmpeg-ccf073e772d72b83e5973c62c4c4112cfa545a9c.tar.gz