diff options
author | Wenbin Chen <wenbin.chen-at-intel.com@ffmpeg.org> | 2022-02-18 11:07:47 +0800 |
---|---|---|
committer | Haihao Xiang <haihao.xiang@intel.com> | 2022-02-28 12:37:02 +0800 |
commit | d165ce22a4a7cc4ed60238ce8f3d5dcbbad3e266 (patch) | |
tree | 5cfcf2b49cce0ec67d1838a00dd2c83c4c165aaf | |
parent | e0ff86993052b49a64d434bac345e92fc149f446 (diff) | |
download | ffmpeg-d165ce22a4a7cc4ed60238ce8f3d5dcbbad3e266.tar.gz |
libavcodec/vaapi_encode: Add async_depth to vaapi_encoder to increase performance
Fix: #7706. After commit 5fdcf85bbffe7451c2, vaapi encoder's performance
decrease. The reason is that vaRenderPicture() and vaSyncBuffer() are
called at the same time (vaRenderPicture() always followed by a
vaSyncBuffer()). Now I changed them to be called in a asynchronous way,
which will make better use of hardware.
Async_depth is added to increase encoder's performance. The frames that
are sent to hardware are stored in a fifo. Encoder will sync output
after async fifo is full.
Signed-off-by: Wenbin Chen <wenbin.chen@intel.com>
Signed-off-by: Haihao Xiang <haihao.xiang@intel.com>
-rw-r--r-- | doc/encoders.texi | 6 | ||||
-rw-r--r-- | libavcodec/vaapi_encode.c | 64 | ||||
-rw-r--r-- | libavcodec/vaapi_encode.h | 16 |
3 files changed, 71 insertions, 15 deletions
diff --git a/doc/encoders.texi b/doc/encoders.texi index 9afb920f69..64e8c5d129 100644 --- a/doc/encoders.texi +++ b/doc/encoders.texi @@ -3591,6 +3591,12 @@ will refer only to P- or I-frames. When set to greater values multiple layers of B-frames will be present, frames in each layer only referring to frames in higher layers. +@item async_depth +Maximum processing parallelism. Increase this to improve single channel +performance. This option doesn't work if driver doesn't implement vaSyncBuffer +function. Please make sure there are enough hw_frames allocated if a large +number of async_depth is used. + @item rc_mode Set the rate control mode to use. A given driver may only support a subset of modes. diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c index 3f8c8ace2a..8c6e881702 100644 --- a/libavcodec/vaapi_encode.c +++ b/libavcodec/vaapi_encode.c @@ -965,8 +965,10 @@ static int vaapi_encode_pick_next(AVCodecContext *avctx, if (!pic && ctx->end_of_stream) { --b_counter; pic = ctx->pic_end; - if (pic->encode_issued) + if (pic->encode_complete) return AVERROR_EOF; + else if (pic->encode_issued) + return AVERROR(EAGAIN); } if (!pic) { @@ -1137,7 +1139,8 @@ static int vaapi_encode_send_frame(AVCodecContext *avctx, AVFrame *frame) if (ctx->input_order == ctx->decode_delay) ctx->dts_pts_diff = pic->pts - ctx->first_pts; if (ctx->output_delay > 0) - ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts; + ctx->ts_ring[ctx->input_order % + (3 * ctx->output_delay + ctx->async_depth)] = pic->pts; pic->display_order = ctx->input_order; ++ctx->input_order; @@ -1191,18 +1194,47 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt) return AVERROR(EAGAIN); } - pic = NULL; - err = vaapi_encode_pick_next(avctx, &pic); - if (err < 0) - return err; - av_assert0(pic); + if (ctx->has_sync_buffer_func) { + pic = NULL; + + if (av_fifo_can_write(ctx->encode_fifo)) { + err = vaapi_encode_pick_next(avctx, &pic); + if (!err) { + av_assert0(pic); + pic->encode_order = ctx->encode_order + + av_fifo_can_read(ctx->encode_fifo); + err = vaapi_encode_issue(avctx, pic); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err); + return err; + } + av_fifo_write(ctx->encode_fifo, &pic, 1); + } + } - pic->encode_order = ctx->encode_order++; + if (!av_fifo_can_read(ctx->encode_fifo)) + return err; - err = vaapi_encode_issue(avctx, pic); - if (err < 0) { - av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err); - return err; + // More frames can be buffered + if (av_fifo_can_write(ctx->encode_fifo) && !ctx->end_of_stream) + return AVERROR(EAGAIN); + + av_fifo_read(ctx->encode_fifo, &pic, 1); + ctx->encode_order = pic->encode_order + 1; + } else { + pic = NULL; + err = vaapi_encode_pick_next(avctx, &pic); + if (err < 0) + return err; + av_assert0(pic); + + pic->encode_order = ctx->encode_order++; + + err = vaapi_encode_issue(avctx, pic); + if (err < 0) { + av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err); + return err; + } } err = vaapi_encode_output(avctx, pic, pkt); @@ -1220,7 +1252,7 @@ int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt) pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff; } else { pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) % - (3 * ctx->output_delay)]; + (3 * ctx->output_delay + ctx->async_depth)]; } av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n", pkt->pts, pkt->dts); @@ -2541,6 +2573,11 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx) vas = vaSyncBuffer(ctx->hwctx->display, VA_INVALID_ID, 0); if (vas != VA_STATUS_ERROR_UNIMPLEMENTED) { ctx->has_sync_buffer_func = 1; + ctx->encode_fifo = av_fifo_alloc2(ctx->async_depth, + sizeof(VAAPIEncodePicture *), + 0); + if (!ctx->encode_fifo) + return AVERROR(ENOMEM); } #endif @@ -2581,6 +2618,7 @@ av_cold int ff_vaapi_encode_close(AVCodecContext *avctx) av_freep(&ctx->codec_sequence_params); av_freep(&ctx->codec_picture_params); + av_fifo_freep2(&ctx->encode_fifo); av_buffer_unref(&ctx->recon_frames_ref); av_buffer_unref(&ctx->input_frames_ref); diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h index 29d9e9b91c..1b40819c69 100644 --- a/libavcodec/vaapi_encode.h +++ b/libavcodec/vaapi_encode.h @@ -29,6 +29,7 @@ #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_vaapi.h" +#include "libavutil/fifo.h" #include "avcodec.h" #include "hwconfig.h" @@ -47,6 +48,7 @@ enum { MAX_TILE_ROWS = 22, // A.4.1: table A.6 allows at most 20 tile columns for any level. MAX_TILE_COLS = 20, + MAX_ASYNC_DEPTH = 64, }; extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[]; @@ -297,7 +299,8 @@ typedef struct VAAPIEncodeContext { // Timestamp handling. int64_t first_pts; int64_t dts_pts_diff; - int64_t ts_ring[MAX_REORDER_DELAY * 3]; + int64_t ts_ring[MAX_REORDER_DELAY * 3 + + MAX_ASYNC_DEPTH]; // Slice structure. int slice_block_rows; @@ -348,6 +351,10 @@ typedef struct VAAPIEncodeContext { // Whether the driver support vaSyncBuffer int has_sync_buffer_func; + // Store buffered pic + AVFifo *encode_fifo; + // Max number of frame buffered in encoder. + int async_depth; } VAAPIEncodeContext; enum { @@ -458,7 +465,12 @@ int ff_vaapi_encode_close(AVCodecContext *avctx); { "b_depth", \ "Maximum B-frame reference depth", \ OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \ - { .i64 = 1 }, 1, INT_MAX, FLAGS } + { .i64 = 1 }, 1, INT_MAX, FLAGS }, \ + { "async_depth", "Maximum processing parallelism. " \ + "Increase this to improve single channel performance. This option " \ + "doesn't work if driver doesn't implement vaSyncBuffer function.", \ + OFFSET(common.async_depth), AV_OPT_TYPE_INT, \ + { .i64 = 2 }, 1, MAX_ASYNC_DEPTH, FLAGS } #define VAAPI_ENCODE_RC_MODE(name, desc) \ { #name, desc, 0, AV_OPT_TYPE_CONST, { .i64 = RC_MODE_ ## name }, \ |