aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGanapathy Kasi <gkasi@nvidia.com>2017-05-30 19:03:14 -0700
committerTimo Rothenpieler <timo@rothenpieler.org>2017-06-02 21:37:43 +0200
commit9b351d0d888dfc75bf4259f854a8b4920cc60119 (patch)
tree6dd503aad3adba727c745ae35f6a5f67cb8ae06c
parente5e01d24773dde50159df2ad616f3e16a8d2a650 (diff)
downloadffmpeg-9b351d0d888dfc75bf4259f854a8b4920cc60119.tar.gz
avcodec/nvenc: fix hw accelerated transcode with bframes
hw accelerated transcode (h264_cuvid -> h264_nvenc with -hwaccel cuvid) was broken after the filtergraph initialization was changed to intialize decoder first followed by encoder (commit af1761f7b5b1b72197dc40934953b775c2d951cc). During initialzing encoder with bframes, local buffers are allocated internally in encoder which fails since no cuda context is available. Now pushing the correct cuda context before encoder initialization fixes the issue. Also adding push/pop cuda ctx during create/destroy/map/unmap resources and destroy encoder session. Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
-rw-r--r--libavcodec/nvenc.c75
1 files changed, 75 insertions, 0 deletions
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 00541f4c49..b7957867d7 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -373,9 +373,21 @@ static av_cold int nvenc_check_device(AVCodecContext *avctx, int idx)
return 0;
fail3:
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
fail2:
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context_internal = NULL;
@@ -951,6 +963,8 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
NV_ENC_PRESET_CONFIG preset_config = { 0 };
NVENCSTATUS nv_status = NV_ENC_SUCCESS;
AVCPBProperties *cpb_props;
+ CUresult cu_res;
+ CUcontext dummy;
int res = 0;
int dw, dh;
@@ -1038,7 +1052,20 @@ static av_cold int nvenc_setup_encoder(AVCodecContext *avctx)
if (res)
return res;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
nv_status = p_nvenc->nvEncInitializeEncoder(ctx->nvencoder, &ctx->init_encode_params);
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (nv_status != NV_ENC_SUCCESS) {
return nvenc_print_error(avctx, nv_status, "InitializeEncoder failed");
}
@@ -1147,6 +1174,9 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
{
NvencContext *ctx = avctx->priv_data;
+ NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i, res;
ctx->surfaces = av_mallocz_array(ctx->nb_surfaces, sizeof(*ctx->surfaces));
@@ -1163,9 +1193,28 @@ static av_cold int nvenc_setup_surfaces(AVCodecContext *avctx)
if (!ctx->output_surface_ready_queue)
return AVERROR(ENOMEM);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
for (i = 0; i < ctx->nb_surfaces; i++) {
if ((res = nvenc_alloc_surface(avctx, i)) < 0)
+ {
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
return res;
+ }
+ }
+
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
}
return 0;
@@ -1209,8 +1258,16 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
NvencContext *ctx = avctx->priv_data;
NvencDynLoadFunctions *dl_fn = &ctx->nvenc_dload_funcs;
NV_ENCODE_API_FUNCTION_LIST *p_nvenc = &dl_fn->nvenc_funcs;
+ CUresult cu_res;
+ CUcontext dummy;
int i;
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
/* the encoder has to be flushed before it can be closed */
if (ctx->nvencoder) {
NV_ENC_PIC_PARAMS params = { .version = NV_ENC_PIC_PARAMS_VER,
@@ -1251,6 +1308,12 @@ av_cold int ff_nvenc_encode_close(AVCodecContext *avctx)
p_nvenc->nvEncDestroyEncoder(ctx->nvencoder);
ctx->nvencoder = NULL;
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (ctx->cu_context_internal)
dl_fn->cuda_dl->cuCtxDestroy(ctx->cu_context_internal);
ctx->cu_context = ctx->cu_context_internal = NULL;
@@ -1785,8 +1848,20 @@ int ff_nvenc_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
if (output_ready(avctx, !frame)) {
av_fifo_generic_read(ctx->output_surface_ready_queue, &tmpoutsurf, sizeof(tmpoutsurf), NULL);
+ cu_res = dl_fn->cuda_dl->cuCtxPushCurrent(ctx->cu_context);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPushCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
res = process_output_surface(avctx, pkt, tmpoutsurf);
+ cu_res = dl_fn->cuda_dl->cuCtxPopCurrent(&dummy);
+ if (cu_res != CUDA_SUCCESS) {
+ av_log(avctx, AV_LOG_ERROR, "cuCtxPopCurrent failed\n");
+ return AVERROR_EXTERNAL;
+ }
+
if (res)
return res;