diff options
author | Philip Langdale <philipl@overt.org> | 2018-11-10 22:47:28 -0800 |
---|---|---|
committer | Philip Langdale <philipl@overt.org> | 2018-11-14 17:39:42 -0800 |
commit | 19d3d0c0570981ddc8a224f07d734ff75d76e234 (patch) | |
tree | 01f718e1878010605cd28d5947b676d42519dfc9 /libavfilter | |
parent | f0f2832a5ce93bad9b1d29f99df6bda2380fc41c (diff) | |
download | ffmpeg-19d3d0c0570981ddc8a224f07d734ff75d76e234.tar.gz |
avutil/hwcontext_cuda: Define and use common CHECK_CU()
We have a pattern of wrapping CUDA calls to print errors and
normalise return values that is used in a couple of places. To
avoid duplication and increase consistency, let's put the wrapper
implementation in a shared place and use it everywhere.
Affects:
* avcodec/cuviddec
* avcodec/nvdec
* avcodec/nvenc
* avfilter/vf_scale_cuda
* avfilter/vf_scale_npp
* avfilter/vf_thumbnail_cuda
* avfilter/vf_transpose_npp
* avfilter/vf_yadif_cuda
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/Makefile | 13 | ||||
-rw-r--r-- | libavfilter/cuda_check.c | 1 | ||||
-rw-r--r-- | libavfilter/vf_scale_cuda.c | 92 | ||||
-rw-r--r-- | libavfilter/vf_scale_npp.c | 12 | ||||
-rw-r--r-- | libavfilter/vf_thumbnail_cuda.c | 102 | ||||
-rw-r--r-- | libavfilter/vf_transpose_npp.c | 12 | ||||
-rw-r--r-- | libavfilter/vf_yadif_cuda.c | 97 |
7 files changed, 141 insertions, 188 deletions
diff --git a/libavfilter/Makefile b/libavfilter/Makefile index 7c6fc836e5..a7ebd0221b 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -334,8 +334,9 @@ OBJS-$(CONFIG_ROBERTS_OPENCL_FILTER) += vf_convolution_opencl.o opencl.o OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale.o -OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o -OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o +OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o vf_scale_cuda.ptx.o \ + cuda_check.o +OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale.o cuda_check.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale.o vaapi_vpp.o OBJS-$(CONFIG_SCALE2REF_FILTER) += vf_scale.o scale.o @@ -376,7 +377,8 @@ OBJS-$(CONFIG_TBLEND_FILTER) += vf_blend.o framesync.o OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o -OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o +OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \ + cuda_check.o OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o @@ -386,7 +388,7 @@ OBJS-$(CONFIG_TONEMAP_OPENCL_FILTER) += vf_tonemap_opencl.o colorspace.o opencl/tonemap.o opencl/colorspace_common.o OBJS-$(CONFIG_TPAD_FILTER) += vf_tpad.o OBJS-$(CONFIG_TRANSPOSE_FILTER) += vf_transpose.o -OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o +OBJS-$(CONFIG_TRANSPOSE_NPP_FILTER) += vf_transpose_npp.o cuda_check.o OBJS-$(CONFIG_TRIM_FILTER) += trim.o OBJS-$(CONFIG_UNPREMULTIPLY_FILTER) += vf_premultiply.o framesync.o OBJS-$(CONFIG_UNSHARP_FILTER) += vf_unsharp.o @@ -410,7 +412,8 @@ OBJS-$(CONFIG_WEAVE_FILTER) += vf_weave.o OBJS-$(CONFIG_XBR_FILTER) += vf_xbr.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o -OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o yadif_common.o +OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ + yadif_common.o cuda_check.o OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o OBJS-$(CONFIG_ZSCALE_FILTER) += vf_zscale.o diff --git a/libavfilter/cuda_check.c b/libavfilter/cuda_check.c new file mode 100644 index 0000000000..a1ebb88882 --- /dev/null +++ b/libavfilter/cuda_check.c @@ -0,0 +1 @@ +#include "libavutil/cuda_check.c" diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 7b2b78c1ed..53b7aa9531 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -28,6 +28,7 @@ #include "libavutil/common.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -52,6 +53,8 @@ static const enum AVPixelFormat supported_formats[] = { #define BLOCKX 32 #define BLOCKY 16 +#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) + typedef struct CUDAScaleContext { const AVClass *class; enum AVPixelFormat in_fmt; @@ -255,55 +258,48 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; - CUresult err; int w, h; int ret; extern char vf_scale_cuda_ptx[]; - err = cuCtxPushCurrent(cuda_ctx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) goto fail; - } - err = cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_scale_cuda_ptx)); + if (ret < 0) goto fail; - } - cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar"); - cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2"); - cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4"); - cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort"); - cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2"); - cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4"); - - cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); - cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); - cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex"); - - cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER); - - cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR); - - cuCtxPopCurrent(&dummy); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Subsample_Bilinear_uchar")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Subsample_Bilinear_uchar2")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar4, s->cu_module, "Subsample_Bilinear_uchar4")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Subsample_Bilinear_ushort")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Subsample_Bilinear_ushort2")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort4, s->cu_module, "Subsample_Bilinear_ushort4")); + + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar4, s->cu_module, "uchar4_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort4, s->cu_module, "ushort4_tex")); + + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar4, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort4, CU_TRSF_READ_AS_INTEGER)); + + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar4, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort4, CU_TR_FILTER_MODE_LINEAR)); + + CHECK_CU(cuCtxPopCurrent(&dummy)); if ((ret = ff_scale_eval_dimensions(s, s->w_expr, s->h_expr, @@ -339,7 +335,7 @@ fail: return ret; } -static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex, int channels, +static int call_resize_kernel(CUDAScaleContext *ctx, CUfunction func, CUtexref tex, int channels, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, uint8_t *dst_dptr, int dst_width, int dst_height, int dst_pitch, int pixel_size) @@ -358,8 +354,9 @@ static int call_resize_kernel(CUDAScaleContext *s, CUfunction func, CUtexref tex desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; } - cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size); - cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL); + CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch * pixel_size)); + CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, + BLOCKX, BLOCKY, 1, 0, 0, args_uchar, NULL)); return 0; } @@ -470,7 +467,6 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; - CUresult err; CUcontext dummy; int ret = 0; @@ -480,15 +476,13 @@ static int cudascale_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) goto fail; - } ret = cudascale_scale(ctx, out, in); - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c index 8a277ce8e1..a3e085764a 100644 --- a/libavfilter/vf_scale_npp.c +++ b/libavfilter/vf_scale_npp.c @@ -29,6 +29,7 @@ #include "libavutil/common.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -39,6 +40,8 @@ #include "scale.h" #include "video.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x) + static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_NV12, @@ -498,7 +501,6 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; - CUresult err; CUcontext dummy; int ret = 0; @@ -511,15 +513,13 @@ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) goto fail; - } ret = nppscale_scale(ctx, out, in); - device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); + CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c index 53df7e0bf7..22691e156f 100644 --- a/libavfilter/vf_thumbnail_cuda.c +++ b/libavfilter/vf_thumbnail_cuda.c @@ -24,12 +24,15 @@ #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_check.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avfilter.h" #include "internal.h" +#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) + #define HIST_SIZE (3*256) #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) #define BLOCKX 32 @@ -154,7 +157,7 @@ static AVFrame *get_best_frame(AVFilterContext *ctx) return picref; } -static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref tex, int channels, +static int thumbnail_kernel(ThumbnailCudaContext *ctx, CUfunction func, CUtexref tex, int channels, int *histogram, uint8_t *src_dptr, int src_width, int src_height, int src_pitch, int pixel_size) { CUdeviceptr src_devptr = (CUdeviceptr)src_dptr; @@ -171,8 +174,10 @@ static int thumbnail_kernel(ThumbnailCudaContext *s, CUfunction func, CUtexref t desc.Format = CU_AD_FORMAT_UNSIGNED_INT16; } - cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch); - cuLaunchKernel(func, DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, 0, args, NULL); + CHECK_CU(cuTexRefSetAddress2D_v3(tex, &desc, src_devptr, src_pitch)); + CHECK_CU(cuLaunchKernel(func, + DIV_UP(src_width, BLOCKX), DIV_UP(src_height, BLOCKY), 1, + BLOCKX, BLOCKY, 1, 0, 0, args, NULL)); return 0; } @@ -235,7 +240,6 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) int *hist = s->frames[s->n].histogram; AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)s->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; - CUresult err; CUcontext dummy; CUDA_MEMCPY2D cpy = { 0 }; int ret = 0; @@ -243,11 +247,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) // keep a reference of each frame s->frames[s->n].buf = frame; - err = cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) - return AVERROR_UNKNOWN; + ret = CHECK_CU(cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) + return ret; - cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int)); + CHECK_CU(cuMemsetD8(s->data, 0, HIST_SIZE * sizeof(int))); thumbnail(ctx, (int*)s->data, frame); @@ -260,11 +264,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) cpy.WidthInBytes = HIST_SIZE * sizeof(int); cpy.Height = 1; - err = cuMemcpy2D(&cpy); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuMemcpy2D(&cpy)); + if (ret < 0) + return ret; if (hw_frames_ctx->sw_format == AV_PIX_FMT_NV12 || hw_frames_ctx->sw_format == AV_PIX_FMT_YUV420P || hw_frames_ctx->sw_format == AV_PIX_FMT_P010LE || hw_frames_ctx->sw_format == AV_PIX_FMT_P016LE) @@ -274,7 +276,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *frame) hist[i] = 4 * hist[i]; } - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); if (ret < 0) return ret; @@ -292,12 +294,12 @@ static av_cold void uninit(AVFilterContext *ctx) ThumbnailCudaContext *s = ctx->priv; if (s->data) { - cuMemFree(s->data); + CHECK_CU(cuMemFree(s->data)); s->data = 0; } if (s->cu_module) { - cuModuleUnload(s->cu_module); + CHECK_CU(cuModuleUnload(s->cu_module)); s->cu_module = NULL; } @@ -340,49 +342,43 @@ static int config_props(AVFilterLink *inlink) AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = hw_frames_ctx->device_ctx->hwctx; CUcontext dummy, cuda_ctx = device_hwctx->cuda_ctx; - CUresult err; + int ret; extern char vf_thumbnail_cuda_ptx[]; - err = cuCtxPushCurrent(cuda_ctx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error pushing cuda context\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuCtxPushCurrent(cuda_ctx)); + if (ret < 0) + return ret; - err = cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error loading module data\n"); - return AVERROR_UNKNOWN; - } + ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx)); + if (ret < 0) + return ret; - cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar"); - cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2"); - cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort"); - cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2"); - - cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex"); - cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex"); - cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex"); - - cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER); - cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER); - - cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR); - cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR); - - err = cuMemAlloc(&s->data, HIST_SIZE * sizeof(int)); - if (err != CUDA_SUCCESS) { - av_log(ctx, AV_LOG_ERROR, "Error allocating cuda memory\n"); - return AVERROR_UNKNOWN; - } + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "Thumbnail_uchar")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "Thumbnail_uchar2")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "Thumbnail_ushort")); + CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "Thumbnail_ushort2")); + + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar, s->cu_module, "uchar_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_uchar2, s->cu_module, "uchar2_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort, s->cu_module, "ushort_tex")); + CHECK_CU(cuModuleGetTexRef(&s->cu_tex_ushort2, s->cu_module, "ushort2_tex")); + + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_uchar2, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort, CU_TRSF_READ_AS_INTEGER)); + CHECK_CU(cuTexRefSetFlags(s->cu_tex_ushort2, CU_TRSF_READ_AS_INTEGER)); + + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_uchar2, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort, CU_TR_FILTER_MODE_LINEAR)); + CHECK_CU(cuTexRefSetFilterMode(s->cu_tex_ushort2, CU_TR_FILTER_MODE_LINEAR)); + + ret = CHECK_CU(cuMemAlloc(&s->data, HIST_SIZE * sizeof(int))); + if (ret < 0) + return ret; - cuCtxPopCurrent(&dummy); + CHECK_CU(cuCtxPopCurrent(&dummy)); s->hw_frames_ctx = ctx->inputs[0]->hw_frames_ctx; diff --git a/libavfilter/vf_transpose_npp.c b/libavfilter/vf_transpose_npp.c index 1b3a5c0c69..3ea031667c 100644 --- a/libavfilter/vf_transpose_npp.c +++ b/libavfilter/vf_transpose_npp.c @@ -23,6 +23,7 @@ #include "libavutil/common.h" #include "libavutil/hwcontext.h" #include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" #include "libavutil/internal.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" @@ -32,6 +33,8 @@ #include "internal.h" #include "video.h" +#define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, device_hwctx->internal->cuda_dl, x) + static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV444P @@ -397,7 +400,6 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) AVHWFramesContext *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data; AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx; AVFrame *out = NULL; - CUresult err; CUcontext dummy; int ret = 0; @@ -410,15 +412,13 @@ static int npptranspose_filter_frame(AVFilterLink *link, AVFrame *in) goto fail; } - err = device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx); - if (err != CUDA_SUCCESS) { - ret = AVERROR_UNKNOWN; + ret = CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPushCurrent(device_hwctx->cuda_ctx)); + if (ret < 0) goto fail; - } ret = npptranspose_filter(ctx, out, in); - device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy); + CHECK_CU(device_hwctx->internal->cuda_dl->cuCtxPopCurrent(&dummy)); if (ret < 0) goto fail; diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index be22344d9d..85e1aac5eb 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -21,6 +21,7 @@ #include <cuda.h> #include "libavutil/avassert.h" #include "libavutil/hwcontext_cuda.h" +#include "libavutil/cuda_check.h" #include "internal.h" #include "yadif.h" @@ -48,28 +49,7 @@ typedef struct DeintCUDAContext { #define BLOCKX 32 #define BLOCKY 16 -static int check_cu(AVFilterContext *avctx, CUresult err, const char *func) -{ - const char *err_name; - const char *err_string; - - av_log(avctx, AV_LOG_TRACE, "Calling %s\n", func); - - if (err == CUDA_SUCCESS) - return 0; - - cuGetErrorName(err, &err_name); - cuGetErrorString(err, &err_string); - - av_log(avctx, AV_LOG_ERROR, "%s failed", func); - if (err_name && err_string) - av_log(avctx, AV_LOG_ERROR, " -> %s: %s", err_name, err_string); - av_log(avctx, AV_LOG_ERROR, "\n"); - - return AVERROR_EXTERNAL; -} - -#define CHECK_CU(x) check_cu(ctx, (x), #x) +#define CHECK_CU(x) FF_CUDA_CHECK(ctx, x) static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, CUdeviceptr prev, CUdeviceptr cur, CUdeviceptr next, @@ -85,7 +65,7 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, { DeintCUDAContext *s = ctx->priv; CUtexObject tex_prev = 0, tex_cur = 0, tex_next = 0; - CUresult err; + int ret; int skip_spatial_check = s->yadif.mode&2; void *args[] = { &dst, &tex_prev, &tex_cur, &tex_next, @@ -108,24 +88,21 @@ static CUresult call_kernel(AVFilterContext *ctx, CUfunction func, }; res_desc.res.pitch2D.devPtr = (CUdeviceptr)prev; - err = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); - if (err != CUDA_SUCCESS) { + ret = CHECK_CU(cuTexObjectCreate(&tex_prev, &res_desc, &tex_desc, NULL)); + if (ret < 0) goto exit; - } res_desc.res.pitch2D.devPtr = (CUdeviceptr)cur; - err = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); - if (err != CUDA_SUCCESS) { + ret = CHECK_CU(cuTexObjectCreate(&tex_cur, &res_desc, &tex_desc, NULL)); + if (ret < 0) goto exit; - } res_desc.res.pitch2D.devPtr = (CUdeviceptr)next; - err = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); - if (err != CUDA_SUCCESS) { + ret = CHECK_CU(cuTexObjectCreate(&tex_next, &res_desc, &tex_desc, NULL)); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuLaunchKernel(func, + ret = CHECK_CU(cuLaunchKernel(func, DIV_UP(dst_width, BLOCKX), DIV_UP(dst_height, BLOCKY), 1, BLOCKX, BLOCKY, 1, 0, s->stream, args, NULL)); @@ -138,7 +115,7 @@ exit: if (tex_next) CHECK_CU(cuTexObjectDestroy(tex_next)); - return err; + return ret; } static void filter(AVFilterContext *ctx, AVFrame *dst, @@ -147,13 +124,11 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, DeintCUDAContext *s = ctx->priv; YADIFContext *y = &s->yadif; CUcontext dummy; - CUresult err; - int i; + int i, ret; - err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); - if (err != CUDA_SUCCESS) { - goto exit; - } + ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); + if (ret < 0) + return; for (i = 0; i < y->csp->nb_components; i++) { CUfunction func; @@ -204,10 +179,7 @@ static void filter(AVFilterContext *ctx, AVFrame *dst, parity, tff); } - err = CHECK_CU(cuStreamSynchronize(s->stream)); - if (err != CUDA_SUCCESS) { - goto exit; - } + CHECK_CU(cuStreamSynchronize(s->stream)); exit: CHECK_CU(cuCtxPopCurrent(&dummy)); @@ -283,7 +255,6 @@ static int config_output(AVFilterLink *link) YADIFContext *y = &s->yadif; int ret = 0; CUcontext dummy; - CUresult err; av_assert0(s->input_frames); s->device_ref = av_buffer_ref(s->input_frames->device_ref); @@ -342,41 +313,29 @@ static int config_output(AVFilterLink *link) y->csp = av_pix_fmt_desc_get(output_frames->sw_format); y->filter = filter; - err = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); - if (err != CUDA_SUCCESS) { - ret = AVERROR_EXTERNAL; + ret = CHECK_CU(cuCtxPushCurrent(s->cu_ctx)); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar, s->cu_module, "yadif_uchar")); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_uchar2, s->cu_module, "yadif_uchar2")); + if (ret < 0) goto exit; - } - err= CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort, s->cu_module, "yadif_ushort")); + if (ret < 0) goto exit; - } - err = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); - if (err != CUDA_SUCCESS) { - ret = AVERROR_INVALIDDATA; + ret = CHECK_CU(cuModuleGetFunction(&s->cu_func_ushort2, s->cu_module, "yadif_ushort2")); + if (ret < 0) goto exit; - } exit: CHECK_CU(cuCtxPopCurrent(&dummy)); |