diff options
author | Timo Rothenpieler <timo@rothenpieler.org> | 2021-06-11 23:54:34 +0200 |
---|---|---|
committer | Timo Rothenpieler <timo@rothenpieler.org> | 2021-06-22 14:05:44 +0200 |
commit | 072788c46e36a21ca9e8f1e3cc19a1944db5b89c (patch) | |
tree | 105335f7b8f9ccc16e13000934aee86e8da1fc5d /libavfilter | |
parent | abe150c9de6a096b14b6d623c5be49b19afe92b2 (diff) | |
download | ffmpeg-072788c46e36a21ca9e8f1e3cc19a1944db5b89c.tar.gz |
avfilter: compress CUDA PTX code if possible
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/Makefile | 11 | ||||
-rw-r--r-- | libavfilter/cuda/load_helper.c | 96 | ||||
-rw-r--r-- | libavfilter/cuda/load_helper.h | 28 | ||||
-rw-r--r-- | libavfilter/vf_overlay_cuda.c | 8 | ||||
-rw-r--r-- | libavfilter/vf_scale_cuda.c | 24 | ||||
-rw-r--r-- | libavfilter/vf_thumbnail_cuda.c | 7 | ||||
-rw-r--r-- | libavfilter/vf_yadif_cuda.c | 7 |
7 files changed, 162 insertions, 19 deletions
diff --git a/libavfilter/Makefile b/libavfilter/Makefile index bc81033e3f..2d963e419d 100644 --- a/libavfilter/Makefile +++ b/libavfilter/Makefile @@ -349,7 +349,8 @@ OBJS-$(CONFIG_OCR_FILTER) += vf_ocr.o OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o OBJS-$(CONFIG_OSCILLOSCOPE_FILTER) += vf_datascope.o OBJS-$(CONFIG_OVERLAY_FILTER) += vf_overlay.o framesync.o -OBJS-$(CONFIG_OVERLAY_CUDA_FILTER) += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o +OBJS-$(CONFIG_OVERLAY_CUDA_FILTER) += vf_overlay_cuda.o framesync.o vf_overlay_cuda.ptx.o \ + cuda/load_helper.o OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER) += vf_overlay_opencl.o opencl.o \ opencl/overlay.o framesync.o OBJS-$(CONFIG_OVERLAY_QSV_FILTER) += vf_overlay_qsv.o framesync.o @@ -394,7 +395,8 @@ OBJS-$(CONFIG_ROTATE_FILTER) += vf_rotate.o OBJS-$(CONFIG_SAB_FILTER) += vf_sab.o OBJS-$(CONFIG_SCALE_FILTER) += vf_scale.o scale_eval.o OBJS-$(CONFIG_SCALE_CUDA_FILTER) += vf_scale_cuda.o scale_eval.o \ - vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o + vf_scale_cuda.ptx.o vf_scale_cuda_bicubic.ptx.o \ + cuda/load_helper.o OBJS-$(CONFIG_SCALE_NPP_FILTER) += vf_scale_npp.o scale_eval.o OBJS-$(CONFIG_SCALE_QSV_FILTER) += vf_scale_qsv.o OBJS-$(CONFIG_SCALE_VAAPI_FILTER) += vf_scale_vaapi.o scale_eval.o vaapi_vpp.o @@ -442,7 +444,8 @@ OBJS-$(CONFIG_TELECINE_FILTER) += vf_telecine.o OBJS-$(CONFIG_THISTOGRAM_FILTER) += vf_histogram.o OBJS-$(CONFIG_THRESHOLD_FILTER) += vf_threshold.o framesync.o OBJS-$(CONFIG_THUMBNAIL_FILTER) += vf_thumbnail.o -OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o +OBJS-$(CONFIG_THUMBNAIL_CUDA_FILTER) += vf_thumbnail_cuda.o vf_thumbnail_cuda.ptx.o \ + cuda/load_helper.o OBJS-$(CONFIG_TILE_FILTER) += vf_tile.o OBJS-$(CONFIG_TINTERLACE_FILTER) += vf_tinterlace.o OBJS-$(CONFIG_TLUT2_FILTER) += vf_lut2.o framesync.o @@ -488,7 +491,7 @@ OBJS-$(CONFIG_XMEDIAN_FILTER) += vf_xmedian.o framesync.o OBJS-$(CONFIG_XSTACK_FILTER) += vf_stack.o framesync.o OBJS-$(CONFIG_YADIF_FILTER) += vf_yadif.o yadif_common.o OBJS-$(CONFIG_YADIF_CUDA_FILTER) += vf_yadif_cuda.o vf_yadif_cuda.ptx.o \ - yadif_common.o + yadif_common.o cuda/load_helper.o OBJS-$(CONFIG_YAEPBLUR_FILTER) += vf_yaepblur.o OBJS-$(CONFIG_ZMQ_FILTER) += f_zmq.o OBJS-$(CONFIG_ZOOMPAN_FILTER) += vf_zoompan.o diff --git a/libavfilter/cuda/load_helper.c b/libavfilter/cuda/load_helper.c new file mode 100644 index 0000000000..62d644c29a --- /dev/null +++ b/libavfilter/cuda/load_helper.c @@ -0,0 +1,96 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "config.h" + +#include "libavutil/hwcontext.h" +#include "libavutil/hwcontext_cuda_internal.h" +#include "libavutil/cuda_check.h" + +#if CONFIG_PTX_COMPRESSION +#include <zlib.h> +#define CHUNK_SIZE 1024 * 64 +#endif + +#include "load_helper.h" + +#define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, cu, x) + +int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, + const unsigned char *data, const unsigned int length) +{ + CudaFunctions *cu = hwctx->internal->cuda_dl; + +#if CONFIG_PTX_COMPRESSION + z_stream stream = { 0 }; + uint8_t *buf, *tmp; + uint64_t buf_size; + int ret; + + if (inflateInit2(&stream, 32 + 15) != Z_OK) { + av_log(avctx, AV_LOG_ERROR, "Error during zlib initialisation: %s\n", stream.msg); + return AVERROR(ENOSYS); + } + + buf_size = CHUNK_SIZE * 4; + buf = av_realloc(NULL, buf_size); + if (!buf) { + inflateEnd(&stream); + return AVERROR(ENOMEM); + } + + stream.next_in = data; + stream.avail_in = length; + + do { + stream.avail_out = buf_size - stream.total_out; + stream.next_out = buf + stream.total_out; + + ret = inflate(&stream, Z_FINISH); + if (ret != Z_OK && ret != Z_STREAM_END) { + av_log(avctx, AV_LOG_ERROR, "zlib inflate error: %s\n", stream.msg); + inflateEnd(&stream); + av_free(buf); + return AVERROR(EINVAL); + } + + if (stream.avail_out == 0) { + buf_size += CHUNK_SIZE; + tmp = av_realloc(buf, buf_size); + if (!tmp) { + inflateEnd(&stream); + av_free(buf); + return AVERROR(ENOMEM); + } + buf = tmp; + } + } while (ret != Z_STREAM_END); + + // NULL-terminate string + // there is guaranteed to be space for this, due to condition in loop + buf[stream.total_out] = 0; + + inflateEnd(&stream); + + ret = CHECK_CU(cu->cuModuleLoadData(cu_module, buf)); + av_free(buf); + return ret; +#else + return CHECK_CU(cu->cuModuleLoadData(cu_module, data)); +#endif +} diff --git a/libavfilter/cuda/load_helper.h b/libavfilter/cuda/load_helper.h new file mode 100644 index 0000000000..31507d6d3e --- /dev/null +++ b/libavfilter/cuda/load_helper.h @@ -0,0 +1,28 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVFILTER_CUDA_DECOMPRESS_H +#define AVFILTER_CUDA_DECOMPRESS_H + +/** + * Loads a CUDA module and applies any decompression, if neccesary. + */ +int ff_cuda_load_module(void *avctx, AVCUDADeviceContext *hwctx, CUmodule *cu_module, + const unsigned char *data, const unsigned int length); + +#endif diff --git a/libavfilter/vf_overlay_cuda.c b/libavfilter/vf_overlay_cuda.c index 260b5c8fa2..a199580869 100644 --- a/libavfilter/vf_overlay_cuda.c +++ b/libavfilter/vf_overlay_cuda.c @@ -36,6 +36,8 @@ #include "framesync.h" #include "internal.h" +#include "cuda/load_helper.h" + #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x) #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) @@ -432,8 +434,8 @@ static int overlay_cuda_query_formats(AVFilterContext *avctx) */ static int overlay_cuda_config_output(AVFilterLink *outlink) { - - extern char vf_overlay_cuda_ptx[]; + extern const unsigned char ff_vf_overlay_cuda_ptx_data[]; + extern const unsigned int ff_vf_overlay_cuda_ptx_len; int err; AVFilterContext* avctx = outlink->src; @@ -509,7 +511,7 @@ static int overlay_cuda_config_output(AVFilterLink *outlink) return err; } - err = CHECK_CU(cu->cuModuleLoadData(&ctx->cu_module, vf_overlay_cuda_ptx)); + err = ff_cuda_load_module(ctx, ctx->hwctx, &ctx->cu_module, ff_vf_overlay_cuda_ptx_data, ff_vf_overlay_cuda_ptx_len); if (err < 0) { CHECK_CU(cu->cuCtxPopCurrent(&dummy)); return err; diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index d97c7df273..c10938e96b 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -39,6 +39,7 @@ #include "scale_eval.h" #include "video.h" +#include "cuda/load_helper.h" #include "vf_scale_cuda.h" static const enum AVPixelFormat supported_formats[] = { @@ -275,34 +276,41 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) int w, h; int ret; - char *scaler_ptx; + const unsigned char *scaler_ptx; + unsigned int scaler_ptx_len; const char *function_infix = ""; - extern char vf_scale_cuda_ptx[]; - extern char vf_scale_cuda_bicubic_ptx[]; + extern const unsigned char ff_vf_scale_cuda_ptx_data[]; + extern const unsigned int ff_vf_scale_cuda_ptx_len; + extern const unsigned char ff_vf_scale_cuda_bicubic_ptx_data[]; + extern const unsigned int ff_vf_scale_cuda_bicubic_ptx_len; switch(s->interp_algo) { case INTERP_ALGO_NEAREST: - scaler_ptx = vf_scale_cuda_ptx; + scaler_ptx = ff_vf_scale_cuda_ptx_data; + scaler_ptx_len = ff_vf_scale_cuda_ptx_len; function_infix = "_Nearest"; s->interp_use_linear = 0; s->interp_as_integer = 1; break; case INTERP_ALGO_BILINEAR: - scaler_ptx = vf_scale_cuda_ptx; + scaler_ptx = ff_vf_scale_cuda_ptx_data; + scaler_ptx_len = ff_vf_scale_cuda_ptx_len; function_infix = "_Bilinear"; s->interp_use_linear = 1; s->interp_as_integer = 1; break; case INTERP_ALGO_DEFAULT: case INTERP_ALGO_BICUBIC: - scaler_ptx = vf_scale_cuda_bicubic_ptx; + scaler_ptx = ff_vf_scale_cuda_bicubic_ptx_data; + scaler_ptx_len = ff_vf_scale_cuda_bicubic_ptx_len; function_infix = "_Bicubic"; s->interp_use_linear = 0; s->interp_as_integer = 0; break; case INTERP_ALGO_LANCZOS: - scaler_ptx = vf_scale_cuda_bicubic_ptx; + scaler_ptx = ff_vf_scale_cuda_bicubic_ptx_data; + scaler_ptx_len = ff_vf_scale_cuda_bicubic_ptx_len; function_infix = "_Lanczos"; s->interp_use_linear = 0; s->interp_as_integer = 0; @@ -319,7 +327,7 @@ static av_cold int cudascale_config_props(AVFilterLink *outlink) if (ret < 0) goto fail; - ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, scaler_ptx)); + ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module, scaler_ptx, scaler_ptx_len); if (ret < 0) goto fail; diff --git a/libavfilter/vf_thumbnail_cuda.c b/libavfilter/vf_thumbnail_cuda.c index aab3ea8cc7..ceac10f72f 100644 --- a/libavfilter/vf_thumbnail_cuda.c +++ b/libavfilter/vf_thumbnail_cuda.c @@ -29,6 +29,8 @@ #include "avfilter.h" #include "internal.h" +#include "cuda/load_helper.h" + #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, s->hwctx->internal->cuda_dl, x) #define HIST_SIZE (3*256) @@ -358,7 +360,8 @@ static int config_props(AVFilterLink *inlink) CudaFunctions *cu = device_hwctx->internal->cuda_dl; int ret; - extern char vf_thumbnail_cuda_ptx[]; + extern const unsigned char ff_vf_thumbnail_cuda_ptx_data[]; + extern const unsigned int ff_vf_thumbnail_cuda_ptx_len; s->hwctx = device_hwctx; s->cu_stream = s->hwctx->stream; @@ -367,7 +370,7 @@ static int config_props(AVFilterLink *inlink) if (ret < 0) return ret; - ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_thumbnail_cuda_ptx)); + ret = ff_cuda_load_module(ctx, device_hwctx, &s->cu_module, ff_vf_thumbnail_cuda_ptx_data, ff_vf_thumbnail_cuda_ptx_len); if (ret < 0) return ret; diff --git a/libavfilter/vf_yadif_cuda.c b/libavfilter/vf_yadif_cuda.c index bbdbfc1adc..5099f0a806 100644 --- a/libavfilter/vf_yadif_cuda.c +++ b/libavfilter/vf_yadif_cuda.c @@ -24,7 +24,10 @@ #include "internal.h" #include "yadif.h" -extern char vf_yadif_cuda_ptx[]; +#include "cuda/load_helper.h" + +extern const unsigned char ff_vf_yadif_cuda_ptx_data[]; +extern const unsigned int ff_vf_yadif_cuda_ptx_len; typedef struct DeintCUDAContext { YADIFContext yadif; @@ -318,7 +321,7 @@ static int config_output(AVFilterLink *link) if (ret < 0) goto exit; - ret = CHECK_CU(cu->cuModuleLoadData(&s->cu_module, vf_yadif_cuda_ptx)); + ret = ff_cuda_load_module(ctx, s->hwctx, &s->cu_module, ff_vf_yadif_cuda_ptx_data, ff_vf_yadif_cuda_ptx_len); if (ret < 0) goto exit; |