diff options
author | Koushik Dutta <koushd@gmail.com> | 2024-09-10 11:10:56 -0700 |
---|---|---|
committer | Timo Rothenpieler <timo@rothenpieler.org> | 2024-11-01 17:10:34 +0100 |
commit | 0cdcbab9e9184dc63b9c00e418ff10f88df0f060 (patch) | |
tree | 47c4c9bce4a1278a5140e2f97e39de052da7470a | |
parent | 1864025458021a2d2c542f56e268ee1106f84460 (diff) | |
download | ffmpeg-0cdcbab9e9184dc63b9c00e418ff10f88df0f060.tar.gz |
avfilter/scale_cuda: frame crop support
The crop filter has no effect on scale_cuda:
-vf crop=100:100,scale_cuda=100x100
Hardware frames (AV_PIX_FMT_FLAG_HWACCEL) are expected to use the crop_* properties,
as seen in the implementation vf_crop.c.
Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
-rw-r--r-- | libavfilter/version.h | 2 | ||||
-rw-r--r-- | libavfilter/vf_scale_cuda.c | 15 | ||||
-rw-r--r-- | libavfilter/vf_scale_cuda.cu | 22 |
3 files changed, 25 insertions, 14 deletions
diff --git a/libavfilter/version.h b/libavfilter/version.h index f191d98883..f84dec4805 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -32,7 +32,7 @@ #include "version_major.h" #define LIBAVFILTER_VERSION_MINOR 6 -#define LIBAVFILTER_VERSION_MICRO 100 +#define LIBAVFILTER_VERSION_MICRO 101 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 54a340949d..eb8beee771 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -407,7 +407,7 @@ fail: } static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, - CUtexObject src_tex[4], int src_width, int src_height, + CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height, AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch) { CUDAScaleContext *s = ctx->priv; @@ -422,7 +422,7 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func, &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3], &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3], &dst_width, &dst_height, &dst_pitch, - &src_width, &src_height, &s->param + &src_left, &src_top, &src_width, &src_height, &s->param }; return CHECK_CU(cu->cuLaunchKernel(func, @@ -440,6 +440,9 @@ static int scalecuda_resize(AVFilterContext *ctx, CUtexObject tex[4] = { 0, 0, 0, 0 }; + int crop_width = (in->width - in->crop_right) - in->crop_left; + int crop_height = (in->height - in->crop_bottom) - in->crop_top; + ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx)); if (ret < 0) return ret; @@ -477,7 +480,7 @@ static int scalecuda_resize(AVFilterContext *ctx, // scale primary plane(s). Usually Y (and A), or single plane of RGB frames. ret = call_resize_kernel(ctx, s->cu_func, - tex, in->width, in->height, + tex, in->crop_left, in->crop_top, crop_width, crop_height, out, out->width, out->height, out->linesize[0]); if (ret < 0) goto exit; @@ -485,8 +488,10 @@ static int scalecuda_resize(AVFilterContext *ctx, if (s->out_planes > 1) { // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane. ret = call_resize_kernel(ctx, s->cu_func_uv, tex, - AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w), - AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h), + AV_CEIL_RSHIFT(in->crop_left, s->in_desc->log2_chroma_w), + AV_CEIL_RSHIFT(in->crop_top, s->in_desc->log2_chroma_h), + AV_CEIL_RSHIFT(crop_width, s->in_desc->log2_chroma_w), + AV_CEIL_RSHIFT(crop_height, s->in_desc->log2_chroma_h), out, AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w), AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h), diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index de06ba9433..271b55cd5d 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -26,6 +26,7 @@ template<typename T> using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param); @@ -64,11 +65,12 @@ static inline __device__ ushort conv_16to10(ushort in) subsample_function_t<in_T_uv> subsample_func_uv> \ __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \ int dst_width, int dst_height, int dst_pitch, \ - int src_width, int src_height, float param) + int src_left, int src_top, int src_width, int src_height, float param) #define SUB_F(m, plane) \ subsample_func_##m(src_tex[plane], xo, yo, \ dst_width, dst_height, \ + src_left, src_top, \ src_width, src_height, \ in_bit_depth, param) @@ -1063,13 +1065,14 @@ template<typename T> __device__ static inline T Subsample_Nearest(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param) { float hscale = (float)src_width / (float)dst_width; float vscale = (float)src_height / (float)dst_height; - float xi = (xo + 0.5f) * hscale; - float yi = (yo + 0.5f) * vscale; + float xi = (xo + 0.5f) * hscale + src_left; + float yi = (yo + 0.5f) * vscale + src_top; return tex2D<T>(tex, xi, yi); } @@ -1078,13 +1081,14 @@ template<typename T> __device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param) { float hscale = (float)src_width / (float)dst_width; float vscale = (float)src_height / (float)dst_height; - float xi = (xo + 0.5f) * hscale; - float yi = (yo + 0.5f) * vscale; + float xi = (xo + 0.5f) * hscale + src_left; + float yi = (yo + 0.5f) * vscale + src_top; // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv} float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f); float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f); @@ -1109,13 +1113,14 @@ template<typename T, coeffs_function_t coeffs_function> __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, int xo, int yo, int dst_width, int dst_height, + int src_left, int src_top, int src_width, int src_height, int bit_depth, float param) { float hscale = (float)src_width / (float)dst_width; float vscale = (float)src_height / (float)dst_height; - float xi = (xo + 0.5f) * hscale - 0.5f; - float yi = (yo + 0.5f) * vscale - 0.5f; + float xi = (xo + 0.5f) * hscale - 0.5f + src_left; + float yi = (yo + 0.5f) * vscale - 0.5f + src_top; float px = floor(xi); float py = floor(yi); float fx = xi - px; @@ -1147,7 +1152,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \ T *dst_0, T *dst_1, T *dst_2, T *dst_3, \ int dst_width, int dst_height, int dst_pitch, \ - int src_width, int src_height, float param + int src_left, int src_top, int src_width, int src_height, float param #define SUBSAMPLE(Convert, T) \ cudaTextureObject_t src_tex[4] = \ @@ -1159,6 +1164,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex, Convert( \ src_tex, dst, xo, yo, \ dst_width, dst_height, dst_pitch, \ + src_left, src_top, \ src_width, src_height, param); extern "C" { |