avfilter/scale_cuda: frame crop support

The crop filter has no effect on scale_cuda: -vf crop=100:100,scale_cuda=100x100 Hardware frames (AV_PIX_FMT_FLAG_HWACCEL) are expected to use the crop_* properties, as seen in the implementation vf_crop.c. Signed-off-by: Timo Rothenpieler <timo@rothenpieler.org>
author: Koushik Dutta <koushd@gmail.com> 2024-09-10 11:10:56 -0700
committer: Timo Rothenpieler <timo@rothenpieler.org> 2024-11-01 17:10:34 +0100
commit: 0cdcbab9e9184dc63b9c00e418ff10f88df0f060 (patch)
tree: 47c4c9bce4a1278a5140e2f97e39de052da7470a
parent: 1864025458021a2d2c542f56e268ee1106f84460 (diff)
download: ffmpeg-0cdcbab9e9184dc63b9c00e418ff10f88df0f060.tar.gz
3 files changed, 25 insertions, 14 deletions
diff --git a/libavfilter/version.h b/libavfilter/version.h
index f191d98883..f84dec4805 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -32,7 +32,7 @@
 #include "version_major.h"
 
 #define LIBAVFILTER_VERSION_MINOR   6
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
 
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c
index 54a340949d..eb8beee771 100644
--- a/libavfilter/vf_scale_cuda.c
+++ b/libavfilter/vf_scale_cuda.c
@@ -407,7 +407,7 @@ fail:
 }
 
 static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
-                              CUtexObject src_tex[4], int src_width, int src_height,
+                              CUtexObject src_tex[4], int src_left, int src_top, int src_width, int src_height,
                               AVFrame *out_frame, int dst_width, int dst_height, int dst_pitch)
 {
     CUDAScaleContext *s = ctx->priv;
@@ -422,7 +422,7 @@ static int call_resize_kernel(AVFilterContext *ctx, CUfunction func,
         &src_tex[0], &src_tex[1], &src_tex[2], &src_tex[3],
         &dst_devptr[0], &dst_devptr[1], &dst_devptr[2], &dst_devptr[3],
         &dst_width, &dst_height, &dst_pitch,
-        &src_width, &src_height, &s->param
+        &src_left, &src_top, &src_width, &src_height, &s->param
     };
 
     return CHECK_CU(cu->cuLaunchKernel(func,
@@ -440,6 +440,9 @@ static int scalecuda_resize(AVFilterContext *ctx,
 
     CUtexObject tex[4] = { 0, 0, 0, 0 };
 
+    int crop_width = (in->width - in->crop_right) - in->crop_left;
+    int crop_height = (in->height - in->crop_bottom) - in->crop_top;
+
     ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
     if (ret < 0)
         return ret;
@@ -477,7 +480,7 @@ static int scalecuda_resize(AVFilterContext *ctx,
 
     // scale primary plane(s). Usually Y (and A), or single plane of RGB frames.
     ret = call_resize_kernel(ctx, s->cu_func,
-                             tex, in->width, in->height,
+                             tex, in->crop_left, in->crop_top, crop_width, crop_height,
                              out, out->width, out->height, out->linesize[0]);
     if (ret < 0)
         goto exit;
@@ -485,8 +488,10 @@ static int scalecuda_resize(AVFilterContext *ctx,
     if (s->out_planes > 1) {
         // scale UV plane. Scale function sets both U and V plane, or singular interleaved plane.
         ret = call_resize_kernel(ctx, s->cu_func_uv, tex,
-                                 AV_CEIL_RSHIFT(in->width, s->in_desc->log2_chroma_w),
-                                 AV_CEIL_RSHIFT(in->height, s->in_desc->log2_chroma_h),
+                                 AV_CEIL_RSHIFT(in->crop_left, s->in_desc->log2_chroma_w),
+                                 AV_CEIL_RSHIFT(in->crop_top, s->in_desc->log2_chroma_h),
+                                 AV_CEIL_RSHIFT(crop_width, s->in_desc->log2_chroma_w),
+                                 AV_CEIL_RSHIFT(crop_height, s->in_desc->log2_chroma_h),
                                  out,
                                  AV_CEIL_RSHIFT(out->width, s->out_desc->log2_chroma_w),
                                  AV_CEIL_RSHIFT(out->height, s->out_desc->log2_chroma_h),
diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu
index de06ba9433..271b55cd5d 100644
--- a/libavfilter/vf_scale_cuda.cu
+++ b/libavfilter/vf_scale_cuda.cu
@@ -26,6 +26,7 @@
 template<typename T>
 using subsample_function_t = T (*)(cudaTextureObject_t tex, int xo, int yo,
                                    int dst_width, int dst_height,
+                                   int src_left, int src_top,
                                    int src_width, int src_height,
                                    int bit_depth, float param);
 
@@ -64,11 +65,12 @@ static inline __device__ ushort conv_16to10(ushort in)
              subsample_function_t<in_T_uv> subsample_func_uv>                                  \
     __device__ static inline void N(cudaTextureObject_t src_tex[4], T *dst[4], int xo, int yo, \
                                     int dst_width, int dst_height, int dst_pitch,              \
-                                    int src_width, int src_height, float param)
+                                    int src_left, int src_top, int src_width, int src_height, float param)
 
 #define SUB_F(m, plane) \
     subsample_func_##m(src_tex[plane], xo, yo, \
                        dst_width, dst_height,  \
+                       src_left, src_top,      \
                        src_width, src_height,  \
                        in_bit_depth, param)
 
@@ -1063,13 +1065,14 @@ template<typename T>
 __device__ static inline T Subsample_Nearest(cudaTextureObject_t tex,
                                              int xo, int yo,
                                              int dst_width, int dst_height,
+                                             int src_left, int src_top,
                                              int src_width, int src_height,
                                              int bit_depth, float param)
 {
     float hscale = (float)src_width / (float)dst_width;
     float vscale = (float)src_height / (float)dst_height;
-    float xi = (xo + 0.5f) * hscale;
-    float yi = (yo + 0.5f) * vscale;
+    float xi = (xo + 0.5f) * hscale + src_left;
+    float yi = (yo + 0.5f) * vscale + src_top;
 
     return tex2D<T>(tex, xi, yi);
 }
@@ -1078,13 +1081,14 @@ template<typename T>
 __device__ static inline T Subsample_Bilinear(cudaTextureObject_t tex,
                                               int xo, int yo,
                                               int dst_width, int dst_height,
+                                              int src_left, int src_top,
                                               int src_width, int src_height,
                                               int bit_depth, float param)
 {
     float hscale = (float)src_width / (float)dst_width;
     float vscale = (float)src_height / (float)dst_height;
-    float xi = (xo + 0.5f) * hscale;
-    float yi = (yo + 0.5f) * vscale;
+    float xi = (xo + 0.5f) * hscale + src_left;
+    float yi = (yo + 0.5f) * vscale + src_top;
     // 3-tap filter weights are {wh,1.0,wh} and {wv,1.0,wv}
     float wh = min(max(0.5f * (hscale - 1.0f), 0.0f), 1.0f);
     float wv = min(max(0.5f * (vscale - 1.0f), 0.0f), 1.0f);
@@ -1109,13 +1113,14 @@ template<typename T, coeffs_function_t coeffs_function>
 __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
                                              int xo, int yo,
                                              int dst_width, int dst_height,
+                                             int src_left, int src_top,
                                              int src_width, int src_height,
                                              int bit_depth, float param)
 {
     float hscale = (float)src_width / (float)dst_width;
     float vscale = (float)src_height / (float)dst_height;
-    float xi = (xo + 0.5f) * hscale - 0.5f;
-    float yi = (yo + 0.5f) * vscale - 0.5f;
+    float xi = (xo + 0.5f) * hscale - 0.5f + src_left;
+    float yi = (yo + 0.5f) * vscale - 0.5f + src_top;
     float px = floor(xi);
     float py = floor(yi);
     float fx = xi - px;
@@ -1147,7 +1152,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
     cudaTextureObject_t src_tex_2, cudaTextureObject_t src_tex_3, \
     T *dst_0, T *dst_1, T *dst_2, T *dst_3,                       \
     int dst_width, int dst_height, int dst_pitch,                 \
-    int src_width, int src_height, float param
+    int src_left, int src_top, int src_width, int src_height, float param
 
 #define SUBSAMPLE(Convert, T) \
     cudaTextureObject_t src_tex[4] =                    \
@@ -1159,6 +1164,7 @@ __device__ static inline T Subsample_Bicubic(cudaTextureObject_t tex,
     Convert(                                            \
         src_tex, dst, xo, yo,                           \
         dst_width, dst_height, dst_pitch,               \
+        src_left, src_top,                              \
         src_width, src_height, param);
 
 extern "C" {
author	Koushik Dutta <koushd@gmail.com>	2024-09-10 11:10:56 -0700
committer	Timo Rothenpieler <timo@rothenpieler.org>	2024-11-01 17:10:34 +0100
commit	0cdcbab9e9184dc63b9c00e418ff10f88df0f060 (patch)
tree	47c4c9bce4a1278a5140e2f97e39de052da7470a
parent	1864025458021a2d2c542f56e268ee1106f84460 (diff)
download	ffmpeg-0cdcbab9e9184dc63b9c00e418ff10f88df0f060.tar.gz