diff options
author | Philip Langdale <philipl@overt.org> | 2023-06-15 22:10:37 -0700 |
---|---|---|
committer | Philip Langdale <philipl@overt.org> | 2023-06-16 10:40:21 -0700 |
commit | 3c07c2489d94ea0da0b575a4b2bb59f13de0bf6f (patch) | |
tree | a1b0774ee01d56eceafedce5dcc348ea417e92d2 | |
parent | f42df8384aedeb435b6d427e1b704fda2bd87d5b (diff) | |
download | ffmpeg-3c07c2489d94ea0da0b575a4b2bb59f13de0bf6f.tar.gz |
avfilter/scale_cuda: add support for rgb32/bgr32 conversions
As we are introducing two new formats and supporting conversions
between them, and also with the existing 0rgb32/0bgr32 formats, we get
a combinatorial explosion of kernels. I introduced a few new macros to
keep the things mostly managable.
The conversions are all simple, following existing patterns, with four
specific exceptions. When converting from 0rgb32/0bgr32 to rgb32/bgr32,
we need to ensure the alpha value is set to 1. In all other cases, it
can just be passed through, either to be used or ignored.
-rw-r--r-- | libavfilter/version.h | 2 | ||||
-rw-r--r-- | libavfilter/vf_scale_cuda.c | 2 | ||||
-rw-r--r-- | libavfilter/vf_scale_cuda.cu | 151 |
3 files changed, 123 insertions, 32 deletions
diff --git a/libavfilter/version.h b/libavfilter/version.h index 08779130f5..c001693e3c 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -32,7 +32,7 @@ #include "version_major.h" #define LIBAVFILTER_VERSION_MINOR 8 -#define LIBAVFILTER_VERSION_MICRO 101 +#define LIBAVFILTER_VERSION_MICRO 102 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ diff --git a/libavfilter/vf_scale_cuda.c b/libavfilter/vf_scale_cuda.c index 1c99befec8..370cb1d9cd 100644 --- a/libavfilter/vf_scale_cuda.c +++ b/libavfilter/vf_scale_cuda.c @@ -51,6 +51,8 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV444P16, AV_PIX_FMT_0RGB32, AV_PIX_FMT_0BGR32, + AV_PIX_FMT_RGB32, + AV_PIX_FMT_BGR32, }; #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) ) diff --git a/libavfilter/vf_scale_cuda.cu b/libavfilter/vf_scale_cuda.cu index c9c6cafdb6..de06ba9433 100644 --- a/libavfilter/vf_scale_cuda.cu +++ b/libavfilter/vf_scale_cuda.cu @@ -853,9 +853,67 @@ struct Convert_yuv444p16le_yuv444p16le } }; -// bgr0->X - -struct Convert_bgr0_bgr0 +#define DEF_CONVERT_IDENTITY(fmt1, fmt2)\ + \ +struct Convert_##fmt1##_##fmt2 \ +{ \ + static const int in_bit_depth = 8; \ + typedef uchar4 in_T; \ + typedef uchar in_T_uv; \ + typedef uchar4 out_T; \ + typedef uchar out_T_uv; \ + \ + DEF_F(Convert, out_T) \ + { \ + DEFAULT_DST(0) = SUB_F(y, 0); \ + } \ + \ + DEF_F(Convert_uv, out_T_uv) \ + { \ + } \ +}; \ + +#define DEF_CONVERT_REORDER(fmt1, fmt2) \ + \ +struct Convert_##fmt1##_##fmt2 \ +{ \ + static const int in_bit_depth = 8; \ + typedef uchar4 in_T; \ + typedef uchar in_T_uv; \ + typedef uchar4 out_T; \ + typedef uchar out_T_uv; \ + \ + DEF_F(Convert, out_T) \ + { \ + uchar4 res = SUB_F(y, 0); \ + DEFAULT_DST(0) = make_uchar4( \ + res.z, \ + res.y, \ + res.x, \ + res.w \ + ); \ + } \ + \ + DEF_F(Convert_uv, out_T_uv) \ + { \ + } \ +}; \ + +#define DEF_CONVERT_RGB(fmt1, fmt2) \ + \ +DEF_CONVERT_IDENTITY(fmt1, fmt1) \ +DEF_CONVERT_REORDER (fmt1, fmt2) \ +DEF_CONVERT_REORDER (fmt2, fmt1) \ +DEF_CONVERT_IDENTITY(fmt2, fmt2) + +DEF_CONVERT_RGB(rgb0, bgr0) +DEF_CONVERT_RGB(rgba, bgra) +DEF_CONVERT_IDENTITY(rgba, rgb0) +DEF_CONVERT_IDENTITY(bgra, bgr0) +DEF_CONVERT_REORDER(rgba, bgr0) +DEF_CONVERT_REORDER(bgra, rgb0) + +struct Convert_bgr0_bgra { static const int in_bit_depth = 8; typedef uchar4 in_T; @@ -865,7 +923,13 @@ struct Convert_bgr0_bgr0 DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + uchar4 res = SUB_F(y, 0); + DEFAULT_DST(0) = make_uchar4( + res.x, + res.y, + res.z, + 1 + ); } DEF_F(Convert_uv, out_T_uv) @@ -873,7 +937,7 @@ struct Convert_bgr0_bgr0 } }; -struct Convert_bgr0_rgb0 +struct Convert_bgr0_rgba { static const int in_bit_depth = 8; typedef uchar4 in_T; @@ -888,7 +952,7 @@ struct Convert_bgr0_rgb0 res.z, res.y, res.x, - res.w + 1 ); } @@ -897,9 +961,7 @@ struct Convert_bgr0_rgb0 } }; -// rgb0->X - -struct Convert_rgb0_bgr0 +struct Convert_rgb0_bgra { static const int in_bit_depth = 8; typedef uchar4 in_T; @@ -914,7 +976,7 @@ struct Convert_rgb0_bgr0 res.z, res.y, res.x, - res.w + 1 ); } @@ -923,7 +985,7 @@ struct Convert_rgb0_bgr0 } }; -struct Convert_rgb0_rgb0 +struct Convert_rgb0_rgba { static const int in_bit_depth = 8; typedef uchar4 in_T; @@ -933,7 +995,13 @@ struct Convert_rgb0_rgb0 DEF_F(Convert, out_T) { - DEFAULT_DST(0) = SUB_F(y, 0); + uchar4 res = SUB_F(y, 0); + DEFAULT_DST(0) = make_uchar4( + res.x, + res.y, + res.z, + 1 + ); } DEF_F(Convert_uv, out_T_uv) @@ -1117,6 +1185,12 @@ extern "C" { NEAREST_KERNEL_RAW(p016le_ ## C) \ NEAREST_KERNEL_RAW(yuv444p16le_ ## C) +#define NEAREST_KERNELS_RGB(C) \ + NEAREST_KERNEL_RAW(rgb0_ ## C) \ + NEAREST_KERNEL_RAW(bgr0_ ## C) \ + NEAREST_KERNEL_RAW(rgba_ ## C) \ + NEAREST_KERNEL_RAW(bgra_ ## C) \ + NEAREST_KERNELS(yuv420p) NEAREST_KERNELS(nv12) NEAREST_KERNELS(yuv444p) @@ -1124,11 +1198,10 @@ NEAREST_KERNELS(p010le) NEAREST_KERNELS(p016le) NEAREST_KERNELS(yuv444p16le) -NEAREST_KERNEL_RAW(bgr0_bgr0) -NEAREST_KERNEL_RAW(rgb0_rgb0) -NEAREST_KERNEL_RAW(bgr0_rgb0) -NEAREST_KERNEL_RAW(rgb0_bgr0) - +NEAREST_KERNELS_RGB(rgb0) +NEAREST_KERNELS_RGB(bgr0) +NEAREST_KERNELS_RGB(rgba) +NEAREST_KERNELS_RGB(bgra) #define BILINEAR_KERNEL(C, S) \ __global__ void Subsample_Bilinear_##C##S( \ @@ -1152,6 +1225,12 @@ NEAREST_KERNEL_RAW(rgb0_bgr0) BILINEAR_KERNEL_RAW(p016le_ ## C) \ BILINEAR_KERNEL_RAW(yuv444p16le_ ## C) +#define BILINEAR_KERNELS_RGB(C) \ + BILINEAR_KERNEL_RAW(rgb0_ ## C) \ + BILINEAR_KERNEL_RAW(bgr0_ ## C) \ + BILINEAR_KERNEL_RAW(rgba_ ## C) \ + BILINEAR_KERNEL_RAW(bgra_ ## C) + BILINEAR_KERNELS(yuv420p) BILINEAR_KERNELS(nv12) BILINEAR_KERNELS(yuv444p) @@ -1159,10 +1238,10 @@ BILINEAR_KERNELS(p010le) BILINEAR_KERNELS(p016le) BILINEAR_KERNELS(yuv444p16le) -BILINEAR_KERNEL_RAW(bgr0_bgr0) -BILINEAR_KERNEL_RAW(rgb0_rgb0) -BILINEAR_KERNEL_RAW(bgr0_rgb0) -BILINEAR_KERNEL_RAW(rgb0_bgr0) +BILINEAR_KERNELS_RGB(rgb0) +BILINEAR_KERNELS_RGB(bgr0) +BILINEAR_KERNELS_RGB(rgba) +BILINEAR_KERNELS_RGB(bgra) #define BICUBIC_KERNEL(C, S) \ __global__ void Subsample_Bicubic_##C##S( \ @@ -1186,6 +1265,12 @@ BILINEAR_KERNEL_RAW(rgb0_bgr0) BICUBIC_KERNEL_RAW(p016le_ ## C) \ BICUBIC_KERNEL_RAW(yuv444p16le_ ## C) +#define BICUBIC_KERNELS_RGB(C) \ + BICUBIC_KERNEL_RAW(rgb0_ ## C) \ + BICUBIC_KERNEL_RAW(bgr0_ ## C) \ + BICUBIC_KERNEL_RAW(rgba_ ## C) \ + BICUBIC_KERNEL_RAW(bgra_ ## C) + BICUBIC_KERNELS(yuv420p) BICUBIC_KERNELS(nv12) BICUBIC_KERNELS(yuv444p) @@ -1193,11 +1278,10 @@ BICUBIC_KERNELS(p010le) BICUBIC_KERNELS(p016le) BICUBIC_KERNELS(yuv444p16le) -BICUBIC_KERNEL_RAW(bgr0_bgr0) -BICUBIC_KERNEL_RAW(rgb0_rgb0) -BICUBIC_KERNEL_RAW(bgr0_rgb0) -BICUBIC_KERNEL_RAW(rgb0_bgr0) - +BICUBIC_KERNELS_RGB(rgb0) +BICUBIC_KERNELS_RGB(bgr0) +BICUBIC_KERNELS_RGB(rgba) +BICUBIC_KERNELS_RGB(bgra) #define LANCZOS_KERNEL(C, S) \ __global__ void Subsample_Lanczos_##C##S( \ @@ -1221,6 +1305,12 @@ BICUBIC_KERNEL_RAW(rgb0_bgr0) LANCZOS_KERNEL_RAW(p016le_ ## C) \ LANCZOS_KERNEL_RAW(yuv444p16le_ ## C) +#define LANCZOS_KERNELS_RGB(C) \ + LANCZOS_KERNEL_RAW(rgb0_ ## C) \ + LANCZOS_KERNEL_RAW(bgr0_ ## C) \ + LANCZOS_KERNEL_RAW(rgba_ ## C) \ + LANCZOS_KERNEL_RAW(bgra_ ## C) + LANCZOS_KERNELS(yuv420p) LANCZOS_KERNELS(nv12) LANCZOS_KERNELS(yuv444p) @@ -1228,9 +1318,8 @@ LANCZOS_KERNELS(p010le) LANCZOS_KERNELS(p016le) LANCZOS_KERNELS(yuv444p16le) -LANCZOS_KERNEL_RAW(bgr0_bgr0) -LANCZOS_KERNEL_RAW(rgb0_rgb0) -LANCZOS_KERNEL_RAW(bgr0_rgb0) -LANCZOS_KERNEL_RAW(rgb0_bgr0) - +LANCZOS_KERNELS_RGB(rgb0) +LANCZOS_KERNELS_RGB(bgr0) +LANCZOS_KERNELS_RGB(rgba) +LANCZOS_KERNELS_RGB(bgra) } |