diff options
author | Mark Reid <mindmark@gmail.com> | 2021-11-24 13:15:21 -0800 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2022-01-11 16:34:33 -0300 |
commit | 52f70261642725f2b1af6e1a5a0b2c9d868997fa (patch) | |
tree | da784581434b55fca80101e38325a45c38446577 /libswscale/x86/swscale.c | |
parent | 9e445a5be2dca30a1f1103c73440648ccf5af9b1 (diff) | |
download | ffmpeg-52f70261642725f2b1af6e1a5a0b2c9d868997fa.tar.gz |
swscale/x86/input.asm: add x86-optimized planer rgb2yuv functions
sse2 only operates on 2 lanes per loop for to_y and to_uv functions, due
to the lack of pmulld instruction. Emulating pmulld with 2 pmuludq and shuffles
proved too costly and made to_uv functions slower then the c implementation.
For to_y on sse2 only float functions are generated,
I was are not able outperform the c implementation on the integer pixel formats.
For to_a on see4 only the float functions are generated.
sse2 and sse4 generated nearly identical performing code on integer pixel formats,
so only sse2/avx2 versions are generated.
planar_gbrp_to_y_512_c: 1197.5
planar_gbrp_to_y_512_sse4: 444.5
planar_gbrp_to_y_512_avx2: 287.5
planar_gbrap_to_y_512_c: 1204.5
planar_gbrap_to_y_512_sse4: 447.5
planar_gbrap_to_y_512_avx2: 289.5
planar_gbrp9be_to_y_512_c: 1380.0
planar_gbrp9be_to_y_512_sse4: 543.5
planar_gbrp9be_to_y_512_avx2: 340.0
planar_gbrp9le_to_y_512_c: 1200.5
planar_gbrp9le_to_y_512_sse4: 442.0
planar_gbrp9le_to_y_512_avx2: 282.0
planar_gbrp10be_to_y_512_c: 1378.5
planar_gbrp10be_to_y_512_sse4: 544.0
planar_gbrp10be_to_y_512_avx2: 337.5
planar_gbrp10le_to_y_512_c: 1200.0
planar_gbrp10le_to_y_512_sse4: 448.0
planar_gbrp10le_to_y_512_avx2: 285.5
planar_gbrap10be_to_y_512_c: 1380.0
planar_gbrap10be_to_y_512_sse4: 542.0
planar_gbrap10be_to_y_512_avx2: 340.5
planar_gbrap10le_to_y_512_c: 1199.0
planar_gbrap10le_to_y_512_sse4: 446.0
planar_gbrap10le_to_y_512_avx2: 289.5
planar_gbrp12be_to_y_512_c: 10563.0
planar_gbrp12be_to_y_512_sse4: 542.5
planar_gbrp12be_to_y_512_avx2: 339.0
planar_gbrp12le_to_y_512_c: 1201.0
planar_gbrp12le_to_y_512_sse4: 440.5
planar_gbrp12le_to_y_512_avx2: 286.0
planar_gbrap12be_to_y_512_c: 1701.5
planar_gbrap12be_to_y_512_sse4: 917.0
planar_gbrap12be_to_y_512_avx2: 338.5
planar_gbrap12le_to_y_512_c: 1201.0
planar_gbrap12le_to_y_512_sse4: 444.5
planar_gbrap12le_to_y_512_avx2: 288.0
planar_gbrp14be_to_y_512_c: 1370.5
planar_gbrp14be_to_y_512_sse4: 545.0
planar_gbrp14be_to_y_512_avx2: 338.5
planar_gbrp14le_to_y_512_c: 1199.0
planar_gbrp14le_to_y_512_sse4: 444.0
planar_gbrp14le_to_y_512_avx2: 279.5
planar_gbrp16be_to_y_512_c: 1364.0
planar_gbrp16be_to_y_512_sse4: 544.5
planar_gbrp16be_to_y_512_avx2: 339.5
planar_gbrp16le_to_y_512_c: 1201.0
planar_gbrp16le_to_y_512_sse4: 445.5
planar_gbrp16le_to_y_512_avx2: 280.5
planar_gbrap16be_to_y_512_c: 1377.0
planar_gbrap16be_to_y_512_sse4: 545.0
planar_gbrap16be_to_y_512_avx2: 338.5
planar_gbrap16le_to_y_512_c: 1201.0
planar_gbrap16le_to_y_512_sse4: 442.0
planar_gbrap16le_to_y_512_avx2: 279.0
planar_gbrpf32be_to_y_512_c: 4113.0
planar_gbrpf32be_to_y_512_sse2: 2438.0
planar_gbrpf32be_to_y_512_sse4: 1068.0
planar_gbrpf32be_to_y_512_avx2: 904.5
planar_gbrpf32le_to_y_512_c: 3818.5
planar_gbrpf32le_to_y_512_sse2: 2024.5
planar_gbrpf32le_to_y_512_sse4: 1241.5
planar_gbrpf32le_to_y_512_avx2: 657.0
planar_gbrapf32be_to_y_512_c: 3707.0
planar_gbrapf32be_to_y_512_sse2: 2444.0
planar_gbrapf32be_to_y_512_sse4: 1077.0
planar_gbrapf32be_to_y_512_avx2: 909.0
planar_gbrapf32le_to_y_512_c: 3822.0
planar_gbrapf32le_to_y_512_sse2: 2024.5
planar_gbrapf32le_to_y_512_sse4: 1176.0
planar_gbrapf32le_to_y_512_avx2: 658.5
planar_gbrp_to_uv_512_c: 2325.8
planar_gbrp_to_uv_512_sse2: 1726.8
planar_gbrp_to_uv_512_sse4: 771.8
planar_gbrp_to_uv_512_avx2: 506.8
planar_gbrap_to_uv_512_c: 2281.8
planar_gbrap_to_uv_512_sse2: 1726.3
planar_gbrap_to_uv_512_sse4: 768.3
planar_gbrap_to_uv_512_avx2: 496.3
planar_gbrp9be_to_uv_512_c: 2336.8
planar_gbrp9be_to_uv_512_sse2: 1924.8
planar_gbrp9be_to_uv_512_sse4: 852.3
planar_gbrp9be_to_uv_512_avx2: 552.8
planar_gbrp9le_to_uv_512_c: 2270.3
planar_gbrp9le_to_uv_512_sse2: 1512.3
planar_gbrp9le_to_uv_512_sse4: 764.3
planar_gbrp9le_to_uv_512_avx2: 491.3
planar_gbrp10be_to_uv_512_c: 2281.8
planar_gbrp10be_to_uv_512_sse2: 1917.8
planar_gbrp10be_to_uv_512_sse4: 855.3
planar_gbrp10be_to_uv_512_avx2: 541.3
planar_gbrp10le_to_uv_512_c: 2269.8
planar_gbrp10le_to_uv_512_sse2: 1515.3
planar_gbrp10le_to_uv_512_sse4: 759.8
planar_gbrp10le_to_uv_512_avx2: 487.8
planar_gbrap10be_to_uv_512_c: 2382.3
planar_gbrap10be_to_uv_512_sse2: 1924.8
planar_gbrap10be_to_uv_512_sse4: 855.3
planar_gbrap10be_to_uv_512_avx2: 540.8
planar_gbrap10le_to_uv_512_c: 2382.3
planar_gbrap10le_to_uv_512_sse2: 1512.3
planar_gbrap10le_to_uv_512_sse4: 759.3
planar_gbrap10le_to_uv_512_avx2: 484.8
planar_gbrp12be_to_uv_512_c: 2283.8
planar_gbrp12be_to_uv_512_sse2: 1936.8
planar_gbrp12be_to_uv_512_sse4: 858.3
planar_gbrp12be_to_uv_512_avx2: 541.3
planar_gbrp12le_to_uv_512_c: 2278.8
planar_gbrp12le_to_uv_512_sse2: 1507.3
planar_gbrp12le_to_uv_512_sse4: 760.3
planar_gbrp12le_to_uv_512_avx2: 485.8
planar_gbrap12be_to_uv_512_c: 2385.3
planar_gbrap12be_to_uv_512_sse2: 1927.8
planar_gbrap12be_to_uv_512_sse4: 855.3
planar_gbrap12be_to_uv_512_avx2: 539.8
planar_gbrap12le_to_uv_512_c: 2377.3
planar_gbrap12le_to_uv_512_sse2: 1516.3
planar_gbrap12le_to_uv_512_sse4: 759.3
planar_gbrap12le_to_uv_512_avx2: 484.8
planar_gbrp14be_to_uv_512_c: 2283.8
planar_gbrp14be_to_uv_512_sse2: 1935.3
planar_gbrp14be_to_uv_512_sse4: 852.3
planar_gbrp14be_to_uv_512_avx2: 540.3
planar_gbrp14le_to_uv_512_c: 2276.8
planar_gbrp14le_to_uv_512_sse2: 1514.8
planar_gbrp14le_to_uv_512_sse4: 762.3
planar_gbrp14le_to_uv_512_avx2: 484.8
planar_gbrp16be_to_uv_512_c: 2383.3
planar_gbrp16be_to_uv_512_sse2: 1881.8
planar_gbrp16be_to_uv_512_sse4: 852.3
planar_gbrp16be_to_uv_512_avx2: 541.8
planar_gbrp16le_to_uv_512_c: 2378.3
planar_gbrp16le_to_uv_512_sse2: 1476.8
planar_gbrp16le_to_uv_512_sse4: 765.3
planar_gbrp16le_to_uv_512_avx2: 485.8
planar_gbrap16be_to_uv_512_c: 2382.3
planar_gbrap16be_to_uv_512_sse2: 1886.3
planar_gbrap16be_to_uv_512_sse4: 853.8
planar_gbrap16be_to_uv_512_avx2: 550.8
planar_gbrap16le_to_uv_512_c: 2381.8
planar_gbrap16le_to_uv_512_sse2: 1488.3
planar_gbrap16le_to_uv_512_sse4: 765.3
planar_gbrap16le_to_uv_512_avx2: 491.8
planar_gbrpf32be_to_uv_512_c: 4863.0
planar_gbrpf32be_to_uv_512_sse2: 3347.5
planar_gbrpf32be_to_uv_512_sse4: 1800.0
planar_gbrpf32be_to_uv_512_avx2: 1199.0
planar_gbrpf32le_to_uv_512_c: 4725.0
planar_gbrpf32le_to_uv_512_sse2: 2753.0
planar_gbrpf32le_to_uv_512_sse4: 1474.5
planar_gbrpf32le_to_uv_512_avx2: 927.5
planar_gbrapf32be_to_uv_512_c: 4859.0
planar_gbrapf32be_to_uv_512_sse2: 3269.0
planar_gbrapf32be_to_uv_512_sse4: 1802.0
planar_gbrapf32be_to_uv_512_avx2: 1201.5
planar_gbrapf32le_to_uv_512_c: 6338.0
planar_gbrapf32le_to_uv_512_sse2: 2756.5
planar_gbrapf32le_to_uv_512_sse4: 1476.0
planar_gbrapf32le_to_uv_512_avx2: 908.5
planar_gbrap_to_a_512_c: 383.3
planar_gbrap_to_a_512_sse2: 66.8
planar_gbrap_to_a_512_avx2: 43.8
planar_gbrap10be_to_a_512_c: 601.8
planar_gbrap10be_to_a_512_sse2: 86.3
planar_gbrap10be_to_a_512_avx2: 34.8
planar_gbrap10le_to_a_512_c: 602.3
planar_gbrap10le_to_a_512_sse2: 48.8
planar_gbrap10le_to_a_512_avx2: 31.3
planar_gbrap12be_to_a_512_c: 601.8
planar_gbrap12be_to_a_512_sse2: 111.8
planar_gbrap12be_to_a_512_avx2: 41.3
planar_gbrap12le_to_a_512_c: 385.8
planar_gbrap12le_to_a_512_sse2: 75.3
planar_gbrap12le_to_a_512_avx2: 39.8
planar_gbrap16be_to_a_512_c: 386.8
planar_gbrap16be_to_a_512_sse2: 79.8
planar_gbrap16be_to_a_512_avx2: 31.3
planar_gbrap16le_to_a_512_c: 600.3
planar_gbrap16le_to_a_512_sse2: 40.3
planar_gbrap16le_to_a_512_avx2: 30.3
planar_gbrapf32be_to_a_512_c: 1148.8
planar_gbrapf32be_to_a_512_sse2: 611.3
planar_gbrapf32be_to_a_512_sse4: 234.8
planar_gbrapf32be_to_a_512_avx2: 183.3
planar_gbrapf32le_to_a_512_c: 851.3
planar_gbrapf32le_to_a_512_sse2: 263.3
planar_gbrapf32le_to_a_512_sse4: 199.3
planar_gbrapf32le_to_a_512_avx2: 156.8
Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libswscale/x86/swscale.c')
-rw-r--r-- | libswscale/x86/swscale.c | 170 |
1 files changed, 170 insertions, 0 deletions
diff --git a/libswscale/x86/swscale.c b/libswscale/x86/swscale.c index d5a467da0e..3cf0c419fd 100644 --- a/libswscale/x86/swscale.c +++ b/libswscale/x86/swscale.c @@ -390,6 +390,80 @@ YUV2GBRP_DECL(sse2); YUV2GBRP_DECL(sse4); YUV2GBRP_DECL(avx2); +#define INPUT_PLANAR_RGB_Y_FN_DECL(fmt, opt) \ +void ff_planar_##fmt##_to_y_##opt(uint8_t *dst, \ + const uint8_t *src[4], int w, int32_t *rgb2yuv) + +#define INPUT_PLANAR_RGB_UV_FN_DECL(fmt, opt) \ +void ff_planar_##fmt##_to_uv_##opt(uint8_t *dstU, uint8_t *dstV, \ + const uint8_t *src[4], int w, int32_t *rgb2yuv) + +#define INPUT_PLANAR_RGB_A_FN_DECL(fmt, opt) \ +void ff_planar_##fmt##_to_a_##opt(uint8_t *dst, \ + const uint8_t *src[4], int w, int32_t *rgb2yuv) + + +#define INPUT_PLANAR_RGBXX_A_DECL(fmt, opt) \ +INPUT_PLANAR_RGB_A_FN_DECL(fmt##le, opt); \ +INPUT_PLANAR_RGB_A_FN_DECL(fmt##be, opt); + +#define INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt) \ +INPUT_PLANAR_RGB_Y_FN_DECL(fmt##le, opt); \ +INPUT_PLANAR_RGB_Y_FN_DECL(fmt##be, opt); + +#define INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt) \ +INPUT_PLANAR_RGB_UV_FN_DECL(fmt##le, opt); \ +INPUT_PLANAR_RGB_UV_FN_DECL(fmt##be, opt); + +#define INPUT_PLANAR_RGBXX_YUVA_DECL(fmt, opt) \ +INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \ +INPUT_PLANAR_RGBXX_A_DECL(fmt, opt); + +#define INPUT_PLANAR_RGBXX_YUV_DECL(fmt, opt) \ +INPUT_PLANAR_RGBXX_Y_DECL(fmt, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); + +#define INPUT_PLANAR_RGBXX_UVA_DECL(fmt, opt) \ +INPUT_PLANAR_RGBXX_UV_DECL(fmt, opt); \ +INPUT_PLANAR_RGBXX_A_DECL(fmt, opt); + +#define INPUT_PLANAR_RGB_A_ALL_DECL(opt) \ +INPUT_PLANAR_RGB_A_FN_DECL(rgb, opt); \ +INPUT_PLANAR_RGBXX_A_DECL(rgb10, opt); \ +INPUT_PLANAR_RGBXX_A_DECL(rgb12, opt); \ +INPUT_PLANAR_RGBXX_A_DECL(rgb16, opt); \ +INPUT_PLANAR_RGBXX_A_DECL(rgbf32, opt); + +#define INPUT_PLANAR_RGB_Y_ALL_DECL(opt) \ +INPUT_PLANAR_RGB_Y_FN_DECL(rgb, opt); \ +INPUT_PLANAR_RGBXX_Y_DECL(rgb9, opt); \ +INPUT_PLANAR_RGBXX_Y_DECL(rgb10, opt); \ +INPUT_PLANAR_RGBXX_Y_DECL(rgb12, opt); \ +INPUT_PLANAR_RGBXX_Y_DECL(rgb14, opt); \ +INPUT_PLANAR_RGBXX_Y_DECL(rgb16, opt); \ +INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, opt); + +#define INPUT_PLANAR_RGB_UV_ALL_DECL(opt) \ +INPUT_PLANAR_RGB_UV_FN_DECL(rgb, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(rgb9, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(rgb10, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(rgb12, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(rgb14, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(rgb16, opt); \ +INPUT_PLANAR_RGBXX_UV_DECL(rgbf32, opt); + +INPUT_PLANAR_RGBXX_Y_DECL(rgbf32, sse2); +INPUT_PLANAR_RGB_UV_ALL_DECL(sse2); +INPUT_PLANAR_RGB_A_ALL_DECL(sse2); + +INPUT_PLANAR_RGB_Y_ALL_DECL(sse4); +INPUT_PLANAR_RGB_UV_ALL_DECL(sse4); +INPUT_PLANAR_RGBXX_A_DECL(rgbf32, sse4); + +INPUT_PLANAR_RGB_Y_ALL_DECL(avx2); +INPUT_PLANAR_RGB_UV_ALL_DECL(avx2); +INPUT_PLANAR_RGB_A_ALL_DECL(avx2); #endif av_cold void ff_sws_init_swscale_x86(SwsContext *c) @@ -639,6 +713,102 @@ switch(c->dstBpc){ \ } } + +#define INPUT_PLANER_RGB_A_FUNC_CASE(fmt, name, opt) \ + case fmt: \ + c->readAlpPlanar = ff_planar_##name##_to_a_##opt; + +#define INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \ + case rgba_fmt: \ + case rgb_fmt: \ + c->readLumPlanar = ff_planar_##name##_to_y_##opt; \ + c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \ + break; + +#define INPUT_PLANER_RGB_YUV_FUNC_CASE(fmt, name, opt) \ + case fmt: \ + c->readLumPlanar = ff_planar_##name##_to_y_##opt; \ + c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \ + break; + +#define INPUT_PLANER_RGB_UV_FUNC_CASE(fmt, name, opt) \ + case fmt: \ + c->readChrPlanar = ff_planar_##name##_to_uv_##opt; \ + break; + +#define INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \ + INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##BE, name##be, opt) \ + INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt) + +#define INPUT_PLANER_RGBAXX_UVA_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \ + INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGB_A_FUNC_CASE(rgba_fmt##BE, name##be, opt) \ + INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt) + +#define INPUT_PLANER_RGBAXX_YUV_FUNC_CASE(rgb_fmt, rgba_fmt, name, opt) \ + INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##LE, rgba_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGBA_YUV_FUNC_CASE(rgb_fmt##BE, rgba_fmt##BE, name##be, opt) + +#define INPUT_PLANER_RGBXX_YUV_FUNC_CASE(rgb_fmt, name, opt) \ + INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGB_YUV_FUNC_CASE(rgb_fmt##BE, name##be, opt) + +#define INPUT_PLANER_RGBXX_UV_FUNC_CASE(rgb_fmt, name, opt) \ + INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##LE, name##le, opt) \ + INPUT_PLANER_RGB_UV_FUNC_CASE(rgb_fmt##BE, name##be, opt) + +#define INPUT_PLANER_RGB_YUVA_ALL_CASES(opt) \ + INPUT_PLANER_RGB_A_FUNC_CASE( AV_PIX_FMT_GBRAP, rgb, opt) \ + INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, opt) \ + INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, opt) \ + INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, opt) \ + INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, opt) \ + INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, opt) \ + INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, opt) \ + INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, opt) + + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (c->srcFormat) { + INPUT_PLANER_RGB_A_FUNC_CASE( AV_PIX_FMT_GBRAP, rgb, sse2); + INPUT_PLANER_RGB_UV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse2); + INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse2); + INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse2); + INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse2); + INPUT_PLANER_RGBXX_UV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse2); + INPUT_PLANER_RGBAXX_UVA_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse2); + INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse2); + default: + break; + } + } + + if (EXTERNAL_SSE4(cpu_flags)) { + switch (c->srcFormat) { + case AV_PIX_FMT_GBRAP: + INPUT_PLANER_RGB_YUV_FUNC_CASE( AV_PIX_FMT_GBRP, rgb, sse4); + INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP9, rgb9, sse4); + INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRAP10, rgb10, sse4); + INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRAP12, rgb12, sse4); + INPUT_PLANER_RGBXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP14, rgb14, sse4); + INPUT_PLANER_RGBAXX_YUV_FUNC_CASE( AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRAP16, rgb16, sse4); + INPUT_PLANER_RGBAXX_YUVA_FUNC_CASE(AV_PIX_FMT_GBRPF32, AV_PIX_FMT_GBRAPF32, rgbf32, sse4); + default: + break; + } + } + + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + switch (c->srcFormat) { + INPUT_PLANER_RGB_YUVA_ALL_CASES(avx2) + default: + break; + } + } + if(c->flags & SWS_FULL_CHR_H_INT) { /* yuv2gbrp uses the SwsContext for yuv coefficients |