diff options
author | Zhao Zhili <zhilizhao@tencent.com> | 2024-06-07 21:44:52 +0800 |
---|---|---|
committer | Zhao Zhili <zhilizhao@tencent.com> | 2024-06-11 01:12:09 +0800 |
commit | 9dac8495b0020f7326bbe79efa070a0150109afc (patch) | |
tree | 6fe99525d67c202e5c334a8cb464031fe8974827 /libswscale/aarch64/swscale.c | |
parent | b1240c983f96271990c4262978a08edc8eb4df1f (diff) | |
download | ffmpeg-9dac8495b0020f7326bbe79efa070a0150109afc.tar.gz |
swscale/aarch64: Add rgb24 to yuv implementation
Test on Apple M1:
rgb24_to_uv_8_c: 0.0
rgb24_to_uv_8_neon: 0.2
rgb24_to_uv_128_c: 1.0
rgb24_to_uv_128_neon: 0.5
rgb24_to_uv_1080_c: 7.0
rgb24_to_uv_1080_neon: 5.7
rgb24_to_uv_1920_c: 12.5
rgb24_to_uv_1920_neon: 9.5
rgb24_to_uv_half_8_c: 0.2
rgb24_to_uv_half_8_neon: 0.2
rgb24_to_uv_half_128_c: 1.0
rgb24_to_uv_half_128_neon: 0.5
rgb24_to_uv_half_1080_c: 6.2
rgb24_to_uv_half_1080_neon: 3.0
rgb24_to_uv_half_1920_c: 11.2
rgb24_to_uv_half_1920_neon: 5.2
rgb24_to_y_8_c: 0.2
rgb24_to_y_8_neon: 0.0
rgb24_to_y_128_c: 0.5
rgb24_to_y_128_neon: 0.5
rgb24_to_y_1080_c: 4.7
rgb24_to_y_1080_neon: 3.2
rgb24_to_y_1920_c: 8.0
rgb24_to_y_1920_neon: 5.7
On Pixel 6:
rgb24_to_uv_8_c: 30.7
rgb24_to_uv_8_neon: 56.9
rgb24_to_uv_128_c: 213.9
rgb24_to_uv_128_neon: 173.2
rgb24_to_uv_1080_c: 1649.9
rgb24_to_uv_1080_neon: 1424.4
rgb24_to_uv_1920_c: 2907.9
rgb24_to_uv_1920_neon: 2480.7
rgb24_to_uv_half_8_c: 36.2
rgb24_to_uv_half_8_neon: 33.4
rgb24_to_uv_half_128_c: 167.9
rgb24_to_uv_half_128_neon: 99.4
rgb24_to_uv_half_1080_c: 1293.9
rgb24_to_uv_half_1080_neon: 778.7
rgb24_to_uv_half_1920_c: 2292.7
rgb24_to_uv_half_1920_neon: 1328.7
rgb24_to_y_8_c: 19.7
rgb24_to_y_8_neon: 27.7
rgb24_to_y_128_c: 129.9
rgb24_to_y_128_neon: 96.7
rgb24_to_y_1080_c: 995.4
rgb24_to_y_1080_neon: 767.7
rgb24_to_y_1920_c: 1747.4
rgb24_to_y_1920_neon: 1337.2
Note both tests use clang as compiler, which has vectorization
enabled by default with -O3.
Reviewed-by: Rémi Denis-Courmont <remi@remlab.net>
Reviewed-by: Martin Storsjö <martin@martin.st>
Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
Diffstat (limited to 'libswscale/aarch64/swscale.c')
-rw-r--r-- | libswscale/aarch64/swscale.c | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/libswscale/aarch64/swscale.c b/libswscale/aarch64/swscale.c index bbd9719a44..4c4ea39dc1 100644 --- a/libswscale/aarch64/swscale.c +++ b/libswscale/aarch64/swscale.c @@ -201,6 +201,20 @@ void ff_yuv2plane1_8_neon( default: break; \ } +void ff_rgb24ToY_neon(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1, + const uint8_t *unused2, int width, + uint32_t *rgb2yuv, void *opq); + +void ff_rgb24ToUV_neon(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, + const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *rgb2yuv, + void *opq); + +void ff_rgb24ToUV_half_neon(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0, + const uint8_t *src1, + const uint8_t *src2, int width, uint32_t *rgb2yuv, + void *opq); + av_cold void ff_sws_init_swscale_aarch64(SwsContext *c) { int cpu_flags = av_get_cpu_flags(); @@ -212,5 +226,16 @@ av_cold void ff_sws_init_swscale_aarch64(SwsContext *c) if (c->dstBpc == 8) { c->yuv2planeX = ff_yuv2planeX_8_neon; } + switch (c->srcFormat) { + case AV_PIX_FMT_RGB24: + c->lumToYV12 = ff_rgb24ToY_neon; + if (c->chrSrcHSubSample) + c->chrToYV12 = ff_rgb24ToUV_half_neon; + else + c->chrToYV12 = ff_rgb24ToUV_neon; + break; + default: + break; + } } } |