aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale/aarch64/swscale.c
diff options
context:
space:
mode:
authorZhao Zhili <zhilizhao@tencent.com>2024-06-07 21:44:52 +0800
committerZhao Zhili <zhilizhao@tencent.com>2024-06-11 01:12:09 +0800
commit9dac8495b0020f7326bbe79efa070a0150109afc (patch)
tree6fe99525d67c202e5c334a8cb464031fe8974827 /libswscale/aarch64/swscale.c
parentb1240c983f96271990c4262978a08edc8eb4df1f (diff)
downloadffmpeg-9dac8495b0020f7326bbe79efa070a0150109afc.tar.gz
swscale/aarch64: Add rgb24 to yuv implementation
Test on Apple M1: rgb24_to_uv_8_c: 0.0 rgb24_to_uv_8_neon: 0.2 rgb24_to_uv_128_c: 1.0 rgb24_to_uv_128_neon: 0.5 rgb24_to_uv_1080_c: 7.0 rgb24_to_uv_1080_neon: 5.7 rgb24_to_uv_1920_c: 12.5 rgb24_to_uv_1920_neon: 9.5 rgb24_to_uv_half_8_c: 0.2 rgb24_to_uv_half_8_neon: 0.2 rgb24_to_uv_half_128_c: 1.0 rgb24_to_uv_half_128_neon: 0.5 rgb24_to_uv_half_1080_c: 6.2 rgb24_to_uv_half_1080_neon: 3.0 rgb24_to_uv_half_1920_c: 11.2 rgb24_to_uv_half_1920_neon: 5.2 rgb24_to_y_8_c: 0.2 rgb24_to_y_8_neon: 0.0 rgb24_to_y_128_c: 0.5 rgb24_to_y_128_neon: 0.5 rgb24_to_y_1080_c: 4.7 rgb24_to_y_1080_neon: 3.2 rgb24_to_y_1920_c: 8.0 rgb24_to_y_1920_neon: 5.7 On Pixel 6: rgb24_to_uv_8_c: 30.7 rgb24_to_uv_8_neon: 56.9 rgb24_to_uv_128_c: 213.9 rgb24_to_uv_128_neon: 173.2 rgb24_to_uv_1080_c: 1649.9 rgb24_to_uv_1080_neon: 1424.4 rgb24_to_uv_1920_c: 2907.9 rgb24_to_uv_1920_neon: 2480.7 rgb24_to_uv_half_8_c: 36.2 rgb24_to_uv_half_8_neon: 33.4 rgb24_to_uv_half_128_c: 167.9 rgb24_to_uv_half_128_neon: 99.4 rgb24_to_uv_half_1080_c: 1293.9 rgb24_to_uv_half_1080_neon: 778.7 rgb24_to_uv_half_1920_c: 2292.7 rgb24_to_uv_half_1920_neon: 1328.7 rgb24_to_y_8_c: 19.7 rgb24_to_y_8_neon: 27.7 rgb24_to_y_128_c: 129.9 rgb24_to_y_128_neon: 96.7 rgb24_to_y_1080_c: 995.4 rgb24_to_y_1080_neon: 767.7 rgb24_to_y_1920_c: 1747.4 rgb24_to_y_1920_neon: 1337.2 Note both tests use clang as compiler, which has vectorization enabled by default with -O3. Reviewed-by: Rémi Denis-Courmont <remi@remlab.net> Reviewed-by: Martin Storsjö <martin@martin.st> Signed-off-by: Zhao Zhili <zhilizhao@tencent.com>
Diffstat (limited to 'libswscale/aarch64/swscale.c')
-rw-r--r--libswscale/aarch64/swscale.c25
1 files changed, 25 insertions, 0 deletions
diff --git a/libswscale/aarch64/swscale.c b/libswscale/aarch64/swscale.c
index bbd9719a44..4c4ea39dc1 100644
--- a/libswscale/aarch64/swscale.c
+++ b/libswscale/aarch64/swscale.c
@@ -201,6 +201,20 @@ void ff_yuv2plane1_8_neon(
default: break; \
}
+void ff_rgb24ToY_neon(uint8_t *_dst, const uint8_t *src, const uint8_t *unused1,
+ const uint8_t *unused2, int width,
+ uint32_t *rgb2yuv, void *opq);
+
+void ff_rgb24ToUV_neon(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0,
+ const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *rgb2yuv,
+ void *opq);
+
+void ff_rgb24ToUV_half_neon(uint8_t *_dstU, uint8_t *_dstV, const uint8_t *unused0,
+ const uint8_t *src1,
+ const uint8_t *src2, int width, uint32_t *rgb2yuv,
+ void *opq);
+
av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
{
int cpu_flags = av_get_cpu_flags();
@@ -212,5 +226,16 @@ av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
if (c->dstBpc == 8) {
c->yuv2planeX = ff_yuv2planeX_8_neon;
}
+ switch (c->srcFormat) {
+ case AV_PIX_FMT_RGB24:
+ c->lumToYV12 = ff_rgb24ToY_neon;
+ if (c->chrSrcHSubSample)
+ c->chrToYV12 = ff_rgb24ToUV_half_neon;
+ else
+ c->chrToYV12 = ff_rgb24ToUV_neon;
+ break;
+ default:
+ break;
+ }
}
}