diff options
author | Swinney, Jonathan <jswinney@amazon.com> | 2022-08-13 20:48:30 +0000 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2022-08-16 12:08:38 +0300 |
commit | 75ffca7eef557bcc714d924048a6e184b39fa470 (patch) | |
tree | e35e8bb8fe1395add7f15cc4e10e5246d4a6f782 /libswscale/aarch64/swscale.c | |
parent | 1af7797d212eee9aa86a54f558bcfe6abb22949b (diff) | |
download | ffmpeg-75ffca7eef557bcc714d924048a6e184b39fa470.tar.gz |
libswscale/aarch64: add another hscale specialization
This specialization handles the case where filtersize is 4 mod 8, e.g.
12, 20, etc. Aarch64 was previously using the c function for this case.
This implementation speeds up that case significantly.
hscale_8_to_15__fs_12_dstW_512_c: 6234.1
hscale_8_to_15__fs_12_dstW_512_neon: 1505.6
Signed-off-by: Jonathan Swinney <jswinney@amazon.com>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libswscale/aarch64/swscale.c')
-rw-r--r-- | libswscale/aarch64/swscale.c | 18 |
1 files changed, 10 insertions, 8 deletions
diff --git a/libswscale/aarch64/swscale.c b/libswscale/aarch64/swscale.c index ab28be4da6..a19ee64159 100644 --- a/libswscale/aarch64/swscale.c +++ b/libswscale/aarch64/swscale.c @@ -32,7 +32,8 @@ void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \ SCALE_FUNC(filter_n, 8, 15, opt); #define ALL_SCALE_FUNCS(opt) \ SCALE_FUNCS(4, opt); \ - SCALE_FUNCS(X8, opt) + SCALE_FUNCS(X8, opt); \ + SCALE_FUNCS(X4, opt) ALL_SCALE_FUNCS(neon); @@ -47,13 +48,14 @@ void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize, } \ } while (0) -#define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) \ - switch (filtersize) { \ - case 4: ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); break; \ - default: if (filtersize % 8 == 0) \ - ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \ - break; \ - } +#define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) do { \ + if (filtersize == 4) \ + ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); \ + else if (filtersize % 8 == 0) \ + ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \ + else if (filtersize % 4 == 0 && filtersize % 8 != 0) \ + ASSIGN_SCALE_FUNC2(hscalefn, X4, opt); \ +} while (0) av_cold void ff_sws_init_swscale_aarch64(SwsContext *c) { |