aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/v210enc_init.c
diff options
context:
space:
mode:
authorJames Darnley <jdarnley@obe.tv>2022-10-25 16:27:38 +0200
committerJames Darnley <jdarnley@obe.tv>2022-11-04 19:37:46 +0100
commitc3d36e1b3de1e2b8a84f47d0369dcea41c8cd351 (patch)
treee7ed526c1b64a42a85a632fab8c404b3ba8c6d17 /libavcodec/x86/v210enc_init.c
parent1936c06f029bd0e793aea47767b06e72a497c268 (diff)
downloadffmpeg-c3d36e1b3de1e2b8a84f47d0369dcea41c8cd351.tar.gz
avcodec/v210enc: add new function for avx2 avx512 avx512icl
Negligible speed difference for avx2 on Zen 2 (Ryzen 5700X) and Broadwell (Xeon E5-2620 v4): 1690±4.3 decicycles vs. 1693±78.4 1439±31.1 decicycles vs 1429±16.7 Moderate speedup with avx512 on Skylake-X (Xeon D-2123IT): 1.22x faster (793±0.8 vs. 649±5.5 decicycles) compared with avx2 Better speedup with avx512icl on Ice Lake (Xeon Silver 4316): 1.77x faster (784±1.8 vs. 442±11.6 decicycles) compared with avx2 Co-authors: Henrik Gramner <henrik@gramner.com> Kieran Kunhya <kierank@obe.tv>
Diffstat (limited to 'libavcodec/x86/v210enc_init.c')
-rw-r--r--libavcodec/x86/v210enc_init.c14
1 files changed, 14 insertions, 0 deletions
diff --git a/libavcodec/x86/v210enc_init.c b/libavcodec/x86/v210enc_init.c
index 13a351dd1d..6e9f8c6e61 100644
--- a/libavcodec/x86/v210enc_init.c
+++ b/libavcodec/x86/v210enc_init.c
@@ -27,6 +27,10 @@ void ff_v210_planar_pack_8_avx(const uint8_t *y, const uint8_t *u,
const uint8_t *v, uint8_t *dst, ptrdiff_t width);
void ff_v210_planar_pack_8_avx2(const uint8_t *y, const uint8_t *u,
const uint8_t *v, uint8_t *dst, ptrdiff_t width);
+void ff_v210_planar_pack_8_avx512(const uint8_t *y, const uint8_t *u,
+ const uint8_t *v, uint8_t *dst, ptrdiff_t width);
+void ff_v210_planar_pack_8_avx512icl(const uint8_t *y, const uint8_t *u,
+ const uint8_t *v, uint8_t *dst, ptrdiff_t width);
void ff_v210_planar_pack_10_ssse3(const uint16_t *y, const uint16_t *u,
const uint16_t *v, uint8_t *dst,
ptrdiff_t width);
@@ -52,4 +56,14 @@ av_cold void ff_v210enc_init_x86(V210EncContext *s)
s->sample_factor_10 = 2;
s->pack_line_10 = ff_v210_planar_pack_10_avx2;
}
+
+ if (EXTERNAL_AVX512(cpu_flags)) {
+ s->sample_factor_8 = 2;
+ s->pack_line_8 = ff_v210_planar_pack_8_avx512;
+ }
+
+ if (EXTERNAL_AVX512ICL(cpu_flags)) {
+ s->sample_factor_8 = 4;
+ s->pack_line_8 = ff_v210_planar_pack_8_avx512icl;
+ }
}