diff options
author | Martin Vignali <martin.vignali@gmail.com> | 2017-11-21 09:14:35 +0100 |
---|---|---|
committer | Martin Vignali <martin.vignali@gmail.com> | 2017-11-21 09:41:58 +0100 |
commit | e641c94190b6bc8a3278dd727cdfc8c6d9aca112 (patch) | |
tree | d0eba55e2eeeed4bdb7181a6902675a57bf11e2b /libavcodec | |
parent | 6955e8842e248bec5adebd8527d2802fcc762493 (diff) | |
download | ffmpeg-e641c94190b6bc8a3278dd727cdfc8c6d9aca112.tar.gz |
avcodec/huffyuvdsp : add add_int16 AVX2 func
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/huffyuvdsp.asm | 5 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp_init.c | 5 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp_template.asm | 4 |
3 files changed, 12 insertions, 2 deletions
diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index 6f038e17da..a1231f1b22 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -53,6 +53,11 @@ ADD_INT16 INIT_XMM sse2 ADD_INT16 +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +ADD_INT16 +%endif + ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, ; intptr_t w, uint8_t *left) %macro LEFT_BGR32 0 diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index a522074565..eb10de383d 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -28,6 +28,7 @@ void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); +void ff_add_int16_avx2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); @@ -53,4 +54,8 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, enum AVPixelFormat pix c->add_int16 = ff_add_int16_sse2; c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; } + + if (EXTERNAL_AVX2_FAST(cpu_flags)) { + c->add_int16 = ff_add_int16_avx2; + } } diff --git a/libavcodec/x86/huffyuvdsp_template.asm b/libavcodec/x86/huffyuvdsp_template.asm index 7e14542671..89721f4ec3 100644 --- a/libavcodec/x86/huffyuvdsp_template.asm +++ b/libavcodec/x86/huffyuvdsp_template.asm @@ -21,8 +21,8 @@ ;****************************************************************************** %macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub - movd m4, maskd - SPLATW m4, m4 + movd xm4, maskd + SPLATW m4, xm4 add wd, wd test wq, 2*mmsize - 1 jz %%.tomainloop |