diff options
author | James Almer <jamrial@gmail.com> | 2019-01-02 21:09:25 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2019-01-03 10:12:19 -0300 |
commit | 5402c1886b97a0c46e843b5ae0c08de74d2e6091 (patch) | |
tree | 3d59f72b9849f493f613a6847c5316fce8add9cb /libavfilter | |
parent | ba89dc27b50cf6e1bcafe473ce8f2e4363be18ee (diff) | |
download | ffmpeg-5402c1886b97a0c46e843b5ae0c08de74d2e6091.tar.gz |
x86/af_afir: add ff_fcmul_add_avx()
fcmul_add_c: 1228.8
fcmul_add_sse3: 334.3
fcmul_add_avx: 186.3
Tested on a Core i5 4460 @ 3.2GHz
Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r-- | libavfilter/x86/af_afir.asm | 8 | ||||
-rw-r--r-- | libavfilter/x86/af_afir_init.c | 5 |
2 files changed, 12 insertions, 1 deletions
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm index fcc1f426db..8054ac5f10 100644 --- a/libavfilter/x86/af_afir.asm +++ b/libavfilter/x86/af_afir.asm @@ -27,7 +27,7 @@ SECTION .text ; void ff_fcmul_add(float *sum, const float *t, const float *c, int len) ;------------------------------------------------------------------------------ -INIT_XMM sse3 +%macro FCMUL_ADD 0 cglobal fcmul_add, 4,4,6, sum, t, c, len shl lend, 3 add tq, lenq @@ -61,3 +61,9 @@ ALIGN 16 addss xm0, [sumq + lenq] movss [sumq + lenq], xm0 RET +%endmacro + +INIT_XMM sse3 +FCMUL_ADD +INIT_YMM avx +FCMUL_ADD diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c index 29e6f976b2..c37212c381 100644 --- a/libavfilter/x86/af_afir_init.c +++ b/libavfilter/x86/af_afir_init.c @@ -24,6 +24,8 @@ void ff_fcmul_add_sse3(float *sum, const float *t, const float *c, ptrdiff_t len); +void ff_fcmul_add_avx(float *sum, const float *t, const float *c, + ptrdiff_t len); av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) { @@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s) if (EXTERNAL_SSE3(cpu_flags)) { s->fcmul_add = ff_fcmul_add_sse3; } + if (EXTERNAL_AVX_FAST(cpu_flags)) { + s->fcmul_add = ff_fcmul_add_avx; + } } |