aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2019-01-02 21:09:25 -0300
committerJames Almer <jamrial@gmail.com>2019-01-03 10:12:19 -0300
commit5402c1886b97a0c46e843b5ae0c08de74d2e6091 (patch)
tree3d59f72b9849f493f613a6847c5316fce8add9cb /libavfilter
parentba89dc27b50cf6e1bcafe473ce8f2e4363be18ee (diff)
downloadffmpeg-5402c1886b97a0c46e843b5ae0c08de74d2e6091.tar.gz
x86/af_afir: add ff_fcmul_add_avx()
fcmul_add_c: 1228.8 fcmul_add_sse3: 334.3 fcmul_add_avx: 186.3 Tested on a Core i5 4460 @ 3.2GHz Reviewed-by: Paul B Mahol <onemda@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavfilter')
-rw-r--r--libavfilter/x86/af_afir.asm8
-rw-r--r--libavfilter/x86/af_afir_init.c5
2 files changed, 12 insertions, 1 deletions
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm
index fcc1f426db..8054ac5f10 100644
--- a/libavfilter/x86/af_afir.asm
+++ b/libavfilter/x86/af_afir.asm
@@ -27,7 +27,7 @@ SECTION .text
; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
;------------------------------------------------------------------------------
-INIT_XMM sse3
+%macro FCMUL_ADD 0
cglobal fcmul_add, 4,4,6, sum, t, c, len
shl lend, 3
add tq, lenq
@@ -61,3 +61,9 @@ ALIGN 16
addss xm0, [sumq + lenq]
movss [sumq + lenq], xm0
RET
+%endmacro
+
+INIT_XMM sse3
+FCMUL_ADD
+INIT_YMM avx
+FCMUL_ADD
diff --git a/libavfilter/x86/af_afir_init.c b/libavfilter/x86/af_afir_init.c
index 29e6f976b2..c37212c381 100644
--- a/libavfilter/x86/af_afir_init.c
+++ b/libavfilter/x86/af_afir_init.c
@@ -24,6 +24,8 @@
void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
+void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
+ ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{
@@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_SSE3(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_sse3;
}
+ if (EXTERNAL_AVX_FAST(cpu_flags)) {
+ s->fcmul_add = ff_fcmul_add_avx;
+ }
}