aboutsummaryrefslogtreecommitdiffstats
path: root/libswresample/x86
diff options
context:
space:
mode:
authorMuhammad Faiz <mfcc64@gmail.com>2017-03-16 11:33:16 +0700
committerMuhammad Faiz <mfcc64@gmail.com>2017-03-19 12:24:41 +0700
commitde1308429ae649c899b74365f0dc72847676ba75 (patch)
treef3072f9c6af39f6fbaa110452cfa8afba1a20887 /libswresample/x86
parent3d5c2169e44e98de1589c13d593f62c1b73cf94e (diff)
downloadffmpeg-de1308429ae649c899b74365f0dc72847676ba75.tar.gz
swresample/x86/resample: extend resample_double to support avx and fma3
benchmark: sse2 10.670s avx 8.763s fma3 8.380s Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
Diffstat (limited to 'libswresample/x86')
-rw-r--r--libswresample/x86/resample.asm15
-rw-r--r--libswresample/x86/resample_init.c10
2 files changed, 22 insertions, 3 deletions
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index 4163df1aa1..7107cf9d42 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -203,7 +203,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_count, dst, frac, \
; horizontal sum & store
%if mmsize == 32
vextractf128 xm1, m0, 0x1
- addps xm0, xm1
+ addp%4 xm0, xm1
%endif
movhlps xm1, xm0
%ifidn %1, float
@@ -489,8 +489,8 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
%if mmsize == 32
vextractf128 xm1, m0, 0x1
vextractf128 xm3, m2, 0x1
- addps xm0, xm1
- addps xm2, xm3
+ addp%4 xm0, xm1
+ addp%4 xm2, xm3
%endif
cvtsi2s%4 xm1, fracd
subp%4 xm2, xm0
@@ -608,3 +608,12 @@ RESAMPLE_FNS int16, 2, 1
INIT_XMM sse2
RESAMPLE_FNS double, 8, 3, d, pdbl_1
+
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+RESAMPLE_FNS double, 8, 3, d, pdbl_1
+%endif
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+RESAMPLE_FNS double, 8, 3, d, pdbl_1
+%endif
diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c
index e515762b98..c6b2a36060 100644
--- a/libswresample/x86/resample_init.c
+++ b/libswresample/x86/resample_init.c
@@ -42,6 +42,8 @@ RESAMPLE_FUNCS(float, avx);
RESAMPLE_FUNCS(float, fma3);
RESAMPLE_FUNCS(float, fma4);
RESAMPLE_FUNCS(double, sse2);
+RESAMPLE_FUNCS(double, avx);
+RESAMPLE_FUNCS(double, fma3);
av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
{
@@ -85,6 +87,14 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
c->dsp.resample_linear = ff_resample_linear_double_sse2;
c->dsp.resample_common = ff_resample_common_double_sse2;
}
+ if (EXTERNAL_AVX_FAST(mm_flags)) {
+ c->dsp.resample_linear = ff_resample_linear_double_avx;
+ c->dsp.resample_common = ff_resample_common_double_avx;
+ }
+ if (EXTERNAL_FMA3_FAST(mm_flags)) {
+ c->dsp.resample_linear = ff_resample_linear_double_fma3;
+ c->dsp.resample_common = ff_resample_common_double_fma3;
+ }
break;
}
}