aboutsummaryrefslogtreecommitdiffstats
path: root/libswresample
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-06-30 13:06:02 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-07-02 01:11:20 +0200
commitc45b7f0d806ed4120b27c02bad0242958e8414fb (patch)
treee40ec24928ee9f794866eab650c3f31e08ff77f9 /libswresample
parent1a69224f44853f0a54c17a81449b63b61c3f2503 (diff)
downloadffmpeg-c45b7f0d806ed4120b27c02bad0242958e8414fb.tar.gz
x86/swr: add ff_resample_{common, linear}_int16_xop
Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswresample')
-rw-r--r--libswresample/x86/resample.asm36
-rw-r--r--libswresample/x86/resample_x86_dsp.c5
2 files changed, 26 insertions, 15 deletions
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
index 17f6169d71..a57ff37bb9 100644
--- a/libswresample/x86/resample.asm
+++ b/libswresample/x86/resample.asm
@@ -176,8 +176,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
.inner_loop:
movu m1, [srcq+min_filter_count_x4q*1]
%ifidn %1, int16
- pmaddwd m1, [filterq+min_filter_count_x4q*1]
- paddd m0, m1
+ PMADCSWD m0, m1, [filterq+min_filter_count_x4q*1], m0, m1
%else ; float/double
%if cpuflag(fma4) || cpuflag(fma3)
fmaddp%4 m0, m1, [filterq+min_filter_count_x4q*1], m0
@@ -190,14 +189,7 @@ cglobal resample_common_%1, 1, 7, 2, ctx, phase_shift, dst, frac, \
js .inner_loop
%ifidn %1, int16
-%if mmsize == 16
- pshufd m1, m0, q0032
- paddd m0, m1
- pshufd m1, m0, q0001
-%else ; mmsize == 8
- pshufw m1, m0, q0032
-%endif
- paddd m0, m1
+ HADDD m0, m1
psrad m0, 15
add fracd, dst_incr_modd
packssdw m0, m0
@@ -427,10 +419,15 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
.inner_loop:
movu m1, [srcq+min_filter_count_x4q*1]
%ifidn %1, int16
+%if cpuflag(xop)
+ vpmadcswd m2, m1, [filter2q+min_filter_count_x4q*1], m2
+ vpmadcswd m0, m1, [filter1q+min_filter_count_x4q*1], m0
+%else
pmaddwd m3, m1, [filter2q+min_filter_count_x4q*1]
pmaddwd m1, [filter1q+min_filter_count_x4q*1]
paddd m2, m3
paddd m0, m1
+%endif ; cpuflag
%else ; float/double
%if cpuflag(fma4) || cpuflag(fma3)
fmaddp%4 m2, m1, [filter2q+min_filter_count_x4q*1], m2
@@ -447,18 +444,21 @@ cglobal resample_linear_%1, 1, 7, 5, ctx, min_filter_length_x4, filter2, \
%ifidn %1, int16
%if mmsize == 16
+%if cpuflag(xop)
+ vphadddq m2, m2
+ vphadddq m0, m0
+%endif
pshufd m3, m2, q0032
pshufd m1, m0, q0032
paddd m2, m3
paddd m0, m1
- pshufd m3, m2, q0001
- pshufd m1, m0, q0001
-%else ; mmsize == 8
- pshufw m3, m2, q0032
- pshufw m1, m0, q0032
%endif
+%if notcpuflag(xop)
+ PSHUFLW m3, m2, q0032
+ PSHUFLW m1, m0, q0032
paddd m2, m3
paddd m0, m1
+%endif
psubd m2, m0
; This is probably a really bad idea on atom and other machines with a
; long transfer latency between GPRs and XMMs (atom). However, it does
@@ -591,4 +591,10 @@ RESAMPLE_FNS int16, 2, 1
INIT_XMM sse2
RESAMPLE_FNS int16, 2, 1
+%if HAVE_XOP_EXTERNAL
+INIT_XMM xop
+RESAMPLE_FNS int16, 2, 1
+%endif
+
+INIT_XMM sse2
RESAMPLE_FNS double, 8, 3, d, pdbl_1
diff --git a/libswresample/x86/resample_x86_dsp.c b/libswresample/x86/resample_x86_dsp.c
index ff9f1ec83e..c030b8c825 100644
--- a/libswresample/x86/resample_x86_dsp.c
+++ b/libswresample/x86/resample_x86_dsp.c
@@ -35,6 +35,7 @@ int ff_resample_linear_##type##_##opt(ResampleContext *c, uint8_t *dst, \
RESAMPLE_FUNCS(int16, mmxext);
RESAMPLE_FUNCS(int16, sse2);
+RESAMPLE_FUNCS(int16, xop);
RESAMPLE_FUNCS(float, sse);
RESAMPLE_FUNCS(float, avx);
RESAMPLE_FUNCS(float, fma3);
@@ -73,4 +74,8 @@ void swresample_dsp_x86_init(ResampleContext *c)
c->dsp.resample_common[FNIDX(FLTP)] = ff_resample_common_float_fma4;
c->dsp.resample_linear[FNIDX(FLTP)] = ff_resample_linear_float_fma4;
}
+ if (HAVE_XOP_EXTERNAL && mm_flags & AV_CPU_FLAG_XOP) {
+ c->dsp.resample_common[FNIDX(S16P)] = ff_resample_common_int16_xop;
+ c->dsp.resample_linear[FNIDX(S16P)] = ff_resample_linear_int16_xop;
+ }
}