aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2014-04-04 23:38:00 +0200
committerMichael Niedermayer <michaelni@gmx.at>2014-04-04 23:40:08 +0200
commit51fd962c0bbfab9a4e4d91364a33576e942dcf54 (patch)
treea7d0eec09c0a84bca254da28d1747fc64cca1c0b /libavcodec/x86
parentb00f6bb90a675f4e71f1b1ff4edabc74ff6b692d (diff)
parentc74b86699c86bdf62e8570f41d8a38be5710baa3 (diff)
downloadffmpeg-51fd962c0bbfab9a4e4d91364a33576e942dcf54.tar.gz
Merge commit 'c74b86699c86bdf62e8570f41d8a38be5710baa3'
* commit 'c74b86699c86bdf62e8570f41d8a38be5710baa3': x86/synth_filter: add synth_filter_fma3 x86/synth_filter: add synth_filter_avx x86/synth_filter: add synth_filter_sse Conflicts: libavcodec/x86/dcadsp.asm libavcodec/x86/dcadsp_init.c See: 64672098361361cd15d37e36f747ab44de5b80ca See: 68c3ed936a76c3ff7738f602fa90237ac7e3ce08 See: 7fd64e3e36f79204c0eda7cacce6884c14ddc1fb See: aa1f38015cb0d04a5c50a8957dd7aba79f0d8882 See: dfd865e51b890d9be394804bccddf55198f4a251 Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/dcadsp.asm10
-rw-r--r--libavcodec/x86/dcadsp_init.c8
2 files changed, 10 insertions, 8 deletions
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 10cb2983aa..1a725978a3 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -292,7 +292,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
%define scale m0
%if ARCH_X86_32 || WIN64
%if cpuflag(sse2) && notcpuflag(avx)
- movd m0, scalem
+ movd scale, scalem
SPLATD m0
%else
VBROADCASTSS m0, scalem
@@ -311,7 +311,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
sub r5q, offmp
and r5q, -64
shl r5q, 2
-%if ARCH_X86_32 || mmsize < 32
+%if ARCH_X86_32 || notcpuflag(avx)
mov OFFQ, r5q
%define i r5q
mov i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize ; main loop counter
@@ -337,7 +337,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
%define j r3q
mov win, windowm
mov ptr1, synth_bufm
-%if ARCH_X86_32 || mmsize < 32
+%if ARCH_X86_32 || notcpuflag(avx)
add win, i
add ptr1, i
%endif
@@ -356,7 +356,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
mov ptr2, synth_bufmp
; prepare the inner loop counter
mov j, OFFQ
-%if ARCH_X86_32 || mmsize < 32
+%if ARCH_X86_32 || notcpuflag(avx)
sub ptr2, i
%endif
.loop1:
@@ -403,7 +403,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
mova [outq + i + 0 * 4 + mmsize], m7
mova [outq + i + 16 * 4 + mmsize], m8
%endif
-%if ARCH_X86_32 || mmsize < 32
+%if ARCH_X86_32 || notcpuflag(avx)
sub i, (ARCH_X86_64 + 1) * mmsize
jge .mainloop
%endif
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 48880d628a..bc94b44376 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -62,9 +62,9 @@ void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \
const float window[512], \
float out[32], intptr_t offset, float scale); \
static void synth_filter_##opt(FFTContext *imdct, \
- float *synth_buf_ptr, int *synth_buf_offset, \
- float synth_buf2[32], const float window[512], \
- float out[32], const float in[32], float scale) \
+ float *synth_buf_ptr, int *synth_buf_offset, \
+ float synth_buf2[32], const float window[512], \
+ float out[32], const float in[32], float scale) \
{ \
float *synth_buf= synth_buf_ptr + *synth_buf_offset; \
\
@@ -82,7 +82,9 @@ SYNTH_FILTER_FUNC(sse)
#endif
SYNTH_FILTER_FUNC(sse2)
SYNTH_FILTER_FUNC(avx)
+#if HAVE_FMA3_EXTERNAL
SYNTH_FILTER_FUNC(fma3)
+#endif
#endif /* HAVE_YASM */
av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)