diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-04-04 23:38:00 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-04-04 23:40:08 +0200 |
commit | 51fd962c0bbfab9a4e4d91364a33576e942dcf54 (patch) | |
tree | a7d0eec09c0a84bca254da28d1747fc64cca1c0b /libavcodec | |
parent | b00f6bb90a675f4e71f1b1ff4edabc74ff6b692d (diff) | |
parent | c74b86699c86bdf62e8570f41d8a38be5710baa3 (diff) | |
download | ffmpeg-51fd962c0bbfab9a4e4d91364a33576e942dcf54.tar.gz |
Merge commit 'c74b86699c86bdf62e8570f41d8a38be5710baa3'
* commit 'c74b86699c86bdf62e8570f41d8a38be5710baa3':
x86/synth_filter: add synth_filter_fma3
x86/synth_filter: add synth_filter_avx
x86/synth_filter: add synth_filter_sse
Conflicts:
libavcodec/x86/dcadsp.asm
libavcodec/x86/dcadsp_init.c
See: 64672098361361cd15d37e36f747ab44de5b80ca
See: 68c3ed936a76c3ff7738f602fa90237ac7e3ce08
See: 7fd64e3e36f79204c0eda7cacce6884c14ddc1fb
See: aa1f38015cb0d04a5c50a8957dd7aba79f0d8882
See: dfd865e51b890d9be394804bccddf55198f4a251
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/dcadsp.asm | 10 | ||||
-rw-r--r-- | libavcodec/x86/dcadsp_init.c | 8 |
2 files changed, 10 insertions, 8 deletions
diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 10cb2983aa..1a725978a3 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -292,7 +292,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define scale m0 %if ARCH_X86_32 || WIN64 %if cpuflag(sse2) && notcpuflag(avx) - movd m0, scalem + movd scale, scalem SPLATD m0 %else VBROADCASTSS m0, scalem @@ -311,7 +311,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ sub r5q, offmp and r5q, -64 shl r5q, 2 -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) mov OFFQ, r5q %define i r5q mov i, 16 * 4 - (ARCH_X86_64 + 1) * mmsize ; main loop counter @@ -337,7 +337,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define j r3q mov win, windowm mov ptr1, synth_bufm -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) add win, i add ptr1, i %endif @@ -356,7 +356,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ mov ptr2, synth_bufmp ; prepare the inner loop counter mov j, OFFQ -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) sub ptr2, i %endif .loop1: @@ -403,7 +403,7 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ mova [outq + i + 0 * 4 + mmsize], m7 mova [outq + i + 16 * 4 + mmsize], m8 %endif -%if ARCH_X86_32 || mmsize < 32 +%if ARCH_X86_32 || notcpuflag(avx) sub i, (ARCH_X86_64 + 1) * mmsize jge .mainloop %endif diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 48880d628a..bc94b44376 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -62,9 +62,9 @@ void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \ const float window[512], \ float out[32], intptr_t offset, float scale); \ static void synth_filter_##opt(FFTContext *imdct, \ - float *synth_buf_ptr, int *synth_buf_offset, \ - float synth_buf2[32], const float window[512], \ - float out[32], const float in[32], float scale) \ + float *synth_buf_ptr, int *synth_buf_offset, \ + float synth_buf2[32], const float window[512], \ + float out[32], const float in[32], float scale) \ { \ float *synth_buf= synth_buf_ptr + *synth_buf_offset; \ \ @@ -82,7 +82,9 @@ SYNTH_FILTER_FUNC(sse) #endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) +#if HAVE_FMA3_EXTERNAL SYNTH_FILTER_FUNC(fma3) +#endif #endif /* HAVE_YASM */ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) |