diff options
author | Diego Biurrun <diego@biurrun.de> | 2012-07-15 20:11:55 +0200 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2017-03-14 17:23:32 +0100 |
commit | 681a86aba6cb09b98ad716d986182060c7795d20 (patch) | |
tree | 5f2fd54c9772deee816a7b57d04a7dea8da6da41 | |
parent | e9bb77fb1012cba1951a82136df7071f71bce8fb (diff) | |
download | ffmpeg-681a86aba6cb09b98ad716d986182060c7795d20.tar.gz |
x86: fft: Port to cpuflags
-rw-r--r-- | libavcodec/x86/fft.asm | 94 |
1 files changed, 43 insertions, 51 deletions
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index 2c7438134c..63e92f7159 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -190,6 +190,23 @@ SECTION .text addps %2, %2, %5 ; {i0,i1,i2,i3} %endmacro +%macro INTERL 5 +%if cpuflag(avx) + vunpckhps %3, %2, %1 + vunpcklps %2, %2, %1 + vextractf128 %4(%5), %2, 0 + vextractf128 %4 %+ H(%5), %3, 0 + vextractf128 %4(%5 + 1), %2, 1 + vextractf128 %4 %+ H(%5 + 1), %3, 1 +%elif cpuflag(sse) + mova %3, %2 + unpcklps %2, %1 + unpckhps %3, %1 + mova %4(%5), %2 + mova %4(%5+1), %3 +%endif +%endmacro + ; scheduled for cpu-bound sizes %macro PASS_SMALL 3 ; (to load m4-m7), wre, wim IF%1 mova m4, Z(4) @@ -536,17 +553,6 @@ DEFINE_ARGS zc, w, n, o1, o3 INIT_YMM avx -%macro INTERL_AVX 5 - vunpckhps %3, %2, %1 - vunpcklps %2, %2, %1 - vextractf128 %4(%5), %2, 0 - vextractf128 %4 %+ H(%5), %3, 0 - vextractf128 %4(%5 + 1), %2, 1 - vextractf128 %4 %+ H(%5 + 1), %3, 1 -%endmacro - -%define INTERL INTERL_AVX - DECL_PASS pass_avx, PASS_BIG 1 DECL_PASS pass_interleave_avx, PASS_BIG 0 @@ -560,16 +566,6 @@ cglobal fft_calc, 2,5,8 INIT_XMM sse -%macro INTERL_SSE 5 - mova %3, %2 - unpcklps %2, %1 - unpckhps %3, %1 - mova %4(%5), %2 - mova %4(%5+1), %3 -%endmacro - -%define INTERL INTERL_SSE - DECL_PASS pass_sse, PASS_BIG 1 DECL_PASS pass_interleave_sse, PASS_BIG 0 @@ -855,16 +851,30 @@ INIT_XMM sse %endmacro %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 +%if cpuflag(sse) mulps m6, %3, [%5+%1] mulps m7, %2, [%5+%1] mulps %2, %2, [%6+%1] mulps %3, %3, [%6+%1] subps %2, %2, m6 addps %3, %3, m7 +%elif cpuflag(3dnow) + mova m6, [%1+%2*2] + mova %3, [%1+%2*2+8] + mova %4, m6 + mova m7, %3 + pfmul m6, [%5+%2] + pfmul %3, [%6+%2] + pfmul %4, [%6+%2] + pfmul m7, [%5+%2] + pfsub %3, m6 + pfadd %4, m7 +%endif %endmacro -%macro POSROTATESHUF_AVX 5 ;j, k, z+n8, tcos+n8, tsin+n8 +%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 .post: +%if cpuflag(avx) vmovaps ymm1, [%3+%1*2] vmovaps ymm0, [%3+%1*2+0x20] vmovaps ymm3, [%3+%2*2] @@ -893,10 +903,7 @@ INIT_XMM sse sub %2, 0x20 add %1, 0x20 jl .post -%endmacro - -%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8 -.post: +%elif cpuflag(sse) movaps xmm1, [%3+%1*2] movaps xmm0, [%3+%1*2+0x10] CMUL %1, xmm0, xmm1, %3, %4, %5 @@ -918,25 +925,9 @@ INIT_XMM sse sub %2, 0x10 add %1, 0x10 jl .post -%endmacro - -%macro CMUL_3DNOW 6 - mova m6, [%1+%2*2] - mova %3, [%1+%2*2+8] - mova %4, m6 - mova m7, %3 - pfmul m6, [%5+%2] - pfmul %3, [%6+%2] - pfmul %4, [%6+%2] - pfmul m7, [%5+%2] - pfsub %3, m6 - pfadd %4, m7 -%endmacro - -%macro POSROTATESHUF_3DNOW 5 ;j, k, z+n8, tcos+n8, tsin+n8 -.post: - CMUL_3DNOW %3, %1, m0, m1, %4, %5 - CMUL_3DNOW %3, %2, m2, m3, %4, %5 +%elif cpuflag(3dnow) + CMUL %3, %1, m0, m1, %4, %5 + CMUL %3, %2, m2, m3, %4, %5 movd [%3+%1*2+ 0], m0 movd [%3+%2*2+12], m1 movd [%3+%2*2+ 0], m2 @@ -952,9 +943,10 @@ INIT_XMM sse sub %2, 8 add %1, 8 jl .post +%endif %endmacro -%macro DECL_IMDCT 1 +%macro DECL_IMDCT 0 cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input %if ARCH_X86_64 %define rrevtab r7 @@ -1060,7 +1052,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i neg r0 mov r1, -mmsize sub r1, r0 - %1 r0, r1, r6, rtcos, rtsin + POSROTATESHUF r0, r1, r6, rtcos, rtsin %if ARCH_X86_64 == 0 add esp, 12 %endif @@ -1070,17 +1062,17 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i RET %endmacro -DECL_IMDCT POSROTATESHUF +DECL_IMDCT %if ARCH_X86_32 INIT_MMX 3dnow -DECL_IMDCT POSROTATESHUF_3DNOW +DECL_IMDCT INIT_MMX 3dnowext -DECL_IMDCT POSROTATESHUF_3DNOW +DECL_IMDCT %endif INIT_YMM avx -DECL_IMDCT POSROTATESHUF_AVX +DECL_IMDCT %endif ; CONFIG_MDCT |