diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2010-08-28 21:03:13 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2010-08-28 21:03:13 +0000 |
commit | 19d929f9a38cbb5eedbb68684905b5dca2c13190 (patch) | |
tree | 482cdc18eb4b5236d2b3a22b36d9270e0afb0fa0 | |
parent | 2d0cdf3cc0190e3b2ae08d4f74c0b3fe7e193e19 (diff) | |
download | ffmpeg-19d929f9a38cbb5eedbb68684905b5dca2c13190.tar.gz |
cosmetics in imdct_sse
Originally committed as revision 24958 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/x86/fft_mmx.asm | 45 |
1 files changed, 20 insertions, 25 deletions
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm index 31176d6c9a..b75ec0cc51 100644 --- a/libavcodec/x86/fft_mmx.asm +++ b/libavcodec/x86/fft_mmx.asm @@ -532,20 +532,15 @@ INIT_XMM unpckhps xmm0, xmm2 %endmacro -%macro PREROTATEW 3 ;addr1, addr2, xmm - movlps %1, %3 - movhps %2, %3 -%endmacro - %macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5 movaps xmm6, [%4+%1*2] movaps %2, [%4+%1*2+0x10] movaps %3, xmm6 movaps xmm7, %2 - mulps xmm6, [%5+%1*1] - mulps %2, [%6+%1*1] - mulps %3, [%6+%1*1] - mulps xmm7, [%5+%1*1] + mulps xmm6, [%5+%1] + mulps %2, [%6+%1] + mulps %3, [%6+%1] + mulps xmm7, [%5+%1] subps %2, xmm6 addps %3, xmm7 %endmacro @@ -576,8 +571,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample %define rrevtab r10 %define rtcos r11 %define rtsin r12 - push r10 - push r11 push r12 push r13 push r14 @@ -620,21 +613,25 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample PREROTATER r4, r3, r2, rtcos, rtsin %ifdef ARCH_X86_64 - movzx r5, word [rrevtab+r4*1-4] - movzx r6, word [rrevtab+r4*1-2] - movzx r13, word [rrevtab+r3*1] - movzx r14, word [rrevtab+r3*1+2] - PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0 - PREROTATEW [r1+r13*8], [r1+r14*8], xmm1 + movzx r5, word [rrevtab+r4-4] + movzx r6, word [rrevtab+r4-2] + movzx r13, word [rrevtab+r3] + movzx r14, word [rrevtab+r3+2] + movlps [r1+r5 *8], xmm0 + movhps [r1+r6 *8], xmm0 + movlps [r1+r13*8], xmm1 + movhps [r1+r14*8], xmm1 add r4, 4 %else mov r6, [esp] - movzx r5, word [r6+r4*1-4] - movzx r4, word [r6+r4*1-2] - PREROTATEW [r1+r5*8], [r1+r4*8], xmm0 - movzx r5, word [r6+r3*1] - movzx r4, word [r6+r3*1+2] - PREROTATEW [r1+r5*8], [r1+r4*8], xmm1 + movzx r5, word [r6+r4-4] + movzx r4, word [r6+r4-2] + movlps [r1+r5*8], xmm0 + movhps [r1+r4*8], xmm0 + movzx r5, word [r6+r3] + movzx r4, word [r6+r3+2] + movlps [r1+r5*8], xmm1 + movhps [r1+r4*8], xmm1 %endif sub r3, 4 jns .pre @@ -663,8 +660,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample pop r14 pop r13 pop r12 - pop r11 - pop r10 %else add esp, 12 %endif |