aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/fft_mmx.asm
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2010-08-28 21:03:13 +0000
committerLoren Merritt <lorenm@u.washington.edu>2010-08-28 21:03:13 +0000
commit19d929f9a38cbb5eedbb68684905b5dca2c13190 (patch)
tree482cdc18eb4b5236d2b3a22b36d9270e0afb0fa0 /libavcodec/x86/fft_mmx.asm
parent2d0cdf3cc0190e3b2ae08d4f74c0b3fe7e193e19 (diff)
downloadffmpeg-19d929f9a38cbb5eedbb68684905b5dca2c13190.tar.gz
cosmetics in imdct_sse
Originally committed as revision 24958 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/fft_mmx.asm')
-rw-r--r--libavcodec/x86/fft_mmx.asm45
1 files changed, 20 insertions, 25 deletions
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index 31176d6c9a..b75ec0cc51 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -532,20 +532,15 @@ INIT_XMM
unpckhps xmm0, xmm2
%endmacro
-%macro PREROTATEW 3 ;addr1, addr2, xmm
- movlps %1, %3
- movhps %2, %3
-%endmacro
-
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
movaps xmm6, [%4+%1*2]
movaps %2, [%4+%1*2+0x10]
movaps %3, xmm6
movaps xmm7, %2
- mulps xmm6, [%5+%1*1]
- mulps %2, [%6+%1*1]
- mulps %3, [%6+%1*1]
- mulps xmm7, [%5+%1*1]
+ mulps xmm6, [%5+%1]
+ mulps %2, [%6+%1]
+ mulps %3, [%6+%1]
+ mulps xmm7, [%5+%1]
subps %2, xmm6
addps %3, xmm7
%endmacro
@@ -576,8 +571,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
%define rrevtab r10
%define rtcos r11
%define rtsin r12
- push r10
- push r11
push r12
push r13
push r14
@@ -620,21 +613,25 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
PREROTATER r4, r3, r2, rtcos, rtsin
%ifdef ARCH_X86_64
- movzx r5, word [rrevtab+r4*1-4]
- movzx r6, word [rrevtab+r4*1-2]
- movzx r13, word [rrevtab+r3*1]
- movzx r14, word [rrevtab+r3*1+2]
- PREROTATEW [r1+r5 *8], [r1+r6 *8], xmm0
- PREROTATEW [r1+r13*8], [r1+r14*8], xmm1
+ movzx r5, word [rrevtab+r4-4]
+ movzx r6, word [rrevtab+r4-2]
+ movzx r13, word [rrevtab+r3]
+ movzx r14, word [rrevtab+r3+2]
+ movlps [r1+r5 *8], xmm0
+ movhps [r1+r6 *8], xmm0
+ movlps [r1+r13*8], xmm1
+ movhps [r1+r14*8], xmm1
add r4, 4
%else
mov r6, [esp]
- movzx r5, word [r6+r4*1-4]
- movzx r4, word [r6+r4*1-2]
- PREROTATEW [r1+r5*8], [r1+r4*8], xmm0
- movzx r5, word [r6+r3*1]
- movzx r4, word [r6+r3*1+2]
- PREROTATEW [r1+r5*8], [r1+r4*8], xmm1
+ movzx r5, word [r6+r4-4]
+ movzx r4, word [r6+r4-2]
+ movlps [r1+r5*8], xmm0
+ movhps [r1+r4*8], xmm0
+ movzx r5, word [r6+r3]
+ movzx r4, word [r6+r3+2]
+ movlps [r1+r5*8], xmm1
+ movhps [r1+r4*8], xmm1
%endif
sub r3, 4
jns .pre
@@ -663,8 +660,6 @@ cglobal imdct_half_sse, 3,7,8; FFTContext *s, FFTSample *output, const FFTSample
pop r14
pop r13
pop r12
- pop r11
- pop r10
%else
add esp, 12
%endif