aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristophe Gisquet <christophe.gisquet@gmail.com>2012-06-26 16:10:33 +0200
committerMans Rullgard <mans@mansr.com>2012-06-27 12:49:33 +0100
commita5bfa66df516b7be55fd08fc62c2b012fc18e340 (patch)
treeae39590342cc74781342dcb819164037b3b0bc7b
parent75d339e044f9b87dd9aa4bdaee73b1a8323d4a15 (diff)
downloadffmpeg-a5bfa66df516b7be55fd08fc62c2b012fc18e340.tar.gz
x86: fft: replace call to memcpy by a loop
The function call was a mess to handle, and memcpy cannot make the assumptions we do in the new code. Tested on an IMC sample: 430c -> 370c. Signed-off-by: Mans Rullgard <mans@mansr.com>
-rw-r--r--libavcodec/x86/fft_mmx.asm37
1 files changed, 12 insertions, 25 deletions
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index 007f5caf77..1a430b9c2c 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -615,8 +615,6 @@ cglobal fft_calc, 2,5,8
.end:
REP_RET
-cextern_naked memcpy
-
cglobal fft_permute, 2,7,1
mov r4, [r0 + FFTContext.revtab]
mov r5, [r0 + FFTContext.tmpbuf]
@@ -637,29 +635,18 @@ cglobal fft_permute, 2,7,1
cmp r0, r2
jl .loop
shl r2, 3
-%if ARCH_X86_64
- mov r0, r1
- mov r1, r5
-%endif
-%if WIN64
- sub rsp, 8
- call memcpy
- add rsp, 8
- RET
-%elif ARCH_X86_64
-%ifdef PIC
- jmp memcpy wrt ..plt
-%else
- jmp memcpy
-%endif
-%else
- push r2
- push r5
- push r1
- call memcpy
- add esp, 12
- RET
-%endif
+ add r1, r2
+ add r5, r2
+ neg r2
+; nbits >= 2 (FFT4) and sizeof(FFTComplex)=8 => at least 32B
+.loopcopy:
+ movaps xmm0, [r5 + r2]
+ movaps xmm1, [r5 + r2 + 16]
+ movaps [r1 + r2], xmm0
+ movaps [r1 + r2 + 16], xmm1
+ add r2, 32
+ jl .loopcopy
+ REP_RET
cglobal imdct_calc, 3,5,3
mov r3d, [r0 + FFTContext.mdctsize]