diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-10-08 11:22:54 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-10-08 11:23:00 +0200 |
commit | 1f17619fe4800dce32a81fda0cec9afad91ff095 (patch) | |
tree | d8da9d339ec07d6a0910dbe6563ad96d47cfa2b9 /libavcodec/x86 | |
parent | 17d9c7c208915c6c090e10508056f68b93440839 (diff) | |
parent | bbe4a6db44f0b55b424a5cc9d3e89cd88e250450 (diff) | |
download | ffmpeg-1f17619fe4800dce32a81fda0cec9afad91ff095.tar.gz |
Merge commit 'bbe4a6db44f0b55b424a5cc9d3e89cd88e250450'
* commit 'bbe4a6db44f0b55b424a5cc9d3e89cd88e250450':
x86inc: Utilize the shadow space on 64-bit Windows
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/fft.asm | 4 | ||||
-rw-r--r-- | libavcodec/x86/h264_deblock.asm | 19 |
2 files changed, 9 insertions, 14 deletions
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm index 5071741d63..879b84e5fd 100644 --- a/libavcodec/x86/fft.asm +++ b/libavcodec/x86/fft.asm @@ -672,13 +672,13 @@ cglobal imdct_calc, 3,5,3 push r1 push r0 %else - sub rsp, 8 + sub rsp, 8+32*WIN64 ; allocate win64 shadow space %endif call r4 %if ARCH_X86_32 add esp, 12 %else - add rsp, 8 + add rsp, 8+32*WIN64 %endif POP r1 POP r3 diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index d58e16ca89..1317783bec 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -331,16 +331,14 @@ cglobal deblock_v_luma_8, 5,5,10 ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- INIT_MMX cpuname -cglobal deblock_h_luma_8, 5,9 +cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 movsxd r7, r1d lea r8, [r7+r7*2] lea r6, [r0-4] lea r5, [r0-4+r8] %if WIN64 - sub rsp, 0x98 - %define pix_tmp rsp+0x30 + %define pix_tmp rsp+0x30 ; shadow space + r4 %else - sub rsp, 0x68 %define pix_tmp rsp %endif @@ -379,11 +377,6 @@ cglobal deblock_h_luma_8, 5,9 movq m3, [pix_tmp+0x40] TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8) -%if WIN64 - add rsp, 0x98 -%else - add rsp, 0x68 -%endif RET %endmacro @@ -708,13 +701,16 @@ INIT_MMX cpuname ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_8, 4,9 +cglobal deblock_h_luma_intra_8, 4,9,0,0x80 movsxd r7, r1d lea r8, [r7*3] lea r6, [r0-4] lea r5, [r0-4+r8] - sub rsp, 0x88 +%if WIN64 + %define pix_tmp rsp+0x20 ; shadow space +%else %define pix_tmp rsp +%endif ; transpose 8x16 -> tmp space TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30) @@ -734,7 +730,6 @@ cglobal deblock_h_luma_intra_8, 4,9 sub r5, r7 shr r7, 3 TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8) - add rsp, 0x88 RET %else cglobal deblock_h_luma_intra_8, 2,4,8,0x80 |