diff options
author | Henrik Gramner <henrik@gramner.com> | 2013-09-11 17:49:26 +0200 |
---|---|---|
committer | Derek Buitenhuis <derek.buitenhuis@gmail.com> | 2013-10-07 06:25:35 -0400 |
commit | bbe4a6db44f0b55b424a5cc9d3e89cd88e250450 (patch) | |
tree | 868ade1083707ea830a8514c093640be5147f5f1 /libavcodec/x86/h264_deblock.asm | |
parent | 3fb78e99a04d0ed8db834d813d933eb86c37142a (diff) | |
download | ffmpeg-bbe4a6db44f0b55b424a5cc9d3e89cd88e250450.tar.gz |
x86inc: Utilize the shadow space on 64-bit Windows
Store XMM6 and XMM7 in the shadow space in functions that
clobbers them. This way we don't have to adjust the stack
pointer as often, reducing the number of instructions as
well as code size.
Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
Diffstat (limited to 'libavcodec/x86/h264_deblock.asm')
-rw-r--r-- | libavcodec/x86/h264_deblock.asm | 19 |
1 files changed, 7 insertions, 12 deletions
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index fc6c983052..6e29ce7373 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -331,16 +331,14 @@ cglobal deblock_v_luma_8, 5,5,10 ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ;----------------------------------------------------------------------------- INIT_MMX cpuname -cglobal deblock_h_luma_8, 5,9 +cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 movsxd r7, r1d lea r8, [r7+r7*2] lea r6, [r0-4] lea r5, [r0-4+r8] %if WIN64 - sub rsp, 0x98 - %define pix_tmp rsp+0x30 + %define pix_tmp rsp+0x30 ; shadow space + r4 %else - sub rsp, 0x68 %define pix_tmp rsp %endif @@ -379,11 +377,6 @@ cglobal deblock_h_luma_8, 5,9 movq m3, [pix_tmp+0x40] TRANSPOSE8x4B_STORE PASS8ROWS(r6, r5, r7, r8) -%if WIN64 - add rsp, 0x98 -%else - add rsp, 0x68 -%endif RET %endmacro @@ -704,13 +697,16 @@ INIT_MMX cpuname ;----------------------------------------------------------------------------- ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ;----------------------------------------------------------------------------- -cglobal deblock_h_luma_intra_8, 4,9 +cglobal deblock_h_luma_intra_8, 4,9,0,0x80 movsxd r7, r1d lea r8, [r7*3] lea r6, [r0-4] lea r5, [r0-4+r8] - sub rsp, 0x88 +%if WIN64 + %define pix_tmp rsp+0x20 ; shadow space +%else %define pix_tmp rsp +%endif ; transpose 8x16 -> tmp space TRANSPOSE8x8_MEM PASS8ROWS(r6, r5, r7, r8), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30) @@ -730,7 +726,6 @@ cglobal deblock_h_luma_intra_8, 4,9 sub r5, r7 shr r7, 3 TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r6, r5, r7, r8) - add rsp, 0x88 RET %else cglobal deblock_h_luma_intra_8, 2,4,8,0x80 |