diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-08 15:56:32 -0500 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-02-09 03:33:55 +0100 |
commit | a239d534d73428d9118bfac0688de1baab78f662 (patch) | |
tree | ae99c76bdf3c714f314ca4dbaf249dab099feca8 | |
parent | 070e5ba560b86d8b0de72536d9e861b215d7ee20 (diff) | |
download | ffmpeg-a239d534d73428d9118bfac0688de1baab78f662.tar.gz |
Fix ff_emu_edge_core_sse() on Win64.
Fix emu_edge_v_extend_15 to be <128 bytes on Win64, by being more strict
on the size of registers and which registers are being used for operations
where multiple are available. This fixes segfaults in emulated_edge()
function calls on Win64.
(cherry picked from commit 17cf7c68ed26a4cb3c7adf7488a38c2e19118918)
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index b1b37e1fb9..53884a6b40 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -468,6 +468,11 @@ cglobal emu_edge_core_%1, 2, 7, 0 %define valw ax %define valw2 r10w %define valw3 r3w +%ifdef WIN64 +%define valw4 r4w +%else ; unix64 +%define valw4 r3w +%endif %define vald eax %else %define vall bl @@ -475,6 +480,7 @@ cglobal emu_edge_core_%1, 2, 7, 0 %define valw bx %define valw2 r6w %define valw3 valw2 +%define valw4 valw3 %define vald ebx %define stack_offset 0x14 %endif @@ -537,8 +543,10 @@ cglobal emu_edge_core_%1, 2, 7, 0 %elif (%2-%%src_off) == 3 %ifidn %1, top mov valw2, [r1+%%src_off] -%else ; %1 != top +%elifidn %1, body mov valw3, [r1+%%src_off] +%elifidn %1, bottom + mov valw4, [r1+%%src_off] %endif ; %1 ==/!= top mov vall, [r1+%%src_off+2] %endif ; (%2-%%src_off) == 1/2/3 @@ -584,8 +592,10 @@ cglobal emu_edge_core_%1, 2, 7, 0 %elif (%2-%%dst_off) == 3 %ifidn %1, top mov [r0+%%dst_off], valw2 -%else ; %1 != top +%elifidn %1, body mov [r0+%%dst_off], valw3 +%elifidn %1, bottom + mov [r0+%%dst_off], valw4 %endif ; %1 ==/!= top mov [r0+%%dst_off+2], vall %endif ; (%2-%%dst_off) == 1/2/3 @@ -615,7 +625,7 @@ ALIGN 128 WRITE_NUM_BYTES top, %%n, %1 ; write bytes add r0 , r2 ; dst += linesize %ifdef ARCH_X86_64 - dec r3 + dec r3d %else ; ARCH_X86_32 dec dword r3m %endif ; ARCH_X86_64/32 @@ -627,7 +637,7 @@ ALIGN 128 WRITE_NUM_BYTES body, %%n, %1 ; write bytes add r0 , r2 ; dst += linesize add r1 , r2 ; src += linesize - dec r4 + dec r4d jnz .emuedge_copy_body_ %+ %%n %+ _loop ; } while (--end_y) ; copy bottom pixels @@ -638,7 +648,7 @@ ALIGN 128 .emuedge_extend_bottom_ %+ %%n %+ _loop: ; do { WRITE_NUM_BYTES bottom, %%n, %1 ; write bytes add r0 , r2 ; dst += linesize - dec r5 + dec r5d jnz .emuedge_extend_bottom_ %+ %%n %+ _loop ; } while (--block_h) .emuedge_v_extend_end_ %+ %%n: |