diff options
author | Henrik Gramner <hengar-6@student.ltu.se> | 2012-04-04 20:03:15 +0000 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-04-11 15:47:00 -0400 |
commit | 729f90e26802057f06905ab15a34612168eeac80 (patch) | |
tree | 41f8c4cedf10851b5b437aeeb558ce3d0f8db1dc /libavutil/x86/x86inc.asm | |
parent | e1ce756844e684876318570dcebc74bc66c084f0 (diff) | |
download | ffmpeg-729f90e26802057f06905ab15a34612168eeac80.tar.gz |
x86inc improvements for 64-bit
Add support for all x86-64 registers
Prefer caller-saved register over callee-saved on WIN64
Support up to 15 function arguments
Also (by Ronald S. Bultje)
Fix up our asm to work with new x86inc.asm.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
Diffstat (limited to 'libavutil/x86/x86inc.asm')
-rw-r--r-- | libavutil/x86/x86inc.asm | 218 |
1 files changed, 114 insertions, 104 deletions
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index 7db1e9c311..ea9f9a1550 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1,11 +1,12 @@ ;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** -;* Copyright (C) 2005-2011 x264 project +;* Copyright (C) 2005-2012 x264 project ;* ;* Authors: Loren Merritt <lorenm@u.washington.edu> ;* Anton Mitrofanov <BugMaster@narod.ru> ;* Jason Garrett-Glaser <darkshikari@gmail.com> +;* Henrik Gramner <hengar-6@student.ltu.se> ;* ;* Permission to use, copy, modify, and/or distribute this software for any ;* purpose with or without fee is hereby granted, provided that the above @@ -95,6 +96,9 @@ default rel %endif +; Always use long nops (reduces 0x90 spam in disassembly on x86_32) +CPU amdnop + ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments ; into registers at the start, and make no other use of the stack. Luckily that @@ -128,18 +132,20 @@ ; rNm is the original location of arg N (a register or on the stack), dword ; rNmp is native size -%macro DECLARE_REG 6 +%macro DECLARE_REG 5-6 %define r%1q %2 %define r%1d %3 %define r%1w %4 %define r%1b %5 - %define r%1m %6 - %ifid %6 ; i.e. it's a register + %if %0 == 5 + %define r%1m %3 %define r%1mp %2 %elif ARCH_X86_64 ; memory - %define r%1mp qword %6 + %define r%1m [rsp + stack_offset + %6] + %define r%1mp qword r %+ %1m %else - %define r%1mp dword %6 + %define r%1m [esp + stack_offset + %6] + %define r%1mp dword r %+ %1m %endif %define r%1 %2 %endmacro @@ -187,7 +193,7 @@ DECLARE_REG_SIZE bp, bpl %endrep %endmacro -DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 +DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %if ARCH_X86_64 %define gprsize 8 @@ -205,6 +211,33 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 %assign stack_offset stack_offset-gprsize %endmacro +%macro PUSH_IF_USED 1-* + %rep %0 + %if %1 < regs_used + PUSH r%1 + %endif + %rotate 1 + %endrep +%endmacro + +%macro POP_IF_USED 1-* + %rep %0 + %if %1 < regs_used + pop r%1 + %endif + %rotate 1 + %endrep +%endmacro + +%macro LOAD_IF_USED 1-* + %rep %0 + %if %1 < num_args + mov r%1, r %+ %1 %+ mp + %endif + %rotate 1 + %endrep +%endmacro + %macro SUB 2 sub %1, %2 %ifidn %1, rsp @@ -272,39 +305,34 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9 %if WIN64 ; Windows x64 ;================================================= -DECLARE_REG 0, rcx, ecx, cx, cl, ecx -DECLARE_REG 1, rdx, edx, dx, dl, edx -DECLARE_REG 2, r8, r8d, r8w, r8b, r8d -DECLARE_REG 3, r9, r9d, r9w, r9b, r9d -DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40] -DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48] -DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] -%define r7m [rsp + stack_offset + 64] -%define r8m [rsp + stack_offset + 72] - -%macro LOAD_IF_USED 2 ; reg_id, number_of_args - %if %1 < %2 - mov r%1, [rsp + stack_offset + 8 + %1*8] - %endif -%endmacro +DECLARE_REG 0, rcx, ecx, cx, cl +DECLARE_REG 1, rdx, edx, dx, dl +DECLARE_REG 2, R8, R8D, R8W, R8B +DECLARE_REG 3, R9, R9D, R9W, R9B +DECLARE_REG 4, R10, R10D, R10W, R10B, 40 +DECLARE_REG 5, R11, R11D, R11W, R11B, 48 +DECLARE_REG 6, rax, eax, ax, al, 56 +DECLARE_REG 7, rdi, edi, di, dil, 64 +DECLARE_REG 8, rsi, esi, si, sil, 72 +DECLARE_REG 9, rbx, ebx, bx, bl, 80 +DECLARE_REG 10, rbp, ebp, bp, bpl, 88 +DECLARE_REG 11, R12, R12D, R12W, R12B, 96 +DECLARE_REG 12, R13, R13D, R13W, R13B, 104 +DECLARE_REG 13, R14, R14D, R14W, R14B, 112 +DECLARE_REG 14, R15, R15D, R15W, R15B, 120 %macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names... - ASSERT %2 >= %1 + %assign num_args %1 %assign regs_used %2 - ASSERT regs_used <= 7 - %if regs_used > 4 - push r4 - push r5 - %assign stack_offset stack_offset+16 - %endif + ASSERT regs_used >= num_args + ASSERT regs_used <= 15 + PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 %if mmsize == 8 %assign xmm_regs_used 0 %else WIN64_SPILL_XMM %3 %endif - LOAD_IF_USED 4, %1 - LOAD_IF_USED 5, %1 - LOAD_IF_USED 6, %1 + LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 DEFINE_ARGS %4 %endmacro @@ -312,12 +340,11 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] %assign xmm_regs_used %1 ASSERT xmm_regs_used <= 16 %if xmm_regs_used > 6 - sub rsp, (xmm_regs_used-6)*16+16 - %assign stack_offset stack_offset+(xmm_regs_used-6)*16+16 + SUB rsp, (xmm_regs_used-6)*16+16 %assign %%i xmm_regs_used %rep (xmm_regs_used-6) %assign %%i %%i-1 - movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i + movdqa [rsp + (%%i-6)*16+(~stack_offset&8)], xmm %+ %%i %endrep %endif %endmacro @@ -327,7 +354,7 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] %assign %%i xmm_regs_used %rep (xmm_regs_used-6) %assign %%i %%i-1 - movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8] + movdqa xmm %+ %%i, [%1 + (%%i-6)*16+(~stack_offset&8)] %endrep add %1, (xmm_regs_used-6)*16+16 %endif @@ -341,15 +368,12 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] %macro RET 0 WIN64_RESTORE_XMM_INTERNAL rsp - %if regs_used > 4 - pop r5 - pop r4 - %endif + POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 ret %endmacro %macro REP_RET 0 - %if regs_used > 4 || xmm_regs_used > 6 + %if regs_used > 7 || xmm_regs_used > 6 RET %else rep ret @@ -358,92 +382,80 @@ DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56] %elif ARCH_X86_64 ; *nix x64 ;============================================= -DECLARE_REG 0, rdi, edi, di, dil, edi -DECLARE_REG 1, rsi, esi, si, sil, esi -DECLARE_REG 2, rdx, edx, dx, dl, edx -DECLARE_REG 3, rcx, ecx, cx, cl, ecx -DECLARE_REG 4, r8, r8d, r8w, r8b, r8d -DECLARE_REG 5, r9, r9d, r9w, r9b, r9d -DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8] -%define r7m [rsp + stack_offset + 16] -%define r8m [rsp + stack_offset + 24] - -%macro LOAD_IF_USED 2 ; reg_id, number_of_args - %if %1 < %2 - mov r%1, [rsp - 40 + %1*8] - %endif -%endmacro +DECLARE_REG 0, rdi, edi, di, dil +DECLARE_REG 1, rsi, esi, si, sil +DECLARE_REG 2, rdx, edx, dx, dl +DECLARE_REG 3, rcx, ecx, cx, cl +DECLARE_REG 4, R8, R8D, R8W, R8B +DECLARE_REG 5, R9, R9D, R9W, R9B +DECLARE_REG 6, rax, eax, ax, al, 8 +DECLARE_REG 7, R10, R10D, R10W, R10B, 16 +DECLARE_REG 8, R11, R11D, R11W, R11B, 24 +DECLARE_REG 9, rbx, ebx, bx, bl, 32 +DECLARE_REG 10, rbp, ebp, bp, bpl, 40 +DECLARE_REG 11, R12, R12D, R12W, R12B, 48 +DECLARE_REG 12, R13, R13D, R13W, R13B, 56 +DECLARE_REG 13, R14, R14D, R14W, R14B, 64 +DECLARE_REG 14, R15, R15D, R15W, R15B, 72 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... - ASSERT %2 >= %1 - ASSERT %2 <= 7 - LOAD_IF_USED 6, %1 + %assign num_args %1 + %assign regs_used %2 + ASSERT regs_used >= num_args + ASSERT regs_used <= 15 + PUSH_IF_USED 9, 10, 11, 12, 13, 14 + LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 DEFINE_ARGS %4 %endmacro %macro RET 0 + POP_IF_USED 14, 13, 12, 11, 10, 9 ret %endmacro %macro REP_RET 0 - rep ret + %if regs_used > 9 + RET + %else + rep ret + %endif %endmacro %else ; X86_32 ;============================================================== -DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4] -DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8] -DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12] -DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16] -DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20] -DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24] -DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28] -%define r7m [esp + stack_offset + 32] -%define r8m [esp + stack_offset + 36] +DECLARE_REG 0, eax, eax, ax, al, 4 +DECLARE_REG 1, ecx, ecx, cx, cl, 8 +DECLARE_REG 2, edx, edx, dx, dl, 12 +DECLARE_REG 3, ebx, ebx, bx, bl, 16 +DECLARE_REG 4, esi, esi, si, null, 20 +DECLARE_REG 5, edi, edi, di, null, 24 +DECLARE_REG 6, ebp, ebp, bp, null, 28 %define rsp esp -%macro PUSH_IF_USED 1 ; reg_id - %if %1 < regs_used - push r%1 - %assign stack_offset stack_offset+4 - %endif -%endmacro - -%macro POP_IF_USED 1 ; reg_id - %if %1 < regs_used - pop r%1 - %endif +%macro DECLARE_ARG 1-* + %rep %0 + %define r%1m [esp + stack_offset + 4*%1 + 4] + %define r%1mp dword r%1m + %rotate 1 + %endrep %endmacro -%macro LOAD_IF_USED 2 ; reg_id, number_of_args - %if %1 < %2 - mov r%1, [esp + stack_offset + 4 + %1*4] - %endif -%endmacro +DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names... - ASSERT %2 >= %1 + %assign num_args %1 %assign regs_used %2 - ASSERT regs_used <= 7 - PUSH_IF_USED 3 - PUSH_IF_USED 4 - PUSH_IF_USED 5 - PUSH_IF_USED 6 - LOAD_IF_USED 0, %1 - LOAD_IF_USED 1, %1 - LOAD_IF_USED 2, %1 - LOAD_IF_USED 3, %1 - LOAD_IF_USED 4, %1 - LOAD_IF_USED 5, %1 - LOAD_IF_USED 6, %1 + %if regs_used > 7 + %assign regs_used 7 + %endif + ASSERT regs_used >= num_args + PUSH_IF_USED 3, 4, 5, 6 + LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 DEFINE_ARGS %4 %endmacro %macro RET 0 - POP_IF_USED 6 - POP_IF_USED 5 - POP_IF_USED 4 - POP_IF_USED 3 + POP_IF_USED 6, 5, 4, 3 ret %endmacro @@ -464,8 +476,6 @@ DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28] %endmacro %endif - - ;============================================================================= ; arch-independent part ;============================================================================= |