diff options
author | Henrik Gramner <hengar-6@student.ltu.se> | 2012-04-04 20:03:15 +0000 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-04-11 15:47:00 -0400 |
commit | 729f90e26802057f06905ab15a34612168eeac80 (patch) | |
tree | 41f8c4cedf10851b5b437aeeb558ce3d0f8db1dc /libswscale | |
parent | e1ce756844e684876318570dcebc74bc66c084f0 (diff) | |
download | ffmpeg-729f90e26802057f06905ab15a34612168eeac80.tar.gz |
x86inc improvements for 64-bit
Add support for all x86-64 registers
Prefer caller-saved register over callee-saved on WIN64
Support up to 15 function arguments
Also (by Ronald S. Bultje)
Fix up our asm to work with new x86inc.asm.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/x86/output.asm | 4 | ||||
-rw-r--r-- | libswscale/x86/scale.asm | 18 |
2 files changed, 6 insertions, 16 deletions
diff --git a/libswscale/x86/output.asm b/libswscale/x86/output.asm index 68dbf51b02..9b0b01253a 100644 --- a/libswscale/x86/output.asm +++ b/libswscale/x86/output.asm @@ -62,11 +62,11 @@ SECTION .text %define cntr_reg fltsizeq %define movsx mov %else -%define cntr_reg r11 +%define cntr_reg r7 %define movsx movsxd %endif -cglobal yuv2planeX_%1, %3, 7, %2, filter, fltsize, src, dst, w, dither, offset +cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset %if %1 == 8 || %1 == 9 || %1 == 10 pxor m6, m6 %endif ; %1 == 8/9/10 diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index f7ed45fcf3..d56e253afa 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -53,7 +53,7 @@ SECTION .text %ifnidn %3, X cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, src, filter, fltpos, pos1 %else -cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize +cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsize %endif %if ARCH_X86_64 movsxd wq, wd @@ -245,10 +245,9 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz %define dlt 0 %endif ; %4 ==/!= X4 %if ARCH_X86_64 - push r12 -%define srcq r11 -%define pos1q r10 -%define srcendq r12 +%define srcq r8 +%define pos1q r7 +%define srcendq r9 movsxd fltsizeq, fltsized ; filterSize lea srcendq, [srcmemq+(fltsizeq-dlt)*srcmul] ; &src[filterSize&~4] %else ; x86-32 @@ -388,16 +387,7 @@ cglobal hscale%1to%2_%4, %5, 7, %6, pos0, dst, w, srcmem, filter, fltpos, fltsiz add wq, 2 %endif ; %3 ==/!= X jl .loop -%ifnidn %3, X REP_RET -%else ; %3 == X -%if ARCH_X86_64 - pop r12 - RET -%else ; x86-32 - REP_RET -%endif ; x86-32/64 -%endif ; %3 ==/!= X %endmacro ; SCALE_FUNCS source_width, intermediate_nbits, n_xmm |