diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2012-12-08 16:12:38 -0800 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-12-12 10:37:52 +0100 |
commit | ce58642ed0d6dade63ff1b2467aa573e97c0cc91 (patch) | |
tree | 5911ccf44159f032015e875a4bc7d11c1dd80381 /libavcodec/x86/vp8dsp.asm | |
parent | 82c0211213430a9aef0d6ef5b29de35043f5bcba (diff) | |
download | ffmpeg-ce58642ed0d6dade63ff1b2467aa573e97c0cc91.tar.gz |
x86inc: support stack mem allocation and re-alignment in PROLOGUE.
Use this in VP8/H264-8bit loopfilter functions so they can be used if
there is no aligned stack (e.g. MSVC 32bit or ICC 10.x).
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/vp8dsp.asm')
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 68 |
1 files changed, 35 insertions, 33 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 37ae336123..84805fdc66 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1631,28 +1631,31 @@ SIMPLE_LOOPFILTER h, 5 ;----------------------------------------------------------------------------- %macro INNER_LOOPFILTER 2 +%define stack_size 0 +%ifndef m8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr +%ifidn %1, v ; [3]=hev() result +%define stack_size mmsize * -4 +%else ; h ; extra storage space for transposes +%define stack_size mmsize * -5 +%endif +%endif + %if %2 == 8 ; chroma -cglobal vp8_%1_loop_filter8uv_inner, 6, 6, 13, dst, dst8, stride, flimE, flimI, hevthr +cglobal vp8_%1_loop_filter8uv_inner, 6, 6, 13, stack_size, dst, dst8, stride, flimE, flimI, hevthr %else ; luma -cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr +cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, stack_size, dst, stride, flimE, flimI, hevthr %endif %if cpuflag(ssse3) pxor m7, m7 %endif -%ifndef m8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr -%ifidn %1, v ; [3]=hev() result -%assign pad 16 + mmsize * 4 - gprsize - (stack_offset & 15) -%else ; h ; extra storage space for transposes -%assign pad 16 + mmsize * 5 - gprsize - (stack_offset & 15) -%endif + +%ifndef m8 ; splat function arguments SPLATB_REG m0, flimEq, m7 ; E SPLATB_REG m1, flimIq, m7 ; I SPLATB_REG m2, hevthrq, m7 ; hev_thresh - SUB rsp, pad - %define m_flimE [rsp] %define m_flimI [rsp+mmsize] %define m_hevthr [rsp+mmsize*2] @@ -2082,12 +2085,10 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr dec cntrq jg .next8px %endif -%endif - -%ifndef m8 ; sse2 on x86-32 or mmx/mmxext - ADD rsp, pad -%endif + REP_RET +%else ; mmsize == 16 RET +%endif %endmacro %if ARCH_X86_32 @@ -2122,31 +2123,34 @@ INNER_LOOPFILTER h, 8 ;----------------------------------------------------------------------------- %macro MBEDGE_LOOPFILTER 2 -%if %2 == 8 ; chroma -cglobal vp8_%1_loop_filter8uv_mbedge, 6, 6, 15, dst1, dst8, stride, flimE, flimI, hevthr -%else ; luma -cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevthr -%endif - -%if cpuflag(ssse3) - pxor m7, m7 -%endif +%define stack_size 0 %ifndef m8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr %if mmsize == 16 ; [3]=hev() result ; [4]=filter tmp result ; [5]/[6] = p2/q2 backup ; [7]=lim_res sign result -%assign pad 16 + mmsize * 7 - gprsize - (stack_offset & 15) +%define stack_size mmsize * -7 %else ; 8 ; extra storage space for transposes -%assign pad 16 + mmsize * 8 - gprsize - (stack_offset & 15) +%define stack_size mmsize * -8 +%endif %endif + +%if %2 == 8 ; chroma +cglobal vp8_%1_loop_filter8uv_mbedge, 6, 6, 15, stack_size, dst1, dst8, stride, flimE, flimI, hevthr +%else ; luma +cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, stack_size, dst1, stride, flimE, flimI, hevthr +%endif + +%if cpuflag(ssse3) + pxor m7, m7 +%endif + +%ifndef m8 ; splat function arguments SPLATB_REG m0, flimEq, m7 ; E SPLATB_REG m1, flimIq, m7 ; I SPLATB_REG m2, hevthrq, m7 ; hev_thresh - SUB rsp, pad - %define m_flimE [rsp] %define m_flimI [rsp+mmsize] %define m_hevthr [rsp+mmsize*2] @@ -2740,12 +2744,10 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt dec cntrq jg .next8px %endif -%endif - -%ifndef m8 ; sse2 on x86-32 or mmx/mmxext - ADD rsp, pad -%endif + REP_RET +%else ; mmsize == 16 RET +%endif %endmacro %if ARCH_X86_32 |