diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-07-26 14:00:15 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-07-26 14:00:15 +0000 |
commit | 2a180c69eacdc6854957aabae3b0e3ee4d4fd774 (patch) | |
tree | d2e11a41b1130f1dde5a4ff5e8d371fdf0ebe0c8 /libavcodec | |
parent | bcd4aa6498d83f81e18eb70efa52969c110ac7c6 (diff) | |
download | ffmpeg-2a180c69eacdc6854957aabae3b0e3ee4d4fd774.tar.gz |
Save a register (or regsize of stackspace for x86-32) for the no-loop
mbedge loopfilter functions, by re-using space that holds a variable
that we no longer need.
Originally committed as revision 24510 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index a67c5bcc79..b3070825f1 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -2200,11 +2200,15 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 ; align stack mov stack_reg, rsp ; backup stack pointer and rsp, ~(mmsize-1) ; align stack +%ifidn %2, sse2 + sub rsp, mmsize * 7 +%else sub rsp, mmsize * 8 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result ; [4]=filter tmp result ; [5]/[6] = p2/q2 backup ; [7]=lim_res sign result +%endif %define flim_E [rsp] %define flim_I [rsp+mmsize] @@ -2215,7 +2219,11 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup [rsp+mmsize*4] %define p2backup [rsp+mmsize*5] %define q2backup [rsp+mmsize*6] +%ifidn %2, sse2 +%define lim_sign [rsp] +%else %define lim_sign [rsp+mmsize*7] +%endif mova flim_E, m0 mova flim_I, m1 @@ -2232,7 +2240,7 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 %define q0backup m8 %define p2backup m13 %define q2backup m14 -%define lim_sign m15 +%define lim_sign m9 ; splat function arguments SPLATB_REG flim_E, E_reg, m7 ; E @@ -2638,8 +2646,8 @@ cglobal vp8_%2_loop_filter16y_mbedge_%1, 5, %3, %5 pmullw m1, [pw_9] paddw m6, m7 paddw m1, m7 -%ifdef m15 - SWAP 7, 15 +%ifdef m9 + SWAP 7, 9 %else mova m7, lim_sign %endif @@ -2749,29 +2757,29 @@ MBEDGE_LOOPFILTER mmxext, h, 6, 8, 0 INIT_XMM %define SPLATB_REG SPLATB_REG_SSE2 %define WRITE_8W WRITE_8W_SSE2 -MBEDGE_LOOPFILTER sse2, v, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER sse2, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse2, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse2, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse2, v, 6, 8, 16 -MBEDGE_LOOPFILTER sse2, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse2, v, 6, 8, 15 +MBEDGE_LOOPFILTER sse2, h, 6, 8, 15 %define SPLATB_REG SPLATB_REG_SSSE3 -MBEDGE_LOOPFILTER ssse3, v, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, v, 5, 16, 15 %ifdef m8 -MBEDGE_LOOPFILTER ssse3, h, 5, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER ssse3, h, 6, 16, 16 +MBEDGE_LOOPFILTER ssse3, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER ssse3, v, 6, 8, 16 -MBEDGE_LOOPFILTER ssse3, h, 6, 8, 16 +MBEDGE_LOOPFILTER ssse3, v, 6, 8, 15 +MBEDGE_LOOPFILTER ssse3, h, 6, 8, 15 %define WRITE_8W WRITE_8W_SSE4 %ifdef m8 -MBEDGE_LOOPFILTER sse4, h, 5, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 5, 16, 15 %else -MBEDGE_LOOPFILTER sse4, h, 6, 16, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 16, 15 %endif -MBEDGE_LOOPFILTER sse4, h, 6, 8, 16 +MBEDGE_LOOPFILTER sse4, h, 6, 8, 15 |