diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-07-19 21:45:36 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-07-19 21:45:36 +0000 |
commit | fb9bdf048c5115cd5dda8edeb9250593e9bb1a88 (patch) | |
tree | a55076b43dcfffe9986d16c1114678e1b44577de /libavcodec/x86 | |
parent | 3facfc99daecd10c2b87761d111d4dee1e3736b7 (diff) | |
download | ffmpeg-fb9bdf048c5115cd5dda8edeb9250593e9bb1a88.tar.gz |
Be more efficient with registers or stack memory. Saves 8/16 bytes stack
for x86-32, or 2 MM registers on x86-64.
Originally committed as revision 24338 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 116064c42f..02b6f8dff8 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -1411,7 +1411,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 sub rsp, mmsize * 4 ; stack layout: [0]=E, [1]=I, [2]=hev_thr ; [3]=hev() result %else ; h - sub rsp, mmsize * 6 ; extra storage space for transposes + sub rsp, mmsize * 5 ; extra storage space for transposes %endif %define flim_E [rsp] @@ -1470,7 +1470,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 ; 8x8 transpose TRANSPOSE4x4B 0, 1, 2, 3, 7 %ifdef m13 - SWAP 1, 13 + SWAP 1, 8 %else mova [rsp+mmsize*4], m1 %endif @@ -1480,17 +1480,17 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 SBUTTERFLY dq, 2, 6, 1 ; q0/q1 SBUTTERFLY dq, 3, 7, 1 ; q2/q3 %ifdef m13 - SWAP 1, 13 - SWAP 2, 13 + SWAP 1, 8 + SWAP 2, 8 %else mova m1, [rsp+mmsize*4] mova [rsp+mmsize*4], m2 ; store q0 %endif SBUTTERFLY dq, 1, 5, 2 ; p1/p0 %ifdef m14 - SWAP 5, 14 + SWAP 5, 12 %else - mova [rsp+mmsize*5], m5 ; store p0 + mova [rsp+mmsize*3], m5 ; store p0 %endif SWAP 1, 4 SWAP 2, 4 @@ -1527,7 +1527,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 ; 8x16 transpose TRANSPOSE4x4B 0, 1, 2, 3, 7 %ifdef m13 - SWAP 1, 13 + SWAP 1, 8 %else mova [rsp+mmsize*4], m1 %endif @@ -1539,17 +1539,17 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 SBUTTERFLY dq, 2, 6, 1 ; q0/q1 SBUTTERFLY dq, 3, 7, 1 ; q2/q3 %ifdef m13 - SWAP 1, 13 - SWAP 2, 13 + SWAP 1, 8 + SWAP 2, 8 %else mova m1, [rsp+mmsize*4] mova [rsp+mmsize*4], m2 ; store q0 %endif SBUTTERFLY dq, 1, 5, 2 ; p1/p0 %ifdef m14 - SWAP 5, 14 + SWAP 5, 12 %else - mova [rsp+mmsize*5], m5 ; store p0 + mova [rsp+mmsize*3], m5 ; store p0 %endif SWAP 1, 4 SWAP 2, 4 @@ -1611,9 +1611,9 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 %ifidn %2, v mova m3, [dst_reg +mstride_reg] ; p0 %elifdef m14 - SWAP 3, 14 + SWAP 3, 12 %else - mova m3, [rsp+mmsize*5] + mova m3, [rsp+mmsize*3] %endif mova m1, m2 @@ -1644,7 +1644,7 @@ cglobal vp8_%2_loop_filter16y_inner_%1, 5, %3, %4 %ifidn %2, v mova m4, [dst_reg] ; q0 %elifdef m13 - SWAP 4, 13 + SWAP 4, 8 %else mova m4, [rsp+mmsize*4] %endif @@ -1836,7 +1836,7 @@ INNER_LOOPFILTER mmxext, h, 6, 8 INIT_XMM INNER_LOOPFILTER sse2, v, 5, 13 %ifdef m8 -INNER_LOOPFILTER sse2, h, 5, 15 +INNER_LOOPFILTER sse2, h, 5, 13 %else -INNER_LOOPFILTER sse2, h, 6, 15 +INNER_LOOPFILTER sse2, h, 6, 13 %endif |