diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2014-12-19 21:44:57 -0500 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2014-12-27 16:55:11 -0500 |
commit | c013ca58c5349e34610e94336d0fae6ef7a403a0 (patch) | |
tree | c8b44c4532270c7a7bf3fa4c03b694f0e46b526b /libavcodec/x86/vp9lpf.asm | |
parent | 56e432b27b8f65872a8b1381bde86b7e54f6a7c7 (diff) | |
download | ffmpeg-c013ca58c5349e34610e94336d0fae6ef7a403a0.tar.gz |
vp9/x86: save one register in loopfilter surface coverage.
Diffstat (limited to 'libavcodec/x86/vp9lpf.asm')
-rw-r--r-- | libavcodec/x86/vp9lpf.asm | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm index 416f08f090..e0f7386a08 100644 --- a/libavcodec/x86/vp9lpf.asm +++ b/libavcodec/x86/vp9lpf.asm @@ -278,22 +278,22 @@ SECTION .text %endmacro %macro DEFINE_REAL_P7_TO_Q7 0-1 0 -%define P7 dst1q + 2*mstrideq + %1 -%define P6 dst1q + mstrideq + %1 -%define P5 dst1q + %1 -%define P4 dst1q + strideq + %1 -%define P3 dstq + 4*mstrideq + %1 -%define P2 dstq + mstride3q + %1 -%define P1 dstq + 2*mstrideq + %1 -%define P0 dstq + mstrideq + %1 -%define Q0 dstq + %1 -%define Q1 dstq + strideq + %1 -%define Q2 dstq + 2*strideq + %1 -%define Q3 dstq + stride3q + %1 -%define Q4 dstq + 4*strideq + %1 -%define Q5 dst2q + mstrideq + %1 -%define Q6 dst2q + %1 -%define Q7 dst2q + strideq + %1 +%define P7 dstq + 4*mstrideq + %1 +%define P6 dstq + mstride3q + %1 +%define P5 dstq + 2*mstrideq + %1 +%define P4 dstq + mstrideq + %1 +%define P3 dstq + %1 +%define P2 dstq + strideq + %1 +%define P1 dstq + 2* strideq + %1 +%define P0 dstq + stride3q + %1 +%define Q0 dstq + 4* strideq + %1 +%define Q1 dst2q + mstride3q + %1 +%define Q2 dst2q + 2*mstrideq + %1 +%define Q3 dst2q + mstrideq + %1 +%define Q4 dst2q + %1 +%define Q5 dst2q + strideq + %1 +%define Q6 dst2q + 2* strideq + %1 +%define Q7 dst2q + stride3q + %1 %endmacro ; ..............AB -> AAAAAAAABBBBBBBB @@ -308,26 +308,26 @@ SECTION .text %endmacro %macro LOOPFILTER 2 ; %1=v/h %2=size1 - lea mstrideq, [strideq] - neg mstrideq + mov mstrideq, strideq + neg mstrideq - lea stride3q, [strideq+2*strideq] - mov mstride3q, stride3q - neg mstride3q + lea stride3q, [strideq*3] + lea mstride3q, [mstrideq*3] %ifidn %1, h %if %2 > 16 %define movx movh - lea dstq, [dstq + 8*strideq - 4] + lea dstq, [dstq + 4*strideq - 4] %else %define movx movu - lea dstq, [dstq + 8*strideq - 8] ; go from top center (h pos) to center left (v pos) + lea dstq, [dstq + 4*strideq - 8] ; go from top center (h pos) to center left (v pos) %endif + lea dst2q, [dstq + 8*strideq] +%else + lea dstq, [dstq + 4*mstrideq] + lea dst2q, [dstq + 8*strideq] %endif - lea dst1q, [dstq + 2*mstride3q] ; dst1q = &dst[stride * -6] - lea dst2q, [dstq + 2* stride3q] ; dst2q = &dst[stride * +6] - DEFINE_REAL_P7_TO_Q7 %ifidn %1, h @@ -795,9 +795,9 @@ SECTION .text %macro LPF_16_VH 2 INIT_XMM %2 -cglobal vp9_loop_filter_v_%1_16, 5,10,16, dst, stride, E, I, H, mstride, dst1, dst2, stride3, mstride3 +cglobal vp9_loop_filter_v_%1_16, 5,10,16, dst, stride, E, I, H, mstride, dst2, stride3, mstride3 LOOPFILTER v, %1 -cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, dst1, dst2, stride3, mstride3 +cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, dst2, stride3, mstride3 LOOPFILTER h, %1 %endmacro |