diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2014-12-19 22:18:42 -0500 |
---|---|---|
committer | Anton Khirnov <anton@khirnov.net> | 2016-10-04 10:54:08 +0200 |
commit | 6e74e9636b1752e777146421ffa2b2498071e28d (patch) | |
tree | 28cae7a3ec7e14cbd4acaa1abf87b56cf7a14dd1 /libavcodec/x86 | |
parent | 6411c328a233b80faa5aa3ef4266f9a16e499699 (diff) | |
download | ffmpeg-6e74e9636b1752e777146421ffa2b2498071e28d.tar.gz |
vp9lpf/x86: slightly simplify 44/48/84/88 h stores.
Signed-off-by: Anton Khirnov <anton@khirnov.net>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/vp9lpf.asm | 88 |
1 files changed, 40 insertions, 48 deletions
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm index 878bc54a28..d5b3fca990 100644 --- a/libavcodec/x86/vp9lpf.asm +++ b/libavcodec/x86/vp9lpf.asm @@ -725,34 +725,34 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stri SBUTTERFLY bw, 2, 3, 8 SBUTTERFLY wd, 0, 2, 8 SBUTTERFLY wd, 1, 3, 8 - SBUTTERFLY dq, 0, 4, 8 - SBUTTERFLY dq, 1, 5, 8 - SBUTTERFLY dq, 2, 6, 8 - SBUTTERFLY dq, 3, 7, 8 movd [P7], m0 - punpckhqdq m0, m8 - movd [P6], m0 - movd [Q0], m1 - punpckhqdq m1, m9 - movd [Q1], m1 movd [P3], m2 - punpckhqdq m2, m10 - movd [P2], m2 + movd [Q0], m1 movd [Q4], m3 - punpckhqdq m3, m11 + psrldq m0, 4 + psrldq m1, 4 + psrldq m2, 4 + psrldq m3, 4 + movd [P6], m0 + movd [P2], m2 + movd [Q1], m1 movd [Q5], m3 - movd [P5], m4 - punpckhqdq m4, m12 - movd [P4], m4 - movd [Q2], m5 - punpckhqdq m5, m13 - movd [Q3], m5 - movd [P1], m6 - punpckhqdq m6, m14 - movd [P0], m6 - movd [Q6], m7 - punpckhqdq m7, m8 - movd [Q7], m7 + psrldq m0, 4 + psrldq m1, 4 + psrldq m2, 4 + psrldq m3, 4 + movd [P5], m0 + movd [P1], m2 + movd [Q2], m1 + movd [Q6], m3 + psrldq m0, 4 + psrldq m1, 4 + psrldq m2, 4 + psrldq m3, 4 + movd [P4], m0 + movd [P0], m2 + movd [Q3], m1 + movd [Q7], m3 %else ; the following code do a transpose of 8 full lines to 16 half ; lines (high part). It is inlined to avoid the need of a staging area @@ -777,30 +777,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, stri SBUTTERFLY dq, 1, 5, 8 SBUTTERFLY dq, 2, 6, 8 SBUTTERFLY dq, 3, 7, 8 - movh [P7], m0 - punpckhqdq m0, m8 - movh [P6], m0 - movh [Q0], m1 - punpckhqdq m1, m9 - movh [Q1], m1 - movh [P3], m2 - punpckhqdq m2, m10 - movh [P2], m2 - movh [Q4], m3 - punpckhqdq m3, m11 - movh [Q5], m3 - movh [P5], m4 - punpckhqdq m4, m12 - movh [P4], m4 - movh [Q2], m5 - punpckhqdq m5, m13 - movh [Q3], m5 - movh [P1], m6 - punpckhqdq m6, m14 - movh [P0], m6 - movh [Q6], m7 - punpckhqdq m7, m8 - movh [Q7], m7 + movh [P7], m0 + movhps [P6], m0 + movh [Q0], m1 + movhps [Q1], m1 + movh [P3], m2 + movhps [P2], m2 + movh [Q4], m3 + movhps [Q5], m3 + movh [P5], m4 + movhps [P4], m4 + movh [Q2], m5 + movhps [Q3], m5 + movh [P1], m6 + movhps [P0], m6 + movh [Q6], m7 + movhps [Q7], m7 %endif %endif |