diff options
author | Clément Bœsch <u@pkh.me> | 2014-02-05 07:21:13 +0100 |
---|---|---|
committer | Clément Bœsch <u@pkh.me> | 2014-02-05 07:21:13 +0100 |
commit | d92a725329e5fe315fd42ab1ef38a97aa8f94aba (patch) | |
tree | 6a963f9f0316fef4108aca6676377fcd5b20dea0 | |
parent | 97dde561dec0d5d8ba5c3da2264298191d500ead (diff) | |
download | ffmpeg-d92a725329e5fe315fd42ab1ef38a97aa8f94aba.tar.gz |
x86/vp9lpf: remove 8 SWAPs in 84/48 transpose.
-rw-r--r-- | libavcodec/x86/vp9lpf.asm | 33 |
1 files changed, 10 insertions, 23 deletions
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm index 230583237c..58a6854203 100644 --- a/libavcodec/x86/vp9lpf.asm +++ b/libavcodec/x86/vp9lpf.asm @@ -759,30 +759,17 @@ SECTION .text movd [Q6], m14 movd [Q7], m15 %else - mova m4, [P3] - mova m5, [P2] - mova m6, [P1] - mova m7, [P0] - mova m8, [Q0] - mova m9, [Q1] - mova m10, [Q2] - mova m11, [Q3] - - DEFINE_REAL_P7_TO_Q7 - - ; the following code do a transpose of 8 full centered lines to 16 half + ; the following code do a transpose of 8 full lines to 16 half ; lines (high part). It is inlined to avoid the need of a staging area - - ; move from [-4;4] to [-8;0] - SWAP 0, 4 - SWAP 1, 5 - SWAP 2, 6 - SWAP 3, 7 - SWAP 4, 8 - SWAP 5, 9 - SWAP 6, 10 - SWAP 7, 11 - + mova m0, [P3] + mova m1, [P2] + mova m2, [P1] + mova m3, [P0] + mova m4, [Q0] + mova m5, [Q1] + mova m6, [Q2] + mova m7, [Q3] + DEFINE_REAL_P7_TO_Q7 SBUTTERFLY bw, 0, 1, 8 SBUTTERFLY bw, 2, 3, 8 SBUTTERFLY bw, 4, 5, 8 |