diff options
author | James Darnley <jdarnley@obe.tv> | 2017-06-15 00:35:44 +0200 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2017-06-28 17:27:35 +0200 |
commit | 8b19467d07d5782b4140f61363f24361efb87ff6 (patch) | |
tree | 6682dcd830bd33cf1585ba767959922e1c4bdc13 /libavcodec/x86/simple_idct10_template.asm | |
parent | c1d1274bfc5672e37e2192f15be6a318ee7bb955 (diff) | |
download | ffmpeg-8b19467d07d5782b4140f61363f24361efb87ff6.tar.gz |
avcodec/x86: allow future 8-bit simple idct to have "DC only hack"
Created by Ronald S. Bultje
Diffstat (limited to 'libavcodec/x86/simple_idct10_template.asm')
-rw-r--r-- | libavcodec/x86/simple_idct10_template.asm | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/libavcodec/x86/simple_idct10_template.asm b/libavcodec/x86/simple_idct10_template.asm index d8ea0bcc6b..0d04a9818a 100644 --- a/libavcodec/x86/simple_idct10_template.asm +++ b/libavcodec/x86/simple_idct10_template.asm @@ -257,6 +257,46 @@ pmullw m12,[%8+96] IDCT_1D %1, %2, %8 +%elif %2 == 11 + ; This copies the DC-only shortcut. When there is only a DC coefficient the + ; C shifts the value and splats it to all coeffs rather than multiplying and + ; doing the full IDCT. This causes a difference on 8-bit because the + ; coefficient is 16383 rather than 16384 (which you can get with shifting). + por m1, m8, m13 + por m1, m12 + por m1, [blockq+ 16] ; { row[1] }[0-7] + por m1, [blockq+ 48] ; { row[3] }[0-7] + por m1, [blockq+ 80] ; { row[5] }[0-7] + por m1, [blockq+112] ; { row[7] }[0-7] + pxor m2, m2 + pcmpeqw m1, m2 + psllw m2, m10, 3 + pand m2, m1 + pcmpeqb m3, m3 + pxor m1, m3 + mova [rsp], m1 + mova [rsp+16], m2 + + IDCT_1D %1, %2 + + mova m5, [rsp] + mova m6, [rsp+16] + pand m8, m5 + por m8, m6 + pand m0, m5 + por m0, m6 + pand m1, m5 + por m1, m6 + pand m2, m5 + por m2, m6 + pand m4, m5 + por m4, m6 + pand m11, m5 + por m11, m6 + pand m9, m5 + por m9, m6 + pand m10, m5 + por m10, m6 %else IDCT_1D %1, %2 %endif |