diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2024-02-25 10:49:35 -0500 |
---|---|---|
committer | J. Dekker <jdek@itanimul.li> | 2024-02-26 12:29:58 +0100 |
commit | d6083f503d5bd7f9a2540c3e30d95e7add765d1e (patch) | |
tree | b7be21aa49b5438b3fb2591ea9ba52afd74f587a /libavcodec/x86 | |
parent | 07cc8f6b3cd463b714aba1f0612c04d21bf8af16 (diff) | |
download | ffmpeg-d6083f503d5bd7f9a2540c3e30d95e7add765d1e.tar.gz |
avcodec/x86/hevc: fix luma 12b overflow
Signed-off-by: J. Dekker <jdek@itanimul.li>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/hevc_deblock.asm | 40 |
1 files changed, 31 insertions, 9 deletions
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm index 85ee4800bb..61b79f8079 100644 --- a/libavcodec/x86/hevc_deblock.asm +++ b/libavcodec/x86/hevc_deblock.asm @@ -541,19 +541,41 @@ ALIGN 16 add betaq, r13 shr betaq, 3; ((beta + (beta >> 1)) >> 3)) - mova m13, [pw_8] psubw m12, m4, m3 ; q0 - p0 - psllw m10, m12, 3; 8 * (q0 - p0) - paddw m12, m10 ; 9 * (q0 - p0) - + paddw m10, m12, m12 + paddw m12, m10 ; 3 * (q0 - p0) psubw m10, m5, m2 ; q1 - p1 - psllw m8, m10, 1; 2 * ( q1 - p1 ) - paddw m10, m8; 3 * ( q1 - p1 ) - psubw m12, m10; 9 * (q0 - p0) - 3 * ( q1 - p1 ) - paddw m12, m13; + 8 + psubw m12, m10 ; 3 * (q0 - p0) - (q1 - p1) +%if %1 < 12 + paddw m10, m12, m12 + paddw m12, [pw_8]; + 8 + paddw m12, m10 ; 9 * (q0 - p0) - 3 * ( q1 - p1 ) psraw m12, 4; >> 4 , delta0 PABSW m13, m12; abs(delta0) - +%elif cpuflag(ssse3) + pabsw m13, m12 + paddw m10, m13, m13 + paddw m13, [pw_8] + paddw m13, m10 ; abs(9 * (q0 - p0) - 3 * ( q1 - p1 )) + pxor m10, m10 + pcmpgtw m10, m12 + paddw m13, m10 + psrlw m13, 4; >> 4, abs(delta0) + psignw m10, m13, m12 + SWAP 10, 12 +%else + pxor m10, m10 + pcmpgtw m10, m12 + pxor m12, m10 + psubw m12, m10 ; abs() + paddw m13, m12, m12 + paddw m12, [pw_8] + paddw m13, m12 ; 3*abs(m12) + paddw m13, m10 + psrlw m13, 4 + pxor m12, m13, m10 + psubw m12, m10 +%endif psllw m10, m9, 2; 8 * tc paddw m10, m9; 10 * tc |