aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2024-02-25 10:49:35 -0500
committerJ. Dekker <jdek@itanimul.li>2024-02-26 12:29:58 +0100
commitd6083f503d5bd7f9a2540c3e30d95e7add765d1e (patch)
treeb7be21aa49b5438b3fb2591ea9ba52afd74f587a /libavcodec/x86
parent07cc8f6b3cd463b714aba1f0612c04d21bf8af16 (diff)
downloadffmpeg-d6083f503d5bd7f9a2540c3e30d95e7add765d1e.tar.gz
avcodec/x86/hevc: fix luma 12b overflow
Signed-off-by: J. Dekker <jdek@itanimul.li>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/hevc_deblock.asm40
1 files changed, 31 insertions, 9 deletions
diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 85ee4800bb..61b79f8079 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -541,19 +541,41 @@ ALIGN 16
add betaq, r13
shr betaq, 3; ((beta + (beta >> 1)) >> 3))
- mova m13, [pw_8]
psubw m12, m4, m3 ; q0 - p0
- psllw m10, m12, 3; 8 * (q0 - p0)
- paddw m12, m10 ; 9 * (q0 - p0)
-
+ paddw m10, m12, m12
+ paddw m12, m10 ; 3 * (q0 - p0)
psubw m10, m5, m2 ; q1 - p1
- psllw m8, m10, 1; 2 * ( q1 - p1 )
- paddw m10, m8; 3 * ( q1 - p1 )
- psubw m12, m10; 9 * (q0 - p0) - 3 * ( q1 - p1 )
- paddw m12, m13; + 8
+ psubw m12, m10 ; 3 * (q0 - p0) - (q1 - p1)
+%if %1 < 12
+ paddw m10, m12, m12
+ paddw m12, [pw_8]; + 8
+ paddw m12, m10 ; 9 * (q0 - p0) - 3 * ( q1 - p1 )
psraw m12, 4; >> 4 , delta0
PABSW m13, m12; abs(delta0)
-
+%elif cpuflag(ssse3)
+ pabsw m13, m12
+ paddw m10, m13, m13
+ paddw m13, [pw_8]
+ paddw m13, m10 ; abs(9 * (q0 - p0) - 3 * ( q1 - p1 ))
+ pxor m10, m10
+ pcmpgtw m10, m12
+ paddw m13, m10
+ psrlw m13, 4; >> 4, abs(delta0)
+ psignw m10, m13, m12
+ SWAP 10, 12
+%else
+ pxor m10, m10
+ pcmpgtw m10, m12
+ pxor m12, m10
+ psubw m12, m10 ; abs()
+ paddw m13, m12, m12
+ paddw m12, [pw_8]
+ paddw m13, m12 ; 3*abs(m12)
+ paddw m13, m10
+ psrlw m13, 4
+ pxor m12, m13, m10
+ psubw m12, m10
+%endif
psllw m10, m9, 2; 8 * tc
paddw m10, m9; 10 * tc