diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2015-05-14 13:39:37 -0400 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2015-05-14 13:39:37 -0400 |
commit | 96d30c34951b42479f4d1a4210e8a36347c4d653 (patch) | |
tree | 5449add2944c0ecbbca2bc58cc707bef550663b8 | |
parent | cf31e2df08e39082241c8e2e10eaacb115c69a6c (diff) | |
download | ffmpeg-96d30c34951b42479f4d1a4210e8a36347c4d653.tar.gz |
vp9: disable all pmulhrsw in 8/16 iadst x86 optimizations.
They all overflow in various samples that are considered valid input.
-rw-r--r-- | libavcodec/x86/vp9itxfm.asm | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index 9cf0d78fab..a08e1ff313 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -868,7 +868,8 @@ VP9_IDCT_IDCT_8x8_ADD_XMM avx, 13 ; m6=out0, m5=out1, m4=t2, m3=t3, m7=t6, m0=t7, m2=out6, m1=out7 -%if cpuflag(ssse3) + ; unfortunately, the code below overflows in some cases +%if 0; cpuflag(ssse3) SUMSUB_BA w, 3, 4, 2 SUMSUB_BA w, 0, 7, 2 pmulhrsw m3, W_11585x2_REG @@ -1647,7 +1648,8 @@ VP9_IDCT_IDCT_16x16_ADD_XMM avx VP9_RND_SH_SUMSUB_BA 4, 7, 0, 2, 1, [pd_8192] PSIGNW m4, [pw_m1] ; m4=out13[w], m7=t15[w] -%if cpuflag(ssse3) + ; unfortunately, the code below overflows in some cases +%if 0; cpuflag(ssse3) SUMSUB_BA w, 7, 6, 1 pmulhrsw m7, [pw_m11585x2] ; m7=out5[w] pmulhrsw m6, [pw_11585x2] ; m6=out10[w] |