diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-02-09 13:00:30 -0500 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-02-09 21:04:44 -0500 |
commit | d483bb58c318b0a6152709cf28263d72200b98f9 (patch) | |
tree | 857e751358a76b263dc2f5aa3f4b38fec3715040 | |
parent | e6d9fa66f12cf5a3024c9bc7c4c608f7fc59207e (diff) | |
download | ffmpeg-d483bb58c318b0a6152709cf28263d72200b98f9.tar.gz |
ac3dsp: do not use pshufb in ac3_extract_exponents_ssse3()
We need to do unsigned saturation in order to cover the corner case when the
absolute coefficient value is 16777215 (the maximum value).
Fixes Bug #216
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 13 |
1 files changed, 4 insertions, 9 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index e1761fa806..746fd83a67 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -35,7 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 ; used in ff_ac3_extract_exponents() pd_1: times 4 dd 1 pd_151: times 4 dd 151 -pb_shuf_4dwb: db 0, 4, 8, 12 SECTION .text @@ -404,15 +403,12 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len %endif %macro AC3_EXTRACT_EXPONENTS 1 -cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len +cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len add expq, lenq lea coefq, [coefq+4*lenq] neg lenq mova m2, [pd_1] mova m3, [pd_151] -%ifidn %1, ssse3 ; - movd m4, [pb_shuf_4dwb] -%endif .loop: ; move 4 32-bit coefs to xmm0 mova m0, [coefq+4*lenq] @@ -426,12 +422,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len mova m0, m3 psubd m0, m1 ; move the lowest byte in each of 4 dwords to the low dword -%ifidn %1, ssse3 - pshufb m0, m4 -%else + ; NOTE: We cannot just extract the low bytes with pshufb because the dword + ; result for 16777215 is -1 due to float inaccuracy. Using packuswb + ; clips this to 0, which is the correct exponent. packssdw m0, m0 packuswb m0, m0 -%endif movd [expq+lenq], m0 add lenq, 4 |