aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/ac3dsp.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-03-16 07:47:27 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-03-16 09:01:08 +0100
commit568e9062bd29e13e0bfa42f2ac8411d01608634d (patch)
treea78351d75b3dee8257909ccffde46880099c91bb /libavcodec/x86/ac3dsp.asm
parent5dbc75870f486fb9c0237870eafa834a8a2066c8 (diff)
parent5effcfa76792470677a1f6bc9aa73347a87ef720 (diff)
downloadffmpeg-568e9062bd29e13e0bfa42f2ac8411d01608634d.tar.gz
Merge remote-tracking branch 'qatar/release/0.8' into release/0.10
* qatar/release/0.8: (154 commits) Update Changelog for the 0.8.1 Release dca: include libavutil/mathematics.h for possibly missing M_SQRT1_2 dca: don't use av_clip_uintp2(). snow: check reference frame indices. snow: reject unsupported chroma shifts. xa_adpcm: limit filter to prevent xa_adpcm_table[] array bounds overruns. h264: increase reference poc list from 16 to 32. h264: stricter reference limit enforcement. h264: improve parsing of broken AVC SPS Replace computations of remaining bits with calls to get_bits_left(). png: convert to bytestream2 API. roqvideo: convert to bytestream2 API. smc: port to bytestream2 API. tgq: convert to bytestream2 API. algmm: convert to bytestream2 API. jvdec: unbreak video decoding h264: Fix invalid interlaced/progressive MB combinations for direct mode prediction. libx264: add 'stats' private option for setting 2pass stats filename. libx264: fix help text for slice-max-size option. avconv: reindent ... Conflicts: Changelog RELEASE avconv.c doc/APIchanges ffplay.c libavcodec/Makefile libavcodec/aacdec.c libavcodec/alsdec.c libavcodec/atrac3.c libavcodec/avcodec.h libavcodec/dvdata.c libavcodec/fraps.c libavcodec/golomb.h libavcodec/h264.c libavcodec/h264.h libavcodec/h264_cabac.c libavcodec/h264_cavlc.c libavcodec/h264_direct.c libavcodec/h264_parser.c libavcodec/h264_ps.c libavcodec/h264idct_template.c libavcodec/indeo3.c libavcodec/kgv1dec.c libavcodec/kmvc.c libavcodec/mjpegbdec.c libavcodec/mmvideo.c libavcodec/mpegaudiodec.c libavcodec/mpegvideo.h libavcodec/options.c libavcodec/pngdec.c libavcodec/roqvideodec.c libavcodec/shorten.c libavcodec/svq3.c libavcodec/utils.c libavcodec/version.h libavcodec/wmadec.c libavcodec/xxan.c libavformat/Makefile libavformat/asfdec.c libavformat/dv.c libavformat/mov.c libavformat/nsvdec.c libavformat/utils.c libavformat/version.h libavutil/avutil.h libavutil/error.c libavutil/error.h libswscale/swscale.c libswscale/utils.c libswscale/x86/swscale_template.c tests/ref/acodec/g722 Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/ac3dsp.asm')
-rw-r--r--libavcodec/x86/ac3dsp.asm13
1 files changed, 4 insertions, 9 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 59157b7219..300660dc5d 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -35,7 +35,6 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
; used in ff_ac3_extract_exponents()
pd_1: times 4 dd 1
pd_151: times 4 dd 151
-pb_shuf_4dwb: db 0, 4, 8, 12
SECTION .text
@@ -404,15 +403,12 @@ cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len
%endif
%macro AC3_EXTRACT_EXPONENTS 1
-cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
+cglobal ac3_extract_exponents_%1, 3,3,4, exp, coef, len
add expq, lenq
lea coefq, [coefq+4*lenq]
neg lenq
mova m2, [pd_1]
mova m3, [pd_151]
-%ifidn %1, ssse3 ;
- movd m4, [pb_shuf_4dwb]
-%endif
.loop:
; move 4 32-bit coefs to xmm0
mova m0, [coefq+4*lenq]
@@ -426,12 +422,11 @@ cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len
mova m0, m3
psubd m0, m1
; move the lowest byte in each of 4 dwords to the low dword
-%ifidn %1, ssse3
- pshufb m0, m4
-%else
+ ; NOTE: We cannot just extract the low bytes with pshufb because the dword
+ ; result for 16777215 is -1 due to float inaccuracy. Using packuswb
+ ; clips this to 0, which is the correct exponent.
packssdw m0, m0
packuswb m0, m0
-%endif
movd [expq+lenq], m0
add lenq, 4