diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-05-29 02:55:19 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-05-29 03:34:35 +0200 |
commit | b8a43bc1b50f409414493a05f6c4b7895ca4ddf9 (patch) | |
tree | 95dda1b7289aac9bdb1f457417baf9515aa4383a /libavcodec/x86/ac3dsp.asm | |
parent | 39d607e5bbc25ad9629683702b510e865434ef21 (diff) | |
parent | 90da52f01f8b6c22af22a002eb226989b1cf7ef8 (diff) | |
download | ffmpeg-b8a43bc1b50f409414493a05f6c4b7895ca4ddf9.tar.gz |
Merge remote-tracking branch 'qatar/master' into master
* qatar/master: (27 commits)
ac3enc: fix LOCAL_ALIGNED usage in count_mantissa_bits()
ac3dsp: do not use the ff_* prefix when referencing ff_ac3_bap_bits.
ac3dsp: fix loop condition in ac3_update_bap_counts_c()
ARM: unbreak build
ac3enc: modify mantissa bit counting to keep bap counts for all values of bap instead of just 0 to 4.
ac3enc: split mantissa bit counting into a separate function.
ac3enc: store per-block/channel bap pointers by reference block in a 2D array rather than in the AC3Block struct.
get_bits: add av_unused tag to cache variable
sws: replace all long with int.
ARM: aacdec: fix constraints on inline asm
ARM: remove unnecessary volatile from inline asm
ARM: add "cc" clobbers to inline asm where needed
ARM: improve FASTDIV asm
ac3enc: use LOCAL_ALIGNED macro
APIchanges: fill in git hash for av_get_pix_fmt_name (0420bd7).
lavu: add av_get_pix_fmt_name() convenience function
cmdutils: remove OPT_FUNC2
swscale: fix crash in bilinear scaling.
vpxenc: add VP8E_SET_STATIC_THRESHOLD mapping
webm: support stereo videos in matroska/webm muxer
...
Conflicts:
Changelog
cmdutils.c
cmdutils.h
doc/APIchanges
doc/muxers.texi
ffmpeg.c
ffplay.c
libavcodec/ac3enc.c
libavcodec/ac3enc_float.c
libavcodec/avcodec.h
libavcodec/get_bits.h
libavcodec/libvpxenc.c
libavcodec/version.h
libavdevice/libdc1394.c
libavformat/matroskaenc.c
libavutil/avutil.h
libswscale/rgb2rgb.c
libswscale/swscale.c
libswscale/swscale_template.c
libswscale/x86/swscale_template.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/ac3dsp.asm')
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index b67f893f22..99c5df340e 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -27,6 +27,11 @@ SECTION_RODATA ; 16777216.0f - used in ff_float_to_fixed24() pf_1_24: times 4 dd 0x4B800000 +; used in ff_ac3_compute_mantissa_size() +cextern ac3_bap_bits +pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 +pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 + SECTION .text ;----------------------------------------------------------------------------- @@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len %endif ja .loop REP_RET + +;------------------------------------------------------------------------------ +; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16]) +;------------------------------------------------------------------------------ + +%macro PHADDD4 2 ; xmm src, xmm tmp + movhlps %2, %1 + paddd %1, %2 + pshufd %2, %1, 0x1 + paddd %1, %2 +%endmacro + +INIT_XMM +cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum + movdqa m0, [mant_cntq ] + movdqa m1, [mant_cntq+ 1*16] + paddw m0, [mant_cntq+ 2*16] + paddw m1, [mant_cntq+ 3*16] + paddw m0, [mant_cntq+ 4*16] + paddw m1, [mant_cntq+ 5*16] + paddw m0, [mant_cntq+ 6*16] + paddw m1, [mant_cntq+ 7*16] + paddw m0, [mant_cntq+ 8*16] + paddw m1, [mant_cntq+ 9*16] + paddw m0, [mant_cntq+10*16] + paddw m1, [mant_cntq+11*16] + pmaddwd m0, [ac3_bap_bits ] + pmaddwd m1, [ac3_bap_bits+16] + paddd m0, m1 + PHADDD4 m0, m1 + movd sumd, m0 + movdqa m3, [pw_bap_mul1] + movhpd m0, [mant_cntq +2] + movlpd m0, [mant_cntq+1*32+2] + movhpd m1, [mant_cntq+2*32+2] + movlpd m1, [mant_cntq+3*32+2] + movhpd m2, [mant_cntq+4*32+2] + movlpd m2, [mant_cntq+5*32+2] + pmulhuw m0, m3 + pmulhuw m1, m3 + pmulhuw m2, m3 + paddusw m0, m1 + paddusw m0, m2 + pmaddwd m0, [pw_bap_mul2] + PHADDD4 m0, m1 + movd eax, m0 + add eax, sumd + RET |