diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-07-02 03:07:06 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-07-02 03:24:32 +0200 |
commit | 3074f03a074de3aab79639d261cbd0ccc265b5b4 (patch) | |
tree | 9710041e852ee69f6de6ef6e6333af82f6ca5931 /libavcodec/x86/ac3dsp.asm | |
parent | 392acaedcb052fa64386d5d0aea4931386f72d64 (diff) | |
parent | 23ce6e72123a40895baaeefeb27c7c18748bd67e (diff) | |
download | ffmpeg-3074f03a074de3aab79639d261cbd0ccc265b5b4.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master:
get_bits: remove x86 inline asm in A32 bitstream reader
doc: Remove outdated information about our issue tracker
avidec: Factor out the sync fucntionality.
fate-aac: Expand coverage.
ac3dsp: add x86-optimized versions of ac3dsp.extract_exponents().
ac3dsp: simplify extract_exponents() now that it does not need to do clipping.
ac3enc: clip coefficients after MDCT.
ac3enc: add int32_t array clipping function to DSPUtil, including x86 versions.
swscale: for >8bit scaling, read in native bit-depth.
matroskadec: matroska_read_seek after after EBML_STOP leads to failure.
doxygen: fix usage of @file directive in libavutil/{dict,file}.h
doxygen: Help doxygen parser to understand the DECLARE_ALIGNED and offsetof macros
Conflicts:
doc/issue_tracker.txt
libavformat/avidec.c
libavutil/dict.h
libswscale/swscale.c
libswscale/utils.c
tests/ref/lavfi/pixfmts_scale
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/ac3dsp.asm')
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 99c5df340e..8c958a17ee 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -32,6 +32,11 @@ cextern ac3_bap_bits pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 +; used in ff_ac3_extract_exponents() +pd_1: times 4 dd 1 +pd_151: times 4 dd 151 +pb_shuf_4dwb: db 0, 4, 8, 12 + SECTION .text ;----------------------------------------------------------------------------- @@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum movd eax, m0 add eax, sumd RET + +;------------------------------------------------------------------------------ +; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs) +;------------------------------------------------------------------------------ + +%macro PABSD_MMX 2 ; src/dst, tmp + pxor %2, %2 + pcmpgtd %2, %1 + pxor %1, %2 + psubd %1, %2 +%endmacro + +%macro PABSD_SSSE3 1-2 ; src/dst, unused + pabsd %1, %1 +%endmacro + +%ifdef HAVE_AMD3DNOW +INIT_MMX +cglobal ac3_extract_exponents_3dnow, 3,3,0, exp, coef, len + add expq, lenq + lea coefq, [coefq+4*lenq] + neg lenq + movq m3, [pd_1] + movq m4, [pd_151] +.loop: + movq m0, [coefq+4*lenq ] + movq m1, [coefq+4*lenq+8] + PABSD_MMX m0, m2 + PABSD_MMX m1, m2 + pslld m0, 1 + por m0, m3 + pi2fd m2, m0 + psrld m2, 23 + movq m0, m4 + psubd m0, m2 + pslld m1, 1 + por m1, m3 + pi2fd m2, m1 + psrld m2, 23 + movq m1, m4 + psubd m1, m2 + packssdw m0, m0 + packuswb m0, m0 + packssdw m1, m1 + packuswb m1, m1 + punpcklwd m0, m1 + movd [expq+lenq], m0 + add lenq, 4 + jl .loop + REP_RET +%endif + +%macro AC3_EXTRACT_EXPONENTS 1 +cglobal ac3_extract_exponents_%1, 3,3,5, exp, coef, len + add expq, lenq + lea coefq, [coefq+4*lenq] + neg lenq + mova m2, [pd_1] + mova m3, [pd_151] +%ifidn %1, ssse3 ; + movd m4, [pb_shuf_4dwb] +%endif +.loop: + ; move 4 32-bit coefs to xmm0 + mova m0, [coefq+4*lenq] + ; absolute value + PABSD m0, m1 + ; convert to float and extract exponents + pslld m0, 1 + por m0, m2 + cvtdq2ps m1, m0 + psrld m1, 23 + mova m0, m3 + psubd m0, m1 + ; move the lowest byte in each of 4 dwords to the low dword +%ifidn %1, ssse3 + pshufb m0, m4 +%else + packssdw m0, m0 + packuswb m0, m0 +%endif + movd [expq+lenq], m0 + + add lenq, 4 + jl .loop + REP_RET +%endmacro + +%ifdef HAVE_SSE +INIT_XMM +%define PABSD PABSD_MMX +AC3_EXTRACT_EXPONENTS sse2 +%ifdef HAVE_SSSE3 +%define PABSD PABSD_SSSE3 +AC3_EXTRACT_EXPONENTS ssse3 +%endif +%endif |