diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2011-05-26 15:53:25 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2011-05-28 12:39:28 -0400 |
commit | 6ca23db9cccac05bef9bf9c665821b396af12a0b (patch) | |
tree | a27bf20b461377d0f7e5566a205172768152a4a6 /libavcodec/x86 | |
parent | 1323828a0fbfa428d2e39a9f094039637b7fef5b (diff) | |
download | ffmpeg-6ca23db9cccac05bef9bf9c665821b396af12a0b.tar.gz |
ac3enc: modify mantissa bit counting to keep bap counts for all values of bap
instead of just 0 to 4.
This does all the actual bit counting as a final step.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 53 | ||||
-rw-r--r-- | libavcodec/x86/ac3dsp_mmx.c | 3 |
2 files changed, 56 insertions, 0 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index 18f9dc3894..0d8f4b78eb 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -27,6 +27,11 @@ SECTION_RODATA ; 16777216.0f - used in ff_float_to_fixed24() pf_1_24: times 4 dd 0x4B800000 +; used in ff_ac3_compute_mantissa_size() +cextern ac3_bap_bits +pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 +pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 + SECTION .text ;----------------------------------------------------------------------------- @@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len %endif ja .loop REP_RET + +;------------------------------------------------------------------------------ +; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16]) +;------------------------------------------------------------------------------ + +%macro PHADDD4 2 ; xmm src, xmm tmp + movhlps %2, %1 + paddd %1, %2 + pshufd %2, %1, 0x1 + paddd %1, %2 +%endmacro + +INIT_XMM +cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum + movdqa m0, [mant_cntq ] + movdqa m1, [mant_cntq+ 1*16] + paddw m0, [mant_cntq+ 2*16] + paddw m1, [mant_cntq+ 3*16] + paddw m0, [mant_cntq+ 4*16] + paddw m1, [mant_cntq+ 5*16] + paddw m0, [mant_cntq+ 6*16] + paddw m1, [mant_cntq+ 7*16] + paddw m0, [mant_cntq+ 8*16] + paddw m1, [mant_cntq+ 9*16] + paddw m0, [mant_cntq+10*16] + paddw m1, [mant_cntq+11*16] + pmaddwd m0, [ff_ac3_bap_bits ] + pmaddwd m1, [ff_ac3_bap_bits+16] + paddd m0, m1 + PHADDD4 m0, m1 + movd sumd, m0 + movdqa m3, [pw_bap_mul1] + movhpd m0, [mant_cntq +2] + movlpd m0, [mant_cntq+1*32+2] + movhpd m1, [mant_cntq+2*32+2] + movlpd m1, [mant_cntq+3*32+2] + movhpd m2, [mant_cntq+4*32+2] + movlpd m2, [mant_cntq+5*32+2] + pmulhuw m0, m3 + pmulhuw m1, m3 + pmulhuw m2, m3 + paddusw m0, m1 + paddusw m0, m2 + pmaddwd m0, [pw_bap_mul2] + PHADDD4 m0, m1 + movd eax, m0 + add eax, sumd + RET diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c index 475042395c..2664736bb6 100644 --- a/libavcodec/x86/ac3dsp_mmx.c +++ b/libavcodec/x86/ac3dsp_mmx.c @@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i extern void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len); extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len); +extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]); + av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) { int mm_flags = av_get_cpu_flags(); @@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact) c->ac3_exponent_min = ff_ac3_exponent_min_sse2; c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2; c->float_to_fixed24 = ff_float_to_fixed24_sse2; + c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2; if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) { c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2; c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2; |