aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2011-05-26 15:53:25 -0400
committerJustin Ruggles <justin.ruggles@gmail.com>2011-05-28 12:39:28 -0400
commit6ca23db9cccac05bef9bf9c665821b396af12a0b (patch)
treea27bf20b461377d0f7e5566a205172768152a4a6 /libavcodec
parent1323828a0fbfa428d2e39a9f094039637b7fef5b (diff)
downloadffmpeg-6ca23db9cccac05bef9bf9c665821b396af12a0b.tar.gz
ac3enc: modify mantissa bit counting to keep bap counts for all values of bap
instead of just 0 to 4. This does all the actual bit counting as a final step.
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/ac3dsp.c42
-rw-r--r--libavcodec/ac3dsp.h20
-rw-r--r--libavcodec/ac3enc.c92
-rw-r--r--libavcodec/arm/ac3dsp_arm.S52
-rw-r--r--libavcodec/arm/ac3dsp_init_arm.c2
-rw-r--r--libavcodec/x86/ac3dsp.asm53
-rw-r--r--libavcodec/x86/ac3dsp_mmx.c3
7 files changed, 150 insertions, 114 deletions
diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index e3ca37ebdd..de58f3ab26 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -128,24 +128,33 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
} while (end > ff_ac3_band_start_tab[band++]);
}
-static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap,
- int nb_coefs)
+static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
+ int len)
{
- int bits, b, i;
+ while (len-- >= 0)
+ mant_cnt[bap[len]]++;
+}
- bits = 0;
- for (i = 0; i < nb_coefs; i++) {
- b = bap[i];
- if (b <= 4) {
- // bap=1 to bap=4 will be counted in compute_mantissa_size_final
- mant_cnt[b]++;
- } else if (b <= 13) {
- // bap=5 to bap=13 use (bap-1) bits
- bits += b - 1;
- } else {
- // bap=14 uses 14 bits and bap=15 uses 16 bits
- bits += (b == 14) ? 14 : 16;
- }
+DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
+ 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
+};
+
+static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
+{
+ int blk, bap;
+ int bits = 0;
+
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ // bap=1 : 3 mantissas in 5 bits
+ bits += (mant_cnt[blk][1] / 3) * 5;
+ // bap=2 : 3 mantissas in 7 bits
+ // bap=4 : 2 mantissas in 7 bits
+ bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
+ // bap=3 : 1 mantissa in 3 bits
+ bits += mant_cnt[blk][3] * 3;
+ // bap=5 to 15 : get bits per mantissa from table
+ for (bap = 5; bap < 16; bap++)
+ bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
}
return bits;
}
@@ -181,6 +190,7 @@ av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
c->ac3_rshift_int32 = ac3_rshift_int32_c;
c->float_to_fixed24 = float_to_fixed24_c;
c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
+ c->update_bap_counts = ac3_update_bap_counts_c;
c->compute_mantissa_size = ac3_compute_mantissa_size_c;
c->extract_exponents = ac3_extract_exponents_c;
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index b750767e81..8eeafd68ac 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -24,6 +24,12 @@
#include <stdint.h>
+/**
+ * Number of mantissa bits written for each bap value.
+ * bap values with fractional bits are set to 0 and are calculated separately.
+ */
+extern const uint16_t ff_ac3_bap_bits[16];
+
typedef struct AC3DSPContext {
/**
* Set each encoded exponent in a block to the minimum of itself and the
@@ -102,9 +108,21 @@ typedef struct AC3DSPContext {
const uint8_t *bap_tab, uint8_t *bap);
/**
+ * Update bap counts using the supplied array of bap.
+ *
+ * @param[out] mant_cnt bap counts for 1 block
+ * @param[in] bap array of bap, pointing to start coef bin
+ * @param[in] len number of elements to process
+ */
+ void (*update_bap_counts)(uint16_t mant_cnt[16], uint8_t *bap, int len);
+
+ /**
* Calculate the number of bits needed to encode a set of mantissas.
+ *
+ * @param[in] mant_cnt bap counts for all blocks
+ * @return mantissa bit count
*/
- int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs);
+ int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]);
void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
} AC3DSPContext;
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index 6b9bd87853..66dfc29217 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -1424,22 +1424,6 @@ static void count_frame_bits(AC3EncodeContext *s)
/**
- * Finalize the mantissa bit count by adding in the grouped mantissas.
- */
-static int compute_mantissa_size_final(int mant_cnt[5])
-{
- // bap=1 : 3 mantissas in 5 bits
- int bits = (mant_cnt[1] / 3) * 5;
- // bap=2 : 3 mantissas in 7 bits
- // bap=4 : 2 mantissas in 7 bits
- bits += ((mant_cnt[2] / 3) + (mant_cnt[4] >> 1)) * 7;
- // bap=3 : each mantissa is 3 bits
- bits += mant_cnt[3] * 3;
- return bits;
-}
-
-
-/**
* Calculate masking curve based on the final exponents.
* Also calculate the power spectral densities to use in future calculations.
*/
@@ -1491,38 +1475,60 @@ static void reset_block_bap(AC3EncodeContext *s)
}
-static int count_mantissa_bits(AC3EncodeContext *s)
+/**
+ * Initialize mantissa counts.
+ * These are set so that they are padded to the next whole group size when bits
+ * are counted in compute_mantissa_size.
+ */
+static void count_mantissa_bits_init(uint16_t mant_cnt[AC3_MAX_BLOCKS][16])
{
- int blk, ch;
- int mantissa_bits;
- int mant_cnt[5];
+ int blk;
+
+ for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+ memset(mant_cnt[blk], 0, sizeof(mant_cnt[blk]));
+ mant_cnt[blk][1] = mant_cnt[blk][2] = 2;
+ mant_cnt[blk][4] = 1;
+ }
+}
+
+
+/**
+ * Update mantissa bit counts for all blocks in 1 channel in a given bandwidth
+ * range.
+ */
+static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
+ uint16_t mant_cnt[AC3_MAX_BLOCKS][16],
+ int start, int end)
+{
+ int blk;
- mantissa_bits = 0;
for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
AC3Block *block = &s->blocks[blk];
- int av_uninit(ch0);
- int got_cpl = !block->cpl_in_use;
- // initialize grouped mantissa counts. these are set so that they are
- // padded to the next whole group size when bits are counted in
- // compute_mantissa_size_final
- mant_cnt[0] = mant_cnt[3] = 0;
- mant_cnt[1] = mant_cnt[2] = 2;
- mant_cnt[4] = 1;
- for (ch = 1; ch <= s->channels; ch++) {
- if (!got_cpl && ch > 1 && block->channel_in_cpl[ch-1]) {
- ch0 = ch - 1;
- ch = CPL_CH;
- got_cpl = 1;
- }
- mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
- s->ref_bap[ch][blk]+s->start_freq[ch],
- block->end_freq[ch]-s->start_freq[ch]);
- if (ch == CPL_CH)
- ch = ch0;
- }
- mantissa_bits += compute_mantissa_size_final(mant_cnt);
+ if (ch == CPL_CH && !block->cpl_in_use)
+ continue;
+ s->ac3dsp.update_bap_counts(mant_cnt[blk],
+ s->ref_bap[ch][blk] + start,
+ FFMIN(end, block->end_freq[ch]) - start);
}
- return mantissa_bits;
+}
+
+
+/**
+ * Count the number of mantissa bits in the frame based on the bap values.
+ */
+static int count_mantissa_bits(AC3EncodeContext *s)
+{
+ int ch, max_end_freq;
+ LOCAL_ALIGNED_16(uint16_t, mant_cnt,[AC3_MAX_BLOCKS][16]);
+
+ count_mantissa_bits_init(mant_cnt);
+
+ max_end_freq = s->bandwidth_code * 3 + 73;
+ for (ch = !s->cpl_enabled; ch <= s->channels; ch++)
+ count_mantissa_bits_update_ch(s, ch, mant_cnt, s->start_freq[ch],
+ max_end_freq);
+
+ return s->ac3dsp.compute_mantissa_size(mant_cnt);
}
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
deleted file mode 100644
index d7d498e41f..0000000000
--- a/libavcodec/arm/ac3dsp_arm.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2011 Mans Rullgard <mans@mansr.com>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "asm.S"
-
-function ff_ac3_compute_mantissa_size_arm, export=1
- push {r4-r8,lr}
- ldm r0, {r4-r8}
- mov r3, r0
- mov r0, #0
-1:
- ldrb lr, [r1], #1
- subs r2, r2, #1
- blt 2f
- cmp lr, #4
- bgt 3f
- subs lr, lr, #1
- addlt r4, r4, #1
- addeq r5, r5, #1
- ble 1b
- subs lr, lr, #2
- addlt r6, r6, #1
- addeq r7, r7, #1
- addgt r8, r8, #1
- b 1b
-3:
- cmp lr, #14
- sublt lr, lr, #1
- addgt r0, r0, #16
- addle r0, r0, lr
- b 1b
-2:
- stm r3, {r4-r8}
- pop {r4-r8,pc}
-endfunc
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index fd78e1e6a4..4414dc8170 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -39,8 +39,6 @@ int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs);
av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
{
- c->compute_mantissa_size = ff_ac3_compute_mantissa_size_arm;
-
if (HAVE_ARMV6) {
c->bit_alloc_calc_bap = ff_ac3_bit_alloc_calc_bap_armv6;
}
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 18f9dc3894..0d8f4b78eb 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -27,6 +27,11 @@ SECTION_RODATA
; 16777216.0f - used in ff_float_to_fixed24()
pf_1_24: times 4 dd 0x4B800000
+; used in ff_ac3_compute_mantissa_size()
+cextern ac3_bap_bits
+pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
+pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
+
SECTION .text
;-----------------------------------------------------------------------------
@@ -293,3 +298,51 @@ cglobal float_to_fixed24_sse2, 3,3,9, dst, src, len
%endif
ja .loop
REP_RET
+
+;------------------------------------------------------------------------------
+; int ff_ac3_compute_mantissa_size(uint16_t mant_cnt[6][16])
+;------------------------------------------------------------------------------
+
+%macro PHADDD4 2 ; xmm src, xmm tmp
+ movhlps %2, %1
+ paddd %1, %2
+ pshufd %2, %1, 0x1
+ paddd %1, %2
+%endmacro
+
+INIT_XMM
+cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
+ movdqa m0, [mant_cntq ]
+ movdqa m1, [mant_cntq+ 1*16]
+ paddw m0, [mant_cntq+ 2*16]
+ paddw m1, [mant_cntq+ 3*16]
+ paddw m0, [mant_cntq+ 4*16]
+ paddw m1, [mant_cntq+ 5*16]
+ paddw m0, [mant_cntq+ 6*16]
+ paddw m1, [mant_cntq+ 7*16]
+ paddw m0, [mant_cntq+ 8*16]
+ paddw m1, [mant_cntq+ 9*16]
+ paddw m0, [mant_cntq+10*16]
+ paddw m1, [mant_cntq+11*16]
+ pmaddwd m0, [ff_ac3_bap_bits ]
+ pmaddwd m1, [ff_ac3_bap_bits+16]
+ paddd m0, m1
+ PHADDD4 m0, m1
+ movd sumd, m0
+ movdqa m3, [pw_bap_mul1]
+ movhpd m0, [mant_cntq +2]
+ movlpd m0, [mant_cntq+1*32+2]
+ movhpd m1, [mant_cntq+2*32+2]
+ movlpd m1, [mant_cntq+3*32+2]
+ movhpd m2, [mant_cntq+4*32+2]
+ movlpd m2, [mant_cntq+5*32+2]
+ pmulhuw m0, m3
+ pmulhuw m1, m3
+ pmulhuw m2, m3
+ paddusw m0, m1
+ paddusw m0, m2
+ pmaddwd m0, [pw_bap_mul2]
+ PHADDD4 m0, m1
+ movd eax, m0
+ add eax, sumd
+ RET
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 475042395c..2664736bb6 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -42,6 +42,8 @@ extern void ff_float_to_fixed24_3dnow(int32_t *dst, const float *src, unsigned i
extern void ff_float_to_fixed24_sse (int32_t *dst, const float *src, unsigned int len);
extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned int len);
+extern int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
+
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
{
int mm_flags = av_get_cpu_flags();
@@ -69,6 +71,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
c->float_to_fixed24 = ff_float_to_fixed24_sse2;
+ c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;