diff options
author | Claudio Freire <klaussfreire@gmail.com> | 2015-09-15 03:59:45 -0300 |
---|---|---|
committer | Claudio Freire <klaussfreire@gmail.com> | 2015-09-16 23:14:26 -0300 |
commit | 8df9bf8e39b5f38c75d63c0ef17965382e634b1c (patch) | |
tree | d9350e8cc6d1c1543facb7c58e477e669cc411f9 /libavcodec/aaccoder.c | |
parent | 344d519040f03515bb915bf418f3ab9e38ada33b (diff) | |
download | ffmpeg-8df9bf8e39b5f38c75d63c0ef17965382e634b1c.tar.gz |
AAC encoder: refactor to resynchronize MIPS port
This patch refactors the AAC coders to reuse code
between the MIPS port and the regular, portable C code.
There were two main functions that had to use
hand-optimized versions of quantization code:
- search_for_quantizers_twoloop
- codebook_trellis_rate
Those two were split into their own template header
files so they can be inlined inside both the MIPS port
and the generic code. In each context, they'll link
to their specialized implementations, and thus be
optimized by the compiler.
This approach I believe is better than maintaining
several copies of each function. As past experience has
proven, having to keep those in sync was error prone.
In this way, they will remain in sync by default.
Also, an implementation of the dequantized output
argument for the optimized quantize_and_encode
functions is included in the patch. While the current
implementation of search_for_pred still isn't using
it, future iterations of main prediction probably will.
It should not imply any measurable performance hit while
not being used.
Diffstat (limited to 'libavcodec/aaccoder.c')
-rw-r--r-- | libavcodec/aaccoder.c | 284 |
1 files changed, 4 insertions, 280 deletions
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 524987df0b..8d5ea77e81 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -48,6 +48,8 @@ #include "aacenc_tns.h" #include "aacenc_pred.h" +#include "libavcodec/aaccoder_twoloop.h" + /** Frequency in Hz for lower limit of noise substitution **/ #define NOISE_LOW_LIMIT 4000 @@ -59,6 +61,8 @@ * replace low energy non zero bands */ #define NOISE_LAMBDA_REPLACE 1.948f +#include "libavcodec/aaccoder_trellis.h" + /** * structure used in optimal codebook search */ @@ -181,137 +185,6 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce } } -static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, - int win, int group_len, const float lambda) -{ - BandCodingPath path[120][CB_TOT_ALL]; - int w, swb, cb, start, size; - int i, j; - const int max_sfb = sce->ics.max_sfb; - const int run_bits = sce->ics.num_windows == 1 ? 5 : 3; - const int run_esc = (1 << run_bits) - 1; - int idx, ppos, count; - int stackrun[120], stackcb[120], stack_len; - float next_minbits = INFINITY; - int next_mincb = 0; - - abs_pow34_v(s->scoefs, sce->coeffs, 1024); - start = win*128; - for (cb = 0; cb < CB_TOT_ALL; cb++) { - path[0][cb].cost = run_bits+4; - path[0][cb].prev_idx = -1; - path[0][cb].run = 0; - } - for (swb = 0; swb < max_sfb; swb++) { - size = sce->ics.swb_sizes[swb]; - if (sce->zeroes[win*16 + swb]) { - float cost_stay_here = path[swb][0].cost; - float cost_get_here = next_minbits + run_bits + 4; - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run] - != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1]) - cost_stay_here += run_bits; - if (cost_get_here < cost_stay_here) { - path[swb+1][0].prev_idx = next_mincb; - path[swb+1][0].cost = cost_get_here; - path[swb+1][0].run = 1; - } else { - path[swb+1][0].prev_idx = 0; - path[swb+1][0].cost = cost_stay_here; - path[swb+1][0].run = path[swb][0].run + 1; - } - next_minbits = path[swb+1][0].cost; - next_mincb = 0; - for (cb = 1; cb < CB_TOT_ALL; cb++) { - path[swb+1][cb].cost = 61450; - path[swb+1][cb].prev_idx = -1; - path[swb+1][cb].run = 0; - } - } else { - float minbits = next_minbits; - int mincb = next_mincb; - int startcb = sce->band_type[win*16+swb]; - startcb = aac_cb_in_map[startcb]; - next_minbits = INFINITY; - next_mincb = 0; - for (cb = 0; cb < startcb; cb++) { - path[swb+1][cb].cost = 61450; - path[swb+1][cb].prev_idx = -1; - path[swb+1][cb].run = 0; - } - for (cb = startcb; cb < CB_TOT_ALL; cb++) { - float cost_stay_here, cost_get_here; - float bits = 0.0f; - if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) { - path[swb+1][cb].cost = 61450; - path[swb+1][cb].prev_idx = -1; - path[swb+1][cb].run = 0; - continue; - } - for (w = 0; w < group_len; w++) { - bits += quantize_band_cost(s, &sce->coeffs[start + w*128], - &s->scoefs[start + w*128], size, - sce->sf_idx[win*16+swb], - aac_cb_out_map[cb], - 0, INFINITY, NULL, 0); - } - cost_stay_here = path[swb][cb].cost + bits; - cost_get_here = minbits + bits + run_bits + 4; - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run] - != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1]) - cost_stay_here += run_bits; - if (cost_get_here < cost_stay_here) { - path[swb+1][cb].prev_idx = mincb; - path[swb+1][cb].cost = cost_get_here; - path[swb+1][cb].run = 1; - } else { - path[swb+1][cb].prev_idx = cb; - path[swb+1][cb].cost = cost_stay_here; - path[swb+1][cb].run = path[swb][cb].run + 1; - } - if (path[swb+1][cb].cost < next_minbits) { - next_minbits = path[swb+1][cb].cost; - next_mincb = cb; - } - } - } - start += sce->ics.swb_sizes[swb]; - } - - //convert resulting path from backward-linked list - stack_len = 0; - idx = 0; - for (cb = 1; cb < CB_TOT_ALL; cb++) - if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) - idx = cb; - ppos = max_sfb; - while (ppos > 0) { - av_assert1(idx >= 0); - cb = idx; - stackrun[stack_len] = path[ppos][cb].run; - stackcb [stack_len] = cb; - idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx; - ppos -= path[ppos][cb].run; - stack_len++; - } - //perform actual band info encoding - start = 0; - for (i = stack_len - 1; i >= 0; i--) { - cb = aac_cb_out_map[stackcb[i]]; - put_bits(&s->pb, 4, cb); - count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !cb, count); - //XXX: memset when band_type is also uint8_t - for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = cb; - start++; - } - while (count >= run_esc) { - put_bits(&s->pb, run_bits, run_esc); - count -= run_esc; - } - put_bits(&s->pb, run_bits, count); - } -} typedef struct TrellisPath { float cost; @@ -508,155 +381,6 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g]; } -/** - * two-loop quantizers search taken from ISO 13818-7 Appendix C - */ -static void search_for_quantizers_twoloop(AVCodecContext *avctx, - AACEncContext *s, - SingleChannelElement *sce, - const float lambda) -{ - int start = 0, i, w, w2, g; - int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); - float dists[128] = { 0 }, uplims[128] = { 0 }; - float maxvals[128]; - int fflag, minscaler; - int its = 0; - int allz = 0; - float minthr = INFINITY; - - // for values above this the decoder might end up in an endless loop - // due to always having more bits than what can be encoded. - destbits = FFMIN(destbits, 5800); - //XXX: some heuristic to determine initial quantizers will reduce search time - //determine zero bands and upper limits - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - for (g = 0; g < sce->ics.num_swb; g++) { - int nz = 0; - float uplim = 0.0f, energy = 0.0f; - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { - FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; - uplim += band->threshold; - energy += band->energy; - if (band->energy <= band->threshold || band->threshold == 0.0f) { - sce->zeroes[(w+w2)*16+g] = 1; - continue; - } - nz = 1; - } - uplims[w*16+g] = uplim *512; - sce->zeroes[w*16+g] = !nz; - if (nz) - minthr = FFMIN(minthr, uplim); - allz |= nz; - } - } - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - for (g = 0; g < sce->ics.num_swb; g++) { - if (sce->zeroes[w*16+g]) { - sce->sf_idx[w*16+g] = SCALE_ONE_POS; - continue; - } - sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); - } - } - - if (!allz) - return; - abs_pow34_v(s->scoefs, sce->coeffs, 1024); - - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - start = w*128; - for (g = 0; g < sce->ics.num_swb; g++) { - const float *scaled = s->scoefs + start; - maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled); - start += sce->ics.swb_sizes[g]; - } - } - - //perform two-loop search - //outer loop - improve quality - do { - int tbits, qstep; - minscaler = sce->sf_idx[0]; - //inner loop - quantize spectrum to fit into given number of bits - qstep = its ? 1 : 32; - do { - int prev = -1; - tbits = 0; - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - start = w*128; - for (g = 0; g < sce->ics.num_swb; g++) { - const float *coefs = &sce->coeffs[start]; - const float *scaled = &s->scoefs[start]; - int bits = 0; - int cb; - float dist = 0.0f; - - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { - start += sce->ics.swb_sizes[g]; - continue; - } - minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); - cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { - int b; - dist += quantize_band_cost(s, coefs + w2*128, - scaled + w2*128, - sce->ics.swb_sizes[g], - sce->sf_idx[w*16+g], - cb, - 1.0f, - INFINITY, - &b, - 0); - bits += b; - } - dists[w*16+g] = dist - bits; - if (prev != -1) { - bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO]; - } - tbits += bits; - start += sce->ics.swb_sizes[g]; - prev = sce->sf_idx[w*16+g]; - } - } - if (tbits > destbits) { - for (i = 0; i < 128; i++) - if (sce->sf_idx[i] < 218 - qstep) - sce->sf_idx[i] += qstep; - } else { - for (i = 0; i < 128; i++) - if (sce->sf_idx[i] > 60 - qstep) - sce->sf_idx[i] -= qstep; - } - qstep >>= 1; - if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217) - qstep = 1; - } while (qstep); - - fflag = 0; - minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); - - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - for (g = 0; g < sce->ics.num_swb; g++) { - int prevsc = sce->sf_idx[w*16+g]; - if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) { - if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1)) - sce->sf_idx[w*16+g]--; - else //Try to make sure there is some energy in every band - sce->sf_idx[w*16+g]-=2; - } - sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF); - sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219); - if (sce->sf_idx[w*16+g] != prevsc) - fflag = 1; - sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); - } - } - its++; - } while (fflag && its < 10); -} static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, SingleChannelElement *sce, |