diff options
author | Måns Rullgård <mans@mansr.com> | 2010-01-15 14:02:52 +0000 |
---|---|---|
committer | Måns Rullgård <mans@mansr.com> | 2010-01-15 14:02:52 +0000 |
commit | d356a53f442f3df63370a4cbe7fbdb6aa29666ae (patch) | |
tree | e12b75b1e85a7eb970429f432c3cd5d42d6b6cf0 | |
parent | 59ea69ef54c658e33e5bce911a218e78bec36c02 (diff) | |
download | ffmpeg-d356a53f442f3df63370a4cbe7fbdb6aa29666ae.tar.gz |
AAC: optimise bitstream reading in decode_spectrum_and_dequant()
Using the low-level macros directly avoids redundant open/update/close
cycles.
2-3% faster on ARM, PPC, and Core i7.
Originally committed as revision 21224 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/aac.c | 93 |
1 files changed, 65 insertions, 28 deletions
diff --git a/libavcodec/aac.c b/libavcodec/aac.c index 393f2e6594..0fbab77367 100644 --- a/libavcodec/aac.c +++ b/libavcodec/aac.c @@ -993,6 +993,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; const int cb_size = ff_aac_spectral_sizes[cbt_m1]; + OPEN_READER(re, gb); switch (cbt_m1 >> 1) { case 0: @@ -1001,15 +1002,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned cb_idx; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; cf = VMUL4(cf, vq, cb_idx, sf + idx); } while (len -= 4); } @@ -1021,19 +1025,26 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned nnz; unsigned cb_idx; uint32_t bits; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; +#if MIN_CACHE_BITS < 20 + UPDATE_CACHE(re, gb); +#endif + cb_idx = cb_vector_idx[code]; nnz = cb_idx >> 8 & 15; - bits = get_bits(gb, nnz) << (32-nnz); + bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); + LAST_SKIP_BITS(re, gb, nnz); cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); } while (len -= 4); } @@ -1045,15 +1056,18 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned cb_idx; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; cf = VMUL2(cf, vq, cb_idx, sf + idx); } while (len -= 2); } @@ -1066,19 +1080,23 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned nnz; unsigned cb_idx; unsigned sign; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; nnz = cb_idx >> 8 & 15; - sign = get_bits(gb, nnz) << (cb_idx >> 12); + sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12); + LAST_SKIP_BITS(re, gb, nnz); cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); } while (len -= 2); } @@ -1091,39 +1109,56 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned nzt, nnz; unsigned cb_idx; uint32_t bits; int j; - if (!index) { + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (!code) { *icf++ = 0; *icf++ = 0; continue; } - if (index >= cb_size) { - err_idx = index; + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; nnz = cb_idx >> 12; nzt = cb_idx >> 8; - bits = get_bits(gb, nnz) << (32-nnz); + bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); + LAST_SKIP_BITS(re, gb, nnz); for (j = 0; j < 2; j++) { if (nzt & 1<<j) { - int n = 4; + uint32_t b; + int n; /* The total length of escape_sequence must be < 22 bits according to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ - while (get_bits1(gb) && n < 13) n++; - if (n == 13) { + UPDATE_CACHE(re, gb); + b = GET_CACHE(re, gb); + b = 31 - av_log2(~b); + + if (b > 8) { av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); return -1; } - n = (1 << n) + get_bits(gb, n); + +#if MIN_CACHE_BITS < 21 + LAST_SKIP_BITS(re, gb, b + 1); + UPDATE_CACHE(re, gb); +#else + SKIP_BITS(re, gb, b + 1); +#endif + b += 4; + n = (1 << b) + SHOW_UBITS(re, gb, b); + LAST_SKIP_BITS(re, gb, b); *icf++ = cbrt_tab[n] | (bits & 1<<31); bits <<= 1; } else { @@ -1138,6 +1173,8 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); } } + + CLOSE_READER(re, gb); } } coef += g_len << 7; |