diff options
author | David Conrad <lessen42@gmail.com> | 2010-07-23 21:46:17 +0000 |
---|---|---|
committer | David Conrad <lessen42@gmail.com> | 2010-07-23 21:46:17 +0000 |
commit | fe1b5d974acf7736151e2e13f2498f4fbd6af765 (patch) | |
tree | a5135bb741533094e8824b8b9ff63659aa43a811 | |
parent | 5474ec2ac8a4964c4d6a0b51cd00a0ec2e7bb9a6 (diff) | |
download | ffmpeg-fe1b5d974acf7736151e2e13f2498f4fbd6af765.tar.gz |
Decode DCT tokens by branching to a different code path for each branch
on the huffman tree, instead of traversing the tree in a while loop.
Based on the similar optimization in libvpx's detokenize.c
10% faster at normal bitrates, and 30% faster for high-bitrate intra-only
Originally committed as revision 24468 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/vp56.h | 18 | ||||
-rw-r--r-- | libavcodec/vp8.c | 69 | ||||
-rw-r--r-- | libavcodec/vp8data.h | 20 |
3 files changed, 67 insertions, 40 deletions
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h index b7c1887596..ad07a49e9b 100644 --- a/libavcodec/vp56.h +++ b/libavcodec/vp56.h @@ -226,6 +226,24 @@ static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) return bit; } +// branchy variant, to be used where there's a branch based on the bit decoded +static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) +{ + unsigned long code_word = vp56_rac_renorm(c); + unsigned low = 1 + (((c->high - 1) * prob) >> 8); + unsigned low_shift = low << 8; + + if (code_word >= low_shift) { + c->high -= low; + c->code_word = code_word - low_shift; + return 1; + } + + c->high = low; + c->code_word = code_word; + return 0; +} + static inline int vp56_rac_get(VP56RangeCoder *c) { unsigned int code_word = vp56_rac_renorm(c); diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 81447c4fd8..acdaf56b03 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -800,36 +800,61 @@ static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], uint8_t probs[8][3][NUM_DCT_TOKENS-1], int i, int zero_nhood, int16_t qmul[2]) { - int token, nonzero = 0; - int offset = 0; + uint8_t *token_prob; + int nonzero = 0; + int coeff; - for (; i < 16; i++) { - token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); + do { + token_prob = probs[vp8_coeff_band[i]][zero_nhood]; - if (token == DCT_EOB) - break; - else if (token >= DCT_CAT1) { - int cat = token-DCT_CAT1; - token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); - token += 3 + (2<<cat); - } + if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB + return nonzero; - // after the first token, the non-zero prediction context becomes - // based on the last decoded coeff - if (!token) { +skip_eob: + if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 zero_nhood = 0; - offset = 1; - continue; - } else if (token == 1) + token_prob = probs[vp8_coeff_band[++i]][0]; + if (i < 16) + goto skip_eob; + return nonzero; // invalid input; blocks should end with EOB + } + + if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 + coeff = 1; zero_nhood = 1; - else + } else { zero_nhood = 2; + if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 + coeff = vp56_rac_get_prob(c, token_prob[4]); + if (coeff) + coeff += vp56_rac_get_prob(c, token_prob[5]); + coeff += 2; + } else { + // DCT_CAT* + if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { + if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 + coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); + } else { // DCT_CAT2 + coeff = 7; + coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; + coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); + } + } else { // DCT_CAT3 and up + int a = vp56_rac_get_prob(c, token_prob[8]); + int b = vp56_rac_get_prob(c, token_prob[9+a]); + int cat = (a<<1) + b; + coeff = 3 + (8<<cat); + coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); + } + } + } + // todo: full [16] qmat? load into register? - block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; - nonzero = i+1; - offset = 0; - } + block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; + nonzero = ++i; + } while (i < 16); + return nonzero; } diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h index 80fa808484..a72ad90399 100644 --- a/libavcodec/vp8data.h +++ b/libavcodec/vp8data.h @@ -329,21 +329,6 @@ static const uint8_t vp8_coeff_band[16] = 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 }; -static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] = -{ - { -DCT_EOB, 1 }, // '0' - { -DCT_0, 2 }, // '10' - { -DCT_1, 3 }, // '110' - { 4, 6 }, - { -DCT_2, 5 }, // '11100' - { -DCT_3, -DCT_4 }, // '111010', '111011' - { 7, 8 }, - { -DCT_CAT1, -DCT_CAT2 }, // '111100', '111101' - { 9, 10 }, - { -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101' - { -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111' -}; - static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 }; static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 }; static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 }; @@ -351,10 +336,9 @@ static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 }; static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 }; static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -static const uint8_t * const vp8_dct_cat_prob[6] = +// only used for cat3 and above; cat 1 and 2 are referenced directly +static const uint8_t * const vp8_dct_cat_prob[] = { - vp8_dct_cat1_prob, - vp8_dct_cat2_prob, vp8_dct_cat3_prob, vp8_dct_cat4_prob, vp8_dct_cat5_prob, |