aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoman Shaposhnik <roman@shaposhnik.org>2004-03-25 01:46:40 +0000
committerRoman Shaposhnik <roman@shaposhnik.org>2004-03-25 01:46:40 +0000
commit901ee2bea3e51ec4b0755f434c11ef1554acb189 (patch)
tree189e018a56ffca9980a83c9d656b0317c1b25351
parent7906085fcc33feb5a0c617f5e01065bb9d0caa86 (diff)
downloadffmpeg-901ee2bea3e51ec4b0755f434c11ef1554acb189.tar.gz
* some significant clean-up of the dv_decode_ac (it looks real
simple now, take a look for yourself). * additional optimizations of the decoder. It runs at 55fps now on my desktop and it used to be ~45fps. Originally committed as revision 2926 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/dv.c162
1 files changed, 67 insertions, 95 deletions
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index 22d966dd8d..5f1eaaa3b4 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -1,6 +1,7 @@
/*
* DV decoder
* Copyright (c) 2002 Fabrice Bellard.
+ * Copyright (c) 2004 Roman Shaposhnik.
*
* DV encoder
* Copyright (c) 2003 Roman Shaposhnik.
@@ -39,7 +40,7 @@ typedef struct DVVideoContext {
uint8_t *buf;
uint8_t dv_zigzag[2][64];
- uint8_t dv_idct_shift[2][22][64];
+ uint8_t dv_idct_shift[2][2][22][64];
void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
void (*fdct[2])(DCTELEM *block);
@@ -77,15 +78,17 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
for(i = 1; i < 64; i++) {
/* 88 table */
j = perm[i];
- s->dv_idct_shift[0][q][j] =
+ s->dv_idct_shift[0][0][q][j] =
dv_quant_shifts[q][dv_88_areas[i]] + 1;
+ s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
}
/* 248DCT */
for(i = 1; i < 64; i++) {
/* 248 table */
- s->dv_idct_shift[1][q][i] =
+ s->dv_idct_shift[0][1][q][i] =
dv_quant_shifts[q][dv_248_areas[i]] + 1;
+ s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
}
}
}
@@ -98,8 +101,11 @@ static int dvvideo_init(AVCodecContext *avctx)
int i, j;
if (!done) {
- int i;
VLC dv_vlc;
+ uint16_t new_dv_vlc_bits[NB_DV_VLC*2];
+ uint8_t new_dv_vlc_len[NB_DV_VLC*2];
+ uint8_t new_dv_vlc_run[NB_DV_VLC*2];
+ int16_t new_dv_vlc_level[NB_DV_VLC*2];
done = 1;
@@ -107,6 +113,7 @@ static int dvvideo_init(AVCodecContext *avctx)
if (!dv_vlc_map)
return -ENOMEM;
+ /* dv_anchor lets each thread know its Id */
dv_anchor = av_malloc(12*27*sizeof(void*));
if (!dv_anchor) {
av_free(dv_vlc_map);
@@ -114,11 +121,30 @@ static int dvvideo_init(AVCodecContext *avctx)
}
for (i=0; i<12*27; i++)
dv_anchor[i] = (void*)(size_t)i;
-
+
+ /* it's faster to include sign bit in a generic VLC parsing scheme */
+ for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
+ new_dv_vlc_bits[j] = dv_vlc_bits[i];
+ new_dv_vlc_len[j] = dv_vlc_len[i];
+ new_dv_vlc_run[j] = dv_vlc_run[i];
+ new_dv_vlc_level[j] = dv_vlc_level[i];
+
+ if (dv_vlc_level[i]) {
+ new_dv_vlc_bits[j] <<= 1;
+ new_dv_vlc_len[j]++;
+
+ j++;
+ new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
+ new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
+ new_dv_vlc_run[j] = dv_vlc_run[i];
+ new_dv_vlc_level[j] = -dv_vlc_level[i];
+ }
+ }
+
/* NOTE: as a trick, we use the fact the no codes are unused
to accelerate the parsing of partial codes */
- init_vlc(&dv_vlc, TEX_VLC_BITS, NB_DV_VLC,
- dv_vlc_len, 1, 1, dv_vlc_bits, 2, 2);
+ init_vlc(&dv_vlc, TEX_VLC_BITS, j,
+ new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2);
dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
if (!dv_rl_vlc) {
@@ -135,8 +161,8 @@ static int dvvideo_init(AVCodecContext *avctx)
run= 0;
level= code;
} else {
- run= dv_vlc_run[code] + 1;
- level= dv_vlc_level[code];
+ run= new_dv_vlc_run[code] + 1;
+ level= new_dv_vlc_level[code];
}
dv_rl_vlc[i].len = len;
dv_rl_vlc[i].level = level;
@@ -216,6 +242,7 @@ static int dvvideo_end(AVCodecContext *avctx)
}
// #define VLC_DEBUG
+// #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
typedef struct BlockInfo {
const uint8_t *shift_table;
@@ -259,115 +286,61 @@ static inline int put_bits_left(PutBitContext* s)
static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
{
int last_index = get_bits_size(gb);
- int last_re_index;
- int shift_offset = mb->shift_offset;
const uint8_t *scan_table = mb->scan_table;
const uint8_t *shift_table = mb->shift_table;
int pos = mb->pos;
- int level, pos1, run;
- int partial_bit_count;
- int sign = 0;
-#ifndef ALT_BITSTREAM_READER //FIXME
- int re_index=0;
- int re1_index=0;
-#endif
+ int partial_bit_count = mb->partial_bit_count;
+ int level, pos1, run, vlc_len, index;
+
OPEN_READER(re, gb);
+ UPDATE_CACHE(re, gb);
-#ifdef VLC_DEBUG
- printf("start\n");
-#endif
-
/* if we must parse a partial vlc, we do it here */
- partial_bit_count = mb->partial_bit_count;
if (partial_bit_count > 0) {
- uint8_t buf[4];
- uint32_t v;
- int l;
- GetBitContext gb1;
-
- /* build the dummy bit buffer */
- l = 16 - partial_bit_count;
- UPDATE_CACHE(re, gb);
-#ifdef VLC_DEBUG
- printf("show=%04x\n", SHOW_UBITS(re, gb, 16));
-#endif
- v = (mb->partial_bit_buffer << l) | SHOW_UBITS(re, gb, l);
- buf[0] = v >> 8;
- buf[1] = v;
-#ifdef VLC_DEBUG
- printf("v=%04x cnt=%d %04x\n",
- v, partial_bit_count, (mb->partial_bit_buffer << l));
-#endif
- /* try to read the codeword */
- init_get_bits(&gb1, buf, 4*8);
- {
- OPEN_READER(re1, &gb1);
- UPDATE_CACHE(re1, &gb1);
- GET_RL_VLC(level, run, re1, &gb1, dv_rl_vlc,
- TEX_VLC_BITS, 2);
- l = re1_index;
- CLOSE_READER(re1, &gb1);
- }
-#ifdef VLC_DEBUG
- printf("****run=%d level=%d size=%d\n", run, level, l);
-#endif
- /* compute codeword length -- if too long, we cannot parse */
- l -= partial_bit_count;
- if ((re_index + l + (level != 0)) > last_index) {
- mb->partial_bit_count += (last_index - re_index);
- mb->partial_bit_buffer = v >> (16 - mb->partial_bit_count);
- return;
- }
-
- /* skip read bits */
- last_re_index = 0; /* avoid warning */
- re_index += l;
- /* by definition, if we can read the vlc, all partial bits
- will be read (otherwise we could have read the vlc before) */
- mb->partial_bit_count = 0;
- UPDATE_CACHE(re, gb);
- goto handle_vlc;
+ re_cache = ((unsigned)re_cache >> partial_bit_count) |
+ (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
+ re_index -= partial_bit_count;
+ mb->partial_bit_count = 0;
}
/* get the AC coefficients until last_index is reached */
for(;;) {
- UPDATE_CACHE(re, gb);
#ifdef VLC_DEBUG
- printf("%2d: bits=%04x index=%d\n",
- pos, SHOW_UBITS(re, gb, 16), re_index);
+ printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
#endif
- last_re_index = re_index;
- GET_RL_VLC(level, run, re, gb, dv_rl_vlc,
- TEX_VLC_BITS, 2);
- handle_vlc:
-#ifdef VLC_DEBUG
- printf("run=%d level=%d\n", run, level);
-#endif
- if (level) {
- sign = SHOW_SBITS(re, gb, 1);
- LAST_SKIP_BITS(re, gb, 1);
- }
- if (re_index > last_index) {
+ /* our own optimized GET_RL_VLC */
+ index = NEG_USR32(re_cache, TEX_VLC_BITS);
+ vlc_len = dv_rl_vlc[index].len;
+ if (vlc_len < 0) {
+ index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
+ vlc_len = TEX_VLC_BITS - vlc_len;
+ }
+ level = dv_rl_vlc[index].level;
+ run = dv_rl_vlc[index].run;
+
+ /* gotta check if we're still within gb boundaries */
+ if (re_index + vlc_len > last_index) {
/* should be < 16 bits otherwise a codeword could have been parsed */
- re_index = last_re_index;
- UPDATE_CACHE(re, gb);
mb->partial_bit_count = last_index - re_index;
- mb->partial_bit_buffer = SHOW_UBITS(re, gb, mb->partial_bit_count);
+ mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
re_index = last_index;
break;
}
-
+ re_index += vlc_len;
+
+#ifdef VLC_DEBUG
+ printf("run=%d level=%d\n", run, level);
+#endif
pos += run;
if (pos >= 64)
break;
if (level) {
- level = (level ^ sign) - sign;
pos1 = scan_table[pos];
- level = level << (shift_table[pos1] + shift_offset);
- block[pos1] = level;
- // printf("run=%d level=%d shift=%d\n", run, level, shift_table[pos1]);
+ block[pos1] = level << shift_table[pos1];
}
+
+ UPDATE_CACHE(re, gb);
}
CLOSE_READER(re, gb);
mb->pos = pos;
@@ -429,8 +402,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
mb->dct_mode = dct_mode;
mb->scan_table = s->dv_zigzag[dct_mode];
class1 = get_bits(&gb, 2);
- mb->shift_offset = (class1 == 3);
- mb->shift_table = s->dv_idct_shift[dct_mode]
+ mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode]
[quant + dv_quant_offset[class1]];
dc = dc << 2;
/* convert to unsigned because 128 is not added in the