diff options
author | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2023-09-20 16:46:23 +0200 |
---|---|---|
committer | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2023-10-31 20:46:59 +0100 |
commit | 1fee3a3dceb323bc9996473b10a6f9b230720252 (patch) | |
tree | bf33fb79b953313e4ea2e3a8a4869b4bcd5f63f1 | |
parent | edc50658d967f893bb1e90666e7d400b1932fa45 (diff) | |
download | ffmpeg-1fee3a3dceb323bc9996473b10a6f9b230720252.tar.gz |
avcodec/vp3: Make VLC tables static where possible
This is especially important for frame-threaded decoders like
this one, because up until now each thread had an identical
copy of all these VLC tables.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
-rw-r--r-- | libavcodec/vp3.c | 162 |
1 files changed, 81 insertions, 81 deletions
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index 25bfa9b094..d4cdfd9e53 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -38,6 +38,7 @@ #include "libavutil/emms.h" #include "libavutil/imgutils.h" #include "libavutil/mem_internal.h" +#include "libavutil/thread.h" #include "avcodec.h" #include "codec_internal.h" @@ -158,6 +159,18 @@ static const uint8_t vp4_pred_block_type_map[8] = { [MODE_INTER_FOURMV] = VP4_DC_INTER, }; +static VLCElem superblock_run_length_vlc[88]; /* version < 2 */ +static VLCElem fragment_run_length_vlc[56]; /* version < 2 */ +static VLCElem motion_vector_vlc[112]; /* version < 2 */ + +// The VP4 tables reuse this vlc. +static VLCElem mode_code_vlc[18 + 2084 * CONFIG_VP4_DECODER]; + +#if CONFIG_VP4_DECODER +static const VLCElem *vp4_mv_vlc_table[2][7]; /* version >= 2 */ +static const VLCElem *block_pattern_vlc[2]; /* version >= 2 */ +#endif + typedef struct { int dc; int type; @@ -280,13 +293,6 @@ typedef struct Vp3DecodeContext { the others are four groups of 16 VLCs each for ac coefficients. */ VLC coeff_vlc[5 * 16]; - VLC superblock_run_length_vlc; /* version < 2 */ - VLC fragment_run_length_vlc; /* version < 2 */ - VLC block_pattern_vlc[2]; /* version >= 2*/ - VLC mode_code_vlc; - VLC motion_vector_vlc; /* version < 2 */ - VLC vp4_mv_vlc[2][7]; /* version >=2 */ - /* these arrays need to be on 16-byte boundaries since SSE2 operations * index into them */ DECLARE_ALIGNED(16, int16_t, qmat)[3][2][3][64]; ///< qmat[qpi][is_inter][plane] @@ -362,17 +368,6 @@ static av_cold int vp3_decode_end(AVCodecContext *avctx) for (int i = 0; i < FF_ARRAY_ELEMS(s->coeff_vlc); i++) ff_vlc_free(&s->coeff_vlc[i]); - ff_vlc_free(&s->superblock_run_length_vlc); - ff_vlc_free(&s->fragment_run_length_vlc); - ff_vlc_free(&s->mode_code_vlc); - ff_vlc_free(&s->motion_vector_vlc); - - for (int j = 0; j < 2; j++) - for (int i = 0; i < 7; i++) - ff_vlc_free(&s->vp4_mv_vlc[j][i]); - - for (int i = 0; i < 2; i++) - ff_vlc_free(&s->block_pattern_vlc[i]); return 0; } @@ -493,7 +488,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) else bit ^= 1; - current_run = get_vlc2(gb, s->superblock_run_length_vlc.table, + current_run = get_vlc2(gb, superblock_run_length_vlc, SUPERBLOCK_VLC_BITS, 2); if (current_run == 34) current_run += get_bits(gb, 12); @@ -527,7 +522,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) else bit ^= 1; - current_run = get_vlc2(gb, s->superblock_run_length_vlc.table, + current_run = get_vlc2(gb, superblock_run_length_vlc, SUPERBLOCK_VLC_BITS, 2); if (current_run == 34) current_run += get_bits(gb, 12); @@ -607,7 +602,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) * that cares about the fragment coding runs */ if (current_run-- == 0) { bit ^= 1; - current_run = get_vlc2(gb, s->fragment_run_length_vlc.table, 5, 2); + current_run = get_vlc2(gb, fragment_run_length_vlc, 5, 2); } coded = bit; } @@ -684,9 +679,9 @@ static int vp4_get_mb_count(Vp3DecodeContext *s, GetBitContext *gb) return v; } -static int vp4_get_block_pattern(Vp3DecodeContext *s, GetBitContext *gb, int *next_block_pattern_table) +static int vp4_get_block_pattern(GetBitContext *gb, int *next_block_pattern_table) { - int v = get_vlc2(gb, s->block_pattern_vlc[*next_block_pattern_table].table, 3, 2); + int v = get_vlc2(gb, block_pattern_vlc[*next_block_pattern_table], 3, 2); *next_block_pattern_table = vp4_block_pattern_table_selector[v]; return v + 1; } @@ -758,7 +753,7 @@ static int vp4_unpack_macroblocks(Vp3DecodeContext *s, GetBitContext *gb) if (mb_coded == SB_FULLY_CODED) pattern = 0xF; else if (mb_coded == SB_PARTIALLY_CODED) - pattern = vp4_get_block_pattern(s, gb, &next_block_pattern_table); + pattern = vp4_get_block_pattern(gb, &next_block_pattern_table); else pattern = 0; @@ -845,7 +840,7 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) if (scheme == 7) coding_mode = get_bits(gb, 3); else - coding_mode = alphabet[get_vlc2(gb, s->mode_code_vlc.table, 3, 3)]; + coding_mode = alphabet[get_vlc2(gb, mode_code_vlc, 3, 3)]; s->macroblock_coding[current_macroblock] = coding_mode; for (k = 0; k < 4; k++) { @@ -886,11 +881,15 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) return 0; } -static int vp4_get_mv(Vp3DecodeContext *s, GetBitContext *gb, int axis, int last_motion) +static int vp4_get_mv(GetBitContext *gb, int axis, int last_motion) { - int v = get_vlc2(gb, s->vp4_mv_vlc[axis][vp4_mv_table_selector[FFABS(last_motion)]].table, +#if CONFIG_VP4_DECODER + int v = get_vlc2(gb, vp4_mv_vlc_table[axis][vp4_mv_table_selector[FFABS(last_motion)]], VP4_MV_VLC_BITS, 2); return last_motion < 0 ? -v : v; +#else + return 0; +#endif } /* @@ -938,23 +937,23 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) switch (s->macroblock_coding[current_macroblock]) { case MODE_GOLDEN_MV: if (coding_mode == 2) { /* VP4 */ - last_gold_motion_x = motion_x[0] = vp4_get_mv(s, gb, 0, last_gold_motion_x); - last_gold_motion_y = motion_y[0] = vp4_get_mv(s, gb, 1, last_gold_motion_y); + last_gold_motion_x = motion_x[0] = vp4_get_mv(gb, 0, last_gold_motion_x); + last_gold_motion_y = motion_y[0] = vp4_get_mv(gb, 1, last_gold_motion_y); break; } /* otherwise fall through */ case MODE_INTER_PLUS_MV: /* all 6 fragments use the same motion vector */ if (coding_mode == 0) { - motion_x[0] = get_vlc2(gb, s->motion_vector_vlc.table, + motion_x[0] = get_vlc2(gb, motion_vector_vlc, VP3_MV_VLC_BITS, 2); - motion_y[0] = get_vlc2(gb, s->motion_vector_vlc.table, + motion_y[0] = get_vlc2(gb, motion_vector_vlc, VP3_MV_VLC_BITS, 2); } else if (coding_mode == 1) { motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)]; motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)]; } else { /* VP4 */ - motion_x[0] = vp4_get_mv(s, gb, 0, last_motion_x); - motion_y[0] = vp4_get_mv(s, gb, 1, last_motion_y); + motion_x[0] = vp4_get_mv(gb, 0, last_motion_x); + motion_y[0] = vp4_get_mv(gb, 1, last_motion_y); } /* vector maintenance, only on MODE_INTER_PLUS_MV */ @@ -977,16 +976,16 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) current_fragment = BLOCK_Y * s->fragment_width[0] + BLOCK_X; if (s->all_fragments[current_fragment].coding_method != MODE_COPY) { if (coding_mode == 0) { - motion_x[k] = get_vlc2(gb, s->motion_vector_vlc.table, + motion_x[k] = get_vlc2(gb, motion_vector_vlc, VP3_MV_VLC_BITS, 2); - motion_y[k] = get_vlc2(gb, s->motion_vector_vlc.table, + motion_y[k] = get_vlc2(gb, motion_vector_vlc, VP3_MV_VLC_BITS, 2); } else if (coding_mode == 1) { motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)]; motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)]; } else { /* VP4 */ - motion_x[k] = vp4_get_mv(s, gb, 0, prior_last_motion_x); - motion_y[k] = vp4_get_mv(s, gb, 1, prior_last_motion_y); + motion_x[k] = vp4_get_mv(gb, 0, prior_last_motion_x); + motion_y[k] = vp4_get_mv(gb, 1, prior_last_motion_y); } last_motion_x = motion_x[k]; last_motion_y = motion_y[k]; @@ -1111,7 +1110,7 @@ static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb) else bit ^= 1; - run_length = get_vlc2(gb, s->superblock_run_length_vlc.table, + run_length = get_vlc2(gb, superblock_run_length_vlc, SUPERBLOCK_VLC_BITS, 2); if (run_length == 34) run_length += get_bits(gb, 12); @@ -2258,6 +2257,48 @@ static void render_slice(Vp3DecodeContext *s, int slice) s->height - 16)); } +static av_cold void init_tables_once(void) +{ + VLCInitState state = VLC_INIT_STATE(mode_code_vlc); + + VLC_INIT_STATIC_TABLE_FROM_LENGTHS(superblock_run_length_vlc, + SUPERBLOCK_VLC_BITS, 34, + superblock_run_length_vlc_lens, 1, + NULL, 0, 0, 1, 0); + + VLC_INIT_STATIC_TABLE_FROM_LENGTHS(fragment_run_length_vlc, 5, 30, + fragment_run_length_vlc_len, 1, + NULL, 0, 0, 0, 0); + + VLC_INIT_STATIC_TABLE_FROM_LENGTHS(motion_vector_vlc, VP3_MV_VLC_BITS, 63, + &motion_vector_vlc_table[0][1], 2, + &motion_vector_vlc_table[0][0], 2, 1, + -31, 0); + + ff_vlc_init_tables_from_lengths(&state, 3, 8, + mode_code_vlc_len, 1, + NULL, 0, 0, 0, 0); + +#if CONFIG_VP4_DECODER + for (int j = 0; j < 2; j++) + for (int i = 0; i < 7; i++) { + vp4_mv_vlc_table[j][i] = + ff_vlc_init_tables_from_lengths(&state, VP4_MV_VLC_BITS, 63, + &vp4_mv_vlc[j][i][0][1], 2, + &vp4_mv_vlc[j][i][0][0], 2, 1, + -31, 0); + } + + /* version >= 2 */ + for (int i = 0; i < 2; i++) { + block_pattern_vlc[i] = + ff_vlc_init_tables(&state, 3, 14, + &vp4_block_pattern_vlc[i][0][1], 2, 1, + &vp4_block_pattern_vlc[i][0][0], 2, 1, 0); + } +#endif +} + /// Allocate tables for per-frame data in Vp3DecodeContext static av_cold int allocate_tables(AVCodecContext *avctx) { @@ -2316,6 +2357,7 @@ static av_cold int init_frames(Vp3DecodeContext *s) static av_cold int vp3_decode_init(AVCodecContext *avctx) { + static AVOnce init_static_once = AV_ONCE_INIT; Vp3DecodeContext *s = avctx->priv_data; int ret; int c_width; @@ -2445,49 +2487,7 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx) } } - ret = ff_vlc_init_from_lengths(&s->superblock_run_length_vlc, SUPERBLOCK_VLC_BITS, 34, - superblock_run_length_vlc_lens, 1, - NULL, 0, 0, 1, 0, avctx); - if (ret < 0) - return ret; - - ret = ff_vlc_init_from_lengths(&s->fragment_run_length_vlc, 5, 30, - fragment_run_length_vlc_len, 1, - NULL, 0, 0, 0, 0, avctx); - if (ret < 0) - return ret; - - ret = ff_vlc_init_from_lengths(&s->mode_code_vlc, 3, 8, - mode_code_vlc_len, 1, - NULL, 0, 0, 0, 0, avctx); - if (ret < 0) - return ret; - - ret = ff_vlc_init_from_lengths(&s->motion_vector_vlc, VP3_MV_VLC_BITS, 63, - &motion_vector_vlc_table[0][1], 2, - &motion_vector_vlc_table[0][0], 2, 1, - -31, 0, avctx); - if (ret < 0) - return ret; - -#if CONFIG_VP4_DECODER - for (int j = 0; j < 2; j++) - for (int i = 0; i < 7; i++) { - ret = ff_vlc_init_from_lengths(&s->vp4_mv_vlc[j][i], VP4_MV_VLC_BITS, 63, - &vp4_mv_vlc[j][i][0][1], 2, - &vp4_mv_vlc[j][i][0][0], 2, 1, -31, - 0, avctx); - if (ret < 0) - return ret; - } - - /* version >= 2 */ - for (int i = 0; i < 2; i++) - if ((ret = vlc_init(&s->block_pattern_vlc[i], 3, 14, - &vp4_block_pattern_vlc[i][0][1], 2, 1, - &vp4_block_pattern_vlc[i][0][0], 2, 1, 0)) < 0) - return ret; -#endif + ff_thread_once(&init_static_once, init_tables_once); return allocate_tables(avctx); } |