diff options
author | Vittorio Giovara <vittorio.giovara@gmail.com> | 2014-03-28 01:13:54 +0100 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2014-03-29 16:11:09 +0100 |
commit | 53c20f17c78d1d8a0fc2505868f201e69ff59cc5 (patch) | |
tree | 7d543871ede0f0adf63d3c5fcaef907275e42758 | |
parent | 6adf3bc42e36242d487636786e995149bbb849fe (diff) | |
download | ffmpeg-53c20f17c78d1d8a0fc2505868f201e69ff59cc5.tar.gz |
vp8: K&R formatting cosmetics
Signed-off-by: Diego Biurrun <diego@biurrun.de>
-rw-r--r-- | libavcodec/vp8.c | 949 | ||||
-rw-r--r-- | libavcodec/vp8.h | 19 | ||||
-rw-r--r-- | libavcodec/vp8_parser.c | 15 | ||||
-rw-r--r-- | libavcodec/vp8data.h | 188 | ||||
-rw-r--r-- | libavcodec/vp8dsp.c | 566 | ||||
-rw-r--r-- | libavcodec/vp8dsp.h | 4 |
6 files changed, 934 insertions, 807 deletions
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 30e24cc893..a1d24cdc1f 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -24,12 +24,13 @@ */ #include "libavutil/imgutils.h" + #include "avcodec.h" #include "internal.h" -#include "vp8.h" -#include "vp8data.h" #include "rectangle.h" #include "thread.h" +#include "vp8.h" +#include "vp8data.h" #if ARCH_ARM # include "arm/vp8.h" @@ -91,7 +92,6 @@ static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src) return 0; } - static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem) { VP8Context *s = avctx->priv_data; @@ -124,22 +124,25 @@ static int update_dimensions(VP8Context *s, int width, int height) return ret; } - s->mb_width = (s->avctx->coded_width +15) / 16; - s->mb_height = (s->avctx->coded_height+15) / 16; + s->mb_width = (s->avctx->coded_width + 15) / 16; + s->mb_height = (s->avctx->coded_height + 15) / 16; - s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1); + s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && + (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1); if (!s->mb_layout) { // Frame threading and one thread - s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks)); - s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); - } - else // Sliced threading - s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks)); - s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); - s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); - s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData)); + s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) * + sizeof(*s->macroblocks)); + s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4); + } else // Sliced threading + s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) * + sizeof(*s->macroblocks)); + s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz)); + s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border)); + s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData)); for (i = 0; i < MAX_THREADS; i++) { - s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength)); + s->thread_data[i].filter_strength = + av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength)); #if HAVE_THREADS pthread_mutex_init(&s->thread_data[i].lock, NULL); pthread_cond_init(&s->thread_data[i].cond, NULL); @@ -150,7 +153,7 @@ static int update_dimensions(VP8Context *s, int width, int height) (!s->intra4x4_pred_mode_top && !s->mb_layout)) return AVERROR(ENOMEM); - s->macroblocks = s->macroblocks_base + 1; + s->macroblocks = s->macroblocks_base + 1; return 0; } @@ -207,13 +210,13 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); - buf += 3*(s->num_coeff_partitions-1); - buf_size -= 3*(s->num_coeff_partitions-1); + buf += 3 * (s->num_coeff_partitions - 1); + buf_size -= 3 * (s->num_coeff_partitions - 1); if (buf_size < 0) return -1; - for (i = 0; i < s->num_coeff_partitions-1; i++) { - int size = AV_RL24(sizes + 3*i); + for (i = 0; i < s->num_coeff_partitions - 1; i++) { + int size = AV_RL24(sizes + 3 * i); if (buf_size - size < 0) return -1; @@ -246,13 +249,13 @@ static void get_quants(VP8Context *s) } else base_qi = yac_qi; - s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)]; - s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)]; - s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)]; + s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)]; + s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)]; + s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2; /* 101581>>16 is equivalent to 155/100 */ - s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16; - s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; - s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; + s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16; + s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)]; + s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)]; s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); @@ -317,24 +320,27 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile); if (!s->profile) - memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab)); + memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, + sizeof(s->put_pixels_tab)); else // profile 1-3 use bilinear, 4+ aren't defined so whatever - memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab)); + memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, + sizeof(s->put_pixels_tab)); - if (header_size > buf_size - 7*s->keyframe) { + if (header_size > buf_size - 7 * s->keyframe) { av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); return AVERROR_INVALIDDATA; } if (s->keyframe) { if (AV_RL24(buf) != 0x2a019d) { - av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf)); + av_log(s->avctx, AV_LOG_ERROR, + "Invalid start code 0x%x\n", AV_RL24(buf)); return AVERROR_INVALIDDATA; } - width = AV_RL16(buf+3) & 0x3fff; - height = AV_RL16(buf+5) & 0x3fff; - hscale = buf[4] >> 6; - vscale = buf[6] >> 6; + width = AV_RL16(buf + 3) & 0x3fff; + height = AV_RL16(buf + 5) & 0x3fff; + hscale = buf[4] >> 6; + vscale = buf[6] >> 6; buf += 7; buf_size -= 7; @@ -344,11 +350,15 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) s->update_golden = s->update_altref = VP56_FRAME_CURRENT; for (i = 0; i < 4; i++) for (j = 0; j < 16; j++) - memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], + memcpy(s->prob->token[i][j], + vp8_token_default_probs[i][vp8_coeff_band[j]], sizeof(s->prob->token[i][j])); - memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); - memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); - memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); + memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, + sizeof(s->prob->pred16x16)); + memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter, + sizeof(s->prob->pred8x8c)); + memcpy(s->prob->mvc, vp8_mv_default_prob, + sizeof(s->prob->mvc)); memset(&s->segmentation, 0, sizeof(s->segmentation)); memset(&s->lf_delta, 0, sizeof(s->lf_delta)); } @@ -382,10 +392,9 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) } if (!s->macroblocks_base || /* first frame */ - width != s->avctx->width || height != s->avctx->height) { + width != s->avctx->width || height != s->avctx->height) if ((ret = update_dimensions(s, width, height)) < 0) return ret; - } get_quants(s); @@ -405,7 +414,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) for (i = 0; i < 4; i++) for (j = 0; j < 8; j++) for (k = 0; k < 3; k++) - for (l = 0; l < NUM_DCT_TOKENS-1; l++) + for (l = 0; l < NUM_DCT_TOKENS - 1; l++) if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { int prob = vp8_rac_get_uint(c, 8); for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) @@ -437,7 +446,8 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) return 0; } -static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) +static av_always_inline +void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) { dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x); dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y); @@ -461,13 +471,13 @@ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) x += 8; } else { // small_mvtree - const uint8_t *ps = p+2; + const uint8_t *ps = p + 2; bit = vp56_rac_get_prob(c, *ps); - ps += 1 + 3*bit; - x += 4*bit; + ps += 1 + 3 * bit; + x += 4 * bit; bit = vp56_rac_get_prob(c, *ps); ps += 1 + bit; - x += 2*bit; + x += 2 * bit; x += vp56_rac_get_prob(c, *ps); } @@ -478,10 +488,10 @@ static av_always_inline const uint8_t *get_submv_prob(uint32_t left, uint32_t top) { if (left == top) - return vp8_submv_prob[4-!!left]; + return vp8_submv_prob[4 - !!left]; if (!top) return vp8_submv_prob[2]; - return vp8_submv_prob[1-!!left]; + return vp8_submv_prob[1 - !!left]; } /** @@ -495,9 +505,8 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay int n, num; VP8Macroblock *top_mb; VP8Macroblock *left_mb = &mb[-1]; - const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], - *mbsplits_top, - *mbsplits_cur, *firstidx; + const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning]; + const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx; VP56mv *top_mv; VP56mv *left_mv = left_mb->bmv; VP56mv *cur_mv = mb->bmv; @@ -505,23 +514,22 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay if (!layout) // layout is inlined, s->mb_layout is not top_mb = &mb[2]; else - top_mb = &mb[-s->mb_width-1]; + top_mb = &mb[-s->mb_width - 1]; mbsplits_top = vp8_mbsplits[top_mb->partitioning]; - top_mv = top_mb->bmv; + top_mv = top_mb->bmv; if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { - if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { + if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); - } else { + else part_idx = VP8_SPLITMVMODE_8x8; - } } else { part_idx = VP8_SPLITMVMODE_4x4; } - num = vp8_mbsplit_count[part_idx]; - mbsplits_cur = vp8_mbsplits[part_idx], - firstidx = vp8_mbfirstidx[part_idx]; + num = vp8_mbsplit_count[part_idx]; + mbsplits_cur = vp8_mbsplits[part_idx], + firstidx = vp8_mbfirstidx[part_idx]; mb->partitioning = part_idx; for (n = 0; n < num; n++) { @@ -532,7 +540,7 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay if (!(k & 3)) left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]); else - left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); + left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]); if (k <= 3) above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]); else @@ -560,11 +568,12 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay } static av_always_inline -void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout) +void decode_mvs(VP8Context *s, VP8Macroblock *mb, + int mb_x, int mb_y, int layout) { - VP8Macroblock *mb_edge[3] = { 0 /* top */, + VP8Macroblock *mb_edge[3] = { 0 /* top */, mb - 1 /* left */, - 0 /* top-left */ }; + 0 /* top-left */ }; enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT }; int idx = CNT_ZERO; @@ -577,10 +586,9 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout if (!layout) { // layout is inlined (s->mb_layout is not) mb_edge[0] = mb + 2; mb_edge[2] = mb + 1; - } - else { - mb_edge[0] = mb - s->mb_width-1; - mb_edge[2] = mb - s->mb_width-2; + } else { + mb_edge[0] = mb - s->mb_width - 1; + mb_edge[2] = mb - s->mb_width - 2; } AV_ZERO32(&near_mv[0]); @@ -588,24 +596,25 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout AV_ZERO32(&near_mv[2]); /* Process MB on top, left and top-left */ - #define MV_EDGE_CHECK(n)\ - {\ - VP8Macroblock *edge = mb_edge[n];\ - int edge_ref = edge->ref_frame;\ - if (edge_ref != VP56_FRAME_CURRENT) {\ - uint32_t mv = AV_RN32A(&edge->mv);\ - if (mv) {\ - if (cur_sign_bias != sign_bias[edge_ref]) {\ - /* SWAR negate of the values in mv. */\ - mv = ~mv;\ - mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ - }\ - if (!n || mv != AV_RN32A(&near_mv[idx]))\ - AV_WN32A(&near_mv[++idx], mv);\ - cnt[idx] += 1 + (n != 2);\ - } else\ - cnt[CNT_ZERO] += 1 + (n != 2);\ - }\ +#define MV_EDGE_CHECK(n) \ + { \ + VP8Macroblock *edge = mb_edge[n]; \ + int edge_ref = edge->ref_frame; \ + if (edge_ref != VP56_FRAME_CURRENT) { \ + uint32_t mv = AV_RN32A(&edge->mv); \ + if (mv) { \ + if (cur_sign_bias != sign_bias[edge_ref]) { \ + /* SWAR negate of the values in mv. */ \ + mv = ~mv; \ + mv = ((mv & 0x7fff7fff) + \ + 0x00010001) ^ (mv & 0x80008000); \ + } \ + if (!n || mv != AV_RN32A(&near_mv[idx])) \ + AV_WN32A(&near_mv[++idx], mv); \ + cnt[idx] += 1 + (n != 2); \ + } else \ + cnt[CNT_ZERO] += 1 + (n != 2); \ + } \ } MV_EDGE_CHECK(0) @@ -617,7 +626,8 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout mb->mode = VP8_MVMODE_MV; /* If we have three distinct MVs, merge first and last if they're the same */ - if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) + if (cnt[CNT_SPLITMV] && + AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT])) cnt[CNT_NEAREST] += 1; /* Swap near and nearest if necessary */ @@ -628,7 +638,6 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { - /* Choose the best mv out of 0,0 and the nearest mv */ clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + @@ -637,10 +646,10 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { mb->mode = VP8_MVMODE_SPLIT; - mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1]; + mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1]; } else { - mb->mv.y += read_mv_component(c, s->prob->mvc[0]); - mb->mv.x += read_mv_component(c, s->prob->mvc[1]); + mb->mv.y += read_mv_component(c, s->prob->mvc[0]); + mb->mv.x += read_mv_component(c, s->prob->mvc[1]); mb->bmv[0] = mb->mv; } } else { @@ -670,8 +679,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, } if (keyframe) { int x, y; - uint8_t* top; - uint8_t* const left = s->intra4x4_pred_mode_left; + uint8_t *top; + uint8_t *const left = s->intra4x4_pred_mode_left; if (layout == 1) top = mb->intra4x4_pred_mode_top; else @@ -679,16 +688,17 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { const uint8_t *ctx; - ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; + ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); - left[y] = top[x] = *intra4x4; + left[y] = top[x] = *intra4x4; intra4x4++; } } } else { int i; for (i = 0; i < 16; i++) - intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); + intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, + vp8_pred4x4_prob_inter); } } @@ -707,7 +717,8 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; if (s->keyframe) { - mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); + mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, + vp8_pred16x16_prob_intra); if (mb->mode == MODE_I4x4) { decode_intra4x4_modes(s, c, mb, mb_x, 1, layout); @@ -717,19 +728,21 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, AV_WN32A(mb->intra4x4_pred_mode_top, modes); else AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); - AV_WN32A( s->intra4x4_pred_mode_left, modes); + AV_WN32A(s->intra4x4_pred_mode_left, modes); } - mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); - mb->ref_frame = VP56_FRAME_CURRENT; + mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, + vp8_pred8x8c_prob_intra); + mb->ref_frame = VP56_FRAME_CURRENT; } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { // inter MB, 16.2 if (vp56_rac_get_prob_branchy(c, s->prob->last)) - mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? - VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; + mb->ref_frame = + vp56_rac_get_prob(c, s->prob->golden) ? VP56_FRAME_GOLDEN2 /* altref */ + : VP56_FRAME_GOLDEN; else mb->ref_frame = VP56_FRAME_PREVIOUS; - s->ref_count[mb->ref_frame-1]++; + s->ref_count[mb->ref_frame - 1]++; // motion vectors, 16.3 decode_mvs(s, mb, mb_x, mb_y, layout); @@ -740,26 +753,29 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, if (mb->mode == MODE_I4x4) decode_intra4x4_modes(s, c, mb, mb_x, 0, layout); - mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); - mb->ref_frame = VP56_FRAME_CURRENT; - mb->partitioning = VP8_SPLITMVMODE_NONE; + mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, + s->prob->pred8x8c); + mb->ref_frame = VP56_FRAME_CURRENT; + mb->partitioning = VP8_SPLITMVMODE_NONE; AV_ZERO32(&mb->bmv[0]); } } #ifndef decode_block_coeffs_internal /** - * @param r arithmetic bitstream reader context + * @param r arithmetic bitstream reader context * @param block destination for block coefficients * @param probs probabilities to use when reading trees from the bitstream - * @param i initial coeff index, 0 unless a separate DC block is coded - * @param qmul array holding the dc/ac dequant factor at position 0/1 + * @param i initial coeff index, 0 unless a separate DC block is coded + * @param qmul array holding the dc/ac dequant factor at position 0/1 + * * @return 0 if no coeffs were decoded * otherwise, the index of the last coeff decoded plus one */ static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16], - uint8_t probs[16][3][NUM_DCT_TOKENS-1], - int i, uint8_t *token_prob, int16_t qmul[2]) + uint8_t probs[16][3][NUM_DCT_TOKENS - 1], + int i, uint8_t *token_prob, + int16_t qmul[2]) { VP56RangeCoder c = *r; goto skip_eob; @@ -778,7 +794,7 @@ skip_eob: if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1 coeff = 1; - token_prob = probs[i+1][1]; + token_prob = probs[i + 1][1]; } else { if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]); @@ -789,21 +805,21 @@ skip_eob: // DCT_CAT* if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) { if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1 - coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]); + coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]); } else { // DCT_CAT2 coeff = 7; coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1; coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]); } } else { // DCT_CAT3 and up - int a = vp56_rac_get_prob(&c, token_prob[8]); - int b = vp56_rac_get_prob(&c, token_prob[9+a]); - int cat = (a<<1) + b; - coeff = 3 + (8<<cat); + int a = vp56_rac_get_prob(&c, token_prob[8]); + int b = vp56_rac_get_prob(&c, token_prob[9 + a]); + int cat = (a << 1) + b; + coeff = 3 + (8 << cat); coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]); } } - token_prob = probs[i+1][2]; + token_prob = probs[i + 1][2]; } block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i]; } while (++i < 16); @@ -814,19 +830,20 @@ skip_eob: #endif /** - * @param c arithmetic bitstream reader context - * @param block destination for block coefficients - * @param probs probabilities to use when reading trees from the bitstream - * @param i initial coeff index, 0 unless a separate DC block is coded + * @param c arithmetic bitstream reader context + * @param block destination for block coefficients + * @param probs probabilities to use when reading trees from the bitstream + * @param i initial coeff index, 0 unless a separate DC block is coded * @param zero_nhood the initial prediction context for number of surrounding * all-zero blocks (only left/top, so 0-2) - * @param qmul array holding the dc/ac dequant factor at position 0/1 + * @param qmul array holding the dc/ac dequant factor at position 0/1 + * * @return 0 if no coeffs were decoded * otherwise, the index of the last coeff decoded plus one */ static av_always_inline int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16], - uint8_t probs[16][3][NUM_DCT_TOKENS-1], + uint8_t probs[16][3][NUM_DCT_TOKENS - 1], int i, int zero_nhood, int16_t qmul[2]) { uint8_t *token_prob = probs[i][zero_nhood]; @@ -836,8 +853,8 @@ int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16], } static av_always_inline -void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb, - uint8_t t_nnz[9], uint8_t l_nnz[9]) +void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, + VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9]) { int i, x, y, luma_start = 0, luma_ctx = 3; int nnz_pred, nnz, nnz_total = 0; @@ -848,28 +865,31 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma nnz_pred = t_nnz[8] + l_nnz[8]; // decode DC values and do hadamard - nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred, - s->qmat[segment].luma_dc_qmul); + nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, + nnz_pred, s->qmat[segment].luma_dc_qmul); l_nnz[8] = t_nnz[8] = !!nnz; if (nnz) { nnz_total += nnz; - block_dc = 1; + block_dc = 1; if (nnz == 1) s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc); else s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc); } luma_start = 1; - luma_ctx = 0; + luma_ctx = 0; } // luma blocks for (y = 0; y < 4; y++) for (x = 0; x < 4; x++) { nnz_pred = l_nnz[y] + t_nnz[x]; - nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start, - nnz_pred, s->qmat[segment].luma_qmul); - // nnz+block_dc may be one more than the actual last index, but we don't care + nnz = decode_block_coeffs(c, td->block[y][x], + s->prob->token[luma_ctx], + luma_start, nnz_pred, + s->qmat[segment].luma_qmul); + /* nnz+block_dc may be one more than the actual last index, + * but we don't care */ td->non_zero_count_cache[y][x] = nnz + block_dc; t_nnz[x] = l_nnz[y] = !!nnz; nnz_total += nnz; @@ -881,12 +901,14 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma for (i = 4; i < 6; i++) for (y = 0; y < 2; y++) for (x = 0; x < 2; x++) { - nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; - nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0, - nnz_pred, s->qmat[segment].chroma_qmul); - td->non_zero_count_cache[i][(y<<1)+x] = nnz; - t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; - nnz_total += nnz; + nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x]; + nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x], + s->prob->token[2], + 0, nnz_pred, + s->qmat[segment].chroma_qmul); + td->non_zero_count_cache[i][(y << 1) + x] = nnz; + t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz; + nnz_total += nnz; } // if there were no coded coeffs despite the macroblock not being marked skip, @@ -897,65 +919,67 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma } static av_always_inline -void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, +void backup_mb_border(uint8_t *top_border, uint8_t *src_y, + uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple) { - AV_COPY128(top_border, src_y + 15*linesize); + AV_COPY128(top_border, src_y + 15 * linesize); if (!simple) { - AV_COPY64(top_border+16, src_cb + 7*uvlinesize); - AV_COPY64(top_border+24, src_cr + 7*uvlinesize); + AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize); + AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize); } } static av_always_inline -void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, - int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, - int simple, int xchg) +void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, + uint8_t *src_cr, int linesize, int uvlinesize, int mb_x, + int mb_y, int mb_width, int simple, int xchg) { - uint8_t *top_border_m1 = top_border-32; // for TL prediction - src_y -= linesize; + uint8_t *top_border_m1 = top_border - 32; // for TL prediction + src_y -= linesize; src_cb -= uvlinesize; src_cr -= uvlinesize; -#define XCHG(a,b,xchg) do { \ - if (xchg) AV_SWAP64(b,a); \ - else AV_COPY64(b,a); \ +#define XCHG(a, b, xchg) \ + do { \ + if (xchg) \ + AV_SWAP64(b, a); \ + else \ + AV_COPY64(b, a); \ } while (0) - XCHG(top_border_m1+8, src_y-8, xchg); - XCHG(top_border, src_y, xchg); - XCHG(top_border+8, src_y+8, 1); - if (mb_x < mb_width-1) - XCHG(top_border+32, src_y+16, 1); + XCHG(top_border_m1 + 8, src_y - 8, xchg); + XCHG(top_border, src_y, xchg); + XCHG(top_border + 8, src_y + 8, 1); + if (mb_x < mb_width - 1) + XCHG(top_border + 32, src_y + 16, 1); // only copy chroma for normal loop filter // or to initialize the top row to 127 if (!simple || !mb_y) { - XCHG(top_border_m1+16, src_cb-8, xchg); - XCHG(top_border_m1+24, src_cr-8, xchg); - XCHG(top_border+16, src_cb, 1); - XCHG(top_border+24, src_cr, 1); + XCHG(top_border_m1 + 16, src_cb - 8, xchg); + XCHG(top_border_m1 + 24, src_cr - 8, xchg); + XCHG(top_border + 16, src_cb, 1); + XCHG(top_border + 24, src_cr, 1); } } static av_always_inline int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) { - if (!mb_x) { + if (!mb_x) return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; - } else { + else return mb_y ? mode : LEFT_DC_PRED8x8; - } } static av_always_inline int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y) { - if (!mb_x) { + if (!mb_x) return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8; - } else { + else return mb_y ? mode : HOR_PRED8x8; - } } static av_always_inline @@ -968,7 +992,7 @@ int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y) return !mb_y ? DC_127_PRED8x8 : mode; case HOR_PRED8x8: return !mb_x ? DC_129_PRED8x8 : mode; - case PLANE_PRED8x8 /*TM*/: + case PLANE_PRED8x8: /* TM */ return check_tm_pred8x8_mode(mode, mb_x, mb_y); } return mode; @@ -1007,7 +1031,8 @@ int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf return !mb_x ? DC_129_PRED : mode; case TM_VP8_PRED: return check_tm_pred4x4_mode(mode, mb_x, mb_y); - case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC + case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions + * as 16x16/8x8 DC */ case DIAG_DOWN_RIGHT_PRED: case VERT_RIGHT_PRED: case HOR_DOWN_PRED: @@ -1025,10 +1050,10 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], int x, y, mode, nnz; uint32_t tr; - // for the first row, we need to run xchg_mb_border to init the top edge to 127 - // otherwise, skip it if we aren't going to deblock + /* for the first row, we need to run xchg_mb_border to init the top edge + * to 127 otherwise, skip it if we aren't going to deblock */ if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) - xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], + xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, s->filter.simple, 1); @@ -1046,10 +1071,9 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], // if we're on the right edge of the frame, said edge is extended // from the top macroblock - if (mb_y && - mb_x == s->mb_width-1) { - tr = tr_right[-1]*0x01010101u; - tr_right = (uint8_t *)&tr; + if (mb_y && mb_x == s->mb_width - 1) { + tr = tr_right[-1] * 0x01010101u; + tr_right = (uint8_t *) &tr; } if (mb->skip) @@ -1059,27 +1083,29 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], uint8_t *topright = ptr + 4 - s->linesize; for (x = 0; x < 4; x++) { int copy = 0, linesize = s->linesize; - uint8_t *dst = ptr+4*x; - DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8]; + uint8_t *dst = ptr + 4 * x; + DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8]; if ((y == 0 || x == 3) && mb_y == 0) { topright = tr_top; } else if (x == 3) topright = tr_right; - mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©); + mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], + mb_x + x, mb_y + y, + ©); if (copy) { - dst = copy_dst + 12; + dst = copy_dst + 12; linesize = 8; if (!(mb_y + y)) { copy_dst[3] = 127U; - AV_WN32A(copy_dst+4, 127U * 0x01010101U); + AV_WN32A(copy_dst + 4, 127U * 0x01010101U); } else { - AV_COPY32(copy_dst+4, ptr+4*x-s->linesize); + AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize); if (!(mb_x + x)) { copy_dst[3] = 129U; } else { - copy_dst[3] = ptr[4*x-s->linesize-1]; + copy_dst[3] = ptr[4 * x - s->linesize - 1]; } } if (!(mb_x + x)) { @@ -1088,31 +1114,33 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], copy_dst[27] = copy_dst[35] = 129U; } else { - copy_dst[11] = ptr[4*x -1]; - copy_dst[19] = ptr[4*x+s->linesize -1]; - copy_dst[27] = ptr[4*x+s->linesize*2-1]; - copy_dst[35] = ptr[4*x+s->linesize*3-1]; + copy_dst[11] = ptr[4 * x - 1]; + copy_dst[19] = ptr[4 * x + s->linesize - 1]; + copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1]; + copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1]; } } s->hpc.pred4x4[mode](dst, topright, linesize); if (copy) { - AV_COPY32(ptr+4*x , copy_dst+12); - AV_COPY32(ptr+4*x+s->linesize , copy_dst+20); - AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28); - AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36); + AV_COPY32(ptr + 4 * x, copy_dst + 12); + AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20); + AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28); + AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36); } nnz = td->non_zero_count_cache[y][x]; if (nnz) { if (nnz == 1) - s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize); + s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x, + td->block[y][x], s->linesize); else - s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize); + s->vp8dsp.vp8_idct_add(ptr + 4 * x, + td->block[y][x], s->linesize); } topright += 4; } - ptr += 4*s->linesize; + ptr += 4 * s->linesize; intra4x4 += 4; } } @@ -1122,7 +1150,7 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], s->hpc.pred8x8[mode](dst[2], s->uvlinesize); if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0) - xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], + xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, s->filter.simple, 0); } @@ -1137,18 +1165,18 @@ static const uint8_t subpel_idx[3][8] = { /** * luma MC function * - * @param s VP8 decoding context - * @param dst target buffer for block data at block position - * @param ref reference picture buffer at origin (0, 0) - * @param mv motion vector (relative to block position) to get pixel data from - * @param x_off horizontal position of block from origin (0, 0) - * @param y_off vertical position of block from origin (0, 0) - * @param block_w width of block (16, 8 or 4) - * @param block_h height of block (always same as block_w) - * @param width width of src/dst plane data - * @param height height of src/dst plane data + * @param s VP8 decoding context + * @param dst target buffer for block data at block position + * @param ref reference picture buffer at origin (0, 0) + * @param mv motion vector (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block (16, 8 or 4) + * @param block_h height of block (always same as block_w) + * @param width width of src/dst plane data + * @param height height of src/dst plane data * @param linesize size of a single line of plane data, including padding - * @param mc_func motion compensation function pointers (bilinear or sixtap MC) + * @param mc_func motion compensation function pointers (bilinear or sixtap MC) */ static av_always_inline void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, @@ -1162,8 +1190,8 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, if (AV_RN32A(mv)) { int src_linesize = linesize; - int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx]; - int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my]; + int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx]; + int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my]; x_off += mv->x >> 2; y_off += mv->y >> 2; @@ -1176,46 +1204,50 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst, s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, EDGE_EMU_LINESIZE, linesize, - block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], - x_off - mx_idx, y_off - my_idx, width, height); + block_w + subpel_idx[1][mx], + block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, + width, height); src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; src_linesize = EDGE_EMU_LINESIZE; } mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my); } else { ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0); - mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); + mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, + linesize, block_h, 0, 0); } } /** * chroma MC function * - * @param s VP8 decoding context - * @param dst1 target buffer for block data at block position (U plane) - * @param dst2 target buffer for block data at block position (V plane) - * @param ref reference picture buffer at origin (0, 0) - * @param mv motion vector (relative to block position) to get pixel data from - * @param x_off horizontal position of block from origin (0, 0) - * @param y_off vertical position of block from origin (0, 0) - * @param block_w width of block (16, 8 or 4) - * @param block_h height of block (always same as block_w) - * @param width width of src/dst plane data - * @param height height of src/dst plane data + * @param s VP8 decoding context + * @param dst1 target buffer for block data at block position (U plane) + * @param dst2 target buffer for block data at block position (V plane) + * @param ref reference picture buffer at origin (0, 0) + * @param mv motion vector (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block (16, 8 or 4) + * @param block_h height of block (always same as block_w) + * @param width width of src/dst plane data + * @param height height of src/dst plane data * @param linesize size of a single line of plane data, including padding - * @param mc_func motion compensation function pointers (bilinear or sixtap MC) + * @param mc_func motion compensation function pointers (bilinear or sixtap MC) */ static av_always_inline -void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2, - ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off, - int block_w, int block_h, int width, int height, ptrdiff_t linesize, +void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, + uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, ptrdiff_t linesize, vp8_mc_func mc_func[3][3]) { uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2]; if (AV_RN32A(mv)) { - int mx = mv->x&7, mx_idx = subpel_idx[0][mx]; - int my = mv->y&7, my_idx = subpel_idx[0][my]; + int mx = mv->x & 7, mx_idx = subpel_idx[0][mx]; + int my = mv->y & 7, my_idx = subpel_idx[0][my]; x_off += mv->x >> 3; y_off += mv->y >> 3; @@ -1239,7 +1271,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst EDGE_EMU_LINESIZE, linesize, block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], x_off - mx_idx, y_off - my_idx, width, height); - src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE* my_idx; + src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx; mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my); } else { mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); @@ -1255,8 +1287,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst static av_always_inline void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], ThreadFrame *ref_frame, int x_off, int y_off, - int bx_off, int by_off, - int block_w, int block_h, + int bx_off, int by_off, int block_w, int block_h, int width, int height, VP56mv *mv) { VP56mv uvmv = *mv; @@ -1272,10 +1303,14 @@ void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], uvmv.x &= ~7; uvmv.y &= ~7; } - x_off >>= 1; y_off >>= 1; - bx_off >>= 1; by_off >>= 1; - width >>= 1; height >>= 1; - block_w >>= 1; block_h >>= 1; + x_off >>= 1; + y_off >>= 1; + bx_off >>= 1; + by_off >>= 1; + width >>= 1; + height >>= 1; + block_w >>= 1; + block_h >>= 1; vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off, dst[2] + by_off * s->uvlinesize + bx_off, ref_frame, &uvmv, x_off + bx_off, y_off + by_off, @@ -1284,22 +1319,24 @@ void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], } /* Fetch pixels for estimated mv 4 macroblocks ahead. - * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ -static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) + * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */ +static av_always_inline +void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + int mb_xy, int ref) { /* Don't prefetch refs that haven't been used very often this frame. */ - if (s->ref_count[ref-1] > (mb_xy >> 5)) { + if (s->ref_count[ref - 1] > (mb_xy >> 5)) { int x_off = mb_x << 4, y_off = mb_y << 4; - int mx = (mb->mv.x>>2) + x_off + 8; - int my = (mb->mv.y>>2) + y_off; - uint8_t **src= s->framep[ref]->tf.f->data; - int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; + int mx = (mb->mv.x >> 2) + x_off + 8; + int my = (mb->mv.y >> 2) + y_off; + uint8_t **src = s->framep[ref]->tf.f->data; + int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64; /* For threading, a ff_thread_await_progress here might be useful, but * it actually slows down the decoder. Since a bad prefetch doesn't * generate bad decoder output, we don't run it here. */ - s->vdsp.prefetch(src[0]+off, s->linesize, 4); - off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64; - s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2); + s->vdsp.prefetch(src[0] + off, s->linesize, 4); + off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64; + s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2); } } @@ -1311,7 +1348,7 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y) { int x_off = mb_x << 4, y_off = mb_y << 4; - int width = 16*s->mb_width, height = 16*s->mb_height; + int width = 16 * s->mb_width, height = 16 * s->mb_height; ThreadFrame *ref = &s->framep[mb->ref_frame]->tf; VP56mv *bmv = mb->bmv; @@ -1327,35 +1364,38 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], /* Y */ for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { - vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4, - ref, &bmv[4*y + x], - 4*x + x_off, 4*y + y_off, 4, 4, + vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4, + ref, &bmv[4 * y + x], + 4 * x + x_off, 4 * y + y_off, 4, 4, width, height, s->linesize, s->put_pixels_tab[2]); } } /* U/V */ - x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; + x_off >>= 1; + y_off >>= 1; + width >>= 1; + height >>= 1; for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + - mb->bmv[ 2*y * 4 + 2*x+1].x + - mb->bmv[(2*y+1) * 4 + 2*x ].x + - mb->bmv[(2*y+1) * 4 + 2*x+1].x; - uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + - mb->bmv[ 2*y * 4 + 2*x+1].y + - mb->bmv[(2*y+1) * 4 + 2*x ].y + - mb->bmv[(2*y+1) * 4 + 2*x+1].y; - uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2; - uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2; + uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x + + mb->bmv[2 * y * 4 + 2 * x + 1].x + + mb->bmv[(2 * y + 1) * 4 + 2 * x ].x + + mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x; + uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y + + mb->bmv[2 * y * 4 + 2 * x + 1].y + + mb->bmv[(2 * y + 1) * 4 + 2 * x ].y + + mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y; + uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2; + uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2; if (s->profile == 3) { uvmv.x &= ~7; uvmv.y &= ~7; } - vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4, - dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv, - 4*x + x_off, 4*y + y_off, 4, 4, + vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4, + dst[2] + 4 * y * s->uvlinesize + x * 4, ref, + &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4, width, height, s->uvlinesize, s->put_pixels_tab[2]); } @@ -1387,8 +1427,8 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], } } -static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td, - uint8_t *dst[3], VP8Macroblock *mb) +static av_always_inline +void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb) { int x, y, ch; @@ -1397,12 +1437,16 @@ static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td, for (y = 0; y < 4; y++) { uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]); if (nnz4) { - if (nnz4&~0x01010101) { + if (nnz4 & ~0x01010101) { for (x = 0; x < 4; x++) { - if ((uint8_t)nnz4 == 1) - s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize); - else if((uint8_t)nnz4 > 1) - s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize); + if ((uint8_t) nnz4 == 1) + s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x, + td->block[y][x], + s->linesize); + else if ((uint8_t) nnz4 > 1) + s->vp8dsp.vp8_idct_add(y_dst + 4 * x, + td->block[y][x], + s->linesize); nnz4 >>= 8; if (!nnz4) break; @@ -1411,36 +1455,42 @@ static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td, s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize); } } - y_dst += 4*s->linesize; + y_dst += 4 * s->linesize; } } for (ch = 0; ch < 2; ch++) { - uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]); + uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]); if (nnz4) { - uint8_t *ch_dst = dst[1+ch]; - if (nnz4&~0x01010101) { + uint8_t *ch_dst = dst[1 + ch]; + if (nnz4 & ~0x01010101) { for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - if ((uint8_t)nnz4 == 1) - s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize); - else if((uint8_t)nnz4 > 1) - s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize); + if ((uint8_t) nnz4 == 1) + s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x, + td->block[4 + ch][(y << 1) + x], + s->uvlinesize); + else if ((uint8_t) nnz4 > 1) + s->vp8dsp.vp8_idct_add(ch_dst + 4 * x, + td->block[4 + ch][(y << 1) + x], + s->uvlinesize); nnz4 >>= 8; if (!nnz4) goto chroma_idct_end; } - ch_dst += 4*s->uvlinesize; + ch_dst += 4 * s->uvlinesize; } } else { - s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize); + s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize); } } -chroma_idct_end: ; +chroma_idct_end: + ; } } -static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) +static av_always_inline +void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f) { int interior_limit, filter_level; @@ -1467,10 +1517,13 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m f->filter_level = filter_level; f->inner_limit = interior_limit; - f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT; + f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || + mb->mode == VP8_MVMODE_SPLIT; } -static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) +static av_always_inline +void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, + int mb_x, int mb_y) { int mbedge_lim, bedge_lim, hev_thresh; int filter_level = f->filter_level; @@ -1492,82 +1545,84 @@ static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Filter if (!filter_level) return; - bedge_lim = 2*filter_level + inner_limit; + bedge_lim = 2 * filter_level + inner_limit; mbedge_lim = bedge_lim + 4; hev_thresh = hev_thresh_lut[s->keyframe][filter_level]; if (mb_x) { - s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, + s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize, mbedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, + s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, mbedge_lim, inner_limit, hev_thresh); } if (inner_filter) { - s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, + s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, bedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, + s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, bedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, + s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, bedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, - uvlinesize, bedge_lim, + s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, + uvlinesize, bedge_lim, inner_limit, hev_thresh); } if (mb_y) { - s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, + s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize, mbedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, + s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, mbedge_lim, inner_limit, hev_thresh); } if (inner_filter) { - s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, - linesize, bedge_lim, + s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize, + linesize, bedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, - linesize, bedge_lim, + s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize, + linesize, bedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, - linesize, bedge_lim, + s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize, + linesize, bedge_lim, inner_limit, hev_thresh); - s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, - dst[2] + 4 * uvlinesize, - uvlinesize, bedge_lim, + s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, + dst[2] + 4 * uvlinesize, + uvlinesize, bedge_lim, inner_limit, hev_thresh); } } -static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) +static av_always_inline +void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, + int mb_x, int mb_y) { int mbedge_lim, bedge_lim; int filter_level = f->filter_level; - int inner_limit = f->inner_limit; + int inner_limit = f->inner_limit; int inner_filter = f->inner_filter; - int linesize = s->linesize; + int linesize = s->linesize; if (!filter_level) return; - bedge_lim = 2*filter_level + inner_limit; + bedge_lim = 2 * filter_level + inner_limit; mbedge_lim = bedge_lim + 4; if (mb_x) s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); if (inner_filter) { - s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim); - s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim); - s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim); } if (mb_y) s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); if (inner_filter) { - s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim); - s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim); - s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim); } } @@ -1581,16 +1636,18 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, s->mv_min.y = -MARGIN; s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; for (mb_y = 0; mb_y < s->mb_height; mb_y++) { - VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1); - int mb_xy = mb_y*s->mb_width; + VP8Macroblock *mb = s->macroblocks_base + + ((s->mb_width + 1) * (mb_y + 1) + 1); + int mb_xy = mb_y * s->mb_width; - AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); + AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); s->mv_min.x = -MARGIN; s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { if (mb_y == 0) - AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101); + AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, + DC_PRED * 0x01010101); decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, prev_frame && prev_frame->seg_map ? prev_frame->seg_map->data + mb_xy : NULL, 1); @@ -1603,37 +1660,40 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, } #if HAVE_THREADS -#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\ - do {\ - int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\ - if (otd->thread_mb_pos < tmp) {\ - pthread_mutex_lock(&otd->lock);\ - td->wait_mb_pos = tmp;\ - do {\ - if (otd->thread_mb_pos >= tmp)\ - break;\ - pthread_cond_wait(&otd->cond, &otd->lock);\ - } while (1);\ - td->wait_mb_pos = INT_MAX;\ - pthread_mutex_unlock(&otd->lock);\ - }\ - } while(0); - -#define update_pos(td, mb_y, mb_x)\ - do {\ - int pos = (mb_y << 16) | (mb_x & 0xFFFF);\ - int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\ - int is_null = (next_td == NULL) || (prev_td == NULL);\ - int pos_check = (is_null) ? 1 :\ - (next_td != td && pos >= next_td->wait_mb_pos) ||\ - (prev_td != td && pos >= prev_td->wait_mb_pos);\ - td->thread_mb_pos = pos;\ - if (sliced_threading && pos_check) {\ - pthread_mutex_lock(&td->lock);\ - pthread_cond_broadcast(&td->cond);\ - pthread_mutex_unlock(&td->lock);\ - }\ - } while(0); +#define check_thread_pos(td, otd, mb_x_check, mb_y_check) \ + do { \ + int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \ + if (otd->thread_mb_pos < tmp) { \ + pthread_mutex_lock(&otd->lock); \ + td->wait_mb_pos = tmp; \ + do { \ + if (otd->thread_mb_pos >= tmp) \ + break; \ + pthread_cond_wait(&otd->cond, &otd->lock); \ + } while (1); \ + td->wait_mb_pos = INT_MAX; \ + pthread_mutex_unlock(&otd->lock); \ + } \ + } while (0); + +#define update_pos(td, mb_y, mb_x) \ + do { \ + int pos = (mb_y << 16) | (mb_x & 0xFFFF); \ + int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \ + (num_jobs > 1); \ + int is_null = (next_td == NULL) || (prev_td == NULL); \ + int pos_check = (is_null) ? 1 \ + : (next_td != td && \ + pos >= next_td->wait_mb_pos) || \ + (prev_td != td && \ + pos >= prev_td->wait_mb_pos); \ + td->thread_mb_pos = pos; \ + if (sliced_threading && pos_check) { \ + pthread_mutex_lock(&td->lock); \ + pthread_cond_broadcast(&td->cond); \ + pthread_mutex_unlock(&td->lock); \ + } \ + } while (0); #else #define check_thread_pos(td, otd, mb_x_check, mb_y_check) #define update_pos(td, mb_y, mb_x) @@ -1644,51 +1704,58 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, { VP8Context *s = avctx->priv_data; VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr]; - int mb_y = td->thread_mb_pos>>16; - int mb_x, mb_xy = mb_y*s->mb_width; + int mb_y = td->thread_mb_pos >> 16; + int mb_x, mb_xy = mb_y * s->mb_width; int num_jobs = s->num_jobs; VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame; - VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; + VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)]; VP8Macroblock *mb; uint8_t *dst[3] = { - curframe->tf.f->data[0] + 16*mb_y*s->linesize, - curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize, - curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize + curframe->tf.f->data[0] + 16 * mb_y * s->linesize, + curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize, + curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize }; - if (mb_y == 0) prev_td = td; - else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs]; - if (mb_y == s->mb_height-1) next_td = td; - else next_td = &s->thread_data[(jobnr + 1)%num_jobs]; + if (mb_y == 0) + prev_td = td; + else + prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; + if (mb_y == s->mb_height - 1) + next_td = td; + else + next_td = &s->thread_data[(jobnr + 1) % num_jobs]; if (s->mb_layout == 1) - mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1); + mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); else { // Make sure the previous frame has read its segmentation map, // if we re-use the same map. if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map) ff_thread_await_progress(&prev_frame->tf, mb_y, 0); - mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; + mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock - AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); + AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); } memset(td->left_nnz, 0, sizeof(td->left_nnz)); s->mv_min.x = -MARGIN; - s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; + s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { // Wait for previous thread to read mb_x+2, and reach mb_y-1. if (prev_td != td) { if (threadnr != 0) { - check_thread_pos(td, prev_td, mb_x+1, mb_y-1); + check_thread_pos(td, prev_td, mb_x + 1, mb_y - 1); } else { - check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1); + check_thread_pos(td, prev_td, + (s->mb_width + 3) + (mb_x + 1), mb_y - 1); } } - s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); - s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); + s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64, + s->linesize, 4); + s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64, + dst[2] - dst[1], 2); if (!s->mb_layout) decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, @@ -1713,7 +1780,8 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, AV_ZERO64(td->left_nnz); AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned - // Reset DC block predictors if they would exist if the mb had coefficients + /* Reset DC block predictors if they would exist + * if the mb had coefficients */ if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { td->left_nnz[8] = 0; s->top_nnz[mb_x][8] = 0; @@ -1723,23 +1791,25 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata, if (s->deblock_filter) filter_level_for_mb(s, mb, &td->filter_strength[mb_x]); - if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) { + if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) { if (s->filter.simple) - backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1); + backup_mb_border(s->top_border[mb_x + 1], dst[0], + NULL, NULL, s->linesize, 0, 1); else - backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); + backup_mb_border(s->top_border[mb_x + 1], dst[0], + dst[1], dst[2], s->linesize, s->uvlinesize, 0); } prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); - dst[0] += 16; - dst[1] += 8; - dst[2] += 8; + dst[0] += 16; + dst[1] += 8; + dst[2] += 8; s->mv_min.x -= 64; s->mv_max.x -= 64; - if (mb_x == s->mb_width+1) { - update_pos(td, mb_y, s->mb_width+3); + if (mb_x == s->mb_width + 1) { + update_pos(td, mb_y, s->mb_width + 3); } else { update_pos(td, mb_y, mb_x); } @@ -1751,41 +1821,46 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, { VP8Context *s = avctx->priv_data; VP8ThreadData *td = &s->thread_data[threadnr]; - int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs; + int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs; AVFrame *curframe = s->curframe->tf.f; VP8Macroblock *mb; VP8ThreadData *prev_td, *next_td; uint8_t *dst[3] = { - curframe->data[0] + 16*mb_y*s->linesize, - curframe->data[1] + 8*mb_y*s->uvlinesize, - curframe->data[2] + 8*mb_y*s->uvlinesize + curframe->data[0] + 16 * mb_y * s->linesize, + curframe->data[1] + 8 * mb_y * s->uvlinesize, + curframe->data[2] + 8 * mb_y * s->uvlinesize }; if (s->mb_layout == 1) - mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1); + mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); else - mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; + mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2; - if (mb_y == 0) prev_td = td; - else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs]; - if (mb_y == s->mb_height-1) next_td = td; - else next_td = &s->thread_data[(jobnr + 1)%num_jobs]; + if (mb_y == 0) + prev_td = td; + else + prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs]; + if (mb_y == s->mb_height - 1) + next_td = td; + else + next_td = &s->thread_data[(jobnr + 1) % num_jobs]; for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) { VP8FilterStrength *f = &td->filter_strength[mb_x]; - if (prev_td != td) { - check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1); - } + if (prev_td != td) + check_thread_pos(td, prev_td, + (mb_x + 1) + (s->mb_width + 3), mb_y - 1); if (next_td != td) - if (next_td != &s->thread_data[0]) { - check_thread_pos(td, next_td, mb_x+1, mb_y+1); - } + if (next_td != &s->thread_data[0]) + check_thread_pos(td, next_td, mb_x + 1, mb_y + 1); if (num_jobs == 1) { if (s->filter.simple) - backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1); + backup_mb_border(s->top_border[mb_x + 1], dst[0], + NULL, NULL, s->linesize, 0, 1); else - backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); + backup_mb_border(s->top_border[mb_x + 1], dst[0], + dst[1], dst[2], s->linesize, s->uvlinesize, 0); } if (s->filter.simple) @@ -1796,7 +1871,7 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata, dst[1] += 8; dst[2] += 8; - update_pos(td, mb_y, (s->mb_width+3) + mb_x); + update_pos(td, mb_y, (s->mb_width + 3) + mb_x); } } @@ -1808,10 +1883,12 @@ static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, VP8ThreadData *next_td = NULL, *prev_td = NULL; VP8Frame *curframe = s->curframe; int mb_y, num_jobs = s->num_jobs; + td->thread_nr = threadnr; for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { - if (mb_y >= s->mb_height) break; - td->thread_mb_pos = mb_y<<16; + if (mb_y >= s->mb_height) + break; + td->thread_mb_pos = mb_y << 16; vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); if (s->deblock_filter) vp8_filter_mb_row(avctx, tdata, jobnr, threadnr); @@ -1840,11 +1917,12 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, prev_frame = s->framep[VP56_FRAME_CURRENT]; - referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT - || s->update_altref == VP56_FRAME_CURRENT; + referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT || + s->update_altref == VP56_FRAME_CURRENT; - skip_thresh = !referenced ? AVDISCARD_NONREF : - !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; + skip_thresh = !referenced ? AVDISCARD_NONREF + : !s->keyframe ? AVDISCARD_NONKEY + : AVDISCARD_ALL; if (avctx->skip_frame >= skip_thresh) { s->invisible = 1; @@ -1858,7 +1936,7 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, if (s->frames[i].tf.f->data[0] && &s->frames[i] != prev_frame && &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && - &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) vp8_release_frame(s, &s->frames[i]); @@ -1866,7 +1944,7 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, for (i = 0; i < 5; i++) if (&s->frames[i] != prev_frame && &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && - &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; break; @@ -1878,57 +1956,61 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, if (curframe->tf.f->data[0]) vp8_release_frame(s, curframe); - // Given that arithmetic probabilities are updated every frame, it's quite likely - // that the values we have on a random interframe are complete junk if we didn't - // start decode on a keyframe. So just don't display anything rather than junk. + /* Given that arithmetic probabilities are updated every frame, it's quite + * likely that the values we have on a random interframe are complete + * junk if we didn't start decode on a keyframe. So just don't display + * anything rather than junk. */ if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || - !s->framep[VP56_FRAME_GOLDEN] || + !s->framep[VP56_FRAME_GOLDEN] || !s->framep[VP56_FRAME_GOLDEN2])) { - av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); + av_log(avctx, AV_LOG_WARNING, + "Discarding interframe without a prior keyframe!\n"); ret = AVERROR_INVALIDDATA; goto err; } curframe->tf.f->key_frame = s->keyframe; - curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P; + curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I + : AV_PICTURE_TYPE_P; if ((ret = vp8_alloc_frame(s, curframe, referenced))) { av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); goto err; } // check if golden and altref are swapped - if (s->update_altref != VP56_FRAME_NONE) { - s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; - } else { - s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; - } - if (s->update_golden != VP56_FRAME_NONE) { - s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; - } else { - s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; - } - if (s->update_last) { + if (s->update_altref != VP56_FRAME_NONE) + s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; + else + s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2]; + + if (s->update_golden != VP56_FRAME_NONE) + s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; + else + s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN]; + + if (s->update_last) s->next_framep[VP56_FRAME_PREVIOUS] = curframe; - } else { + else s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS]; - } - s->next_framep[VP56_FRAME_CURRENT] = curframe; + + s->next_framep[VP56_FRAME_CURRENT] = curframe; ff_thread_finish_setup(avctx); s->linesize = curframe->tf.f->linesize[0]; s->uvlinesize = curframe->tf.f->linesize[1]; - memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); - /* Zero macroblock structures for top/top-left prediction from outside the frame. */ + memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz)); + /* Zero macroblock structures for top/top-left prediction + * from outside the frame. */ if (!s->mb_layout) - memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks)); + memset(s->macroblocks + s->mb_height * 2 - 1, 0, + (s->mb_width + 1) * sizeof(*s->macroblocks)); if (!s->mb_layout && s->keyframe) - memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); + memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4); memset(s->ref_count, 0, sizeof(s->ref_count)); - if (s->mb_layout == 1) { // Make sure the previous frame has read its segmentation map, // if we re-use the same map. @@ -1949,9 +2031,10 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; for (i = 0; i < MAX_THREADS; i++) { s->thread_data[i].thread_mb_pos = 0; - s->thread_data[i].wait_mb_pos = INT_MAX; + s->thread_data[i].wait_mb_pos = INT_MAX; } - avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs); + avctx->execute2(avctx, vp8_decode_mb_row_sliced, + s->thread_data, NULL, num_jobs); ff_thread_report_progress(&curframe->tf, INT_MAX, 0); memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4); @@ -1965,7 +2048,7 @@ skip_decode: if (!s->invisible) { if ((ret = av_frame_ref(data, curframe->tf.f)) < 0) return ret; - *got_frame = 1; + *got_frame = 1; } return avpkt->size; @@ -2033,10 +2116,10 @@ static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx) return 0; } -#define REBASE(pic) \ - pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL +#define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL -static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src) +static int vp8_decode_update_thread_context(AVCodecContext *dst, + const AVCodecContext *src) { VP8Context *s = dst->priv_data, *s_src = src->priv_data; int i; @@ -2048,9 +2131,9 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo s->mb_height = s_src->mb_height; } - s->prob[0] = s_src->prob[!s_src->update_probabilities]; + s->prob[0] = s_src->prob[!s_src->update_probabilities]; s->segmentation = s_src->segmentation; - s->lf_delta = s_src->lf_delta; + s->lf_delta = s_src->lf_delta; memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias)); for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) { diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index 6d864b9a7f..1eb0512626 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -28,14 +28,15 @@ #include "libavutil/buffer.h" -#include "vp56.h" -#include "vp8dsp.h" #include "h264pred.h" #include "thread.h" +#include "vp56.h" +#include "vp8dsp.h" + #if HAVE_PTHREADS -#include <pthread.h> +# include <pthread.h> #elif HAVE_W32THREADS -#include "compat/w32pthreads.h" +# include "compat/w32pthreads.h" #endif #define VP8_MAX_QUANT 127 @@ -82,7 +83,7 @@ typedef struct VP8FilterStrength { typedef struct VP8Macroblock { uint8_t skip; - // todo: make it possible to check for at least (i4x4 or split_mv) + // TODO: make it possible to check for at least (i4x4 or split_mv) // in one op. are others needed? uint8_t mode; uint8_t ref_frame; @@ -116,7 +117,7 @@ typedef struct VP8ThreadData { int thread_nr; #if HAVE_THREADS pthread_mutex_t lock; - pthread_cond_t cond; + pthread_cond_t cond; #endif int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF) int wait_mb_pos; // What the current thread is waiting on. @@ -203,7 +204,7 @@ typedef struct VP8Context { * [7] - split mv * i16x16 modes never have any adjustment */ - int8_t mode[VP8_MVMODE_SPLIT+1]; + int8_t mode[VP8_MVMODE_SPLIT + 1]; /** * filter strength adjustment for macroblocks that reference: @@ -215,7 +216,7 @@ typedef struct VP8Context { int8_t ref[4]; } lf_delta; - uint8_t (*top_border)[16+8+8]; + uint8_t (*top_border)[16 + 8 + 8]; uint8_t (*top_nnz)[9]; VP56RangeCoder c; ///< header context, includes mb modes and motion vectors @@ -234,7 +235,7 @@ typedef struct VP8Context { uint8_t golden; uint8_t pred16x16[4]; uint8_t pred8x8c[3]; - uint8_t token[4][16][3][NUM_DCT_TOKENS-1]; + uint8_t token[4][16][3][NUM_DCT_TOKENS - 1]; uint8_t mvc[2][19]; } prob[2]; diff --git a/libavcodec/vp8_parser.c b/libavcodec/vp8_parser.c index 196de83935..8f6459ccec 100644 --- a/libavcodec/vp8_parser.c +++ b/libavcodec/vp8_parser.c @@ -21,18 +21,19 @@ #include "parser.h" static int parse(AVCodecParserContext *s, - AVCodecContext *avctx, - const uint8_t **poutbuf, int *poutbuf_size, - const uint8_t *buf, int buf_size) + AVCodecContext *avctx, + const uint8_t **poutbuf, int *poutbuf_size, + const uint8_t *buf, int buf_size) { - s->pict_type= (buf[0]&0x01) ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I; + s->pict_type = (buf[0] & 0x01) ? AV_PICTURE_TYPE_P + : AV_PICTURE_TYPE_I; - *poutbuf = buf; + *poutbuf = buf; *poutbuf_size = buf_size; return buf_size; } AVCodecParser ff_vp8_parser = { - .codec_ids = { AV_CODEC_ID_VP8 }, - .parser_parse = parse, + .codec_ids = { AV_CODEC_ID_VP8 }, + .parser_parse = parse, }; diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h index a48b0f6f9b..2a17cc2024 100644 --- a/libavcodec/vp8data.h +++ b/libavcodec/vp8data.h @@ -30,28 +30,25 @@ #include "vp8.h" #include "h264pred.h" -static const uint8_t vp8_pred4x4_mode[] = -{ +static const uint8_t vp8_pred4x4_mode[] = { [DC_PRED8x8] = DC_PRED, [VERT_PRED8x8] = VERT_PRED, [HOR_PRED8x8] = HOR_PRED, [PLANE_PRED8x8] = TM_VP8_PRED, }; -static const int8_t vp8_pred16x16_tree_intra[4][2] = -{ - { -MODE_I4x4, 1 }, // '0' - { 2, 3 }, - { -DC_PRED8x8, -VERT_PRED8x8 }, // '100', '101' - { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +static const int8_t vp8_pred16x16_tree_intra[4][2] = { + { -MODE_I4x4, 1 }, // '0' + { 2, 3 }, + { -DC_PRED8x8, -VERT_PRED8x8 }, // '100', '101' + { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' }; -static const int8_t vp8_pred16x16_tree_inter[4][2] = -{ - { -DC_PRED8x8, 1 }, // '0' - { 2, 3 }, - { -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101' - { -PLANE_PRED8x8, -MODE_I4x4 }, // '110', '111' +static const int8_t vp8_pred16x16_tree_inter[4][2] = { + { -DC_PRED8x8, 1 }, // '0' + { 2, 3 }, + { -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101' + { -PLANE_PRED8x8, -MODE_I4x4 }, // '110', '111' }; static const int vp8_mode_contexts[6][4] = { @@ -64,26 +61,26 @@ static const int vp8_mode_contexts[6][4] = { }; static const uint8_t vp8_mbsplits[5][16] = { - { 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1 }, - { 0, 0, 1, 1, 0, 0, 1, 1, - 0, 0, 1, 1, 0, 0, 1, 1 }, - { 0, 0, 1, 1, 0, 0, 1, 1, - 2, 2, 3, 3, 2, 2, 3, 3 }, - { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15 }, - { 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 } + { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; static const uint8_t vp8_mbfirstidx[4][16] = { - { 0, 8 }, { 0, 2 }, { 0, 2, 8, 10 }, - { 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15 } + { 0, 8 }, + { 0, 2 }, + { 0, 2, 8, 10 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }; -static const uint8_t vp8_mbsplit_count[4] = { 2, 2, 4, 16 }; -static const uint8_t vp8_mbsplit_prob[3] = { 110, 111, 150 }; +static const uint8_t vp8_mbsplit_count[4] = { + 2, 2, 4, 16 +}; +static const uint8_t vp8_mbsplit_prob[3] = { + 110, 111, 150 +}; static const uint8_t vp8_submv_prob[5][3] = { { 147, 136, 18 }, @@ -93,39 +90,42 @@ static const uint8_t vp8_submv_prob[5][3] = { { 208, 1, 1 } }; -static const uint8_t vp8_pred16x16_prob_intra[4] = { 145, 156, 163, 128 }; -static const uint8_t vp8_pred16x16_prob_inter[4] = { 112, 86, 140, 37 }; - -static const int8_t vp8_pred4x4_tree[9][2] = -{ - { -DC_PRED, 1 }, // '0' - { -TM_VP8_PRED, 2 }, // '10' - { -VERT_PRED, 3 }, // '110' - { 4, 6 }, - { -HOR_PRED, 5 }, // '11100' - { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011' - { -DIAG_DOWN_LEFT_PRED, 7 }, // '11110' - { -VERT_LEFT_PRED, 8 }, // '111110' - { -HOR_DOWN_PRED, -HOR_UP_PRED }, // '1111110', '1111111' +static const uint8_t vp8_pred16x16_prob_intra[4] = { + 145, 156, 163, 128 +}; +static const uint8_t vp8_pred16x16_prob_inter[4] = { + 112, 86, 140, 37 }; -static const int8_t vp8_pred8x8c_tree[3][2] = -{ - { -DC_PRED8x8, 1 }, // '0' - { -VERT_PRED8x8, 2 }, // '10 - { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +static const int8_t vp8_pred4x4_tree[9][2] = { + { -DC_PRED, 1 }, // '0' + { -TM_VP8_PRED, 2 }, // '10' + { -VERT_PRED, 3 }, // '110' + { 4, 6 }, + { -HOR_PRED, 5 }, // '11100' + { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011' + { -DIAG_DOWN_LEFT_PRED, 7 }, // '11110' + { -VERT_LEFT_PRED, 8 }, // '111110' + { -HOR_DOWN_PRED, -HOR_UP_PRED }, // '1111110', '1111111' }; -static const uint8_t vp8_pred8x8c_prob_intra[3] = { 142, 114, 183 }; -static const uint8_t vp8_pred8x8c_prob_inter[3] = { 162, 101, 204 }; +static const int8_t vp8_pred8x8c_tree[3][2] = { + { -DC_PRED8x8, 1 }, // '0' + { -VERT_PRED8x8, 2 }, // '10 + { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +}; -static const uint8_t vp8_pred4x4_prob_inter[9] = -{ +static const uint8_t vp8_pred8x8c_prob_intra[3] = { + 142, 114, 183 +}; +static const uint8_t vp8_pred8x8c_prob_inter[3] = { + 162, 101, 204 +}; +static const uint8_t vp8_pred4x4_prob_inter[9] = { 120, 90, 79, 133, 87, 85, 80, 111, 151 }; -static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = -{ +static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = { { { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, @@ -248,50 +248,57 @@ static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = }, }; -static const int8_t vp8_segmentid_tree[][2] = -{ - { 1, 2 }, - { -0, -1 }, // '00', '01' - { -2, -3 }, // '10', '11' +static const int8_t vp8_segmentid_tree[][2] = { + { 1, 2 }, + { -0, -1 }, // '00', '01' + { -2, -3 }, // '10', '11' }; -static const uint8_t vp8_coeff_band[16] = -{ +static const uint8_t vp8_coeff_band[16] = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 }; /* Inverse of vp8_coeff_band: mappings of bands to coefficient indexes. * Each list is -1-terminated. */ -static const int8_t vp8_coeff_band_indexes[8][10] = -{ - {0, -1}, - {1, -1}, - {2, -1}, - {3, -1}, - {5, -1}, - {6, -1}, - {4, 7, 8, 9, 10, 11, 12, 13, 14, -1}, - {15, -1} +static const int8_t vp8_coeff_band_indexes[8][10] = { + { 0, -1 }, + { 1, -1 }, + { 2, -1 }, + { 3, -1 }, + { 5, -1 }, + { 6, -1 }, + { 4, 7, 8, 9, 10, 11, 12, 13, 14, -1 }, + { 15, -1 } }; -static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 }; -static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 }; -static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 }; -static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 }; -static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 }; -static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; +static const uint8_t vp8_dct_cat1_prob[] = { + 159, 0 +}; +static const uint8_t vp8_dct_cat2_prob[] = { + 165, 145, 0 +}; +static const uint8_t vp8_dct_cat3_prob[] = { + 173, 148, 140, 0 +}; +static const uint8_t vp8_dct_cat4_prob[] = { + 176, 155, 140, 135, 0 +}; +static const uint8_t vp8_dct_cat5_prob[] = { + 180, 157, 141, 134, 130, 0 +}; +static const uint8_t vp8_dct_cat6_prob[] = { + 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 +}; // only used for cat3 and above; cat 1 and 2 are referenced directly -const uint8_t * const ff_vp8_dct_cat_prob[] = -{ +const uint8_t *const ff_vp8_dct_cat_prob[] = { vp8_dct_cat3_prob, vp8_dct_cat4_prob, vp8_dct_cat5_prob, vp8_dct_cat6_prob, }; -static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] = -{ +static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS - 1] = { { { { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, @@ -462,8 +469,7 @@ static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] = }, }; -static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] = -{ +static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS - 1] = { { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, @@ -635,15 +641,14 @@ static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] = }; // fixme: copied from h264data.h -static const uint8_t zigzag_scan[16]={ - 0+0*4, 1+0*4, 0+1*4, 0+2*4, - 1+1*4, 2+0*4, 3+0*4, 2+1*4, - 1+2*4, 0+3*4, 1+3*4, 2+2*4, - 3+1*4, 3+2*4, 2+3*4, 3+3*4, +static const uint8_t zigzag_scan[16] = { + 0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4, + 1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4, + 1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4, + 3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4, }; -static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] = -{ +static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT + 1] = { 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, @@ -654,8 +659,7 @@ static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] = 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, }; -static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT+1] = -{ +static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT + 1] = { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c index ad7c60309c..431c96ea77 100644 --- a/libavcodec/vp8dsp.c +++ b/libavcodec/vp8dsp.c @@ -24,9 +24,10 @@ * VP8 compatible video decoder */ +#include "libavutil/common.h" + #include "mathops.h" #include "vp8dsp.h" -#include "libavutil/common.h" // TODO: Maybe add dequant static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) @@ -34,26 +35,26 @@ static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) int i, t0, t1, t2, t3; for (i = 0; i < 4; i++) { - t0 = dc[0*4+i] + dc[3*4+i]; - t1 = dc[1*4+i] + dc[2*4+i]; - t2 = dc[1*4+i] - dc[2*4+i]; - t3 = dc[0*4+i] - dc[3*4+i]; - - dc[0*4+i] = t0 + t1; - dc[1*4+i] = t3 + t2; - dc[2*4+i] = t0 - t1; - dc[3*4+i] = t3 - t2; + t0 = dc[0 * 4 + i] + dc[3 * 4 + i]; + t1 = dc[1 * 4 + i] + dc[2 * 4 + i]; + t2 = dc[1 * 4 + i] - dc[2 * 4 + i]; + t3 = dc[0 * 4 + i] - dc[3 * 4 + i]; + + dc[0 * 4 + i] = t0 + t1; + dc[1 * 4 + i] = t3 + t2; + dc[2 * 4 + i] = t0 - t1; + dc[3 * 4 + i] = t3 - t2; } for (i = 0; i < 4; i++) { - t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding - t1 = dc[i*4+1] + dc[i*4+2]; - t2 = dc[i*4+1] - dc[i*4+2]; - t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding - dc[i*4+0] = 0; - dc[i*4+1] = 0; - dc[i*4+2] = 0; - dc[i*4+3] = 0; + t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding + t1 = dc[i * 4 + 1] + dc[i * 4 + 2]; + t2 = dc[i * 4 + 1] - dc[i * 4 + 2]; + t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding + dc[i * 4 + 0] = 0; + dc[i * 4 + 1] = 0; + dc[i * 4 + 2] = 0; + dc[i * 4 + 3] = 0; block[i][0][0] = (t0 + t1) >> 3; block[i][1][0] = (t3 + t2) >> 3; @@ -75,8 +76,8 @@ static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) } } -#define MUL_20091(a) ((((a)*20091) >> 16) + (a)) -#define MUL_35468(a) (((a)*35468) >> 16) +#define MUL_20091(a) ((((a) * 20091) >> 16) + (a)) +#define MUL_35468(a) (((a) * 35468) >> 16) static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) { @@ -84,32 +85,32 @@ static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) int16_t tmp[16]; for (i = 0; i < 4; i++) { - t0 = block[0*4+i] + block[2*4+i]; - t1 = block[0*4+i] - block[2*4+i]; - t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); - t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); - block[0*4+i] = 0; - block[1*4+i] = 0; - block[2*4+i] = 0; - block[3*4+i] = 0; - - tmp[i*4+0] = t0 + t3; - tmp[i*4+1] = t1 + t2; - tmp[i*4+2] = t1 - t2; - tmp[i*4+3] = t0 - t3; + t0 = block[0 * 4 + i] + block[2 * 4 + i]; + t1 = block[0 * 4 + i] - block[2 * 4 + i]; + t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]); + t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]); + block[0 * 4 + i] = 0; + block[1 * 4 + i] = 0; + block[2 * 4 + i] = 0; + block[3 * 4 + i] = 0; + + tmp[i * 4 + 0] = t0 + t3; + tmp[i * 4 + 1] = t1 + t2; + tmp[i * 4 + 2] = t1 - t2; + tmp[i * 4 + 3] = t0 - t3; } for (i = 0; i < 4; i++) { - t0 = tmp[0*4+i] + tmp[2*4+i]; - t1 = tmp[0*4+i] - tmp[2*4+i]; - t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); - t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); + t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i]; + t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i]; + t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]); + t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]); dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); - dst += stride; + dst += stride; } } @@ -123,46 +124,49 @@ static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) dst[1] = av_clip_uint8(dst[1] + dc); dst[2] = av_clip_uint8(dst[2] + dc); dst[3] = av_clip_uint8(dst[3] + dc); - dst += stride; + dst += stride; } } -static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride) +static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], + ptrdiff_t stride) { - vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); - vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); - vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride); - vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); + vp8_idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); + vp8_idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); + vp8_idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); + vp8_idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); } -static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride) +static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], + ptrdiff_t stride) { - vp8_idct_dc_add_c(dst+ 0, block[0], stride); - vp8_idct_dc_add_c(dst+ 4, block[1], stride); - vp8_idct_dc_add_c(dst+ 8, block[2], stride); - vp8_idct_dc_add_c(dst+12, block[3], stride); + vp8_idct_dc_add_c(dst + 0, block[0], stride); + vp8_idct_dc_add_c(dst + 4, block[1], stride); + vp8_idct_dc_add_c(dst + 8, block[2], stride); + vp8_idct_dc_add_c(dst + 12, block[3], stride); } // because I like only having two parameters to pass functions... -#define LOAD_PIXELS\ - int av_unused p3 = p[-4*stride];\ - int av_unused p2 = p[-3*stride];\ - int av_unused p1 = p[-2*stride];\ - int av_unused p0 = p[-1*stride];\ - int av_unused q0 = p[ 0*stride];\ - int av_unused q1 = p[ 1*stride];\ - int av_unused q2 = p[ 2*stride];\ - int av_unused q3 = p[ 3*stride]; - -#define clip_int8(n) (cm[n+0x80]-0x80) - -static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap) +#define LOAD_PIXELS \ + int av_unused p3 = p[-4 * stride]; \ + int av_unused p2 = p[-3 * stride]; \ + int av_unused p1 = p[-2 * stride]; \ + int av_unused p0 = p[-1 * stride]; \ + int av_unused q0 = p[ 0 * stride]; \ + int av_unused q1 = p[ 1 * stride]; \ + int av_unused q2 = p[ 2 * stride]; \ + int av_unused q3 = p[ 3 * stride]; + +#define clip_int8(n) (cm[n + 0x80] - 0x80) + +static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, + int is4tap) { LOAD_PIXELS int a, f1, f2; const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; - a = 3*(q0 - p0); + a = 3 * (q0 - p0); if (is4tap) a += clip_int8(p1 - q1); @@ -171,45 +175,50 @@ static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4 // We deviate from the spec here with c(a+3) >> 3 // since that's what libvpx does. - f1 = FFMIN(a+4, 127) >> 3; - f2 = FFMIN(a+3, 127) >> 3; + f1 = FFMIN(a + 4, 127) >> 3; + f2 = FFMIN(a + 3, 127) >> 3; // Despite what the spec says, we do need to clamp here to // be bitexact with libvpx. - p[-1*stride] = cm[p0 + f2]; - p[ 0*stride] = cm[q0 - f1]; + p[-1 * stride] = cm[p0 + f2]; + p[ 0 * stride] = cm[q0 - f1]; // only used for _inner on blocks without high edge variance if (!is4tap) { - a = (f1+1)>>1; - p[-2*stride] = cm[p1 + a]; - p[ 1*stride] = cm[q1 - a]; + a = (f1 + 1) >> 1; + p[-2 * stride] = cm[p1 + a]; + p[ 1 * stride] = cm[q1 - a]; } } static av_always_inline int simple_limit(uint8_t *p, ptrdiff_t stride, int flim) { LOAD_PIXELS - return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; + return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim; } /** * E - limit at the macroblock edge * I - limit for interior difference */ -static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I) +static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, + int E, int I) { LOAD_PIXELS - return simple_limit(p, stride, E) - && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I - && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; + return simple_limit(p, stride, E) && + FFABS(p3 - p2) <= I && + FFABS(p2 - p1) <= I && + FFABS(p1 - p0) <= I && + FFABS(q3 - q2) <= I && + FFABS(q2 - q1) <= I && + FFABS(q1 - q0) <= I; } // high edge variance static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) { LOAD_PIXELS - return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; + return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh; } static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) @@ -219,67 +228,75 @@ static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) LOAD_PIXELS - w = clip_int8(p1-q1); - w = clip_int8(w + 3*(q0-p0)); + w = clip_int8(p1 - q1); + w = clip_int8(w + 3 * (q0 - p0)); - a0 = (27*w + 63) >> 7; - a1 = (18*w + 63) >> 7; - a2 = ( 9*w + 63) >> 7; + a0 = (27 * w + 63) >> 7; + a1 = (18 * w + 63) >> 7; + a2 = (9 * w + 63) >> 7; - p[-3*stride] = cm[p2 + a2]; - p[-2*stride] = cm[p1 + a1]; - p[-1*stride] = cm[p0 + a0]; - p[ 0*stride] = cm[q0 - a0]; - p[ 1*stride] = cm[q1 - a1]; - p[ 2*stride] = cm[q2 - a2]; + p[-3 * stride] = cm[p2 + a2]; + p[-2 * stride] = cm[p1 + a1]; + p[-1 * stride] = cm[p0 + a0]; + p[ 0 * stride] = cm[q0 - a0]; + p[ 1 * stride] = cm[q1 - a1]; + p[ 2 * stride] = cm[q2 - a2]; } -#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ -static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\ - int flim_E, int flim_I, int hev_thresh)\ -{\ - int i;\ -\ - for (i = 0; i < size; i++)\ - if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ - if (hev(dst+i*stridea, strideb, hev_thresh))\ - filter_common(dst+i*stridea, strideb, 1);\ - else\ - filter_mbedge(dst+i*stridea, strideb);\ - }\ -}\ -\ -static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\ - int flim_E, int flim_I, int hev_thresh)\ -{\ - int i;\ -\ - for (i = 0; i < size; i++)\ - if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ - int hv = hev(dst+i*stridea, strideb, hev_thresh);\ - if (hv) \ - filter_common(dst+i*stridea, strideb, 1);\ - else \ - filter_common(dst+i*stridea, strideb, 0);\ - }\ +#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ +static maybe_inline \ +void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim_E, int flim_I, \ + int hev_thresh) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + if (normal_limit(dst + i * stridea, strideb, flim_E, flim_I)) { \ + if (hev(dst + i * stridea, strideb, hev_thresh)) \ + filter_common(dst + i * stridea, strideb, 1); \ + else \ + filter_mbedge(dst + i * stridea, strideb); \ + } \ +} \ + \ +static maybe_inline \ +void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \ + ptrdiff_t stride, \ + int flim_E, int flim_I, \ + int hev_thresh) \ +{ \ + int i; \ + for (i = 0; i < size; i++) \ + if (normal_limit(dst + i * stridea, strideb, flim_E, flim_I)) { \ + int hv = hev(dst + i * stridea, strideb, hev_thresh); \ + if (hv) \ + filter_common(dst + i * stridea, strideb, 1); \ + else \ + filter_common(dst + i * stridea, strideb, 0); \ + } \ } -LOOP_FILTER(v, 16, 1, stride,) -LOOP_FILTER(h, 16, stride, 1,) - -#define UV_LOOP_FILTER(dir, stridea, strideb) \ -LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ -static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\ - int fE, int fI, int hev_thresh)\ -{\ - vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ - vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ -}\ -static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\ - int fE, int fI, int hev_thresh)\ -{\ - vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ - vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ +LOOP_FILTER(v, 16, 1, stride, ) +LOOP_FILTER(h, 16, stride, 1, ) + +#define UV_LOOP_FILTER(dir, stridea, strideb) \ +LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ +static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, \ + ptrdiff_t stride, int fE, \ + int fI, int hev_thresh) \ +{ \ + vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \ + vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \ +} \ + \ +static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \ + uint8_t *dstV, \ + ptrdiff_t stride, int fE, \ + int fI, int hev_thresh) \ +{ \ + vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh); \ + vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh); \ } UV_LOOP_FILTER(v, 1, stride) @@ -290,8 +307,8 @@ static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) int i; for (i = 0; i < 16; i++) - if (simple_limit(dst+i, stride, flim)) - filter_common(dst+i, stride, 1); + if (simple_limit(dst + i, stride, flim)) + filter_common(dst + i, stride, 1); } static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) @@ -299,94 +316,110 @@ static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) int i; for (i = 0; i < 16; i++) - if (simple_limit(dst+i*stride, 1, flim)) - filter_common(dst+i*stride, 1, 1); + if (simple_limit(dst + i * stride, 1, flim)) + filter_common(dst + i * stride, 1, 1); } static const uint8_t subpel_filters[7][6] = { - { 0, 6, 123, 12, 1, 0 }, - { 2, 11, 108, 36, 8, 1 }, - { 0, 9, 93, 50, 6, 0 }, - { 3, 16, 77, 77, 16, 3 }, - { 0, 6, 50, 93, 9, 0 }, - { 1, 8, 36, 108, 11, 2 }, - { 0, 1, 12, 123, 6, 0 }, + { 0, 6, 123, 12, 1, 0 }, + { 2, 11, 108, 36, 8, 1 }, + { 0, 9, 93, 50, 6, 0 }, + { 3, 16, 77, 77, 16, 3 }, + { 0, 6, 50, 93, 9, 0 }, + { 1, 8, 36, 108, 11, 2 }, + { 0, 1, 12, 123, 6, 0 }, }; -#define PUT_PIXELS(WIDTH) \ -static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \ - int i; \ - for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ - memcpy(dst, src, WIDTH); \ - } \ +#define PUT_PIXELS(WIDTH) \ +static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \ + uint8_t *src, ptrdiff_t srcstride, \ + int h, int x, int y) \ +{ \ + int i; \ + for (i = 0; i < h; i++, dst += dststride, src += srcstride) \ + memcpy(dst, src, WIDTH); \ } PUT_PIXELS(16) PUT_PIXELS(8) PUT_PIXELS(4) -#define FILTER_6TAP(src, F, stride) \ - cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ - F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7] - -#define FILTER_4TAP(src, F, stride) \ - cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ - F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] - -#define VP8_EPEL_H(SIZE, TAPS) \ -static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ -{ \ - const uint8_t *filter = subpel_filters[mx-1]; \ - const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ - int x, y; \ -\ - for (y = 0; y < h; y++) { \ - for (x = 0; x < SIZE; x++) \ - dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ - dst += dststride; \ - src += srcstride; \ - } \ +#define FILTER_6TAP(src, F, stride) \ + cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ + F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \ + F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7] + +#define FILTER_4TAP(src, F, stride) \ + cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \ + F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7] + +#define VP8_EPEL_H(SIZE, TAPS) \ +static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \ + ptrdiff_t dststride, \ + uint8_t *src, \ + ptrdiff_t srcstride, \ + int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[mx - 1]; \ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ + dst += dststride; \ + src += srcstride; \ + } \ } -#define VP8_EPEL_V(SIZE, TAPS) \ -static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ -{ \ - const uint8_t *filter = subpel_filters[my-1]; \ - const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ - int x, y; \ -\ - for (y = 0; y < h; y++) { \ - for (x = 0; x < SIZE; x++) \ - dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ - dst += dststride; \ - src += srcstride; \ - } \ + +#define VP8_EPEL_V(SIZE, TAPS) \ +static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \ + ptrdiff_t dststride, \ + uint8_t *src, \ + ptrdiff_t srcstride, \ + int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[my - 1]; \ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ + dst += dststride; \ + src += srcstride; \ + } \ } -#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ -static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ -{ \ - const uint8_t *filter = subpel_filters[mx-1]; \ - const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ - int x, y; \ - uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \ - uint8_t *tmp = tmp_array; \ - src -= (2-(VTAPS==4))*srcstride; \ -\ - for (y = 0; y < h+VTAPS-1; y++) { \ - for (x = 0; x < SIZE; x++) \ - tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ - tmp += SIZE; \ - src += srcstride; \ - } \ -\ - tmp = tmp_array + (2-(VTAPS==4))*SIZE; \ - filter = subpel_filters[my-1]; \ -\ - for (y = 0; y < h; y++) { \ - for (x = 0; x < SIZE; x++) \ - dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ - dst += dststride; \ - tmp += SIZE; \ - } \ + +#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ +static void \ +put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \ + ptrdiff_t dststride, \ + uint8_t *src, \ + ptrdiff_t srcstride, \ + int h, int mx, \ + int my) \ +{ \ + const uint8_t *filter = subpel_filters[mx - 1]; \ + const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \ + int x, y; \ + uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \ + uint8_t *tmp = tmp_array; \ + src -= (2 - (VTAPS == 4)) * srcstride; \ + \ + for (y = 0; y < h + VTAPS - 1; y++) { \ + for (x = 0; x < SIZE; x++) \ + tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ + tmp += SIZE; \ + src += srcstride; \ + } \ + tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \ + filter = subpel_filters[my - 1]; \ + \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ + dst += dststride; \ + tmp += SIZE; \ + } \ } VP8_EPEL_H(16, 4) @@ -401,6 +434,7 @@ VP8_EPEL_V(4, 4) VP8_EPEL_V(16, 6) VP8_EPEL_V(8, 6) VP8_EPEL_V(4, 6) + VP8_EPEL_HV(16, 4, 4) VP8_EPEL_HV(8, 4, 4) VP8_EPEL_HV(4, 4, 4) @@ -414,73 +448,77 @@ VP8_EPEL_HV(16, 6, 6) VP8_EPEL_HV(8, 6, 6) VP8_EPEL_HV(4, 6, 6) -#define VP8_BILINEAR(SIZE) \ -static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \ -{ \ - int a = 8-mx, b = mx; \ - int x, y; \ -\ - for (y = 0; y < h; y++) { \ - for (x = 0; x < SIZE; x++) \ - dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ - dst += dstride; \ - src += sstride; \ - } \ -} \ -static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \ -{ \ - int c = 8-my, d = my; \ - int x, y; \ -\ - for (y = 0; y < h; y++) { \ - for (x = 0; x < SIZE; x++) \ - dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \ - dst += dstride; \ - src += sstride; \ - } \ -} \ -\ -static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \ -{ \ - int a = 8-mx, b = mx; \ - int c = 8-my, d = my; \ - int x, y; \ - uint8_t tmp_array[(2*SIZE+1)*SIZE]; \ - uint8_t *tmp = tmp_array; \ -\ - for (y = 0; y < h+1; y++) { \ - for (x = 0; x < SIZE; x++) \ - tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ - tmp += SIZE; \ - src += sstride; \ - } \ -\ - tmp = tmp_array; \ -\ - for (y = 0; y < h; y++) { \ - for (x = 0; x < SIZE; x++) \ - dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \ - dst += dstride; \ - tmp += SIZE; \ - } \ +#define VP8_BILINEAR(SIZE) \ +static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \ + uint8_t *src, ptrdiff_t sstride, \ + int h, int mx, int my) \ +{ \ + int a = 8 - mx, b = mx; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ + dst += dstride; \ + src += sstride; \ + } \ +} \ + \ +static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \ + uint8_t *src, ptrdiff_t sstride, \ + int h, int mx, int my) \ +{ \ + int c = 8 - my, d = my; \ + int x, y; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \ + dst += dstride; \ + src += sstride; \ + } \ +} \ + \ +static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \ + ptrdiff_t dstride, \ + uint8_t *src, \ + ptrdiff_t sstride, \ + int h, int mx, int my) \ +{ \ + int a = 8 - mx, b = mx; \ + int c = 8 - my, d = my; \ + int x, y; \ + uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \ + uint8_t *tmp = tmp_array; \ + for (y = 0; y < h + 1; y++) { \ + for (x = 0; x < SIZE; x++) \ + tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \ + tmp += SIZE; \ + src += sstride; \ + } \ + tmp = tmp_array; \ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \ + dst += dstride; \ + tmp += SIZE; \ + } \ } VP8_BILINEAR(16) VP8_BILINEAR(8) VP8_BILINEAR(4) -#define VP8_MC_FUNC(IDX, SIZE) \ - dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ - dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ - dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ - dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ +#define VP8_MC_FUNC(IDX, SIZE) \ + dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ + dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ - dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c -#define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ +#define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h index 877e264ff9..9e313a7067 100644 --- a/libavcodec/vp8dsp.h +++ b/libavcodec/vp8dsp.h @@ -30,8 +30,8 @@ #include <stddef.h> #include <stdint.h> -typedef void (*vp8_mc_func)(uint8_t *dst/*align 8*/, ptrdiff_t dstStride, - uint8_t *src/*align 1*/, ptrdiff_t srcStride, +typedef void (*vp8_mc_func)(uint8_t *dst /* align 8 */, ptrdiff_t dstStride, + uint8_t *src /* align 1 */, ptrdiff_t srcStride, int h, int x, int y); typedef struct VP8DSPContext { |