aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2014-01-24 21:53:22 -0500
committerClément Bœsch <u@pkh.me>2014-02-09 18:07:14 +0100
commitfccd7d81b694f7705d088243c52f08691e3449d6 (patch)
tree3778c327efa042eb655d8544f000d2c33677bffd /libavcodec
parent1dc8559e25b4e51fa0c0570a43fa085c0e5ac23b (diff)
downloadffmpeg-fccd7d81b694f7705d088243c52f08691e3449d6.tar.gz
vp9: decode_mode context optimizations.
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/vp9.c103
1 files changed, 75 insertions, 28 deletions
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index b612d533fe..c899fac359 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1287,6 +1287,7 @@ static void decode_mode(AVCodecContext *ctx)
int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
+ int vref, filter_id;
if (!s->segmentation.enabled) {
b->seg_id = 0;
@@ -1780,9 +1781,10 @@ static void decode_mode(AVCodecContext *ctx)
c = 3;
}
- b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
+ filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
s->prob.p.filter[c]);
- s->counts.filter[c][b->filter]++;
+ s->counts.filter[c][filter_id]++;
+ b->filter = vp9_filter_lut[filter_id];
} else {
b->filter = s->filtermode;
}
@@ -1839,27 +1841,80 @@ static void decode_mode(AVCodecContext *ctx)
AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
}
+
+ vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
+ }
+
+#if HAVE_FAST_64BIT
+#define SPLAT_CTX(var, val, n) \
+ switch (n) { \
+ case 1: var = val; break; \
+ case 2: AV_WN16A(&var, val * 0x0101); break; \
+ case 4: AV_WN32A(&var, val * 0x01010101); break; \
+ case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
+ case 16: { \
+ uint64_t v64 = val * 0x0101010101010101ULL; \
+ AV_WN64A( &var, v64); \
+ AV_WN64A(&((uint8_t *) &var)[8], v64); \
+ break; \
+ } \
+ }
+#else
+#define SPLAT_CTX(var, val, n) \
+ switch (n) { \
+ case 1: var = val; break; \
+ case 2: AV_WN16A(&var, val * 0x0101); break; \
+ case 4: AV_WN32A(&var, val * 0x01010101); break; \
+ case 8: { \
+ uint32_t v32 = val * 0x01010101); \
+ AV_WN32A( &var, v32); \
+ AV_WN32A(&((uint8_t *) &var)[4], v32); \
+ break; \
+ } \
+ case 16: { \
+ uint32_t v32 = val * 0x01010101); \
+ AV_WN32A( &var, v32); \
+ AV_WN32A(&((uint8_t *) &var)[4], v32); \
+ AV_WN32A(&((uint8_t *) &var)[8], v32); \
+ AV_WN32A(&((uint8_t *) &var)[12], v32); \
+ break; \
+ } \
+ }
+#endif
+
+ switch (bwh_tab[1][b->bs][0]) {
+#define SET_CTXS(dir, off, n) \
+ do { \
+ SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
+ SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
+ SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
+ if (!s->keyframe && !s->intraonly) { \
+ SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
+ SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
+ SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
+ if (!b->intra) { \
+ SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
+ if (s->filtermode == FILTER_SWITCHABLE) { \
+ SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
+ } \
+ } \
+ } \
+ } while (0)
+ case 1: SET_CTXS(above, col, 1); break;
+ case 2: SET_CTXS(above, col, 2); break;
+ case 4: SET_CTXS(above, col, 4); break;
+ case 8: SET_CTXS(above, col, 8); break;
}
+ switch (bwh_tab[1][b->bs][1]) {
+ case 1: SET_CTXS(left, row7, 1); break;
+ case 2: SET_CTXS(left, row7, 2); break;
+ case 4: SET_CTXS(left, row7, 4); break;
+ case 8: SET_CTXS(left, row7, 8); break;
+ }
+#undef SPLAT_CTX
+#undef SET_CTXS
- // FIXME this can probably be optimized
- memset(&s->above_skip_ctx[col], b->skip, w4);
- memset(&s->left_skip_ctx[row7], b->skip, h4);
- memset(&s->above_txfm_ctx[col], b->tx, w4);
- memset(&s->left_txfm_ctx[row7], b->tx, h4);
- memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
- memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
if (!s->keyframe && !s->intraonly) {
- memset(&s->above_intra_ctx[col], b->intra, w4);
- memset(&s->left_intra_ctx[row7], b->intra, h4);
- memset(&s->above_comp_ctx[col], b->comp, w4);
- memset(&s->left_comp_ctx[row7], b->comp, h4);
- memset(&s->above_mode_ctx[col], b->mode[3], w4);
- memset(&s->left_mode_ctx[row7], b->mode[3], h4);
- if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
- memset(&s->above_filter_ctx[col], b->filter, w4);
- memset(&s->left_filter_ctx[row7], b->filter, h4);
- b->filter = vp9_filter_lut[b->filter];
- }
if (b->bs > BS_8x8) {
int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
@@ -1883,14 +1938,6 @@ static void decode_mode(AVCodecContext *ctx)
AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
}
}
-
- if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
- // as a direct check in above branches
- int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
-
- memset(&s->above_ref_ctx[col], vref, w4);
- memset(&s->left_ref_ctx[row7], vref, h4);
- }
}
// FIXME kinda ugly