aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2015-05-01 22:23:37 +0200
committerMichael Niedermayer <michaelni@gmx.at>2015-05-01 22:23:48 +0200
commitd56245f77035992aa43d2714fb2a134d45b0e84d (patch)
tree41fc1838d8bc3b66fa5cede886c32fd84ca7232e
parentc8de8f7e64386235388b99ff3214e90bb806c4f0 (diff)
parentb005d097646191e06c446951c33b4606b2d5b955 (diff)
downloadffmpeg-d56245f77035992aa43d2714fb2a134d45b0e84d.tar.gz
Merge remote-tracking branch 'rbultje/vp9-profile1-wip'
* rbultje/vp9-profile1-wip: vp9: add fate test for 422. vp9: copy bug in libvpx for 4:2:2 chroma bs=8x4/4x4 prediction. vp9: add yuv440 fate test. vp9: fix mask_edges and filter_plane_rows/cols() for 440. vp9: more specifically specify mask destination to mask_edges(). vp9: add fate test for profile 1 444. vp9: don't create special u/v filter masks for 444. vp9: merge uv loopfilter code into generic filter_plane_rows/cols(). vp9: split out loopfilter luma rows/cols functions from loopfilter_sb(). vp9: invert order of two conditions. vp9: use correct chroma subsampling for profile 1 inter block recon. vp9: use correct chroma subsampling for profile 1 intra block recon. vp9: take chroma subsampling into account when walking the block tree. vp9: support non-420 chroma subsampling for profile 1 token decoding. vp9: increase buffer sizes for non-420 chroma subsamplings. vp9: profile 1 header decoding. Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/vp9.c736
-rw-r--r--libavcodec/vp9_mc_template.c322
-rw-r--r--tests/fate/vpx.mak45
-rw-r--r--tests/ref/fate/vp9p1-04-yuv42215
-rw-r--r--tests/ref/fate/vp9p1-04-yuv44015
-rw-r--r--tests/ref/fate/vp9p1-04-yuv44415
6 files changed, 712 insertions, 436 deletions
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 84e2220fb7..71ed4e64d8 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -112,8 +112,7 @@ typedef struct VP9Context {
uint8_t invisible;
uint8_t use_last_frame_mvs;
uint8_t errorres;
- uint8_t colorspace;
- uint8_t fullrange;
+ uint8_t ss_h, ss_v;
uint8_t intraonly;
uint8_t resetctx;
uint8_t refreshrefmask;
@@ -216,7 +215,7 @@ typedef struct VP9Context {
DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
- DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
+ DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
@@ -249,8 +248,8 @@ typedef struct VP9Context {
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
struct { int x, y; } min_mv, max_mv;
- DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
- DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
+ DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
+ DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
uint16_t mvscale[3][2];
uint8_t mvstep[3][2];
} VP9Context;
@@ -308,39 +307,42 @@ static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
return 0;
}
-static int update_size(AVCodecContext *ctx, int w, int h)
+static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
{
VP9Context *s = ctx->priv_data;
uint8_t *p;
av_assert0(w > 0 && h > 0);
- if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
+ if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
return 0;
- ctx->width = w;
- ctx->height = h;
- s->sb_cols = (w + 63) >> 6;
- s->sb_rows = (h + 63) >> 6;
- s->cols = (w + 7) >> 3;
- s->rows = (h + 7) >> 3;
+ ctx->width = w;
+ ctx->height = h;
+ ctx->pix_fmt = fmt;
+ s->sb_cols = (w + 63) >> 6;
+ s->sb_rows = (h + 63) >> 6;
+ s->cols = (w + 7) >> 3;
+ s->rows = (h + 7) >> 3;
#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
av_freep(&s->intra_pred_data[0]);
- p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
+ // FIXME we slightly over-allocate here for subsampled chroma, but a little
+ // bit of padding shouldn't affect performance...
+ p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
if (!p)
return AVERROR(ENOMEM);
assign(s->intra_pred_data[0], uint8_t *, 64);
- assign(s->intra_pred_data[1], uint8_t *, 32);
- assign(s->intra_pred_data[2], uint8_t *, 32);
+ assign(s->intra_pred_data[1], uint8_t *, 64);
+ assign(s->intra_pred_data[2], uint8_t *, 64);
assign(s->above_y_nnz_ctx, uint8_t *, 16);
assign(s->above_mode_ctx, uint8_t *, 16);
assign(s->above_mv_ctx, VP56mv(*)[2], 16);
+ assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
+ assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
assign(s->above_partition_ctx, uint8_t *, 8);
assign(s->above_skip_ctx, uint8_t *, 8);
assign(s->above_txfm_ctx, uint8_t *, 8);
- assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
- assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
assign(s->above_segpred_ctx, uint8_t *, 8);
assign(s->above_intra_ctx, uint8_t *, 8);
assign(s->above_comp_ctx, uint8_t *, 8);
@@ -359,34 +361,39 @@ static int update_size(AVCodecContext *ctx, int w, int h)
static int update_block_buffers(AVCodecContext *ctx)
{
VP9Context *s = ctx->priv_data;
+ int chroma_blocks, chroma_eobs;
if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
return 0;
av_free(s->b_base);
av_free(s->block_base);
+ chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
+ chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
if (s->frames[CUR_FRAME].uses_2pass) {
int sbs = s->sb_cols * s->sb_rows;
s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
- s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
+ s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
+ 16 * 16 + 2 * chroma_eobs) * sbs);
if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
- s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
- s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
- s->uveob_base[0] = s->eob_base + 256 * sbs;
- s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
+ s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
+ s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
+ s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
+ s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
} else {
s->b_base = av_malloc(sizeof(VP9Block));
- s->block_base = av_mallocz((64 * 64 + 128) * 3);
+ s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
+ 16 * 16 + 2 * chroma_eobs);
if (!s->b_base || !s->block_base)
return AVERROR(ENOMEM);
s->uvblock_base[0] = s->block_base + 64 * 64;
- s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
- s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
- s->uveob_base[0] = s->eob_base + 256;
- s->uveob_base[1] = s->uveob_base[0] + 64;
+ s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
+ s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
+ s->uveob_base[0] = s->eob_base + 16 * 16;
+ s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
}
s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
@@ -463,11 +470,56 @@ static int update_prob(VP56RangeCoder *c, int p)
255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
}
+static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
+{
+ static const enum AVColorSpace colorspaces[8] = {
+ AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
+ AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
+ };
+ VP9Context *s = ctx->priv_data;
+ enum AVPixelFormat res;
+
+ ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
+ if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
+ if (s->profile == 1) {
+ s->ss_h = s->ss_v = 1;
+ res = AV_PIX_FMT_GBRP;
+ ctx->color_range = AVCOL_RANGE_JPEG;
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
+ return AVERROR_INVALIDDATA;
+ }
+ } else {
+ static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = {
+ { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
+ { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P },
+ };
+ ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
+ if (s->profile == 1) {
+ s->ss_h = get_bits1(&s->gb);
+ s->ss_v = get_bits1(&s->gb);
+ if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
+ av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n");
+ return AVERROR_INVALIDDATA;
+ } else if (get_bits1(&s->gb)) {
+ av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n");
+ return AVERROR_INVALIDDATA;
+ }
+ } else {
+ s->ss_h = s->ss_v = 1;
+ res = AV_PIX_FMT_YUV420P;
+ }
+ }
+
+ return res;
+}
+
static int decode_frame_header(AVCodecContext *ctx,
const uint8_t *data, int size, int *ref)
{
VP9Context *s = ctx->priv_data;
int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
+ enum AVPixelFormat fmt = ctx->pix_fmt;
int last_invisible;
const uint8_t *data2;
@@ -481,8 +533,9 @@ static int decode_frame_header(AVCodecContext *ctx,
return AVERROR_INVALIDDATA;
}
s->profile = get_bits1(&s->gb);
- if (get_bits1(&s->gb)) { // reserved bit
- av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
+ s->profile |= get_bits1(&s->gb) << 1;
+ if (s->profile > 1) {
+ av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", s->profile);
return AVERROR_INVALIDDATA;
}
if (get_bits1(&s->gb)) {
@@ -500,12 +553,8 @@ static int decode_frame_header(AVCodecContext *ctx,
av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
return AVERROR_INVALIDDATA;
}
- s->colorspace = get_bits(&s->gb, 3);
- if (s->colorspace == 7) { // RGB = profile 1
- av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
- return AVERROR_INVALIDDATA;
- }
- s->fullrange = get_bits1(&s->gb);
+ if ((fmt = read_colorspace_details(ctx)) < 0)
+ return fmt;
// for profile 1, here follows the subsampling bits
s->refreshrefmask = 0xff;
w = get_bits(&s->gb, 16) + 1;
@@ -520,6 +569,15 @@ static int decode_frame_header(AVCodecContext *ctx,
av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
return AVERROR_INVALIDDATA;
}
+ if (s->profile == 1) {
+ if ((fmt = read_colorspace_details(ctx)) < 0)
+ return fmt;
+ } else {
+ s->ss_h = s->ss_v = 1;
+ fmt = AV_PIX_FMT_YUV420P;
+ ctx->colorspace = AVCOL_SPC_BT470BG;
+ ctx->color_range = AVCOL_RANGE_JPEG;
+ }
s->refreshrefmask = get_bits(&s->gb, 8);
w = get_bits(&s->gb, 16) + 1;
h = get_bits(&s->gb, 16) + 1;
@@ -722,8 +780,8 @@ static int decode_frame_header(AVCodecContext *ctx,
}
/* tiling info */
- if ((res = update_size(ctx, w, h)) < 0) {
- av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
+ if ((res = update_size(ctx, w, h, fmt)) < 0) {
+ av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
return res;
}
for (s->tiling.log2_tile_cols = 0;
@@ -2279,12 +2337,12 @@ static void decode_coeffs(AVCodecContext *ctx)
break;
}
-#define DECODE_UV_COEF_LOOP(step) \
+#define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \
for (n = 0, y = 0; y < end_y; y += step) { \
for (x = 0; x < end_x; x += step, n += step * step) { \
- res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
- 16 * step * step, c, e, p, a[x] + l[y], \
- uvscan, uvnb, uv_band_counts, qmul[1]); \
+ res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \
+ 16 * step * step, c, e, p, a[x] + l[y], \
+ uvscan, uvnb, uv_band_counts, qmul[1]); \
a[x] = l[y] = !!res; \
if (step >= 4) { \
AV_WN16A(&s->uveob[pl][n], res); \
@@ -2297,36 +2355,30 @@ static void decode_coeffs(AVCodecContext *ctx)
p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
- w4 >>= 1;
- h4 >>= 1;
- end_x >>= 1;
- end_y >>= 1;
+ w4 >>= s->ss_h;
+ end_x >>= s->ss_h;
+ h4 >>= s->ss_v;
+ end_y >>= s->ss_v;
for (pl = 0; pl < 2; pl++) {
- a = &s->above_uv_nnz_ctx[pl][col];
- l = &s->left_uv_nnz_ctx[pl][row & 7];
+ a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
+ l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
switch (b->uvtx) {
case TX_4X4:
- DECODE_UV_COEF_LOOP(1);
+ DECODE_UV_COEF_LOOP(1, decode_coeffs_b);
break;
case TX_8X8:
MERGE_CTX(2, AV_RN16A);
- DECODE_UV_COEF_LOOP(2);
+ DECODE_UV_COEF_LOOP(2, decode_coeffs_b);
SPLAT_CTX(2);
break;
case TX_16X16:
MERGE_CTX(4, AV_RN32A);
- DECODE_UV_COEF_LOOP(4);
+ DECODE_UV_COEF_LOOP(4, decode_coeffs_b);
SPLAT_CTX(4);
break;
case TX_32X32:
MERGE_CTX(8, AV_RN64A);
- // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
- // so there is no need to loop
- res = decode_coeffs_b32(&s->c, s->uvblock[pl],
- 1024, c, e, p, a[0] + l[0],
- uvscan, uvnb, uv_band_counts, qmul[1]);
- a[0] = l[0] = !!res;
- AV_WN16A(&s->uveob[pl][0], res);
+ DECODE_UV_COEF_LOOP(8, decode_coeffs_b32);
SPLAT_CTX(8);
break;
}
@@ -2338,7 +2390,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
uint8_t *dst_inner, ptrdiff_t stride_inner,
uint8_t *l, int col, int x, int w,
int row, int y, enum TxfmMode tx,
- int p)
+ int p, int ss_h, int ss_v)
{
int have_top = row > 0 || y > 0;
int have_left = col > s->tiling.tile_col_start || x > 0;
@@ -2393,7 +2445,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
mode = mode_conv[mode][have_left][have_top];
if (edges[mode].needs_top) {
uint8_t *top, *topleft;
- int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
+ int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
int n_px_need_tr = 0;
if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
@@ -2404,11 +2456,11 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
// post-loopfilter data)
if (have_top) {
top = !(row & 7) && !y ?
- s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
+ s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
if (have_left)
topleft = !(row & 7) && !y ?
- s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
+ s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
y == 0 || x == 0 ? &dst_edge[-stride_edge] :
&dst_inner[-stride_inner];
}
@@ -2449,7 +2501,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **
}
if (edges[mode].needs_left) {
if (have_left) {
- int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
+ int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
uint8_t *dst = x == 0 ? dst_edge : dst_inner;
ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
@@ -2508,7 +2560,7 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
mode = check_intra_mode(s, mode, &a, ptr_r,
s->frames[CUR_FRAME].tf.f->linesize[0],
ptr, s->y_stride, l,
- col, x, w4, row, y, b->tx, 0);
+ col, x, w4, row, y, b->tx, 0, 0, 0);
s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
if (eob)
s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
@@ -2519,9 +2571,9 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
}
// U/V
- w4 >>= 1;
- end_x >>= 1;
- end_y >>= 1;
+ w4 >>= s->ss_h;
+ end_x >>= s->ss_h;
+ end_y >>= s->ss_v;
step = 1 << (b->uvtx * 2);
for (p = 0; p < 2; p++) {
dst = s->dst[1 + p];
@@ -2536,8 +2588,8 @@ static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
mode = check_intra_mode(s, mode, &a, ptr_r,
s->frames[CUR_FRAME].tf.f->linesize[1],
- ptr, s->uv_stride, l,
- col, x, w4, row, y, b->uvtx, p + 1);
+ ptr, s->uv_stride, l, col, x, w4, row, y,
+ b->uvtx, p + 1, s->ss_h, s->ss_v);
s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
if (eob)
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
@@ -2557,7 +2609,7 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
int bw, int bh, int w, int h,
const uint16_t *scale, const uint8_t *step)
{
-#define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
+#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
// BUG libvpx seems to scale the two components separately. This introduces
// rounding errors but we have to reproduce them to be exactly compatible
// with the output from libvpx...
@@ -2601,8 +2653,8 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
const uint16_t *scale, const uint8_t *step)
{
// BUG https://code.google.com/p/webm/issues/detail?id=820
- int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
- int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
+ int mx = scale_mv(mv->x << !s->ss_h, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
+ int my = scale_mv(mv->y << !s->ss_v, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
#undef scale_mv
int refbw_m1, refbh_m1;
int th;
@@ -2618,7 +2670,7 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
- th = (y + refbh_m1 + 4 + 7) >> 5;
+ th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
@@ -2696,7 +2748,7 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
int bw, int bh, int w, int h)
{
- int mx = mv->x, my = mv->y, th;
+ int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
y += my >> 4;
x += mx >> 4;
@@ -2707,7 +2759,7 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
// we use +7 because the last 7 pixels of each sbrow can be changed in
// the longest loopfilter of the next sbrow
- th = (y + bh + 4 * !!my + 7) >> 5;
+ th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
if (x < !!mx * 3 || y < !!my * 3 ||
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
@@ -2781,8 +2833,8 @@ static void inter_recon(AVCodecContext *ctx)
}
// uv itxfm add
- end_x >>= 1;
- end_y >>= 1;
+ end_x >>= s->ss_h;
+ end_y >>= s->ss_v;
step = 1 << (b->uvtx * 2);
for (p = 0; p < 2; p++) {
dst = s->dst[p + 1];
@@ -2801,11 +2853,14 @@ static void inter_recon(AVCodecContext *ctx)
}
}
-static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
+static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
int row_and_7, int col_and_7,
int w, int h, int col_end, int row_end,
enum TxfmMode tx, int skip_inter)
{
+ static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
+ static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
+
// FIXME I'm pretty sure all loops can be replaced by a single LUT if
// we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
// and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
@@ -2816,14 +2871,14 @@ static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
// a time, and we only use the topleft block's mode information to set
// things like block strength. Thus, for any block size smaller than
// 16x16, ignore the odd portion of the block.
- if (tx == TX_4X4 && is_uv) {
- if (h == 1) {
+ if (tx == TX_4X4 && (ss_v | ss_h)) {
+ if (h == ss_v) {
if (row_and_7 & 1)
return;
if (!row_end)
h += 1;
}
- if (w == 1) {
+ if (w == ss_h) {
if (col_and_7 & 1)
return;
if (!col_end)
@@ -2833,103 +2888,85 @@ static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
if (tx == TX_4X4 && !skip_inter) {
int t = 1 << col_and_7, m_col = (t << w) - t, y;
- int m_col_odd = (t << (w - 1)) - t;
-
// on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
- if (is_uv) {
- int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
-
- for (y = row_and_7; y < h + row_and_7; y++) {
- int col_mask_id = 2 - !(y & 7);
-
- lflvl->mask[is_uv][0][y][1] |= m_row_8;
- lflvl->mask[is_uv][0][y][2] |= m_row_4;
- // for odd lines, if the odd col is not being filtered,
- // skip odd row also:
- // .---. <-- a
- // | |
- // |___| <-- b
- // ^ ^
- // c d
- //
- // if a/c are even row/col and b/d are odd, and d is skipped,
- // e.g. right edge of size-66x66.webm, then skip b also (bug)
- if ((col_end & 1) && (y & 1)) {
- lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
- } else {
- lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
- }
- }
- } else {
- int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
-
- for (y = row_and_7; y < h + row_and_7; y++) {
- int col_mask_id = 2 - !(y & 3);
-
- lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
- lflvl->mask[is_uv][0][y][2] |= m_row_4;
- lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
- lflvl->mask[is_uv][0][y][3] |= m_col;
- lflvl->mask[is_uv][1][y][3] |= m_col;
+ int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
+
+ for (y = row_and_7; y < h + row_and_7; y++) {
+ int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
+
+ mask[0][y][1] |= m_row_8;
+ mask[0][y][2] |= m_row_4;
+ // for odd lines, if the odd col is not being filtered,
+ // skip odd row also:
+ // .---. <-- a
+ // | |
+ // |___| <-- b
+ // ^ ^
+ // c d
+ //
+ // if a/c are even row/col and b/d are odd, and d is skipped,
+ // e.g. right edge of size-66x66.webm, then skip b also (bug)
+ if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
+ mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
+ } else {
+ mask[1][y][col_mask_id] |= m_col;
}
+ if (!ss_h)
+ mask[0][y][3] |= m_col;
+ if (!ss_v)
+ mask[1][y][3] |= m_col;
}
} else {
int y, t = 1 << col_and_7, m_col = (t << w) - t;
if (!skip_inter) {
int mask_id = (tx == TX_8X8);
- int l2 = tx + is_uv - 1, step1d = 1 << l2;
static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
+ int l2 = tx + ss_h - 1, step1d;
int m_row = m_col & masks[l2];
// at odd UV col/row edges tx16/tx32 loopfilter edges, force
// 8wd loopfilter to prevent going off the visible edge.
- if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
+ if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
int m_row_8 = m_row - m_row_16;
for (y = row_and_7; y < h + row_and_7; y++) {
- lflvl->mask[is_uv][0][y][0] |= m_row_16;
- lflvl->mask[is_uv][0][y][1] |= m_row_8;
+ mask[0][y][0] |= m_row_16;
+ mask[0][y][1] |= m_row_8;
}
} else {
for (y = row_and_7; y < h + row_and_7; y++)
- lflvl->mask[is_uv][0][y][mask_id] |= m_row;
+ mask[0][y][mask_id] |= m_row;
}
- if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
+ l2 = tx + ss_v - 1;
+ step1d = 1 << l2;
+ if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
- lflvl->mask[is_uv][1][y][0] |= m_col;
+ mask[1][y][0] |= m_col;
if (y - row_and_7 == h - 1)
- lflvl->mask[is_uv][1][y][1] |= m_col;
+ mask[1][y][1] |= m_col;
} else {
for (y = row_and_7; y < h + row_and_7; y += step1d)
- lflvl->mask[is_uv][1][y][mask_id] |= m_col;
+ mask[1][y][mask_id] |= m_col;
}
} else if (tx != TX_4X4) {
int mask_id;
- mask_id = (tx == TX_8X8) || (is_uv && h == 1);
- lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
- mask_id = (tx == TX_8X8) || (is_uv && w == 1);
+ mask_id = (tx == TX_8X8) || (h == ss_v);
+ mask[1][row_and_7][mask_id] |= m_col;
+ mask_id = (tx == TX_8X8) || (w == ss_h);
for (y = row_and_7; y < h + row_and_7; y++)
- lflvl->mask[is_uv][0][y][mask_id] |= t;
- } else if (is_uv) {
- int t8 = t & 0x01, t4 = t - t8;
-
- for (y = row_and_7; y < h + row_and_7; y++) {
- lflvl->mask[is_uv][0][y][2] |= t4;
- lflvl->mask[is_uv][0][y][1] |= t8;
- }
- lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
+ mask[0][y][mask_id] |= t;
} else {
- int t8 = t & 0x11, t4 = t - t8;
+ int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
for (y = row_and_7; y < h + row_and_7; y++) {
- lflvl->mask[is_uv][0][y][2] |= t4;
- lflvl->mask[is_uv][0][y][1] |= t8;
+ mask[0][y][2] |= t4;
+ mask[0][y][1] |= t8;
}
- lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
+ mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
}
}
}
@@ -2958,7 +2995,8 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
b->bl = bl;
b->bp = bp;
decode_mode(ctx);
- b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
+ b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
+ (s->ss_v && h4 * 2 == (1 << b->tx)));
if (!b->skip) {
decode_coeffs(ctx);
@@ -2973,34 +3011,39 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
case 8: AV_ZERO64(&v); break; \
case 16: AV_ZERO128(&v); break; \
}
-#define SPLAT_ZERO_YUV(dir, var, off, n) \
+#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
do { \
SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
- SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
- SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
+ if (s->ss_##dir2) { \
+ SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
+ SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
+ } else { \
+ SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
+ SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
+ } \
} while (0)
switch (w4) {
- case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
- case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
- case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
- case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
+ case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
+ case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
+ case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
+ case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
}
switch (h4) {
- case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
- case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
- case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
- case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
+ case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
+ case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
+ case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
+ case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
}
}
if (s->pass == 1) {
s->b++;
s->block += w4 * h4 * 64;
- s->uvblock[0] += w4 * h4 * 16;
- s->uvblock[1] += w4 * h4 * 16;
+ s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
+ s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
s->eob += 4 * w4 * h4;
- s->uveob[0] += w4 * h4;
- s->uveob[1] += w4 * h4;
+ s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
+ s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
return;
}
@@ -3073,11 +3116,12 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
- mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
- mask_edges(lflvl, 1, row7, col7, x_end, y_end,
- s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
- s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
- b->uvtx, skip_inter);
+ mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
+ if (s->ss_h || s->ss_v)
+ mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
+ s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
+ s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
+ b->uvtx, skip_inter);
if (!s->filter.lim_lut[lvl]) {
int sharp = s->filter.sharpness;
@@ -3097,11 +3141,11 @@ static void decode_b(AVCodecContext *ctx, int row, int col,
if (s->pass == 2) {
s->b++;
s->block += w4 * h4 * 64;
- s->uvblock[0] += w4 * h4 * 16;
- s->uvblock[1] += w4 * h4 * 16;
+ s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
+ s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
s->eob += 4 * w4 * h4;
- s->uveob[0] += w4 * h4;
- s->uveob[1] += w4 * h4;
+ s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
+ s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
}
}
@@ -3131,24 +3175,24 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
case PARTITION_H:
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
yoff += hbs * 8 * y_stride;
- uvoff += hbs * 4 * uv_stride;
+ uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
break;
case PARTITION_V:
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
yoff += hbs * 8;
- uvoff += hbs * 4;
+ uvoff += hbs * 8 >> s->ss_h;
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
break;
case PARTITION_SPLIT:
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row, col + hbs, lflvl,
- yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+ yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
yoff += hbs * 8 * y_stride;
- uvoff += hbs * 4 * uv_stride;
+ uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row + hbs, col + hbs, lflvl,
- yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+ yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
break;
default:
av_assert0(0);
@@ -3157,7 +3201,7 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
bp = PARTITION_SPLIT;
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
decode_sb(ctx, row, col + hbs, lflvl,
- yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+ yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
} else {
bp = PARTITION_H;
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
@@ -3167,7 +3211,7 @@ static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *l
bp = PARTITION_SPLIT;
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
yoff += hbs * 8 * y_stride;
- uvoff += hbs * 4 * uv_stride;
+ uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
} else {
bp = PARTITION_V;
@@ -3196,11 +3240,11 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
if (b->bp == PARTITION_H && row + hbs < s->rows) {
yoff += hbs * 8 * y_stride;
- uvoff += hbs * 4 * uv_stride;
+ uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
} else if (b->bp == PARTITION_V && col + hbs < s->cols) {
yoff += hbs * 8;
- uvoff += hbs * 4;
+ uvoff += hbs * 8 >> s->ss_h;
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
}
} else {
@@ -3208,262 +3252,203 @@ static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filte
if (col + hbs < s->cols) { // FIXME why not <=?
if (row + hbs < s->rows) {
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
- uvoff + 4 * hbs, bl + 1);
+ uvoff + (8 * hbs >> s->ss_h), bl + 1);
yoff += hbs * 8 * y_stride;
- uvoff += hbs * 4 * uv_stride;
+ uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
- yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
+ yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
} else {
yoff += hbs * 8;
- uvoff += hbs * 4;
+ uvoff += hbs * 8 >> s->ss_h;
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
}
} else if (row + hbs < s->rows) {
yoff += hbs * 8 * y_stride;
- uvoff += hbs * 4 * uv_stride;
+ uvoff += hbs * 8 * uv_stride >> s->ss_v;
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
}
}
}
-static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
- int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
+static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
+ uint8_t *lvl, uint8_t (*mask)[4],
+ uint8_t *dst, ptrdiff_t ls)
{
- VP9Context *s = ctx->priv_data;
- AVFrame *f = s->frames[CUR_FRAME].tf.f;
- uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
- ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
- int y, x, p;
+ int y, x;
- // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
- // if you think of them as acting on a 8x8 block max, we can interleave
- // each v/h within the single x loop, but that only works if we work on
- // 8 pixel blocks, and we won't always do that (we want at least 16px
- // to use SSE2 optimizations, perhaps 32 for AVX2)
-
- // filter edges between columns, Y plane (e.g. block1 | block2)
- for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
- uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
- uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
+ // filter edges between columns (e.g. block1 | block2)
+ for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
+ uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
unsigned hm = hm1 | hm2 | hm13 | hm23;
- for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
- if (hm1 & x) {
- int L = *l, H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+ for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 >> ss_h) {
+ if (col || x > 1) {
+ if (hm1 & x) {
+ int L = *l, H = L >> 4;
+ int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
- if (col || x > 1) {
if (hmask1[0] & x) {
if (hmask2[0] & x) {
- av_assert2(l[8] == L);
- s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
+ av_assert2(l[8 << ss_v] == L);
+ s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
} else {
- s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
+ s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
}
} else if (hm2 & x) {
- L = l[8];
+ L = l[8 << ss_v];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
[!!(hmask2[1] & x)]
- [0](ptr, ls_y, E, I, H);
+ [0](ptr, ls, E, I, H);
} else {
s->dsp.loop_filter_8[!!(hmask1[1] & x)]
- [0](ptr, ls_y, E, I, H);
+ [0](ptr, ls, E, I, H);
}
- }
- } else if (hm2 & x) {
- int L = l[8], H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+ } else if (hm2 & x) {
+ int L = l[8 << ss_v], H = L >> 4;
+ int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
- if (col || x > 1) {
s->dsp.loop_filter_8[!!(hmask2[1] & x)]
- [0](ptr + 8 * ls_y, ls_y, E, I, H);
+ [0](ptr + 8 * ls, ls, E, I, H);
}
}
- if (hm13 & x) {
- int L = *l, H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
-
- if (hm23 & x) {
- L = l[8];
- H |= (L >> 4) << 8;
- E |= s->filter.mblim_lut[L] << 8;
- I |= s->filter.lim_lut[L] << 8;
- s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
- } else {
- s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
- }
- } else if (hm23 & x) {
- int L = l[8], H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+ if (ss_h) {
+ if (x & 0xAA)
+ l += 2;
+ } else {
+ if (hm13 & x) {
+ int L = *l, H = L >> 4;
+ int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
- s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
+ if (hm23 & x) {
+ L = l[8 << ss_v];
+ H |= (L >> 4) << 8;
+ E |= s->filter.mblim_lut[L] << 8;
+ I |= s->filter.lim_lut[L] << 8;
+ s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls, E, I, H);
+ } else {
+ s->dsp.loop_filter_8[0][0](ptr + 4, ls, E, I, H);
+ }
+ } else if (hm23 & x) {
+ int L = l[8 << ss_v], H = L >> 4;
+ int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+
+ s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4, ls, E, I, H);
+ }
+ l++;
}
}
}
+}
+
+static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
+ uint8_t *lvl, uint8_t (*mask)[4],
+ uint8_t *dst, ptrdiff_t ls)
+{
+ int y, x;
- // block1
- // filter edges between rows, Y plane (e.g. ------)
- // block2
- dst = f->data[0] + yoff;
- lvl = lflvl->level;
- for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
- uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
+ // block1
+ // filter edges between rows (e.g. ------)
+ // block2
+ for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
+ uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
- for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
+ for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16, l += 2 << ss_h) {
if (row || y) {
if (vm & x) {
int L = *l, H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
if (vmask[0] & x) {
- if (vmask[0] & (x << 1)) {
- av_assert2(l[1] == L);
- s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
+ if (vmask[0] & (x << (1 + ss_h))) {
+ av_assert2(l[1 + ss_h] == L);
+ s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
} else {
- s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
+ s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
}
- } else if (vm & (x << 1)) {
- L = l[1];
+ } else if (vm & (x << (1 + ss_h))) {
+ L = l[1 + ss_h];
H |= (L >> 4) << 8;
E |= s->filter.mblim_lut[L] << 8;
I |= s->filter.lim_lut[L] << 8;
s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
- [!!(vmask[1] & (x << 1))]
- [1](ptr, ls_y, E, I, H);
+ [!!(vmask[1] & (x << (1 + ss_h)))]
+ [1](ptr, ls, E, I, H);
} else {
s->dsp.loop_filter_8[!!(vmask[1] & x)]
- [1](ptr, ls_y, E, I, H);
+ [1](ptr, ls, E, I, H);
}
- } else if (vm & (x << 1)) {
- int L = l[1], H = L >> 4;
+ } else if (vm & (x << (1 + ss_h))) {
+ int L = l[1 + ss_h], H = L >> 4;
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
- s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
- [1](ptr + 8, ls_y, E, I, H);
+ s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
+ [1](ptr + 8, ls, E, I, H);
}
}
- if (vm3 & x) {
- int L = *l, H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
-
- if (vm3 & (x << 1)) {
- L = l[1];
- H |= (L >> 4) << 8;
- E |= s->filter.mblim_lut[L] << 8;
- I |= s->filter.lim_lut[L] << 8;
- s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
- } else {
- s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
- }
- } else if (vm3 & (x << 1)) {
- int L = l[1], H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
-
- s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
- }
- }
- }
-
- // same principle but for U/V planes
- for (p = 0; p < 2; p++) {
- lvl = lflvl->level;
- dst = f->data[1 + p] + uvoff;
- for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
- uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
- uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
- unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
- unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
-
- for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
- if (col || x > 1) {
- if (hm1 & x) {
- int L = *l, H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
-
- if (hmask1[0] & x) {
- if (hmask2[0] & x) {
- av_assert2(l[16] == L);
- s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
- } else {
- s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
- }
- } else if (hm2 & x) {
- L = l[16];
- H |= (L >> 4) << 8;
- E |= s->filter.mblim_lut[L] << 8;
- I |= s->filter.lim_lut[L] << 8;
- s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
- [!!(hmask2[1] & x)]
- [0](ptr, ls_uv, E, I, H);
- } else {
- s->dsp.loop_filter_8[!!(hmask1[1] & x)]
- [0](ptr, ls_uv, E, I, H);
- }
- } else if (hm2 & x) {
- int L = l[16], H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+ if (!ss_v) {
+ if (vm3 & x) {
+ int L = *l, H = L >> 4;
+ int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
- s->dsp.loop_filter_8[!!(hmask2[1] & x)]
- [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
+ if (vm3 & (x << (1 + ss_h))) {
+ L = l[1 + ss_h];
+ H |= (L >> 4) << 8;
+ E |= s->filter.mblim_lut[L] << 8;
+ I |= s->filter.lim_lut[L] << 8;
+ s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
+ } else {
+ s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
}
- }
- if (x & 0xAA)
- l += 2;
- }
- }
- lvl = lflvl->level;
- dst = f->data[1 + p] + uvoff;
- for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
- uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
- unsigned vm = vmask[0] | vmask[1] | vmask[2];
-
- for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
- if (row || y) {
- if (vm & x) {
- int L = *l, H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
-
- if (vmask[0] & x) {
- if (vmask[0] & (x << 2)) {
- av_assert2(l[2] == L);
- s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
- } else {
- s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
- }
- } else if (vm & (x << 2)) {
- L = l[2];
- H |= (L >> 4) << 8;
- E |= s->filter.mblim_lut[L] << 8;
- I |= s->filter.lim_lut[L] << 8;
- s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
- [!!(vmask[1] & (x << 2))]
- [1](ptr, ls_uv, E, I, H);
- } else {
- s->dsp.loop_filter_8[!!(vmask[1] & x)]
- [1](ptr, ls_uv, E, I, H);
- }
- } else if (vm & (x << 2)) {
- int L = l[2], H = L >> 4;
- int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
+ } else if (vm3 & (x << (1 + ss_h))) {
+ int L = l[1 + ss_h], H = L >> 4;
+ int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
- s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
- [1](ptr + 8, ls_uv, E, I, H);
- }
+ s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8, ls, E, I, H);
}
}
+ }
+ if (ss_v) {
if (y & 1)
lvl += 16;
+ } else {
+ lvl += 8;
}
}
}
+static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
+ int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
+{
+ VP9Context *s = ctx->priv_data;
+ AVFrame *f = s->frames[CUR_FRAME].tf.f;
+ uint8_t *dst = f->data[0] + yoff;
+ ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
+ uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
+ int p;
+
+ // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
+ // if you think of them as acting on a 8x8 block max, we can interleave
+ // each v/h within the single x loop, but that only works if we work on
+ // 8 pixel blocks, and we won't always do that (we want at least 16px
+ // to use SSE2 optimizations, perhaps 32 for AVX2)
+
+ filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
+ filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
+
+ for (p = 0; p < 2; p++) {
+ dst = f->data[1 + p] + uvoff;
+ filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
+ filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
+ }
+}
+
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
{
int sb_start = ( idx * n) >> log2_n;
@@ -3815,18 +3800,6 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
return res;
}
- if (s->fullrange)
- ctx->color_range = AVCOL_RANGE_JPEG;
- else
- ctx->color_range = AVCOL_RANGE_MPEG;
-
- switch (s->colorspace) {
- case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
- case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
- case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
- case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
- }
-
// main tile decode loop
memset(s->above_partition_ctx, 0, s->cols);
memset(s->above_skip_ctx, 0, s->cols);
@@ -3836,8 +3809,8 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
memset(s->above_mode_ctx, NEARESTMV, s->cols);
}
memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
- memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
- memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
+ memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
+ memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
memset(s->above_segpred_ctx, 0, s->cols);
s->pass = s->frames[CUR_FRAME].uses_2pass =
ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
@@ -3905,7 +3878,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
}
for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
- row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
+ row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
struct VP9Filter *lflvl_ptr = s->lflvl;
ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
@@ -3922,7 +3895,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
memset(s->left_mode_ctx, NEARESTMV, 8);
}
memset(s->left_y_nnz_ctx, 0, 16);
- memset(s->left_uv_nnz_ctx, 0, 16);
+ memset(s->left_uv_nnz_ctx, 0, 32);
memset(s->left_segpred_ctx, 0, 8);
memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
@@ -3930,7 +3903,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
for (col = s->tiling.tile_col_start;
col < s->tiling.tile_col_end;
- col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
+ col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
// FIXME integrate with lf code (i.e. zero after each
// use, similar to invtxfm coefficients, or similar)
if (s->pass != 1) {
@@ -3961,11 +3934,11 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
f->data[0] + yoff + 63 * ls_y,
8 * s->cols);
memcpy(s->intra_pred_data[1],
- f->data[1] + uvoff + 31 * ls_uv,
- 4 * s->cols);
+ f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+ 8 * s->cols >> s->ss_h);
memcpy(s->intra_pred_data[2],
- f->data[2] + uvoff + 31 * ls_uv,
- 4 * s->cols);
+ f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
+ 8 * s->cols >> s->ss_h);
}
// loopfilter one row
@@ -3974,7 +3947,7 @@ static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
uvoff2 = uvoff;
lflvl_ptr = s->lflvl;
for (col = 0; col < s->cols;
- col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
+ col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
}
}
@@ -4051,7 +4024,6 @@ static av_cold int vp9_decode_init(AVCodecContext *ctx)
VP9Context *s = ctx->priv_data;
ctx->internal->allocate_progress = 1;
- ctx->pix_fmt = AV_PIX_FMT_YUV420P;
ff_vp9dsp_init(&s->dsp);
ff_videodsp_init(&s->vdsp, 8);
s->filter.sharpness = -1;
@@ -4094,6 +4066,8 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
s->invisible = ssrc->invisible;
s->keyframe = ssrc->keyframe;
+ s->ss_v = ssrc->ss_v;
+ s->ss_h = ssrc->ss_h;
s->segmentation.enabled = ssrc->segmentation.enabled;
s->segmentation.update_map = ssrc->segmentation.update_map;
memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
diff --git a/libavcodec/vp9_mc_template.c b/libavcodec/vp9_mc_template.c
index c6ae432e26..f94438fa06 100644
--- a/libavcodec/vp9_mc_template.c
+++ b/libavcodec/vp9_mc_template.c
@@ -21,6 +21,12 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#define ROUNDED_DIV_MVx2(a, b) \
+ (VP56mv) { .x = ROUNDED_DIV(a.x + b.x, 2), .y = ROUNDED_DIV(a.y + b.y, 2) }
+#define ROUNDED_DIV_MVx4(a, b, c, d) \
+ (VP56mv) { .x = ROUNDED_DIV(a.x + b.x + c.x + d.x, 4), \
+ .y = ROUNDED_DIV(a.y + b.y + c.y + d.y, 4) }
+
static void FN(inter_pred)(AVCodecContext *ctx)
{
static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
@@ -44,6 +50,8 @@ static void FN(inter_pred)(AVCodecContext *ctx)
// y inter pred
if (b->bs > BS_8x8) {
+ VP56mv uvmv;
+
if (b->bs == BS_8x4) {
mc_luma_dir(s, mc[3][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1,
@@ -52,6 +60,38 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1, 0);
+ w1 = (w1 + s->ss_h) >> s->ss_h;
+ if (s->ss_v) {
+ h1 = (h1 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+ mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 2, col << (3 - s->ss_h),
+ &uvmv, 8 >> s->ss_h, 4, w1, h1, 0);
+ } else {
+ mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 3, col << (3 - s->ss_h),
+ &b->mv[0][0], 8 >> s->ss_h, 4, w1, h1, 0);
+ // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
+ // to get the motion vector for the bottom 4x4 block
+ // https://code.google.com/p/webm/issues/detail?id=993
+ if (s->ss_h == 0) {
+ uvmv = b->mv[2][0];
+ } else {
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+ }
+ mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][0],
+ s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ (row << 3) + 4, col << (3 - s->ss_h),
+ &uvmv, 8 >> s->ss_h, 4, w1, h1, 0);
+ }
if (b->comp) {
mc_luma_dir(s, mc[3][b->filter][1], s->dst[0], ls_y,
@@ -61,6 +101,38 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2, 1);
+ w2 = (w2 + s->ss_h) >> s->ss_h;
+ if (s->ss_v) {
+ h2 = (h2 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+ mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 2, col << (3 - s->ss_h),
+ &uvmv, 8 >> s->ss_h, 4, w2, h2, 1);
+ } else {
+ mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 3, col << (3 - s->ss_h),
+ &b->mv[0][1], 8 >> s->ss_h, 4, w2, h2, 1);
+ // BUG for 4:2:2 bs=8x4, libvpx uses the wrong block index
+ // to get the motion vector for the bottom 4x4 block
+ // https://code.google.com/p/webm/issues/detail?id=993
+ if (s->ss_h == 0) {
+ uvmv = b->mv[2][1];
+ } else {
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+ }
+ mc_chroma_dir(s, mc[3 + s->ss_h][b->filter][1],
+ s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ (row << 3) + 4, col << (3 - s->ss_h),
+ &uvmv, 8 >> s->ss_h, 4, w2, h2, 1);
+ }
}
} else if (b->bs == BS_4x8) {
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0], ls_y,
@@ -69,6 +141,30 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][0], s->dst[0] + 4, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1, 0);
+ h1 = (h1 + s->ss_v) >> s->ss_v;
+ if (s->ss_h) {
+ w1 = (w1 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << (3 - s->ss_v), col << 2,
+ &uvmv, 4, 8 >> s->ss_v, w1, h1, 0);
+ } else {
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << (3 - s->ss_v), col << 3,
+ &b->mv[0][0], 4, 8 >> s->ss_v, w1, h1, 0);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1] + 4, s->dst[2] + 4, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << (3 - s->ss_v), (col << 3) + 4,
+ &b->mv[1][0], 4, 8 >> s->ss_v, w1, h1, 0);
+ }
if (b->comp) {
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
@@ -77,6 +173,30 @@ static void FN(inter_pred)(AVCodecContext *ctx)
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0] + 4, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2, 1);
+ h2 = (h2 + s->ss_v) >> s->ss_v;
+ if (s->ss_h) {
+ w2 = (w2 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << (3 - s->ss_v), col << 2,
+ &uvmv, 4, 8 >> s->ss_v, w2, h2, 1);
+ } else {
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << (3 - s->ss_v), col << 3,
+ &b->mv[0][1], 4, 8 >> s->ss_v, w2, h2, 1);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1] + 4, s->dst[2] + 4, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << (3 - s->ss_v), (col << 3) + 4,
+ &b->mv[1][1], 4, 8 >> s->ss_v, w2, h2, 1);
+ }
}
} else {
av_assert2(b->bs == BS_4x4);
@@ -97,6 +217,81 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y + 4, ls_y,
ref1->data[0], ref1->linesize[0], tref1,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1, 0);
+ if (s->ss_v) {
+ h1 = (h1 + 1) >> 1;
+ if (s->ss_h) {
+ w1 = (w1 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx4(b->mv[0][0], b->mv[1][0],
+ b->mv[2][0], b->mv[3][0]);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 2, col << 2,
+ &uvmv, 4, 4, w1, h1, 0);
+ } else {
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[2][0]);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 2, col << 3,
+ &uvmv, 4, 4, w1, h1, 0);
+ uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[3][0]);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1] + 4, s->dst[2] + 4, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 2, (col << 3) + 4,
+ &uvmv, 4, 4, w1, h1, 0);
+ }
+ } else {
+ if (s->ss_h) {
+ w1 = (w1 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][0], b->mv[1][0]);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 3, col << 2,
+ &uvmv, 4, 4, w1, h1, 0);
+ // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
+ // bottom block
+ // https://code.google.com/p/webm/issues/detail?id=993
+ uvmv = ROUNDED_DIV_MVx2(b->mv[1][0], b->mv[2][0]);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ (row << 3) + 4, col << 2,
+ &uvmv, 4, 4, w1, h1, 0);
+ } else {
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1], s->dst[2], ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 3, col << 3,
+ &b->mv[0][0], 4, 4, w1, h1, 0);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1] + 4, s->dst[2] + 4, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ row << 3, (col << 3) + 4,
+ &b->mv[1][0], 4, 4, w1, h1, 0);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ (row << 3) + 4, col << 3,
+ &b->mv[2][0], 4, 4, w1, h1, 0);
+ mc_chroma_dir(s, mc[4][b->filter][0],
+ s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv,
+ ref1->data[1], ref1->linesize[1],
+ ref1->data[2], ref1->linesize[2], tref1,
+ (row << 3) + 4, (col << 3) + 4,
+ &b->mv[3][0], 4, 4, w1, h1, 0);
+ }
+ }
if (b->comp) {
mc_luma_dir(s, mc[4][b->filter][1], s->dst[0], ls_y,
@@ -113,59 +308,112 @@ static void FN(inter_pred)(AVCodecContext *ctx)
s->dst[0] + 4 * ls_y + 4, ls_y,
ref2->data[0], ref2->linesize[0], tref2,
(row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2, 1);
+ if (s->ss_v) {
+ h2 = (h2 + 1) >> 1;
+ if (s->ss_h) {
+ w2 = (w2 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx4(b->mv[0][1], b->mv[1][1],
+ b->mv[2][1], b->mv[3][1]);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 2, col << 2,
+ &uvmv, 4, 4, w2, h2, 1);
+ } else {
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[2][1]);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 2, col << 3,
+ &uvmv, 4, 4, w2, h2, 1);
+ uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[3][1]);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1] + 4, s->dst[2] + 4, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 2, (col << 3) + 4,
+ &uvmv, 4, 4, w2, h2, 1);
+ }
+ } else {
+ if (s->ss_h) {
+ w2 = (w2 + 1) >> 1;
+ uvmv = ROUNDED_DIV_MVx2(b->mv[0][1], b->mv[1][1]);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 3, col << 2,
+ &uvmv, 4, 4, w2, h2, 1);
+ // BUG libvpx uses wrong block index for 4:2:2 bs=4x4
+ // bottom block
+ // https://code.google.com/p/webm/issues/detail?id=993
+ uvmv = ROUNDED_DIV_MVx2(b->mv[1][1], b->mv[2][1]);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ (row << 3) + 4, col << 2,
+ &uvmv, 4, 4, w2, h2, 1);
+ } else {
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1], s->dst[2], ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 3, col << 3,
+ &b->mv[0][1], 4, 4, w2, h2, 1);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1] + 4, s->dst[2] + 4, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ row << 3, (col << 3) + 4,
+ &b->mv[1][1], 4, 4, w2, h2, 1);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1] + 4 * ls_uv, s->dst[2] + 4 * ls_uv, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ (row << 3) + 4, col << 3,
+ &b->mv[2][1], 4, 4, w2, h2, 1);
+ mc_chroma_dir(s, mc[4][b->filter][1],
+ s->dst[1] + 4 * ls_uv + 4, s->dst[2] + 4 * ls_uv + 4, ls_uv,
+ ref2->data[1], ref2->linesize[1],
+ ref2->data[2], ref2->linesize[2], tref2,
+ (row << 3) + 4, (col << 3) + 4,
+ &b->mv[3][1], 4, 4, w2, h2, 1);
+ }
+ }
}
}
} else {
int bwl = bwlog_tab[0][b->bs];
int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
+ int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = bwh_tab[s->ss_v][b->bs][1] * 4;
mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
ref1->data[0], ref1->linesize[0], tref1,
- row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1, 0);
-
- if (b->comp)
- mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
- ref2->data[0], ref2->linesize[0], tref2,
- row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1);
- }
-
- // uv inter pred
- {
- int bwl = bwlog_tab[1][b->bs];
- int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
- VP56mv mvuv;
-
- w1 = (w1 + 1) >> 1;
- h1 = (h1 + 1) >> 1;
- if (b->comp) {
- w2 = (w2 + 1) >> 1;
- h2 = (h2 + 1) >> 1;
- }
- if (b->bs > BS_8x8) {
- mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
- mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
- } else {
- mvuv = b->mv[0][0];
- }
-
- mc_chroma_dir(s, mc[bwl][b->filter][0],
+ row << 3, col << 3, &b->mv[0][0], bw, bh, w1, h1, 0);
+ w1 = (w1 + s->ss_h) >> s->ss_h;
+ h1 = (h1 + s->ss_v) >> s->ss_v;
+ mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][0],
s->dst[1], s->dst[2], ls_uv,
ref1->data[1], ref1->linesize[1],
ref1->data[2], ref1->linesize[2], tref1,
- row << 2, col << 2, &mvuv, bw, bh, w1, h1, 0);
+ row << (3 - s->ss_v), col << (3 - s->ss_h),
+ &b->mv[0][0], uvbw, uvbh, w1, h1, 0);
if (b->comp) {
- if (b->bs > BS_8x8) {
- mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
- mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
- } else {
- mvuv = b->mv[0][1];
- }
- mc_chroma_dir(s, mc[bwl][b->filter][1],
+ mc_luma_dir(s, mc[bwl][b->filter][1], s->dst[0], ls_y,
+ ref2->data[0], ref2->linesize[0], tref2,
+ row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2, 1);
+ w2 = (w2 + s->ss_h) >> s->ss_h;
+ h2 = (h2 + s->ss_v) >> s->ss_v;
+ mc_chroma_dir(s, mc[bwl + s->ss_h][b->filter][1],
s->dst[1], s->dst[2], ls_uv,
ref2->data[1], ref2->linesize[1],
ref2->data[2], ref2->linesize[2], tref2,
- row << 2, col << 2, &mvuv, bw, bh, w2, h2, 1);
+ row << (3 - s->ss_v), col << (3 - s->ss_h),
+ &b->mv[0][1], uvbw, uvbh, w2, h2, 1);
}
}
}
diff --git a/tests/fate/vpx.mak b/tests/fate/vpx.mak
index d77d5cb5c1..3bc8e9a38e 100644
--- a/tests/fate/vpx.mak
+++ b/tests/fate/vpx.mak
@@ -85,6 +85,12 @@ fate-vp9$(2)-$(1): CMD = framemd5 $(3) -i $(TARGET_SAMPLES)/vp9-test-vectors/vp9
fate-vp9$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp9-$(1)
endef
+define FATE_VP9_PROFILE_SUITE
+FATE_VP9-$(CONFIG_MATROSKA_DEMUXER) += fate-vp9p$(2)-$(1)
+fate-vp9p$(2)-$(1): CMD = framemd5 -i $(TARGET_SAMPLES)/vp9-test-vectors/vp9$(2)-2-$(1).webm
+fate-vp9p$(2)-$(1): REF = $(SRC_PATH)/tests/ref/fate/vp9p$(2)-$(1)
+endef
+
VP9_Q = 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 \
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 \
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 \
@@ -94,24 +100,27 @@ VP9_SIZE_A = 08 10 16 18 32 34 64 66
VP9_SIZE_B = 196 198 200 202 208 210 224 226
define FATE_VP9_FULL
-$(foreach Q,$(VP9_Q),$(eval $(call FATE_VP9_SUITE,00-quantizer-$(Q),$(1),$(2))))
-$(foreach SHARP,$(VP9_SHARP),$(eval $(call FATE_VP9_SUITE,01-sharpness-$(SHARP),$(1),$(2))))
-$(foreach W,$(VP9_SIZE_A),$(eval $(foreach H,$(VP9_SIZE_A),$(eval $(call FATE_VP9_SUITE,02-size-$(W)x$(H),$(1),$(2))))))
-$(foreach W,$(VP9_SIZE_B),$(eval $(foreach H,$(VP9_SIZE_B),$(eval $(call FATE_VP9_SUITE,03-size-$(W)x$(H),$(1),$(2))))))
-$(eval $(call FATE_VP9_SUITE,03-deltaq,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,06-bilinear,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,09-lf_deltas,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,10-show-existing-frame,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,10-show-existing-frame2,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,15-segkey_adpq,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,16-intra-only,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,2pass-akiyo,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,parallelmode-akiyo,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,segmentation-aq-akiyo,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,segmentation-sf-akiyo,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,tiling-pedestrian,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,trac3849,$(1),$(2)))
-$(eval $(call FATE_VP9_SUITE,trac4359,$(1),$(2)))
+$(foreach Q,$(VP9_Q),$(eval $(call FATE_VP9_SUITE,00-quantizer-$(Q))))
+$(foreach SHARP,$(VP9_SHARP),$(eval $(call FATE_VP9_SUITE,01-sharpness-$(SHARP))))
+$(foreach W,$(VP9_SIZE_A),$(eval $(foreach H,$(VP9_SIZE_A),$(eval $(call FATE_VP9_SUITE,02-size-$(W)x$(H))))))
+$(foreach W,$(VP9_SIZE_B),$(eval $(foreach H,$(VP9_SIZE_B),$(eval $(call FATE_VP9_SUITE,03-size-$(W)x$(H))))))
+$(eval $(call FATE_VP9_SUITE,03-deltaq))
+$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv444,1))
+$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv440,1))
+$(eval $(call FATE_VP9_PROFILE_SUITE,04-yuv422,1))
+$(eval $(call FATE_VP9_SUITE,06-bilinear))
+$(eval $(call FATE_VP9_SUITE,09-lf_deltas))
+$(eval $(call FATE_VP9_SUITE,10-show-existing-frame))
+$(eval $(call FATE_VP9_SUITE,10-show-existing-frame2))
+$(eval $(call FATE_VP9_SUITE,15-segkey_adpq))
+$(eval $(call FATE_VP9_SUITE,16-intra-only))
+$(eval $(call FATE_VP9_SUITE,2pass-akiyo))
+$(eval $(call FATE_VP9_SUITE,parallelmode-akiyo))
+$(eval $(call FATE_VP9_SUITE,segmentation-aq-akiyo))
+$(eval $(call FATE_VP9_SUITE,segmentation-sf-akiyo))
+$(eval $(call FATE_VP9_SUITE,tiling-pedestrian))
+$(eval $(call FATE_VP9_SUITE,trac3849))
+$(eval $(call FATE_VP9_SUITE,trac4359))
endef
$(eval $(call FATE_VP9_FULL))
diff --git a/tests/ref/fate/vp9p1-04-yuv422 b/tests/ref/fate/vp9p1-04-yuv422
new file mode 100644
index 0000000000..59abfb002c
--- /dev/null
+++ b/tests/ref/fate/vp9p1-04-yuv422
@@ -0,0 +1,15 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/50
+#stream#, dts, pts, duration, size, hash
+0, 0, 0, 1, 28800, b81b8a8444ac6ce4a4807c37e0a44c8b
+0, 1, 1, 1, 28800, 344458b82d35ea9944dc841643fc25c2
+0, 2, 2, 1, 28800, 376a4bb3944f052191963740b980eb26
+0, 3, 3, 1, 28800, 2fecb02c842bd7d588415904f2d3a82d
+0, 4, 4, 1, 28800, 0fda2f1dabba5c179599190f179b9782
+0, 5, 5, 1, 28800, a88ac885ee59e3a3a01fa483cdd40274
+0, 6, 6, 1, 28800, e76b488ffa70a05457fc046e7b999c56
+0, 7, 7, 1, 28800, 74ae5e52162f5bbc95258d44a2dd647c
+0, 8, 8, 1, 28800, 0c017e2b12e5192c8d598941d9c93306
+0, 9, 9, 1, 28800, ca3941ee43b7033cb48f8498af127d53
diff --git a/tests/ref/fate/vp9p1-04-yuv440 b/tests/ref/fate/vp9p1-04-yuv440
new file mode 100644
index 0000000000..0c28f36535
--- /dev/null
+++ b/tests/ref/fate/vp9p1-04-yuv440
@@ -0,0 +1,15 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/50
+#stream#, dts, pts, duration, size, hash
+0, 0, 0, 1, 28800, 61157ad4fb02a254de8f34ae7b8915dc
+0, 1, 1, 1, 28800, 9431337382bf90d40aa417e297ac05da
+0, 2, 2, 1, 28800, 56b739049cc9e97a1d82018bba3db0ee
+0, 3, 3, 1, 28800, 75138a9b6bb905b2f79a1ebb959ddfea
+0, 4, 4, 1, 28800, 141b2fc9625fad86577838d84a276ef8
+0, 5, 5, 1, 28800, b364668c44a237d4e532e086a55401a9
+0, 6, 6, 1, 28800, a4ca6014d5194e4c921a4cb4289eb315
+0, 7, 7, 1, 28800, cfcacb3d5086d3861f4712a3c87a6b6c
+0, 8, 8, 1, 28800, 228d3fd3d849d021f3690cc538edb0a3
+0, 9, 9, 1, 28800, 97ecf281eb1130723d70e3c8803fa814
diff --git a/tests/ref/fate/vp9p1-04-yuv444 b/tests/ref/fate/vp9p1-04-yuv444
new file mode 100644
index 0000000000..e9559c6c6e
--- /dev/null
+++ b/tests/ref/fate/vp9p1-04-yuv444
@@ -0,0 +1,15 @@
+#format: frame checksums
+#version: 1
+#hash: MD5
+#tb 0: 1/25
+#stream#, dts, pts, duration, size, hash
+0, 0, 0, 1, 304128, 859df7b3661783e337a16ee79f3c20bc
+0, 1, 1, 1, 304128, 3b3ccf344cd5a478c4c1fa422497183d
+0, 2, 2, 1, 304128, 3be1f565823cb88013a14a93a3cf9480
+0, 3, 3, 1, 304128, 6e188a963deaf46c2d6e741b03c4240c
+0, 4, 4, 1, 304128, 82ead184ae478ac821b1b4b72f28c9cd
+0, 5, 5, 1, 304128, 59bb43badc76b39a228b1ad96b6339ca
+0, 6, 6, 1, 304128, 2eaee790fc188e2251b92dd4ea90c42a
+0, 7, 7, 1, 304128, 2a95f8727589e710dc1b95400916b72e
+0, 8, 8, 1, 304128, b7032f73544a7108fcdcaca2832ecc32
+0, 9, 9, 1, 304128, b7778c35b30bcc400b25ed0e5b7913e1