aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/vp9.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2015-09-04 15:03:30 -0400
committerRonald S. Bultje <rsbultje@gmail.com>2015-09-05 15:50:01 -0400
commit9cdeb105a6d5a516fc42d97cf49e593cf9f6e6c4 (patch)
treeb97bf50536a426d0bd826e2e66538ffb61bfcd59 /libavcodec/vp9.c
parent873dbc6758d84758403313fe457c170bbcbcc9e4 (diff)
downloadffmpeg-9cdeb105a6d5a516fc42d97cf49e593cf9f6e6c4.tar.gz
vp9: do unscaled MC in scaled path if size of this reference matches.
This can happen if we do bidirectional MC, where one reference has the same size as the current frame, but the other one doesn't.
Diffstat (limited to 'libavcodec/vp9.c')
-rw-r--r--libavcodec/vp9.c219
1 files changed, 117 insertions, 102 deletions
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 3bd2a0f5a6..25a7b1d0b4 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -2766,7 +2766,108 @@ static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv
intra_recon(ctx, y_off, uv_off, 2);
}
+static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const uint8_t *ref, ptrdiff_t ref_stride,
+ ThreadFrame *ref_frame,
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+ int bw, int bh, int w, int h, int bytesperpixel)
+{
+ int mx = mv->x, my = mv->y, th;
+
+ y += my >> 3;
+ x += mx >> 3;
+ ref += y * ref_stride + x * bytesperpixel;
+ mx &= 7;
+ my &= 7;
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+ // we use +7 because the last 7 pixels of each sbrow can be changed in
+ // the longest loopfilter of the next sbrow
+ th = (y + bh + 4 * !!my + 7) >> 6;
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+ if (x < !!mx * 3 || y < !!my * 3 ||
+ x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+ ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
+ 160, ref_stride,
+ bw + !!mx * 7, bh + !!my * 7,
+ x - !!mx * 3, y - !!my * 3, w, h);
+ ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+ ref_stride = 160;
+ }
+ mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
+}
+
+static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
+ uint8_t *dst_u, uint8_t *dst_v,
+ ptrdiff_t dst_stride,
+ const uint8_t *ref_u, ptrdiff_t src_stride_u,
+ const uint8_t *ref_v, ptrdiff_t src_stride_v,
+ ThreadFrame *ref_frame,
+ ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
+ int bw, int bh, int w, int h, int bytesperpixel)
+{
+ int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
+
+ y += my >> 4;
+ x += mx >> 4;
+ ref_u += y * src_stride_u + x * bytesperpixel;
+ ref_v += y * src_stride_v + x * bytesperpixel;
+ mx &= 15;
+ my &= 15;
+ // FIXME bilinear filter only needs 0/1 pixels, not 3/4
+ // we use +7 because the last 7 pixels of each sbrow can be changed in
+ // the longest loopfilter of the next sbrow
+ th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
+ ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
+ if (x < !!mx * 3 || y < !!my * 3 ||
+ x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+ ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
+ 160, src_stride_u,
+ bw + !!mx * 7, bh + !!my * 7,
+ x - !!mx * 3, y - !!my * 3, w, h);
+ ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
+
+ s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
+ ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
+ 160, src_stride_v,
+ bw + !!mx * 7, bh + !!my * 7,
+ x - !!mx * 3, y - !!my * 3, w, h);
+ ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
+ } else {
+ mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
+ mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
+ }
+}
+
+#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
+ px, py, pw, ph, bw, bh, w, h, i) \
+ mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
+ mv, bw, bh, w, h, bytesperpixel)
+#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+ row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
+ mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+ row, col, mv, bw, bh, w, h, bytesperpixel)
+#define SCALED 0
+#define FN(x) x##_8bpp
+#define BYTES_PER_PIXEL 1
+#include "vp9_mc_template.c"
+#undef FN
+#undef BYTES_PER_PIXEL
+#define FN(x) x##_16bpp
+#define BYTES_PER_PIXEL 2
+#include "vp9_mc_template.c"
+#undef mc_luma_dir
+#undef mc_chroma_dir
+#undef FN
+#undef BYTES_PER_PIXEL
+#undef SCALED
+
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+ vp9_mc_func (*mc)[2],
uint8_t *dst, ptrdiff_t dst_stride,
const uint8_t *ref, ptrdiff_t ref_stride,
ThreadFrame *ref_frame,
@@ -2775,6 +2876,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step)
{
+ if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+ s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+ mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
+ y, x, in_mv, bw, bh, w, h, bytesperpixel);
+ } else {
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
int mx, my;
int refbw_m1, refbh_m1;
@@ -2811,9 +2917,11 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm
ref_stride = 288;
}
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
+ }
}
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
+ vp9_mc_func (*mc)[2],
uint8_t *dst_u, uint8_t *dst_v,
ptrdiff_t dst_stride,
const uint8_t *ref_u, ptrdiff_t src_stride_u,
@@ -2824,6 +2932,12 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
int bw, int bh, int w, int h, int bytesperpixel,
const uint16_t *scale, const uint8_t *step)
{
+ if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
+ s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
+ mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
+ ref_v, src_stride_v, ref_frame,
+ y, x, in_mv, bw, bh, w, h, bytesperpixel);
+ } else {
int mx, my;
int refbw_m1, refbh_m1;
int th;
@@ -2879,16 +2993,17 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
}
+ }
}
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
px, py, pw, ph, bw, bh, w, h, i) \
- mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
+ mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
- mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
+ mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
#define SCALED 1
@@ -2906,106 +3021,6 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func
#undef BYTES_PER_PIXEL
#undef SCALED
-static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
- uint8_t *dst, ptrdiff_t dst_stride,
- const uint8_t *ref, ptrdiff_t ref_stride,
- ThreadFrame *ref_frame,
- ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
- int bw, int bh, int w, int h, int bytesperpixel)
-{
- int mx = mv->x, my = mv->y, th;
-
- y += my >> 3;
- x += mx >> 3;
- ref += y * ref_stride + x * bytesperpixel;
- mx &= 7;
- my &= 7;
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4
- // we use +7 because the last 7 pixels of each sbrow can be changed in
- // the longest loopfilter of the next sbrow
- th = (y + bh + 4 * !!my + 7) >> 6;
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
- if (x < !!mx * 3 || y < !!my * 3 ||
- x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
- ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
- 160, ref_stride,
- bw + !!mx * 7, bh + !!my * 7,
- x - !!mx * 3, y - !!my * 3, w, h);
- ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
- ref_stride = 160;
- }
- mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
-}
-
-static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
- uint8_t *dst_u, uint8_t *dst_v,
- ptrdiff_t dst_stride,
- const uint8_t *ref_u, ptrdiff_t src_stride_u,
- const uint8_t *ref_v, ptrdiff_t src_stride_v,
- ThreadFrame *ref_frame,
- ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
- int bw, int bh, int w, int h, int bytesperpixel)
-{
- int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
-
- y += my >> 4;
- x += mx >> 4;
- ref_u += y * src_stride_u + x * bytesperpixel;
- ref_v += y * src_stride_v + x * bytesperpixel;
- mx &= 15;
- my &= 15;
- // FIXME bilinear filter only needs 0/1 pixels, not 3/4
- // we use +7 because the last 7 pixels of each sbrow can be changed in
- // the longest loopfilter of the next sbrow
- th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
- ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
- if (x < !!mx * 3 || y < !!my * 3 ||
- x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
- ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
- 160, src_stride_u,
- bw + !!mx * 7, bh + !!my * 7,
- x - !!mx * 3, y - !!my * 3, w, h);
- ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
- mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
-
- s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
- ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
- 160, src_stride_v,
- bw + !!mx * 7, bh + !!my * 7,
- x - !!mx * 3, y - !!my * 3, w, h);
- ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
- mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
- } else {
- mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
- mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
- }
-}
-
-#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
- px, py, pw, ph, bw, bh, w, h, i) \
- mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
- mv, bw, bh, w, h, bytesperpixel)
-#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
- row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
- mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
- row, col, mv, bw, bh, w, h, bytesperpixel)
-#define SCALED 0
-#define FN(x) x##_8bpp
-#define BYTES_PER_PIXEL 1
-#include "vp9_mc_template.c"
-#undef FN
-#undef BYTES_PER_PIXEL
-#define FN(x) x##_16bpp
-#define BYTES_PER_PIXEL 2
-#include "vp9_mc_template.c"
-#undef mc_luma_dir_dir
-#undef mc_chroma_dir_dir
-#undef FN
-#undef BYTES_PER_PIXEL
-#undef SCALED
-
static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
{
VP9Context *s = ctx->priv_data;