aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/vp8.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2010-06-28 13:50:55 +0000
committerRonald S. Bultje <rsbultje@gmail.com>2010-06-28 13:50:55 +0000
commit7c4dcf81658103b9506adcbf848bd23efe3b0b4e (patch)
treef04ecb6acdd8e925caaebc856f50c42c3aeeabb3 /libavcodec/vp8.c
parent4332bfbff865c41d27d09eaf73409eac544e1350 (diff)
downloadffmpeg-7c4dcf81658103b9506adcbf848bd23efe3b0b4e.tar.gz
Optimize split MC, so we don't always do 4x4 blocks of 4x4pixels each, but
we apply them as 16x8/8x16/8x8 subblocks where possible. Since this allows us to use width=8/16 instead of width=4 MC functions, we can now take more advantage of SSE2/SSSE3 optimizations, leading to a total speedup for splitMV filter of about 10%. Originally committed as revision 23853 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/vp8.c')
-rw-r--r--libavcodec/vp8.c82
1 files changed, 62 insertions, 20 deletions
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 0000706ed2..75585994cb 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -943,6 +943,39 @@ static inline void vp8_mc(VP8Context *s, int luma,
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
}
+static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3],
+ AVFrame *ref_frame, int x_off, int y_off,
+ int bx_off, int by_off,
+ int block_w, int block_h,
+ int width, int height, VP56mv *mv)
+{
+ VP56mv uvmv = *mv;
+
+ /* Y */
+ vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off,
+ ref_frame->data[0], mv, x_off + bx_off, y_off + by_off,
+ block_w, block_h, width, height, s->linesize,
+ s->put_pixels_tab[block_w == 8]);
+
+ /* U/V */
+ if (s->profile == 3) {
+ uvmv.x &= ~7;
+ uvmv.y &= ~7;
+ }
+ x_off >>= 1; y_off >>= 1;
+ bx_off >>= 1; by_off >>= 1;
+ width >>= 1; height >>= 1;
+ block_w >>= 1; block_h >>= 1;
+ vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off,
+ ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off,
+ block_w, block_h, width, height, s->uvlinesize,
+ s->put_pixels_tab[1 + (block_w == 4)]);
+ vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off,
+ ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off,
+ block_w, block_h, width, height, s->uvlinesize,
+ s->put_pixels_tab[1 + (block_w == 4)]);
+}
+
/**
* Apply motion vectors to prediction buffer, chapter 18.
*/
@@ -951,29 +984,14 @@ static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
{
int x_off = mb_x << 4, y_off = mb_y << 4;
int width = 16*s->mb_width, height = 16*s->mb_height;
- VP56mv uvmv;
if (mb->mode < VP8_MVMODE_SPLIT) {
- /* Y */
- vp8_mc(s, 1, dst[0], s->framep[mb->ref_frame]->data[0], &mb->mv,
- x_off, y_off, 16, 16, width, height, s->linesize,
- s->put_pixels_tab[0]);
-
- /* U/V */
- uvmv = mb->mv;
- if (s->profile == 3) {
- uvmv.x &= ~7;
- uvmv.y &= ~7;
- }
- x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
- vp8_mc(s, 0, dst[1], s->framep[mb->ref_frame]->data[1], &uvmv,
- x_off, y_off, 8, 8, width, height, s->uvlinesize,
- s->put_pixels_tab[1]);
- vp8_mc(s, 0, dst[2], s->framep[mb->ref_frame]->data[2], &uvmv,
- x_off, y_off, 8, 8, width, height, s->uvlinesize,
- s->put_pixels_tab[1]);
- } else {
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 0, 0, 16, 16, width, height, &mb->mv);
+ } else switch (mb->partitioning) {
+ case VP8_SPLITMVMODE_4x4: {
int x, y;
+ VP56mv uvmv;
/* Y */
for (y = 0; y < 4; y++) {
@@ -1016,6 +1034,30 @@ static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
s->put_pixels_tab[2]);
}
}
+ break;
+ }
+ case VP8_SPLITMVMODE_16x8:
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 0, 0, 16, 8, width, height, &mb->bmv[0]);
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 0, 8, 16, 8, width, height, &mb->bmv[8]);
+ break;
+ case VP8_SPLITMVMODE_8x16:
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 0, 0, 8, 16, width, height, &mb->bmv[0]);
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 8, 0, 8, 16, width, height, &mb->bmv[2]);
+ break;
+ case VP8_SPLITMVMODE_8x8:
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 0, 0, 8, 8, width, height, &mb->bmv[0]);
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 8, 0, 8, 8, width, height, &mb->bmv[2]);
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 0, 8, 8, 8, width, height, &mb->bmv[8]);
+ vp8_mc_part(s, dst, s->framep[mb->ref_frame], x_off, y_off,
+ 8, 8, 8, 8, width, height, &mb->bmv[10]);
+ break;
}
}