aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/vp8.c
diff options
context:
space:
mode:
authorDavid Conrad <lessen42@gmail.com>2010-07-16 07:20:35 +0000
committerDavid Conrad <lessen42@gmail.com>2010-07-16 07:20:35 +0000
commit9ac831c2c02e6e1c9c322b8bb77881c1dbac6f08 (patch)
tree566ea63633d6c04dc53c51ba7ff8734947026343 /libavcodec/vp8.c
parentb6c420ce8f1151121350892c75e8d688e085f253 (diff)
downloadffmpeg-9ac831c2c02e6e1c9c322b8bb77881c1dbac6f08.tar.gz
vp8: Save mb border needed for intra prediction so that loop filter can run
immediately after a mb row is decoded Originally committed as revision 24252 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/vp8.c')
-rw-r--r--libavcodec/vp8.c88
1 files changed, 72 insertions, 16 deletions
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 85f1723ae9..0b4434b32f 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -62,6 +62,7 @@ typedef struct {
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
int update_altref;
+ int deblock_filter;
/**
* If this flag is not set, all the probability updates
@@ -85,6 +86,12 @@ typedef struct {
int b4_stride;
/**
+ * Cache of the top row needed for intra prediction
+ * 16 for luma, 8 for each chroma plane
+ */
+ uint8_t (*top_border)[16+8+8];
+
+ /**
* For coeff decode, we need to know whether the above block had non-zero
* coefficients. This means for each macroblock, we need data for 4 luma
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
@@ -197,6 +204,7 @@ static void vp8_decode_flush(AVCodecContext *avctx)
av_freep(&s->intra4x4_pred_mode_base);
av_freep(&s->top_nnz);
av_freep(&s->edge_emu_buffer);
+ av_freep(&s->top_border);
s->macroblocks = NULL;
s->intra4x4_pred_mode = NULL;
@@ -224,8 +232,9 @@ static int update_dimensions(VP8Context *s, int width, int height)
s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks));
s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1));
s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
+ s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
- if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz)
+ if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border)
return AVERROR(ENOMEM);
s->macroblocks = s->macroblocks_base + 1 + s->mb_stride;
@@ -852,6 +861,47 @@ static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb
mb->skip = 1;
}
+static av_always_inline
+void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
+ int linesize, int uvlinesize, int simple)
+{
+ AV_COPY128(top_border, src_y + 15*linesize);
+ if (!simple) {
+ AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
+ AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
+ }
+}
+
+static av_always_inline
+void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
+ int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
+ int simple, int xchg)
+{
+ uint8_t *top_border_m1 = top_border-32; // for TL prediction
+ src_y -= linesize;
+ src_cb -= uvlinesize;
+ src_cr -= uvlinesize;
+
+#define XCHG(a,b,xchg)\
+if (xchg) AV_SWAP64(b,a);\
+else AV_COPY64(b,a);
+
+ XCHG(top_border_m1+8, src_y-8, xchg);
+ XCHG(top_border, src_y, xchg);
+ XCHG(top_border+8, src_y+8, 1);
+ if (mb_x < mb_width-1)
+ XCHG(top_border+32, src_y+16, 1);
+
+ // only copy chroma for normal loop filter
+ // or to initialize the top row to 127
+ if (!simple || !mb_y) {
+ XCHG(top_border_m1+16, src_cb-8, xchg);
+ XCHG(top_border_m1+24, src_cr-8, xchg);
+ XCHG(top_border+16, src_cb, 1);
+ XCHG(top_border+24, src_cr, 1);
+ }
+}
+
static int check_intra_pred_mode(int mode, int mb_x, int mb_y)
{
if (mode == DC_PRED8x8) {
@@ -870,6 +920,13 @@ static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
{
int x, y, mode, nnz, tr;
+ // for the first row, we need to run xchg_mb_border to init the top edge to 127
+ // otherwise, skip it if we aren't going to deblock
+ if (s->deblock_filter || !mb_y)
+ xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
+ s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+ s->filter.simple, 1);
+
if (mb->mode < MODE_I4x4) {
mode = check_intra_pred_mode(mb->mode, mb_x, mb_y);
s->hpc.pred16x16[mode](dst[0], s->linesize);
@@ -913,6 +970,11 @@ static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb,
mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y);
s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
+
+ if (s->deblock_filter || !mb_y)
+ xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
+ s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+ s->filter.simple, 0);
}
/**
@@ -1171,7 +1233,6 @@ static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, in
}
}
-// TODO: look at backup_mb_border / xchg_mb_border in h264.c
static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y)
{
int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim;
@@ -1251,6 +1312,7 @@ static void filter_mb_row(VP8Context *s, int mb_y)
int mb_x;
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+ backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
filter_mb(s, dst, mb++, mb_x, mb_y);
dst[0] += 16;
dst[1] += 8;
@@ -1265,6 +1327,7 @@ static void filter_mb_row_simple(VP8Context *s, int mb_y)
int mb_x;
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+ backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
filter_mb_simple(s, dst, mb++, mb_x, mb_y);
dst += 16;
}
@@ -1291,6 +1354,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
s->invisible = 1;
goto skip_decode;
}
+ s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
for (i = 0; i < 4; i++)
if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
@@ -1329,11 +1393,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
// top edge of 127 for intra prediction
- if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
- memset(curframe->data[0] - s->linesize -1, 127, s->linesize +1);
- memset(curframe->data[1] - s->uvlinesize-1, 127, s->uvlinesize+1);
- memset(curframe->data[2] - s->uvlinesize-1, 127, s->uvlinesize+1);
- }
+ memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border));
for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
@@ -1352,6 +1412,8 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
for (i = 0; i < 3; i++)
for (y = 0; y < 16>>!!i; y++)
dst[i][y*curframe->linesize[i]-1] = 129;
+ if (mb_y)
+ memset(s->top_border, 129, sizeof(*s->top_border));
for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x);
@@ -1388,19 +1450,13 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
dst[2] += 8;
mb++;
}
- if (mb_y && s->filter.level && avctx->skip_loop_filter < skip_thresh) {
+ if (s->deblock_filter) {
if (s->filter.simple)
- filter_mb_row_simple(s, mb_y-1);
+ filter_mb_row_simple(s, mb_y);
else
- filter_mb_row(s, mb_y-1);
+ filter_mb_row(s, mb_y);
}
}
- if (s->filter.level && avctx->skip_loop_filter < skip_thresh) {
- if (s->filter.simple)
- filter_mb_row_simple(s, mb_y-1);
- else
- filter_mb_row(s, mb_y-1);
- }
skip_decode:
// if future frames don't use the updated probabilities,