aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/vp8.h
diff options
context:
space:
mode:
authorDaniel Kang <daniel.d.kang@gmail.com>2012-07-12 13:07:06 -0700
committerLuca Barbato <lu_zero@gentoo.org>2012-07-14 20:18:54 +0200
commit951455c1c18d54177f281dba174078e54a835361 (patch)
tree2ec094ab36756bfbede03a40194ce56f206d092c /libavcodec/vp8.h
parent17343e395250f5cb459f5fab198dcae50841b91d (diff)
downloadffmpeg-951455c1c18d54177f281dba174078e54a835361.tar.gz
vp8: implement sliced threading
Testing gives 25-30% gain on HD clips with two threads and up to 50% gain with eight threads. Sliced threading uses more memory than single or frame threading. Frame threading and single threading keep the previous memory layout. Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
Diffstat (limited to 'libavcodec/vp8.h')
-rw-r--r--libavcodec/vp8.h63
1 files changed, 39 insertions, 24 deletions
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 2f2cb80a0a..1355da4d68 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -4,6 +4,7 @@
* Copyright (C) 2010 David Conrad
* Copyright (C) 2010 Ronald S. Bultje
* Copyright (C) 2010 Jason Garrett-Glaser
+ * Copyright (C) 2012 Daniel Kang
*
* This file is part of Libav.
*
@@ -88,10 +89,40 @@ typedef struct {
} VP8Macroblock;
typedef struct {
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ int thread_nr;
+ int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
+ int wait_mb_pos; // What the current thread is waiting on.
+ uint8_t *edge_emu_buffer;
+ /**
+ * For coeff decode, we need to know whether the above block had non-zero
+ * coefficients. This means for each macroblock, we need data for 4 luma
+ * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
+ * per macroblock. We keep the last row in top_nnz.
+ */
+ DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
+ /**
+ * This is the index plus one of the last non-zero coeff
+ * for each of the blocks in the current macroblock.
+ * So, 0 -> no coeffs
+ * 1 -> dc-only (special transform)
+ * 2+-> full transform
+ */
+ DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
+ DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
+ DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
+ VP8FilterStrength *filter_strength;
+} VP8ThreadData;
+
+#define MAX_THREADS 8
+typedef struct {
+ VP8ThreadData *thread_data;
AVCodecContext *avctx;
AVFrame *framep[4];
AVFrame *next_framep[4];
- uint8_t *edge_emu_buffer;
+ AVFrame *curframe;
+ AVFrame *prev_frame;
uint16_t mb_width; /* number of horizontal MB */
uint16_t mb_height; /* number of vertical MB */
@@ -128,7 +159,6 @@ typedef struct {
} filter;
VP8Macroblock *macroblocks;
- VP8FilterStrength *filter_strength;
uint8_t *intra4x4_pred_mode_top;
uint8_t intra4x4_pred_mode_left[4];
@@ -169,32 +199,10 @@ typedef struct {
int8_t ref[4];
} lf_delta;
- /**
- * Cache of the top row needed for intra prediction
- * 16 for luma, 8 for each chroma plane
- */
uint8_t (*top_border)[16+8+8];
-
- /**
- * For coeff decode, we need to know whether the above block had non-zero
- * coefficients. This means for each macroblock, we need data for 4 luma
- * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
- * per macroblock. We keep the last row in top_nnz.
- */
uint8_t (*top_nnz)[9];
- DECLARE_ALIGNED(8, uint8_t, left_nnz)[9];
- /**
- * This is the index plus one of the last non-zero coeff
- * for each of the blocks in the current macroblock.
- * So, 0 -> no coeffs
- * 1 -> dc-only (special transform)
- * 2+-> full transform
- */
- DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors
- DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
- DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
/**
* These are all of the updatable probabilities for binary decisions.
@@ -247,6 +255,13 @@ typedef struct {
uint8_t *segmentation_maps[5];
int num_maps_to_be_freed;
int maps_are_invalid;
+ int num_jobs;
+ /**
+ * This describes the macroblock memory layout.
+ * 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread).
+ * 1 -> Macroblocks for entire frame alloced (sliced thread).
+ */
+ int mb_layout;
} VP8Context;
#endif /* AVCODEC_VP8_H */