diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2012-07-12 13:07:06 -0700 |
---|---|---|
committer | Luca Barbato <lu_zero@gentoo.org> | 2012-07-14 20:18:54 +0200 |
commit | 951455c1c18d54177f281dba174078e54a835361 (patch) | |
tree | 2ec094ab36756bfbede03a40194ce56f206d092c /libavcodec/vp8.h | |
parent | 17343e395250f5cb459f5fab198dcae50841b91d (diff) | |
download | ffmpeg-951455c1c18d54177f281dba174078e54a835361.tar.gz |
vp8: implement sliced threading
Testing gives 25-30% gain on HD clips with two threads and
up to 50% gain with eight threads.
Sliced threading uses more memory than single or frame threading.
Frame threading and single threading keep the previous memory
layout.
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
Diffstat (limited to 'libavcodec/vp8.h')
-rw-r--r-- | libavcodec/vp8.h | 63 |
1 files changed, 39 insertions, 24 deletions
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index 2f2cb80a0a..1355da4d68 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -4,6 +4,7 @@ * Copyright (C) 2010 David Conrad * Copyright (C) 2010 Ronald S. Bultje * Copyright (C) 2010 Jason Garrett-Glaser + * Copyright (C) 2012 Daniel Kang * * This file is part of Libav. * @@ -88,10 +89,40 @@ typedef struct { } VP8Macroblock; typedef struct { + pthread_mutex_t lock; + pthread_cond_t cond; + int thread_nr; + int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF) + int wait_mb_pos; // What the current thread is waiting on. + uint8_t *edge_emu_buffer; + /** + * For coeff decode, we need to know whether the above block had non-zero + * coefficients. This means for each macroblock, we need data for 4 luma + * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 + * per macroblock. We keep the last row in top_nnz. + */ + DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; + /** + * This is the index plus one of the last non-zero coeff + * for each of the blocks in the current macroblock. + * So, 0 -> no coeffs + * 1 -> dc-only (special transform) + * 2+-> full transform + */ + DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; + DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; + DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; + VP8FilterStrength *filter_strength; +} VP8ThreadData; + +#define MAX_THREADS 8 +typedef struct { + VP8ThreadData *thread_data; AVCodecContext *avctx; AVFrame *framep[4]; AVFrame *next_framep[4]; - uint8_t *edge_emu_buffer; + AVFrame *curframe; + AVFrame *prev_frame; uint16_t mb_width; /* number of horizontal MB */ uint16_t mb_height; /* number of vertical MB */ @@ -128,7 +159,6 @@ typedef struct { } filter; VP8Macroblock *macroblocks; - VP8FilterStrength *filter_strength; uint8_t *intra4x4_pred_mode_top; uint8_t intra4x4_pred_mode_left[4]; @@ -169,32 +199,10 @@ typedef struct { int8_t ref[4]; } lf_delta; - /** - * Cache of the top row needed for intra prediction - * 16 for luma, 8 for each chroma plane - */ uint8_t (*top_border)[16+8+8]; - - /** - * For coeff decode, we need to know whether the above block had non-zero - * coefficients. This means for each macroblock, we need data for 4 luma - * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 - * per macroblock. We keep the last row in top_nnz. - */ uint8_t (*top_nnz)[9]; - DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; - /** - * This is the index plus one of the last non-zero coeff - * for each of the blocks in the current macroblock. - * So, 0 -> no coeffs - * 1 -> dc-only (special transform) - * 2+-> full transform - */ - DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; VP56RangeCoder c; ///< header context, includes mb modes and motion vectors - DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; - DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; /** * These are all of the updatable probabilities for binary decisions. @@ -247,6 +255,13 @@ typedef struct { uint8_t *segmentation_maps[5]; int num_maps_to_be_freed; int maps_are_invalid; + int num_jobs; + /** + * This describes the macroblock memory layout. + * 0 -> Only width+height*2+1 macroblocks allocated (frame/single thread). + * 1 -> Macroblocks for entire frame alloced (sliced thread). + */ + int mb_layout; } VP8Context; #endif /* AVCODEC_VP8_H */ |