diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2015-03-21 18:47:19 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2015-03-21 18:50:53 +0100 |
commit | 3d1d8e1f95e1cb6682a6dd035046d91a0812d331 (patch) | |
tree | 01ed80b8f00c484de9ed9330c802ce3407c6e45c | |
parent | 27301633297f36549d94499b1c5728e1118a2b7c (diff) | |
parent | 36d04801ba9d8622c2d759c172aea18561bac74d (diff) | |
download | ffmpeg-3d1d8e1f95e1cb6682a6dd035046d91a0812d331.tar.gz |
Merge commit '36d04801ba9d8622c2d759c172aea18561bac74d'
* commit '36d04801ba9d8622c2d759c172aea18561bac74d':
h264: move the scratch buffers into the per-slice context
Conflicts:
libavcodec/h264.h
libavcodec/h264_slice.c
libavcodec/svq3.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/h264.c | 12 | ||||
-rw-r--r-- | libavcodec/h264.h | 8 | ||||
-rw-r--r-- | libavcodec/h264_mb.c | 26 | ||||
-rw-r--r-- | libavcodec/h264_slice.c | 36 | ||||
-rw-r--r-- | libavcodec/svq3.c | 17 |
5 files changed, 50 insertions, 49 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c index dd324a9f9d..89eaca3a13 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -403,8 +403,6 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp) continue; av_freep(&hx->top_borders[1]); av_freep(&hx->top_borders[0]); - av_freep(&hx->bipred_scratchpad); - av_freep(&hx->edge_emu_buffer); av_freep(&hx->dc_val_base); av_freep(&hx->er.mb_index2xy); av_freep(&hx->er.error_status_table); @@ -419,6 +417,16 @@ void ff_h264_free_tables(H264Context *h, int free_rbsp) if (i) av_freep(&h->thread_context[i]); } + + for (i = 0; i < h->nb_slice_ctx; i++) { + H264SliceContext *sl = &h->slice_ctx[i]; + + av_freep(&sl->bipred_scratchpad); + av_freep(&sl->edge_emu_buffer); + + sl->bipred_scratchpad_allocated = 0; + sl->edge_emu_buffer_allocated = 0; + } } int ff_h264_alloc_tables(H264Context *h) diff --git a/libavcodec/h264.h b/libavcodec/h264.h index e7d5a304c9..49639ddd57 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -436,6 +436,11 @@ typedef struct H264SliceContext { const uint8_t *intra_pcm_ptr; + uint8_t *bipred_scratchpad; + uint8_t *edge_emu_buffer; + int bipred_scratchpad_allocated; + int edge_emu_buffer_allocated; + /** * non zero coeff count cache. * is 64 if not available. @@ -763,14 +768,11 @@ typedef struct H264Context { int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs int cur_chroma_format_idc; - uint8_t *bipred_scratchpad; - int16_t slice_row[MAX_SLICES]; ///< to detect when MAX_SLICES is too low uint8_t parse_history[6]; int parse_history_count; int parse_last_mb; - uint8_t *edge_emu_buffer; int16_t *dc_val_base; AVBufferPool *qscale_table_pool; diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index b33c63c130..86fdea9e33 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -237,12 +237,12 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext full_my < 0 - extra_height || full_mx + 16 /*FIXME*/ > pic_width + extra_width || full_my + 16 /*FIXME*/ > pic_height + extra_height) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_y - (2 << pixel_shift) - 2 * sl->mb_linesize, sl->mb_linesize, sl->mb_linesize, 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, full_my - 2, pic_width, pic_height); - src_y = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; + src_y = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; emu = 1; } @@ -256,13 +256,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext if (chroma_idc == 3 /* yuv444 */) { src_cb = pic->f.data[1] + offset; if (emu) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2 * sl->mb_linesize, sl->mb_linesize, sl->mb_linesize, 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, full_my - 2, pic_width, pic_height); - src_cb = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; + src_cb = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; } qpix_op[luma_xy](dest_cb, src_cb, sl->mb_linesize); // FIXME try variable height perhaps? if (!square) @@ -270,13 +270,13 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext src_cr = pic->f.data[2] + offset; if (emu) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2 * sl->mb_linesize, sl->mb_linesize, sl->mb_linesize, 16 + 5, 16 + 5 /*FIXME*/, full_mx - 2, full_my - 2, pic_width, pic_height); - src_cr = h->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; + src_cr = sl->edge_emu_buffer + (2 << pixel_shift) + 2 * sl->mb_linesize; } qpix_op[luma_xy](dest_cr, src_cr, sl->mb_linesize); // FIXME try variable height perhaps? if (!square) @@ -297,22 +297,22 @@ static av_always_inline void mc_dir_part(const H264Context *h, H264SliceContext (my >> ysh) * sl->mb_uvlinesize; if (emu) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cb, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cb, sl->mb_uvlinesize, sl->mb_uvlinesize, 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); - src_cb = h->edge_emu_buffer; + src_cb = sl->edge_emu_buffer; } chroma_op(dest_cb, src_cb, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); if (emu) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src_cr, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src_cr, sl->mb_uvlinesize, sl->mb_uvlinesize, 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh), pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */)); - src_cr = h->edge_emu_buffer; + src_cr = sl->edge_emu_buffer; } chroma_op(dest_cr, src_cr, sl->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */), mx & 7, ((unsigned)my << (chroma_idc == 2 /* yuv422 */)) & 7); @@ -405,9 +405,9 @@ static av_always_inline void mc_part_weighted(const H264Context *h, H264SliceCon if (list0 && list1) { /* don't optimize for luma-only case, since B-frames usually * use implicit weights => chroma too. */ - uint8_t *tmp_cb = h->bipred_scratchpad; - uint8_t *tmp_cr = h->bipred_scratchpad + (16 << pixel_shift); - uint8_t *tmp_y = h->bipred_scratchpad + 16 * sl->mb_uvlinesize; + uint8_t *tmp_cb = sl->bipred_scratchpad; + uint8_t *tmp_cr = sl->bipred_scratchpad + (16 << pixel_shift); + uint8_t *tmp_y = sl->bipred_scratchpad + 16 * sl->mb_uvlinesize; int refn0 = sl->ref_cache[0][scan8[n]]; int refn1 = sl->ref_cache[1][scan8[n]]; diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c index 0e72a81a53..2c9ee12d0d 100644 --- a/libavcodec/h264_slice.c +++ b/libavcodec/h264_slice.c @@ -161,21 +161,20 @@ static void release_unused_pictures(H264Context *h, int remove_current) } } -static int alloc_scratch_buffers(H264Context *h, int linesize) +static int alloc_scratch_buffers(H264SliceContext *sl, int linesize) { int alloc_size = FFALIGN(FFABS(linesize) + 32, 32); - if (h->bipred_scratchpad) - return 0; - - h->bipred_scratchpad = av_malloc(16 * 6 * alloc_size); + av_fast_malloc(&sl->bipred_scratchpad, &sl->bipred_scratchpad_allocated, 16 * 6 * alloc_size); // edge emu needs blocksize + filter length - 1 // (= 21x21 for h264) - h->edge_emu_buffer = av_mallocz(alloc_size * 2 * 21); + av_fast_malloc(&sl->edge_emu_buffer, &sl->edge_emu_buffer_allocated, alloc_size * 2 * 21); - if (!h->bipred_scratchpad || !h->edge_emu_buffer) { - av_freep(&h->bipred_scratchpad); - av_freep(&h->edge_emu_buffer); + if (!sl->bipred_scratchpad || !sl->edge_emu_buffer) { + av_freep(&sl->bipred_scratchpad); + av_freep(&sl->edge_emu_buffer); + sl->bipred_scratchpad_allocated = 0; + sl->edge_emu_buffer_allocated = 0; return AVERROR(ENOMEM); } @@ -402,8 +401,6 @@ static void clone_tables(H264Context *dst, H264SliceContext *sl, dst->DPB = src->DPB; dst->cur_pic_ptr = src->cur_pic_ptr; dst->cur_pic = src->cur_pic; - dst->bipred_scratchpad = NULL; - dst->edge_emu_buffer = NULL; ff_h264_pred_init(&dst->hpc, src->avctx->codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc); } @@ -481,8 +478,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst, * the current value */ h->avctx->bits_per_raw_sample = h->sps.bit_depth_luma; - av_freep(&h->bipred_scratchpad); - h->width = h1->width; h->height = h1->height; h->mb_height = h1->mb_height; @@ -587,8 +582,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst, } } - h->bipred_scratchpad = NULL; - h->edge_emu_buffer = NULL; h->thread_context[0] = h; h->context_initialized = h1->context_initialized; @@ -1736,14 +1729,6 @@ int ff_h264_decode_slice_header(H264Context *h, H264SliceContext *sl, H264Contex if (h != h0 && (ret = clone_slice(h, h0)) < 0) return ret; - /* can't be in alloc_tables because linesize isn't known there. - * FIXME: redo bipred weight to not require extra buffer? */ - for (i = 0; i < h->slice_context_count; i++) - if (h->thread_context[i]) { - ret = alloc_scratch_buffers(h->thread_context[i], h->linesize); - if (ret < 0) - return ret; - } h->cur_pic_ptr->frame_num = h->frame_num; // FIXME frame_num cleanup @@ -2397,6 +2382,11 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg) H264SliceContext *sl = arg; H264Context *h = sl->h264; int lf_x_start = sl->mb_x; + int ret; + + ret = alloc_scratch_buffers(sl, h->linesize); + if (ret < 0) + return ret; sl->mb_skip_run = -1; diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c index 56befbfa7a..9b497e8a33 100644 --- a/libavcodec/svq3.c +++ b/libavcodec/svq3.c @@ -303,6 +303,7 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, int thirdpel, int dir, int avg) { H264Context *h = &s->h; + H264SliceContext *sl = &h->slice_ctx[0]; const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic; uint8_t *src, *dest; int i, emu = 0; @@ -323,11 +324,11 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, src = pic->f.data[0] + mx + my * h->linesize; if (emu) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src, h->linesize, h->linesize, width + 1, height + 1, mx, my, s->h_edge_pos, s->v_edge_pos); - src = h->edge_emu_buffer; + src = sl->edge_emu_buffer; } if (thirdpel) (avg ? s->tdsp.avg_tpel_pixels_tab @@ -350,12 +351,12 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, src = pic->f.data[i] + mx + my * h->uvlinesize; if (emu) { - h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src, + h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src, h->uvlinesize, h->uvlinesize, width + 1, height + 1, mx, my, (s->h_edge_pos >> 1), s->v_edge_pos >> 1); - src = h->edge_emu_buffer; + src = sl->edge_emu_buffer; } if (thirdpel) (avg ? s->tdsp.avg_tpel_pixels_tab @@ -1082,6 +1083,7 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic) { SVQ3Context *s = avctx->priv_data; H264Context *h = &s->h; + H264SliceContext *sl = &h->slice_ctx[0]; const int big_mb_num = h->mb_stride * (h->mb_height + 1) + 1; const int mb_array_size = h->mb_stride * h->mb_height; const int b4_stride = h->mb_width * 4 + 1; @@ -1115,9 +1117,9 @@ static int get_buffer(AVCodecContext *avctx, H264Picture *pic) if (ret < 0) goto fail; - if (!h->edge_emu_buffer) { - h->edge_emu_buffer = av_mallocz_array(pic->f.linesize[0], 17); - if (!h->edge_emu_buffer) + if (!sl->edge_emu_buffer) { + sl->edge_emu_buffer = av_mallocz_array(pic->f.linesize[0], 17); + if (!sl->edge_emu_buffer) return AVERROR(ENOMEM); } @@ -1373,7 +1375,6 @@ static av_cold int svq3_decode_end(AVCodecContext *avctx) av_freep(&s->buf); s->buf_size = 0; - av_freep(&h->edge_emu_buffer); return 0; } |