diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-02-17 14:52:24 -0800 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-02-18 01:21:23 +0100 |
commit | c63f9fb37a7b7da03bed6d79115f7f2e36607808 (patch) | |
tree | f405ac2af54069fd224ea5a06c071a023869efff /libavcodec/h264_mb_template.c | |
parent | 54b2bddd22fb32a67038848b8d2394bee671b143 (diff) | |
download | ffmpeg-c63f9fb37a7b7da03bed6d79115f7f2e36607808.tar.gz |
h264: don't store intra pcm samples in h->mb.
Instead, keep them in the bitstream buffer until we read them verbatim,
this saves a memcpy() and a subsequent clearing of the target buffer.
decode_cabac+decode_mb for a sample file (CAPM3_Sony_D.jsv) goes from
6121.4 to 6095.5 cycles, i.e. 26 cycles faster.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/h264_mb_template.c')
-rw-r--r-- | libavcodec/h264_mb_template.c | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c index 2f4890a98b..b617029a9f 100644 --- a/libavcodec/h264_mb_template.c +++ b/libavcodec/h264_mb_template.c @@ -102,7 +102,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) if (PIXEL_SHIFT) { int j; GetBitContext gb; - init_get_bits(&gb, (uint8_t *)h->mb, + init_get_bits(&gb, (uint8_t *)h->intra_pcm_ptr, ff_h264_mb_sizes[h->sps.chroma_format_idc] * bit_depth); for (i = 0; i < 16; i++) { @@ -134,7 +134,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) } } else { for (i = 0; i < 16; i++) - memcpy(dest_y + i * linesize, (uint8_t *)h->mb + i * 16, 16); + memcpy(dest_y + i * linesize, (uint8_t *)h->intra_pcm_ptr + i * 16, 16); if (SIMPLE || !CONFIG_GRAY || !(h->flags & CODEC_FLAG_GRAY)) { if (!h->sps.chroma_format_idc) { for (i = 0; i < 8; i++) { @@ -142,8 +142,8 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) memset(dest_cr + i*uvlinesize, 1 << (bit_depth - 1), 8); } } else { - uint8_t *src_cb = (uint8_t *)h->mb + 256; - uint8_t *src_cr = (uint8_t *)h->mb + 256 + block_h * 8; + uint8_t *src_cb = (uint8_t *)h->intra_pcm_ptr + 256; + uint8_t *src_cr = (uint8_t *)h->intra_pcm_ptr + 256 + block_h * 8; for (i = 0; i < block_h; i++) { memcpy(dest_cb + i * uvlinesize, src_cb + i * 8, 8); memcpy(dest_cr + i * uvlinesize, src_cr + i * 8, 8); @@ -258,10 +258,10 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h) } } } - } - if (h->cbp || IS_INTRA(mb_type)) { - h->dsp.clear_blocks(h->mb); - h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT)); + if (h->cbp || IS_INTRA(mb_type)) { + h->dsp.clear_blocks(h->mb); + h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT)); + } } } @@ -325,7 +325,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) if (PIXEL_SHIFT) { const int bit_depth = h->sps.bit_depth_luma; GetBitContext gb; - init_get_bits(&gb, (uint8_t *)h->mb, 768 * bit_depth); + init_get_bits(&gb, (uint8_t *)h->intra_pcm_ptr, 768 * bit_depth); for (p = 0; p < plane_count; p++) for (i = 0; i < 16; i++) { @@ -337,7 +337,7 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) for (p = 0; p < plane_count; p++) for (i = 0; i < 16; i++) memcpy(dest[p] + i * linesize, - (uint8_t *)h->mb + p * 256 + i * 16, 16); + (uint8_t *)h->intra_pcm_ptr + p * 256 + i * 16, 16); } } else { if (IS_INTRA(mb_type)) { @@ -365,10 +365,11 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h) hl_decode_mb_idct_luma(h, mb_type, 1, SIMPLE, transform_bypass, PIXEL_SHIFT, block_offset, linesize, dest[p], p); - } - if (h->cbp || IS_INTRA(mb_type)) { - h->dsp.clear_blocks(h->mb); - h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT)); + + if (h->cbp || IS_INTRA(mb_type)) { + h->dsp.clear_blocks(h->mb); + h->dsp.clear_blocks(h->mb + (24 * 16 << PIXEL_SHIFT)); + } } } |