diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2011-01-14 21:34:25 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2011-01-14 21:34:25 +0000 |
commit | 19fb234e4af1ff9f58ff2fdd604ac6f6bb87ad6b (patch) | |
tree | 220be84d79d9c771c1afeab43fdd2aaa82fea01d /libavcodec/h264_cavlc.c | |
parent | 6c18f1cda2e2b2471ebf75d30d552cb0cb61b6ad (diff) | |
download | ffmpeg-19fb234e4af1ff9f58ff2fdd604ac6f6bb87ad6b.tar.gz |
H.264: split luma dc idct out and implement MMX/SSE2 versions
About 2.5x the speed.
NOTE: the way that the asm code handles large qmuls is a bit suboptimal.
If x264-style dequant was used (separate shift and qmul values), it might
be possible to get some extra speed.
Originally committed as revision 26336 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264_cavlc.c')
-rw-r--r-- | libavcodec/h264_cavlc.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index 6f3bcad782..426a285570 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -911,16 +911,14 @@ decode_intra_mb: int i8x8, i4x4, chroma_idx; int dquant; GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; - const uint8_t *scan, *scan8x8, *dc_scan; + const uint8_t *scan, *scan8x8; if(IS_INTERLACED(mb_type)){ scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; scan= s->qscale ? h->field_scan : h->field_scan_q0; - dc_scan= luma_dc_field_scan; }else{ scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0; - dc_scan= luma_dc_zigzag_scan; } dquant= get_se_golomb(&s->gb); @@ -939,7 +937,9 @@ decode_intra_mb: h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale); h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale); if(IS_INTRA16x16(mb_type)){ - if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ + AV_ZERO128(h->mb_luma_dc+0); + AV_ZERO128(h->mb_luma_dc+8); + if( decode_residual(h, h->intra_gb_ptr, h->mb_luma_dc, LUMA_DC_BLOCK_INDEX, scan, h->dequant4_coeff[0][s->qscale], 16) < 0){ return -1; //FIXME continue if partitioned and other return -1 too } |