diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2006-08-28 09:33:01 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2006-08-28 09:33:01 +0000 |
commit | 3e20143ee7b2e3b9b11eaaa4bda971281ae0efbf (patch) | |
tree | 4c991508d67a3b40fb302a0ffb842c7bb32fc0c6 /libavcodec/h264.c | |
parent | 001299bfe8142740d4260d5443fa91eb951dbdcb (diff) | |
download | ffmpeg-3e20143ee7b2e3b9b11eaaa4bda971281ae0efbf.tar.gz |
mmx implementation of deblocking strength decision.
2-3% faster h264.
Originally committed as revision 6113 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.c')
-rw-r--r-- | libavcodec/h264.c | 123 |
1 files changed, 113 insertions, 10 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 1a7fb76b49..4f0f875de7 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -409,6 +409,7 @@ static VLC run7_vlc; static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp); static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc); static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); +static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize); static always_inline uint32_t pack16to32(int a, int b){ #ifdef WORDS_BIGENDIAN @@ -3879,7 +3880,7 @@ static void hl_decode_mb(H264Context *h){ tprintf("call filter_mb\n"); backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize); fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb - filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); + filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize); } } } @@ -6694,7 +6695,7 @@ decode_intra_mb: } -static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i, d; const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int alpha = alpha_table[index_a]; @@ -6755,7 +6756,7 @@ static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4] } } } -static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i; const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int alpha = alpha_table[index_a]; @@ -6771,7 +6772,7 @@ static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4 } } -static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { +static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { int i; for( i = 0; i < 16; i++, pix += stride) { int index_a; @@ -6869,7 +6870,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int } } } -static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[8], int qp[2] ) { +static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) { int i; for( i = 0; i < 8; i++, pix += stride) { int index_a; @@ -6922,7 +6923,7 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in } } -static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i, d; const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int alpha = alpha_table[index_a]; @@ -6982,7 +6983,7 @@ static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int bS[4] } } -static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) { +static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) { int i; const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 ); const int alpha = alpha_table[index_a]; @@ -6998,6 +6999,108 @@ static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int bS[4 } } +static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { + MpegEncContext * const s = &h->s; + int mb_xy, mb_type; + int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; + + if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength) { + filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); + return; + } + assert(!FRAME_MBAFF); + + mb_xy = mb_x + mb_y*s->mb_stride; + mb_type = s->current_picture.mb_type[mb_xy]; + qp = s->current_picture.qscale_table[mb_xy]; + qp0 = s->current_picture.qscale_table[mb_xy-1]; + qp1 = s->current_picture.qscale_table[h->top_mb_xy]; + qpc = get_chroma_qp( h->pps.chroma_qp_index_offset, qp ); + qpc0 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp0 ); + qpc1 = get_chroma_qp( h->pps.chroma_qp_index_offset, qp1 ); + qp0 = (qp + qp0 + 1) >> 1; + qp1 = (qp + qp1 + 1) >> 1; + qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX(0, h->pps.chroma_qp_index_offset); + if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh) + return; + qpc0 = (qpc + qpc0 + 1) >> 1; + qpc1 = (qpc + qpc1 + 1) >> 1; + + if( IS_INTRA(mb_type) ) { + int16_t bS4[4] = {4,4,4,4}; + int16_t bS3[4] = {3,3,3,3}; + if( IS_8x8DCT(mb_type) ) { + filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); + filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 ); + filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); + } else { + filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 ); + filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp ); + filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp ); + filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 ); + filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp ); + filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp ); + } + filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 ); + filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc ); + filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 ); + filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc ); + filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 ); + filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc ); + filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 ); + filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc ); + return; + } else { + DECLARE_ALIGNED_8(int16_t, bS[2][4][4]); + uint64_t (*bSv)[4] = (uint64_t(*)[4])bS; + int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) + == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; + int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 : + (mb_type & MB_TYPE_16x8) ? 1 : 0; + int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) + && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16)) + ? 3 : 0; + int step = IS_8x8DCT(mb_type) ? 2 : 1; + s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, + (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 ); + if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) + bSv[0][0] = 0x0004000400040004ULL; + if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) ) + bSv[1][0] = 0x0004000400040004ULL; + +#define FILTER(hv,dir,edge)\ + if(bSv[dir][edge]) {\ + filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\ + if(!(edge&1)) {\ + filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ + filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\ + }\ + } + if( edges == 1 ) { + FILTER(v,0,0); + FILTER(h,1,0); + } else if( IS_8x8DCT(mb_type) ) { + FILTER(v,0,0); + FILTER(v,0,2); + FILTER(h,1,0); + FILTER(h,1,2); + } else { + FILTER(v,0,0); + FILTER(v,0,1); + FILTER(v,0,2); + FILTER(v,0,3); + FILTER(h,1,0); + FILTER(h,1,1); + FILTER(h,1,2); + FILTER(h,1,3); + } +#undef FILTER + } +} + static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { MpegEncContext * const s = &h->s; const int mb_xy= mb_x + mb_y*s->mb_stride; @@ -7035,7 +7138,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 */ const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride; const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride }; - int bS[8]; + int16_t bS[8]; int qp[2]; int chroma_qp[2]; int mb_qp, mbn0_qp, mbn1_qp; @@ -7114,7 +7217,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 int mbn_xy = mb_xy - 2 * s->mb_stride; int qp, chroma_qp; int i, j; - int bS[4]; + int16_t bS[4]; for(j=0; j<2; j++, mbn_xy += s->mb_stride){ if( IS_INTRA(mb_type) || @@ -7150,7 +7253,7 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8 /* mbn_xy: neighbor macroblock */ const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; const int mbn_type = s->current_picture.mb_type[mbn_xy]; - int bS[4]; + int16_t bS[4]; int qp; if( (edge&1) && IS_8x8DCT(mb_type) ) |