diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2006-03-22 12:41:02 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2006-03-22 12:41:02 +0000 |
commit | 53b19144e7336a71bcf5f1e160cf14ca30b38f75 (patch) | |
tree | 567181aed43c3f061ef8a80afc1e03e6c160b666 | |
parent | e8600e5edcad2c8f1e3a6880fce5bfcf81cb784f (diff) | |
download | ffmpeg-53b19144e7336a71bcf5f1e160cf14ca30b38f75.tar.gz |
1.5x faster write_back_motion, 1-3% overall
Originally committed as revision 5198 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/h264.c | 55 |
1 files changed, 21 insertions, 34 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c index f120858268..c419d7230c 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -715,7 +715,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ } h->mv_cache_clean[list]= 0; - if(IS_INTER(top_type)){ + if(USES_LIST(top_type, list)){ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; @@ -735,7 +735,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ } //FIXME unify cleanup or sth - if(IS_INTER(left_type[0])){ + if(USES_LIST(left_type[0], list)){ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; @@ -749,7 +749,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; } - if(IS_INTER(left_type[1])){ + if(USES_LIST(left_type[1], list)){ const int b_xy= h->mb2b_xy[left_xy[1]] + 3; const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]]; @@ -767,7 +767,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) continue; - if(IS_INTER(topleft_type)){ + if(USES_LIST(topleft_type, list)){ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; @@ -777,7 +777,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; } - if(IS_INTER(topright_type)){ + if(USES_LIST(topright_type, list)){ const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; @@ -801,14 +801,14 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ if( h->pps.cabac ) { /* XXX beurk, Load mvd */ - if(IS_INTER(topleft_type)){ + if(USES_LIST(topleft_type, list)){ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy]; }else{ *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0; } - if(IS_INTER(top_type)){ + if(USES_LIST(top_type, list)){ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; @@ -820,7 +820,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; } - if(IS_INTER(left_type[0])){ + if(USES_LIST(left_type[0], list)){ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; @@ -828,7 +828,7 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){ *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; } - if(IS_INTER(left_type[1])){ + if(USES_LIST(left_type[1], list)){ const int b_xy= h->mb2b_xy[left_xy[1]] + 3; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; @@ -1424,26 +1424,8 @@ static inline void write_back_motion(H264Context *h, int mb_type){ for(list=0; list<2; list++){ int y; - if(!USES_LIST(mb_type, list)){ - if(1){ //FIXME skip or never read if mb_type doesn't use it - for(y=0; y<4; y++){ - *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= - *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0; - } - if( h->pps.cabac ) { - /* FIXME needed ? */ - for(y=0; y<4; y++){ - *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= - *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0; - } - } - for(y=0; y<2; y++){ - s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= - s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED; - } - } + if(!USES_LIST(mb_type, list)) continue; - } for(y=0; y<4; y++){ *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; @@ -1455,17 +1437,22 @@ static inline void write_back_motion(H264Context *h, int mb_type){ *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; } } - for(y=0; y<2; y++){ - s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y]; - s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y]; + + { + uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; + ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; + ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; + ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; + ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; } } if(h->slice_type == B_TYPE && h->pps.cabac){ if(IS_8X8(mb_type)){ - h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; - h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; - h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; + uint8_t *direct_table = &h->direct_table[b8_xy]; + direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; + direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; + direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; } } } |