diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2010-01-18 05:15:31 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2010-01-18 05:15:31 +0000 |
commit | 6d7e6b2657ae7ec577f3a55bc8c8b48075d51ba2 (patch) | |
tree | 0056c59696827fcdf7f6a4cee03c4665f80d4845 | |
parent | 5bc3fc2187e6186cfeb3cac193ab99b3045e9f58 (diff) | |
download | ffmpeg-6d7e6b2657ae7ec577f3a55bc8c8b48075d51ba2.tar.gz |
Perform reference remapping at fill_cache() time instead of in the
loop filter. This removes one obstacle of getting ff_h264_filter_mb_fast()
bitexact. code is maybe 0.1% faster
Originally committed as revision 21280 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/h264.c | 23 | ||||
-rw-r--r-- | libavcodec/h264.h | 27 | ||||
-rw-r--r-- | libavcodec/h264_loopfilter.c | 11 |
3 files changed, 50 insertions, 11 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c index 8d7478c0f1..8146ab6cf0 100644 --- a/libavcodec/h264.c +++ b/libavcodec/h264.c @@ -2087,16 +2087,35 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ } for(j=0; j<2; j++){ + int id_list[16]; int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j]; + for(i=0; i<16; i++){ + id_list[i]= 60; + if(h->ref_list[j][i].data[0]){ + int k; + uint8_t *base= h->ref_list[j][i].base[0]; + for(k=0; k<h->short_ref_count; k++) + if(h->short_ref[k]->base[0] == base){ + id_list[i]= k; + break; + } + for(k=0; k<h->long_ref_count; k++) + if(h->long_ref[k] && h->long_ref[k]->base[0] == base){ + id_list[i]= h->short_ref_count + k; + break; + } + } + } + ref2frm[0]= ref2frm[1]= -1; for(i=0; i<16; i++) - ref2frm[i+2]= 4*h->ref_list[j][i].frame_num + ref2frm[i+2]= 4*id_list[i] +(h->ref_list[j][i].reference&3); ref2frm[18+0]= ref2frm[18+1]= -1; for(i=16; i<48; i++) - ref2frm[i+4]= 4*h->ref_list[j][i].frame_num + ref2frm[i+4]= 4*id_list[(i-16)>>1] +(h->ref_list[j][i].reference&3); } diff --git a/libavcodec/h264.h b/libavcodec/h264.h index 94e386375c..0d87f90a9e 100644 --- a/libavcodec/h264.h +++ b/libavcodec/h264.h @@ -834,11 +834,20 @@ static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deb } ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; + if(for_deblock){ + int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); + *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = + *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; + ref += h->b8_stride; + *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = + *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; + }else{ *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; ref += h->b8_stride; *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101; + } b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride; for(y=0; y<4; y++){ @@ -1029,16 +1038,24 @@ static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deb *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3]; + if(for_deblock){ + int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); + h->ref_cache[list][scan8[0] + 0 - 1*8]= + h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; + h->ref_cache[list][scan8[0] + 2 - 1*8]= + h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; + }else{ h->ref_cache[list][scan8[0] + 0 - 1*8]= h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; h->ref_cache[list][scan8[0] + 2 - 1*8]= h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; + } }else{ *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; - *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; + *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; } for(i=0; i<2; i++){ @@ -1048,13 +1065,19 @@ static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deb const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; + if(for_deblock){ + int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[i]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); + h->ref_cache[list][cache_idx ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]]; + h->ref_cache[list][cache_idx+8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]]; + }else{ h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; + } }else{ *(uint32_t*)h->mv_cache [list][cache_idx ]= *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; h->ref_cache[list][cache_idx ]= - h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE; + h->ref_cache[list][cache_idx+8]= (for_deblock||left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; } } diff --git a/libavcodec/h264_loopfilter.c b/libavcodec/h264_loopfilter.c index 98a5d9e6c3..6f14f8c7a5 100644 --- a/libavcodec/h264_loopfilter.c +++ b/libavcodec/h264_loopfilter.c @@ -445,8 +445,6 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u int edge; const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; const int mbm_type = s->current_picture.mb_type[mbm_xy]; - int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); - int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0; const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) @@ -513,7 +511,6 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u /* mbn_xy: neighbor macroblock */ const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; const int mbn_type = s->current_picture.mb_type[mbn_xy]; - int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm; int16_t bS[4]; int qp; @@ -553,7 +550,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u int v = 0; for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { - v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] | + v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; } @@ -562,7 +559,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u v=0; for( l = 0; !v && l < 2; l++ ) { int ln= 1-l; - v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] | + v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; } @@ -588,7 +585,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u { bS[i] = 0; for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { - if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] | + if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] | h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { bS[i] = 1; @@ -600,7 +597,7 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u bS[i] = 0; for( l = 0; l < 2; l++ ) { int ln= 1-l; - if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] | + if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] | h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { bS[i] = 1; |