aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/h264.h
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2010-01-18 05:15:31 +0000
committerMichael Niedermayer <michaelni@gmx.at>2010-01-18 05:15:31 +0000
commit6d7e6b2657ae7ec577f3a55bc8c8b48075d51ba2 (patch)
tree0056c59696827fcdf7f6a4cee03c4665f80d4845 /libavcodec/h264.h
parent5bc3fc2187e6186cfeb3cac193ab99b3045e9f58 (diff)
downloadffmpeg-6d7e6b2657ae7ec577f3a55bc8c8b48075d51ba2.tar.gz
Perform reference remapping at fill_cache() time instead of in the
loop filter. This removes one obstacle of getting ff_h264_filter_mb_fast() bitexact. code is maybe 0.1% faster Originally committed as revision 21280 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/h264.h')
-rw-r--r--libavcodec/h264.h27
1 files changed, 25 insertions, 2 deletions
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 94e386375c..0d87f90a9e 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -834,11 +834,20 @@ static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deb
}
ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
+ if(for_deblock){
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
+ *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+ ref += h->b8_stride;
+ *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
+ *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+ }else{
*(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
*(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
ref += h->b8_stride;
*(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
*(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+ }
b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
for(y=0; y<4; y++){
@@ -1029,16 +1038,24 @@ static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deb
*(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
*(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
*(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
+ if(for_deblock){
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ h->ref_cache[list][scan8[0] + 0 - 1*8]=
+ h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+ h->ref_cache[list][scan8[0] + 2 - 1*8]=
+ h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+ }else{
h->ref_cache[list][scan8[0] + 0 - 1*8]=
h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
h->ref_cache[list][scan8[0] + 2 - 1*8]=
h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
+ }
}else{
*(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
- *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
+ *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
}
for(i=0; i<2; i++){
@@ -1048,13 +1065,19 @@ static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deb
const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
*(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
*(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
+ if(for_deblock){
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[i]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ h->ref_cache[list][cache_idx ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]];
+ h->ref_cache[list][cache_idx+8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]];
+ }else{
h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
+ }
}else{
*(uint32_t*)h->mv_cache [list][cache_idx ]=
*(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
h->ref_cache[list][cache_idx ]=
- h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ h->ref_cache[list][cache_idx+8]= (for_deblock||left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
}