deblocking filter doesn't need to call fill_caches again. 1.4% faster decoding.

Originally committed as revision 4147 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Loren Merritt <lorenm@u.washington.edu> 2005-04-21 20:38:42 +0000
committer: Loren Merritt <lorenm@u.washington.edu> 2005-04-21 20:38:42 +0000
commit: e2e5894a91b9a14d13d57a5be5544143de4406f7 (patch)
tree: 8b92d6369cb7cc052734043a32d56dc51b656f04
parent: dff0f0354605102ac06e419ddd437ca7aa558c30 (diff)
download: ffmpeg-e2e5894a91b9a14d13d57a5be5544143de4406f7.tar.gz
1 files changed, 31 insertions, 24 deletions
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 39ee4c6a2c..a44e7ce1a0 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -428,6 +428,12 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
     int left_block[8];
     int i;
 
+    //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
+    // the actual condition is whether we're on the edge of a slice,
+    // and even then the intra and nnz parts are unnecessary.
+    if(for_deblock && h->slice_num == 1)
+        return;
+
     //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
     
     top_xy     = mb_xy  - s->mb_stride;
@@ -662,10 +668,10 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
 
 #if 1
     //FIXME direct mb can skip much of this
-    if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){
+    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
         int list;
-        for(list=0; list<2; list++){
-            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !for_deblock){
+        for(list=0; list<1+(h->slice_type==B_TYPE); list++){
+            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
                 /*if(!h->mv_cache_clean[list]){
                     memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
                     memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
@@ -675,16 +681,6 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
             }
             h->mv_cache_clean[list]= 0;
             
-            if(IS_INTER(topleft_type)){
-                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
-                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
-            }else{
-                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
-                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
-            }
-            
             if(IS_INTER(top_type)){
                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
@@ -704,16 +700,6 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
             }
 
-            if(IS_INTER(topright_type)){
-                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
-                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
-            }else{
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
-                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
-            }
-            
             //FIXME unify cleanup or sth
             if(IS_INTER(left_type[0])){
                 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
@@ -743,9 +729,30 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
 
-            if(for_deblock)
+            if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
                 continue;
 
+            if(IS_INTER(topleft_type)){
+                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
+                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+            }else{
+                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
+                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            }
+            
+            if(IS_INTER(topright_type)){
+                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
+                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
+                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
+                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
+            }else{
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
+                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+            }
+            
+
             h->ref_cache[list][scan8[5 ]+1] = 
             h->ref_cache[list][scan8[7 ]+1] = 
             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewher else)
author	Loren Merritt <lorenm@u.washington.edu>	2005-04-21 20:38:42 +0000
committer	Loren Merritt <lorenm@u.washington.edu>	2005-04-21 20:38:42 +0000
commit	e2e5894a91b9a14d13d57a5be5544143de4406f7 (patch)
tree	8b92d6369cb7cc052734043a32d56dc51b656f04
parent	dff0f0354605102ac06e419ddd437ca7aa558c30 (diff)
download	ffmpeg-e2e5894a91b9a14d13d57a5be5544143de4406f7.tar.gz