Switch VC-1 decoder to output decoded residual immediately.

Originally committed as revision 11188 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Kostya Shishkov <kostya.shishkov@gmail.com> 2007-12-08 10:41:18 +0000
committer: Kostya Shishkov <kostya.shishkov@gmail.com> 2007-12-08 10:41:18 +0000
commit: d2e45f33a4f3ebafb9a097a0dea844697d79c494 (patch)
tree: 6d516345e8912e25a4637bea78025f9907a47003
parent: 30ea3075442fc0380e0d5de835324d5db1f22194 (diff)
download: ffmpeg-d2e45f33a4f3ebafb9a097a0dea844697d79c494.tar.gz
4 files changed, 54 insertions, 62 deletions
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 273069c1e8..b076e60f58 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -407,9 +407,9 @@ typedef struct DSPContext {
 
     /* vc1 functions */
     void (*vc1_inv_trans_8x8)(DCTELEM *b);
-    void (*vc1_inv_trans_8x4)(DCTELEM *b, int n);
-    void (*vc1_inv_trans_4x8)(DCTELEM *b, int n);
-    void (*vc1_inv_trans_4x4)(DCTELEM *b, int n);
+    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
     void (*vc1_v_overlap)(uint8_t* src, int stride);
     void (*vc1_h_overlap)(uint8_t* src, int stride);
     /* put 8x8 block with bicubic interpolation and quarterpel precision
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index b8e3a5d125..8387a5e0f4 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -332,5 +332,5 @@ static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n)
 
 void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
     dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
-    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
+    //dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
 }
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 6c478d1c41..ac2a4de98e 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -2884,7 +2884,8 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
 
 /** Decode P block
  */
-static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquant, int ttmb, int first_block)
+static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquant, int ttmb, int first_block,
+                              uint8_t *dst, int linesize, int skip_block)
 {
     MpegEncContext *s = &v->s;
     GetBitContext *gb = &s->gb;
@@ -2930,7 +2931,10 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
             if(!v->pquantizer)
                 block[idx] += (block[idx] < 0) ? -mquant : mquant;
         }
-        s->dsp.vc1_inv_trans_8x8(block);
+        if(!skip_block){
+            s->dsp.vc1_inv_trans_8x8(block);
+            s->dsp.add_pixels_clamped(block, dst, linesize);
+        }
         break;
     case TT_4X4:
         for(j = 0; j < 4; j++) {
@@ -2947,8 +2951,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                 if(!v->pquantizer)
                     block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
             }
-            if(!(subblkpat & (1 << (3 - j))))
-                s->dsp.vc1_inv_trans_4x4(block, j);
+            if(!(subblkpat & (1 << (3 - j))) && !skip_block)
+                s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
         }
         break;
     case TT_8X4:
@@ -2969,8 +2973,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                 if(!v->pquantizer)
                     block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
             }
-            if(!(subblkpat & (1 << (1 - j))))
-                s->dsp.vc1_inv_trans_8x4(block, j);
+            if(!(subblkpat & (1 << (1 - j))) && !skip_block)
+                s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
         }
         break;
     case TT_4X8:
@@ -2991,8 +2995,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                 if(!v->pquantizer)
                     block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
             }
-            if(!(subblkpat & (1 << (1 - j))))
-                s->dsp.vc1_inv_trans_4x8(block, j);
+            if(!(subblkpat & (1 << (1 - j))) && !skip_block)
+                s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
         }
         break;
     }
@@ -3101,11 +3105,9 @@ static int vc1_decode_p_mb(VC1Context *v)
                             s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
                     }
                 } else if(val) {
-                    vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block);
+                    vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY));
                     if(!v->ttmbf && ttmb < 8) ttmb = -1;
                     first_block = 0;
-                    if((i<4) || !(s->flags & CODEC_FLAG_GRAY))
-                        s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
                 }
             }
         }
@@ -3203,11 +3205,9 @@ static int vc1_decode_p_mb(VC1Context *v)
                             s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
                     }
                 } else if(is_coded[i]) {
-                    status = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block);
+                    status = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY));
                     if(!v->ttmbf && ttmb < 8) ttmb = -1;
                     first_block = 0;
-                    if((i<4) || !(s->flags & CODEC_FLAG_GRAY))
-                        s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
                 }
             }
             return status;
@@ -3377,11 +3377,9 @@ static void vc1_decode_b_mb(VC1Context *v)
             if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
             s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2));
         } else if(val) {
-            vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block);
+            vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY));
             if(!v->ttmbf && ttmb < 8) ttmb = -1;
             first_block = 0;
-            if((i<4) || !(s->flags & CODEC_FLAG_GRAY))
-                s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
         }
     }
 }
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 09213ccb77..f96fddcc43 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -152,16 +152,15 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
 
 /** Do inverse transform on 8x4 part of block
 */
-static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
+static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     register int t1,t2,t3,t4,t5,t6,t7,t8;
     DCTELEM *src, *dst;
-    int off;
+    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
-    off = n * 32;
-    src = block + off;
-    dst = block + off;
+    src = block;
+    dst = block;
     for(i = 0; i < 4; i++){
         t1 = 12 * (src[0] + src[4]);
         t2 = 12 * (src[0] - src[4]);
@@ -191,8 +190,7 @@ static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
         dst += 8;
     }
 
-    src = block + off;
-    dst = block + off;
+    src = block;
     for(i = 0; i < 8; i++){
         t1 = 17 * (src[ 0] + src[16]);
         t2 = 17 * (src[ 0] - src[16]);
@@ -201,28 +199,27 @@ static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n)
         t5 = 10 * src[ 8];
         t6 = 10 * src[24];
 
-        dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
-        dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
-        dst[16] = (t2 + t4 - t5 + 64) >> 7;
-        dst[24] = (t1 - t3 - t6 + 64) >> 7;
+        dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)];
+        dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)];
+        dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)];
+        dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)];
 
         src ++;
-        dst ++;
+        dest++;
     }
 }
 
 /** Do inverse transform on 4x8 parts of block
 */
-static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
+static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     register int t1,t2,t3,t4,t5,t6,t7,t8;
     DCTELEM *src, *dst;
-    int off;
+    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
-    off = n * 4;
-    src = block + off;
-    dst = block + off;
+    src = block;
+    dst = block;
     for(i = 0; i < 8; i++){
         t1 = 17 * (src[0] + src[2]);
         t2 = 17 * (src[0] - src[2]);
@@ -240,8 +237,7 @@ static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
         dst += 8;
     }
 
-    src = block + off;
-    dst = block + off;
+    src = block;
     for(i = 0; i < 4; i++){
         t1 = 12 * (src[ 0] + src[32]);
         t2 = 12 * (src[ 0] - src[32]);
@@ -258,32 +254,31 @@ static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n)
         t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
         t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
 
-        dst[ 0] = (t5 + t1 + 64) >> 7;
-        dst[ 8] = (t6 + t2 + 64) >> 7;
-        dst[16] = (t7 + t3 + 64) >> 7;
-        dst[24] = (t8 + t4 + 64) >> 7;
-        dst[32] = (t8 - t4 + 64 + 1) >> 7;
-        dst[40] = (t7 - t3 + 64 + 1) >> 7;
-        dst[48] = (t6 - t2 + 64 + 1) >> 7;
-        dst[56] = (t5 - t1 + 64 + 1) >> 7;
+        dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1 + 64) >> 7)];
+        dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2 + 64) >> 7)];
+        dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3 + 64) >> 7)];
+        dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4 + 64) >> 7)];
+        dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 64 + 1) >> 7)];
+        dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 64 + 1) >> 7)];
+        dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 64 + 1) >> 7)];
+        dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 64 + 1) >> 7)];
 
-        src++;
-        dst++;
+        src ++;
+        dest++;
     }
 }
 
 /** Do inverse transform on 4x4 part of block
 */
-static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
+static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block)
 {
     int i;
     register int t1,t2,t3,t4,t5,t6;
     DCTELEM *src, *dst;
-    int off;
+    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
 
-    off = (n&1) * 4 + (n&2) * 16;
-    src = block + off;
-    dst = block + off;
+    src = block;
+    dst = block;
     for(i = 0; i < 4; i++){
         t1 = 17 * (src[0] + src[2]);
         t2 = 17 * (src[0] - src[2]);
@@ -301,8 +296,7 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
         dst += 8;
     }
 
-    src = block + off;
-    dst = block + off;
+    src = block;
     for(i = 0; i < 4; i++){
         t1 = 17 * (src[ 0] + src[16]);
         t2 = 17 * (src[ 0] - src[16]);
@@ -311,13 +305,13 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n)
         t5 = 10 * src[ 8];
         t6 = 10 * src[24];
 
-        dst[ 0] = (t1 + t3 + t6 + 64) >> 7;
-        dst[ 8] = (t2 - t4 + t5 + 64) >> 7;
-        dst[16] = (t2 + t4 - t5 + 64) >> 7;
-        dst[24] = (t1 - t3 - t6 + 64) >> 7;
+        dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)];
+        dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)];
+        dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)];
+        dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)];
 
         src ++;
-        dst ++;
+        dest++;
     }
 }
author	Kostya Shishkov <kostya.shishkov@gmail.com>	2007-12-08 10:41:18 +0000
committer	Kostya Shishkov <kostya.shishkov@gmail.com>	2007-12-08 10:41:18 +0000
commit	d2e45f33a4f3ebafb9a097a0dea844697d79c494 (patch)
tree	6d516345e8912e25a4637bea78025f9907a47003
parent	30ea3075442fc0380e0d5de835324d5db1f22194 (diff)
download	ffmpeg-d2e45f33a4f3ebafb9a097a0dea844697d79c494.tar.gz