vp3: Use full transpose for all IDCTs

This way, the special IDCT permutations are no longer needed. This is similar to how H264 does it, and removes the dsputil dependency imposed by the scantable code. Also remove the unused type == 0 cases from the plain C version of the idct. Signed-off-by: Martin Storsjö <martin@martin.st>
author: Ronald S. Bultje <rsbultje@gmail.com> 2013-03-12 07:28:12 -0700
committer: Martin Storsjö <martin@martin.st> 2013-04-15 12:32:05 +0300
commit: 015821229f96bf7e677f2a711a58dbea3009f574 (patch)
tree: 2247f2d16c077a1f887656b8859b164eca6b84df /libavcodec/vp3dsp.c
parent: 5941978e71d2c3a8e2a7e87951e081e0b2e77da9 (diff)
download: ffmpeg-015821229f96bf7e677f2a711a58dbea3009f574.tar.gz
1 files changed, 35 insertions, 57 deletions
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c
index d1a7db957d..94de0e5b96 100644
--- a/libavcodec/vp3dsp.c
+++ b/libavcodec/vp3dsp.c
@@ -54,11 +54,12 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
     /* Inverse DCT on the rows now */
     for (i = 0; i < 8; i++) {
         /* Check for non-zero values */
-        if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
-            A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
-            B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
-            C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
-            D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
+        if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
+             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
+            A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
+            B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
+            C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
+            D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
 
             Ad = M(xC4S4, (A - C));
             Bd = M(xC4S4, (B - D));
@@ -66,11 +67,11 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
             Cd = A + C;
             Dd = B + D;
 
-            E = M(xC4S4, (ip[0] + ip[4]));
-            F = M(xC4S4, (ip[0] - ip[4]));
+            E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
+            F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
 
-            G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
-            H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
+            G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
+            H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
 
             Ed = E - G;
             Gd = E + G;
@@ -82,33 +83,33 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
             Hd = Bd + H;
 
             /*  Final sequence of operations over-write original inputs. */
-            ip[0] = Gd + Cd ;
-            ip[7] = Gd - Cd ;
+            ip[0 * 8] = Gd + Cd ;
+            ip[7 * 8] = Gd - Cd ;
 
-            ip[1] = Add + Hd;
-            ip[2] = Add - Hd;
+            ip[1 * 8] = Add + Hd;
+            ip[2 * 8] = Add - Hd;
 
-            ip[3] = Ed + Dd ;
-            ip[4] = Ed - Dd ;
+            ip[3 * 8] = Ed + Dd ;
+            ip[4 * 8] = Ed - Dd ;
 
-            ip[5] = Fd + Bdd;
-            ip[6] = Fd - Bdd;
+            ip[5 * 8] = Fd + Bdd;
+            ip[6 * 8] = Fd - Bdd;
         }
 
-        ip += 8;            /* next row */
+        ip += 1;            /* next row */
     }
 
     ip = input;
 
     for ( i = 0; i < 8; i++) {
         /* Check for non-zero values (bitwise or faster than ||) */
-        if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
-             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
+        if ( ip[1] | ip[2] | ip[3] |
+             ip[4] | ip[5] | ip[6] | ip[7] ) {
 
-            A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]);
-            B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]);
-            C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]);
-            D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]);
+            A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
+            B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
+            C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
+            D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
 
             Ad = M(xC4S4, (A - C));
             Bd = M(xC4S4, (B - D));
@@ -116,16 +117,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
             Cd = A + C;
             Dd = B + D;
 
-            E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8;
-            F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8;
+            E = M(xC4S4, (ip[0] + ip[4])) + 8;
+            F = M(xC4S4, (ip[0] - ip[4])) + 8;
 
             if(type==1){  //HACK
                 E += 16*128;
                 F += 16*128;
             }
 
-            G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]);
-            H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]);
+            G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
+            H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
 
             Ed = E - G;
             Gd = E + G;
@@ -137,19 +138,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
             Hd = Bd + H;
 
             /* Final sequence of operations over-write original inputs. */
-            if(type==0){
-                ip[0*8] = (Gd + Cd )  >> 4;
-                ip[7*8] = (Gd - Cd )  >> 4;
-
-                ip[1*8] = (Add + Hd ) >> 4;
-                ip[2*8] = (Add - Hd ) >> 4;
-
-                ip[3*8] = (Ed + Dd )  >> 4;
-                ip[4*8] = (Ed - Dd )  >> 4;
-
-                ip[5*8] = (Fd + Bdd ) >> 4;
-                ip[6*8] = (Fd - Bdd ) >> 4;
-            }else if(type==1){
+            if (type == 1) {
                 dst[0*stride] = av_clip_uint8((Gd + Cd )  >> 4);
                 dst[7*stride] = av_clip_uint8((Gd - Cd )  >> 4);
 
@@ -176,16 +165,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
             }
 
         } else {
-            if(type==0){
-                ip[0*8] =
-                ip[1*8] =
-                ip[2*8] =
-                ip[3*8] =
-                ip[4*8] =
-                ip[5*8] =
-                ip[6*8] =
-                ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
-            }else if(type==1){
+            if (type == 1) {
                 dst[0*stride]=
                 dst[1*stride]=
                 dst[2*stride]=
@@ -193,10 +173,10 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
                 dst[4*stride]=
                 dst[5*stride]=
                 dst[6*stride]=
-                dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20));
+                dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20));
             }else{
-                if(ip[0*8]){
-                    int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
+                if(ip[0]){
+                    int v= ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20);
                     dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
                     dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
                     dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
@@ -209,7 +189,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int
             }
         }
 
-        ip++;            /* next column */
+        ip += 8;            /* next column */
         dst++;
     }
 }
@@ -307,8 +287,6 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
     c->v_loop_filter = vp3_v_loop_filter_c;
     c->h_loop_filter = vp3_h_loop_filter_c;
 
-    c->idct_perm = FF_NO_IDCT_PERM;
-
     if (ARCH_ARM)
         ff_vp3dsp_init_arm(c, flags);
     if (ARCH_BFIN)
author	Ronald S. Bultje <rsbultje@gmail.com>	2013-03-12 07:28:12 -0700
committer	Martin Storsjö <martin@martin.st>	2013-04-15 12:32:05 +0300
commit	015821229f96bf7e677f2a711a58dbea3009f574 (patch)
tree	2247f2d16c077a1f887656b8859b164eca6b84df /libavcodec/vp3dsp.c
parent	5941978e71d2c3a8e2a7e87951e081e0b2e77da9 (diff)
download	ffmpeg-015821229f96bf7e677f2a711a58dbea3009f574.tar.gz