idct_dc for VC-1/WMV3 decoder; ~11% faster decoding overall.

Includes mmx2 asm for the various functions. Note that the actual idct still does not have an x86 SIMD implemtation. For wmv3 files using regular idct, the decoder just falls back to simple_idct, since simple_idct_dc doesn't exist (yet). Originally committed as revision 19204 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Jason Garrett-Glaser <darkshikari@gmail.com> 2009-06-16 09:00:55 +0000
committer: Jason Garrett-Glaser <darkshikari@gmail.com> 2009-06-16 09:00:55 +0000
commit: 4f717c69ed25a701f8b6613ca00e5e632a6382a6 (patch)
tree: 0c82c716bd1f4f88d7645499692f3e213f4ffe68 /libavcodec/vc1dec.c
parent: 41faa87886e6fc54f159da6940b9edbfcd194714 (diff)
download: ffmpeg-4f717c69ed25a701f8b6613ca00e5e632a6382a6.tar.gz
1 files changed, 18 insertions, 5 deletions
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 5d4dd0633b..6172e0c047 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -2028,8 +2028,12 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                 block[idx] += (block[idx] < 0) ? -mquant : mquant;
         }
         if(!skip_block){
-            s->dsp.vc1_inv_trans_8x8(block);
-            s->dsp.add_pixels_clamped(block, dst, linesize);
+            if(i==1)
+                s->dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
+            else{
+                s->dsp.vc1_inv_trans_8x8(block);
+                s->dsp.add_pixels_clamped(block, dst, linesize);
+            }
             if(apply_filter && cbp_top  & 0xC)
                 s->dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
             if(apply_filter && cbp_left & 0xA)
@@ -2053,7 +2057,10 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                     block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant;
             }
             if(!(subblkpat & (1 << (3 - j))) && !skip_block){
-                s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
+                if(i==1)
+                    s->dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
+                else
+                    s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
                 if(apply_filter && (j&2 ? pat & (1<<(j-2)) : (cbp_top & (1 << (j + 2)))))
                     s->dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
                 if(apply_filter && (j&1 ? pat & (1<<(j-1)) : (cbp_left & (1 << (j + 1)))))
@@ -2078,7 +2085,10 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                     block[idx] += (block[idx] < 0) ? -mquant : mquant;
             }
             if(!(subblkpat & (1 << (1 - j))) && !skip_block){
-                s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
+                if(i==1)
+                    s->dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off);
+                else
+                    s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
                 if(apply_filter && j ? pat & 0x3 : (cbp_top & 0xC))
                     s->dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq);
                 if(apply_filter && cbp_left & (2 << j))
@@ -2103,7 +2113,10 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
                     block[idx] += (block[idx] < 0) ? -mquant : mquant;
             }
             if(!(subblkpat & (1 << (1 - j))) && !skip_block){
-                s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
+                if(i==1)
+                    s->dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off);
+                else
+                    s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
                 if(apply_filter && cbp_top & (2 << j))
                     s->dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq);
                 if(apply_filter && j ? pat & 0x5 : (cbp_left & 0xA))
author	Jason Garrett-Glaser <darkshikari@gmail.com>	2009-06-16 09:00:55 +0000
committer	Jason Garrett-Glaser <darkshikari@gmail.com>	2009-06-16 09:00:55 +0000
commit	4f717c69ed25a701f8b6613ca00e5e632a6382a6 (patch)
tree	0c82c716bd1f4f88d7645499692f3e213f4ffe68 /libavcodec/vc1dec.c
parent	41faa87886e6fc54f159da6940b9edbfcd194714 (diff)
download	ffmpeg-4f717c69ed25a701f8b6613ca00e5e632a6382a6.tar.gz