aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/i386/h264dsp_mmx.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2008-12-18 02:36:48 +0000
committerMichael Niedermayer <michaelni@gmx.at>2008-12-18 02:36:48 +0000
commitac22385931f847cdabde4df8b0f6450d0ad14de1 (patch)
treee28a9f68de28ad4acf6f9916769758878a6542c2 /libavcodec/i386/h264dsp_mmx.c
parent1d217cdb3f4704cd533d7a80a9a30be473ee65b6 (diff)
downloadffmpeg-ac22385931f847cdabde4df8b0f6450d0ad14de1.tar.gz
H.264 idct functions that include the chroma, inter luma and intra16 luma loops
thus avoiding the calling overhead. New functions are not yet used. Originally committed as revision 16206 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/h264dsp_mmx.c')
-rw-r--r--libavcodec/i386/h264dsp_mmx.c95
1 files changed, 95 insertions, 0 deletions
diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c
index bb9c82d612..7d19f995ec 100644
--- a/libavcodec/i386/h264dsp_mmx.c
+++ b/libavcodec/i386/h264dsp_mmx.c
@@ -376,6 +376,101 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
}
}
+//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+};
+
+static void ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct8_add_mmx(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+
+static void ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ] || block[i*16])
+ ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ]) ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride);
+ else if(block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct8_add_mmx (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct8_add_sse2 (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ] || block[i*16])
+ ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ else if(block[i*16])
+ ff_h264_idct_dc_add_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
/***********************************/
/* deblocking */