diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2002-04-27 12:30:26 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2002-04-27 12:30:26 +0000 |
commit | d7e9533aa06f4073a27812349b35ba5fede11ca1 (patch) | |
tree | 58b28f82ec84f27458d4065c140ba5e828d27b90 /libavcodec/i386/mpegvideo_mmx_template.c | |
parent | 1ac9248bb7c0c64bfebbd0ced1806a8899c396a9 (diff) | |
download | ffmpeg-d7e9533aa06f4073a27812349b35ba5fede11ca1.tar.gz |
fixed msmpeg4 infinite loop if buggy stream
rewrote quantizer
fixed bias (+10% compression/quality for h263 like codecs)
qscale=1 support
mpeg1 intra frames looks far less blocky
added codec_id field
Originally committed as revision 423 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/mpegvideo_mmx_template.c')
-rw-r--r-- | libavcodec/i386/mpegvideo_mmx_template.c | 252 |
1 files changed, 132 insertions, 120 deletions
diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c index 2b33229159..3f9f9d78ee 100644 --- a/libavcodec/i386/mpegvideo_mmx_template.c +++ b/libavcodec/i386/mpegvideo_mmx_template.c @@ -33,149 +33,160 @@ static int RENAME(dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, - int qscale) + int qscale, int *overflow) { - int i, level, last_non_zero_p1, q; - const UINT16 *qmat; + int level=0, last_non_zero_p1, q; //=0 is cuz gcc says uninitalized ... + const UINT16 *qmat, *bias; static __align8 INT16 temp_block[64]; - int minLevel, maxLevel; - - if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){ - /* mpeg4 */ - minLevel= -2048; - maxLevel= 2047; - }else if(s->out_format==FMT_MPEG1){ - /* mpeg1 */ - minLevel= -255; - maxLevel= 255; - }else if(s->out_format==FMT_MJPEG){ - /* (m)jpeg */ - minLevel= -1023; - maxLevel= 1023; - }else{ - /* h263 / msmpeg4 */ - minLevel= -128; - maxLevel= 127; - } av_fdct (block); - + if (s->mb_intra) { int dummy; if (n < 4) q = s->y_dc_scale; else q = s->c_dc_scale; - /* note: block[0] is assumed to be positive */ #if 1 - asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" - : "=d" (temp_block[0]), "=a"(dummy) - : "a" (block[0] + (q >> 1)), "c" (inverse[q]) - ); + asm volatile ( + "xorl %%edx, %%edx \n\t" + "mul %%ecx \n\t" + : "=d" (level), "=a"(dummy) + : "a" (block[0] + (q >> 1)), "c" (inverse[q]) + ); #else - asm volatile ( - "xorl %%edx, %%edx \n\t" - "divw %%cx \n\t" - "movzwl %%ax, %%eax \n\t" - : "=a" (temp_block[0]) - : "a" (block[0] + (q >> 1)), "c" (q) - : "%edx" - ); + asm volatile ( + "xorl %%edx, %%edx \n\t" + "divw %%cx \n\t" + "movzwl %%ax, %%eax \n\t" + : "=a" (level) + : "a" (block[0] + (q >> 1)), "c" (q) + : "%edx" + ); #endif + block[0]=0; //avoid fake overflow // temp_block[0] = (block[0] + (q >> 1)) / q; - i = 1; last_non_zero_p1 = 1; - if (s->out_format == FMT_H263) { - qmat = s->q_non_intra_matrix16; - } else { - qmat = s->q_intra_matrix16; - } - for(i=1;i<4;i++) { - level = block[i] * qmat[i]; - level = level / (1 << (QMAT_SHIFT_MMX - 3)); - /* XXX: currently, this code is not optimal. the range should be: - mpeg1: -255..255 - mpeg2: -2048..2047 - h263: -128..127 - mpeg4: -2048..2047 - */ - if (level > maxLevel) - level = maxLevel; - else if (level < minLevel) - level = minLevel; - temp_block[i] = level; - - if(level) - if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i]; - block[i]=0; - } + bias = s->q_intra_matrix16_bias[qscale]; + qmat = s->q_intra_matrix16[qscale]; } else { - i = 0; last_non_zero_p1 = 0; - qmat = s->q_non_intra_matrix16; + bias = s->q_inter_matrix16_bias[qscale]; + qmat = s->q_inter_matrix16[qscale]; } - asm volatile( /* XXX: small rounding bug, but it shouldnt matter */ - "movd %3, %%mm3 \n\t" - SPREADW(%%mm3) - "movd %4, %%mm4 \n\t" - SPREADW(%%mm4) -#ifndef HAVE_MMX2 - "movd %5, %%mm5 \n\t" - SPREADW(%%mm5) -#endif - "pxor %%mm7, %%mm7 \n\t" - "movd %%eax, %%mm2 \n\t" - SPREADW(%%mm2) - "movl %6, %%eax \n\t" - ".balign 16 \n\t" - "1: \n\t" - "movq (%1, %%eax), %%mm0 \n\t" - "movq (%2, %%eax), %%mm1 \n\t" - "movq %%mm0, %%mm6 \n\t" - "psraw $15, %%mm6 \n\t" - "pmulhw %%mm0, %%mm1 \n\t" - "psubsw %%mm6, %%mm1 \n\t" -#ifdef HAVE_MMX2 - "pminsw %%mm3, %%mm1 \n\t" - "pmaxsw %%mm4, %%mm1 \n\t" -#else - "paddsw %%mm3, %%mm1 \n\t" - "psubusw %%mm4, %%mm1 \n\t" - "paddsw %%mm5, %%mm1 \n\t" -#endif - "movq %%mm1, (%8, %%eax) \n\t" - "pcmpeqw %%mm7, %%mm1 \n\t" - "movq (%7, %%eax), %%mm0 \n\t" - "movq %%mm7, (%1, %%eax) \n\t" - "pandn %%mm0, %%mm1 \n\t" - PMAXW(%%mm1, %%mm2) - "addl $8, %%eax \n\t" - " js 1b \n\t" - "movq %%mm2, %%mm0 \n\t" - "psrlq $32, %%mm2 \n\t" - PMAXW(%%mm0, %%mm2) - "movq %%mm2, %%mm0 \n\t" - "psrlq $16, %%mm2 \n\t" - PMAXW(%%mm0, %%mm2) - "movd %%mm2, %%eax \n\t" - "movzbl %%al, %%eax \n\t" - : "+a" (last_non_zero_p1) - : "r" (block+64), "r" (qmat+64), -#ifdef HAVE_MMX2 - "m" (maxLevel), "m" (minLevel), "m" (minLevel /* dummy */), "g" (2*i - 128), -#else - "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel), "g" (2*i - 128), -#endif - "r" (inv_zigzag_direct16+64), "r" (temp_block+64) - ); + if(s->out_format == FMT_H263){ + + asm volatile( + "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 + SPREADW(%%mm3) + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "movq (%2), %%mm5 \n\t" // qmat[0] + "pxor %%mm6, %%mm6 \n\t" + "psubw (%3), %%mm6 \n\t" // -bias[0] + "movl $-128, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "psubusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movq %%mm0, (%5, %%eax) \n\t" + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%eax), %%mm1 \n\t" + "movq %%mm7, (%1, %%eax) \n\t" // 0 + "pandn %%mm1, %%mm0 \n\t" + PMAXW(%%mm0, %%mm3) + "addl $8, %%eax \n\t" + " js 1b \n\t" + "movq %%mm3, %%mm0 \n\t" + "psrlq $32, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movq %%mm3, %%mm0 \n\t" + "psrlq $16, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movd %%mm3, %%eax \n\t" + "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + : "+a" (last_non_zero_p1) + : "r" (block+64), "r" (qmat), "r" (bias), + "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + ); + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff + SPREADW(%%mm1) + "psubusw %%mm1, %%mm4 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "movd %%mm4, %0 \n\t" // *overflow + : "=g" (*overflow) + : "g" (s->max_qcoeff) + ); + }else{ // FMT_H263 + asm volatile( + "movd %%eax, %%mm3 \n\t" // last_non_zero_p1 + SPREADW(%%mm3) + "pxor %%mm7, %%mm7 \n\t" // 0 + "pxor %%mm4, %%mm4 \n\t" // 0 + "movl $-128, %%eax \n\t" + ".balign 16 \n\t" + "1: \n\t" + "pxor %%mm1, %%mm1 \n\t" // 0 + "movq (%1, %%eax), %%mm0 \n\t" // block[i] + "pcmpgtw %%mm0, %%mm1 \n\t" // block[i] <= 0 ? 0xFF : 0x00 + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // ABS(block[i]) + "movq (%3, %%eax), %%mm6 \n\t" // bias[0] + "paddusw %%mm6, %%mm0 \n\t" // ABS(block[i]) + bias[0] + "movq (%2, %%eax), %%mm5 \n\t" // qmat[i] + "pmulhw %%mm5, %%mm0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 + "por %%mm0, %%mm4 \n\t" + "pxor %%mm1, %%mm0 \n\t" + "psubw %%mm1, %%mm0 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) + "movq %%mm0, (%5, %%eax) \n\t" + "pcmpeqw %%mm7, %%mm0 \n\t" // out==0 ? 0xFF : 0x00 + "movq (%4, %%eax), %%mm1 \n\t" + "movq %%mm7, (%1, %%eax) \n\t" // 0 + "pandn %%mm1, %%mm0 \n\t" + PMAXW(%%mm0, %%mm3) + "addl $8, %%eax \n\t" + " js 1b \n\t" + "movq %%mm3, %%mm0 \n\t" + "psrlq $32, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movq %%mm3, %%mm0 \n\t" + "psrlq $16, %%mm3 \n\t" + PMAXW(%%mm0, %%mm3) + "movd %%mm3, %%eax \n\t" + "movzbl %%al, %%eax \n\t" // last_non_zero_p1 + : "+a" (last_non_zero_p1) + : "r" (block+64), "r" (qmat+64), "r" (bias+64), + "r" (inv_zigzag_direct16+64), "r" (temp_block+64) + ); + // note the asm is split cuz gcc doesnt like that many operands ... + asm volatile( + "movd %1, %%mm1 \n\t" // max_qcoeff + SPREADW(%%mm1) + "psubusw %%mm1, %%mm4 \n\t" + "packuswb %%mm4, %%mm4 \n\t" + "movd %%mm4, %0 \n\t" // *overflow + : "=g" (*overflow) + : "g" (s->max_qcoeff) + ); + } + + if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute // last_non_zero_p1=64; /* permute for IDCT */ asm volatile( - "movl %0, %%eax \n\t" + "movl %0, %%eax \n\t" "pushl %%ebp \n\t" "movl %%esp, " MANGLE(esp_temp) "\n\t" "1: \n\t" @@ -203,5 +214,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s, } */ //block_permute(block); + return last_non_zero_p1 - 1; } |