diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2001-10-17 13:59:49 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2001-10-17 13:59:49 +0000 |
commit | d9cf0d3368f3067183f0353d0a6dd255556fce20 (patch) | |
tree | 69271c5023d614306255d22eb624e6f1ee6c322e | |
parent | fffd2e0ac258511a044501802fba4938c53a8caf (diff) | |
download | ffmpeg-d9cf0d3368f3067183f0353d0a6dd255556fce20.tar.gz |
24bpp support (untested)
Originally committed as revision 2238 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
-rw-r--r-- | postproc/swscale.c | 54 | ||||
-rw-r--r-- | postproc/swscale_template.c | 54 |
2 files changed, 104 insertions, 4 deletions
diff --git a/postproc/swscale.c b/postproc/swscale.c index c5170490a0..b02fe41152 100644 --- a/postproc/swscale.c +++ b/postproc/swscale.c @@ -31,6 +31,8 @@ static uint64_t ugCoeff= 0xE5E2E5E2E5E2E5E2LL; static uint64_t vgCoeff= 0xF36EF36EF36EF36ELL; static uint64_t w80= 0x0080008000800080LL; static uint64_t w10= 0x0010001000100010LL; +static uint64_t bm00000111=0x0000000000FFFFFFLL; +static uint64_t bm11111000=0xFFFFFFFFFF000000LL; static uint64_t b16Dither= 0x0004000400040004LL; static uint64_t b16Dither1=0x0004000400040004LL; @@ -412,7 +414,6 @@ s_xinc&= -2; //clear last bit or uv and y might be shifted relative to each othe ); #elif defined (ARCH_X86) - //NO MMX just normal asm ... FIXME try/write funny MMX2 variant asm volatile( "xorl %%eax, %%eax \n\t" // i "xorl %%ebx, %%ebx \n\t" // xx @@ -555,6 +556,55 @@ YSCALEYUV2RGB : "%eax" ); } + else if(dstbpp==24) + { + asm volatile( + +YSCALEYUV2RGB + + // lsb ... msb + "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG + "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 + + "movq %%mm3, %%mm1 \n\t" + "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 + "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 + + "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 + "psrlq $8, %%mm3 \n\t" // GR0BGR00 + "pand bm00000111, %%mm2 \n\t" // BGR00000 + "pand bm11111000, %%mm3 \n\t" // 000BGR00 + "por %%mm2, %%mm3 \n\t" // BGRBGR00 + "movq %%mm1, %%mm2 \n\t" + "psllq $48, %%mm1 \n\t" // 000000BG + "por %%mm1, %%mm3 \n\t" // BGRBGRBG + + "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 + "psrld $16, %%mm2 \n\t" // R000R000 + "psrlq $24, %%mm1 \n\t" // 0BGR0000 + "por %%mm2, %%mm1 \n\t" // RBGRR000 + + "movl %4, %%ebx \n\t" + "addl %%eax, %%ebx \n\t" +#ifdef HAVE_MMX2 + //FIXME Alignment + "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" + "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" +#else + "movd %%mm3, (%%ebx, %%eax, 2) \n\t" + "psrlq $32, %%mm3 \n\t" + "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" + "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" +#endif + "addl $4, %%eax \n\t" + "cmpl %5, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), + "m" (yalpha1), "m" (uvalpha1) + : "%eax", "%ebx" + ); + } else if(dstbpp==16) { asm volatile( @@ -603,7 +653,7 @@ YSCALEYUV2RGB dest+=dstbpp>>3; } } - else if(dstbpp==16) //16bit + else if(dstbpp==16) { for(i=0;i<dstw;i++){ // vertical linear interpolation && yuv2rgb in a single step: diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c index c5170490a0..b02fe41152 100644 --- a/postproc/swscale_template.c +++ b/postproc/swscale_template.c @@ -31,6 +31,8 @@ static uint64_t ugCoeff= 0xE5E2E5E2E5E2E5E2LL; static uint64_t vgCoeff= 0xF36EF36EF36EF36ELL; static uint64_t w80= 0x0080008000800080LL; static uint64_t w10= 0x0010001000100010LL; +static uint64_t bm00000111=0x0000000000FFFFFFLL; +static uint64_t bm11111000=0xFFFFFFFFFF000000LL; static uint64_t b16Dither= 0x0004000400040004LL; static uint64_t b16Dither1=0x0004000400040004LL; @@ -412,7 +414,6 @@ s_xinc&= -2; //clear last bit or uv and y might be shifted relative to each othe ); #elif defined (ARCH_X86) - //NO MMX just normal asm ... FIXME try/write funny MMX2 variant asm volatile( "xorl %%eax, %%eax \n\t" // i "xorl %%ebx, %%ebx \n\t" // xx @@ -555,6 +556,55 @@ YSCALEYUV2RGB : "%eax" ); } + else if(dstbpp==24) + { + asm volatile( + +YSCALEYUV2RGB + + // lsb ... msb + "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG + "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 + + "movq %%mm3, %%mm1 \n\t" + "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 + "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 + + "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 + "psrlq $8, %%mm3 \n\t" // GR0BGR00 + "pand bm00000111, %%mm2 \n\t" // BGR00000 + "pand bm11111000, %%mm3 \n\t" // 000BGR00 + "por %%mm2, %%mm3 \n\t" // BGRBGR00 + "movq %%mm1, %%mm2 \n\t" + "psllq $48, %%mm1 \n\t" // 000000BG + "por %%mm1, %%mm3 \n\t" // BGRBGRBG + + "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 + "psrld $16, %%mm2 \n\t" // R000R000 + "psrlq $24, %%mm1 \n\t" // 0BGR0000 + "por %%mm2, %%mm1 \n\t" // RBGRR000 + + "movl %4, %%ebx \n\t" + "addl %%eax, %%ebx \n\t" +#ifdef HAVE_MMX2 + //FIXME Alignment + "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" + "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" +#else + "movd %%mm3, (%%ebx, %%eax, 2) \n\t" + "psrlq $32, %%mm3 \n\t" + "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" + "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" +#endif + "addl $4, %%eax \n\t" + "cmpl %5, %%eax \n\t" + " jb 1b \n\t" + + :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstw), + "m" (yalpha1), "m" (uvalpha1) + : "%eax", "%ebx" + ); + } else if(dstbpp==16) { asm volatile( @@ -603,7 +653,7 @@ YSCALEYUV2RGB dest+=dstbpp>>3; } } - else if(dstbpp==16) //16bit + else if(dstbpp==16) { for(i=0;i<dstw;i++){ // vertical linear interpolation && yuv2rgb in a single step: |