diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2002-01-20 05:30:23 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2002-01-20 05:30:23 +0000 |
commit | 28bf81c90d36a55cf76e2be913c5215ebebf61f2 (patch) | |
tree | c465262f4b3f57b991027ecbc33fe8e1619c6e1a /postproc/swscale_template.c | |
parent | cbf5fa71c9cf556cb3117b2fd31b3dcf0ce52c49 (diff) | |
download | ffmpeg-28bf81c90d36a55cf76e2be913c5215ebebf61f2.tar.gz |
removed global vars so that multiple swscalers can be used
experimental upscaling mode (-sws 3)
general convolution filters support (unfinished)
bugfix for bicubic upscaling
assertion checking if defined MP_DEBUG
checking of the input/output size instead of segfault if its very large
Originally committed as revision 4277 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
Diffstat (limited to 'postproc/swscale_template.c')
-rw-r--r-- | postproc/swscale_template.c | 801 |
1 files changed, 157 insertions, 644 deletions
diff --git a/postproc/swscale_template.c b/postproc/swscale_template.c index 1cdf7ab7b8..b70806b1a1 100644 --- a/postproc/swscale_template.c +++ b/postproc/swscale_template.c @@ -734,16 +734,16 @@ static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, */ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, - uint8_t *dest, int dstW, int dstbpp, int16_t * lumMmxFilter, int16_t * chrMmxFilter) + uint8_t *dest, int dstW, int dstFormat, int16_t * lumMmxFilter, int16_t * chrMmxFilter) { - if(fullUVIpol) +/* if(flags&SWS_FULL_UV_IPOL) { //FIXME }//FULL_UV_IPOL - else + else*/ { #ifdef HAVE_MMX - if(dstbpp == 32) //FIXME untested + if(dstFormat == IMGFMT_BGR32) //FIXME untested { asm volatile( YSCALEYUV2RGBX @@ -756,7 +756,7 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu : "%eax", "%ebx", "%ecx", "%edx", "%esi" ); } - else if(dstbpp==24) //FIXME untested + else if(dstFormat == IMGFMT_BGR24) //FIXME untested { asm volatile( YSCALEYUV2RGBX @@ -771,7 +771,7 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu : "%eax", "%ebx", "%ecx", "%edx", "%esi" ); } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { asm volatile( YSCALEYUV2RGBX @@ -791,7 +791,7 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu : "%eax", "%ebx", "%ecx", "%edx", "%esi" ); } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { asm volatile( YSCALEYUV2RGBX @@ -814,7 +814,7 @@ static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lu #else yuv2rgbXinC(lumFilter, lumSrc, lumFilterSize, chrFilter, chrSrc, chrFilterSize, - dest, dstW, dstbpp); + dest, dstW, dstFormat); #endif } //!FULL_UV_IPOL @@ -825,16 +825,16 @@ yuv2rgbXinC(lumFilter, lumSrc, lumFilterSize, * vertical bilinear scale YV12 to RGB */ static inline void RENAME(yuv2rgb2)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, - uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstbpp) + uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstFormat, int flags) { int yalpha1=yalpha^4095; int uvalpha1=uvalpha^4095; - if(fullUVIpol) + if(flags&SWS_FULL_UV_IPOL) { #ifdef HAVE_MMX - if(dstbpp == 32) + if(dstFormat==IMGFMT_BGR32) { asm volatile( @@ -860,7 +860,7 @@ FULL_YSCALEYUV2RGB : "%eax" ); } - else if(dstbpp==24) + else if(dstFormat==IMGFMT_BGR24) { asm volatile( @@ -910,7 +910,7 @@ FULL_YSCALEYUV2RGB : "%eax", "%ebx" ); } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { asm volatile( @@ -944,7 +944,7 @@ FULL_YSCALEYUV2RGB : "%eax" ); } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { asm volatile( @@ -979,7 +979,21 @@ FULL_YSCALEYUV2RGB ); } #else - if(dstbpp==32 || dstbpp==24) + if(dstFormat==IMGFMT_BGR32) + { + int i; + for(i=0;i<dstW;i++){ + // vertical linear interpolation && yuv2rgb in a single step: + int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; + int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); + int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); + dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; + dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; + dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; + dest+= 4; + } + } + else if(dstFormat==IMGFMT_BGR24) { int i; for(i=0;i<dstW;i++){ @@ -990,10 +1004,10 @@ FULL_YSCALEYUV2RGB dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; - dest+=dstbpp>>3; + dest+= 3; } } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { int i; for(i=0;i<dstW;i++){ @@ -1008,7 +1022,7 @@ FULL_YSCALEYUV2RGB clip_table16r[(Y + yuvtab_3343[V]) >>13]; } } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { int i; for(i=0;i<dstW;i++){ @@ -1028,7 +1042,7 @@ FULL_YSCALEYUV2RGB else { #ifdef HAVE_MMX - if(dstbpp == 32) + if(dstFormat==IMGFMT_BGR32) { asm volatile( YSCALEYUV2RGB @@ -1039,7 +1053,7 @@ FULL_YSCALEYUV2RGB : "%eax" ); } - else if(dstbpp==24) + else if(dstFormat==IMGFMT_BGR24) { asm volatile( "movl %4, %%ebx \n\t" @@ -1051,7 +1065,7 @@ FULL_YSCALEYUV2RGB : "%eax", "%ebx" ); } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { asm volatile( YSCALEYUV2RGB @@ -1069,7 +1083,7 @@ FULL_YSCALEYUV2RGB : "%eax" ); } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { asm volatile( YSCALEYUV2RGB @@ -1088,7 +1102,7 @@ FULL_YSCALEYUV2RGB ); } #else - if(dstbpp==32) + if(dstFormat==IMGFMT_BGR32) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1111,7 +1125,7 @@ FULL_YSCALEYUV2RGB dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; } } - else if(dstbpp==24) + else if(dstFormat==IMGFMT_BGR24) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1135,7 +1149,7 @@ FULL_YSCALEYUV2RGB dest+=6; } } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1160,7 +1174,7 @@ FULL_YSCALEYUV2RGB clip_table16r[(Y2 + Cr) >>13]; } } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1193,21 +1207,21 @@ FULL_YSCALEYUV2RGB * YV12 to RGB without scaling or interpolating */ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, - uint8_t *dest, int dstW, int uvalpha, int dstbpp) + uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags) { int uvalpha1=uvalpha^4095; const int yalpha1=0; - if(fullUVIpol || allwaysIpol) + if(flags&SWS_FULL_UV_IPOL) { - RENAME(yuv2rgb2)(buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, dstbpp); + RENAME(yuv2rgb2)(buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, dstFormat, flags); return; } #ifdef HAVE_MMX if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster { - if(dstbpp == 32) + if(dstFormat==IMGFMT_BGR32) { asm volatile( YSCALEYUV2RGB1 @@ -1217,7 +1231,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * : "%eax" ); } - else if(dstbpp==24) + else if(dstFormat==IMGFMT_BGR24) { asm volatile( "movl %4, %%ebx \n\t" @@ -1228,7 +1242,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * : "%eax", "%ebx" ); } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { asm volatile( YSCALEYUV2RGB1 @@ -1244,7 +1258,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * : "%eax" ); } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { asm volatile( YSCALEYUV2RGB1 @@ -1264,7 +1278,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * } else { - if(dstbpp == 32) + if(dstFormat==IMGFMT_BGR32) { asm volatile( YSCALEYUV2RGB1b @@ -1274,7 +1288,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * : "%eax" ); } - else if(dstbpp==24) + else if(dstFormat==IMGFMT_BGR24) { asm volatile( "movl %4, %%ebx \n\t" @@ -1285,7 +1299,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * : "%eax", "%ebx" ); } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { asm volatile( YSCALEYUV2RGB1b @@ -1301,7 +1315,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * : "%eax" ); } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { asm volatile( YSCALEYUV2RGB1b @@ -1322,7 +1336,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * #else //FIXME write 2 versions (for even & odd lines) - if(dstbpp==32) + if(dstFormat==IMGFMT_BGR32) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1345,7 +1359,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; } } - else if(dstbpp==24) + else if(dstFormat==IMGFMT_BGR24) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1369,7 +1383,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * dest+=6; } } - else if(dstbpp==16) + else if(dstFormat==IMGFMT_BGR16) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1394,7 +1408,7 @@ static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t * clip_table16r[(Y2 + Cr) >>13]; } } - else if(dstbpp==15) + else if(dstFormat==IMGFMT_BGR15) { int i; for(i=0; i<dstW-1; i+=2){ @@ -1584,13 +1598,15 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW #endif } // *** horizontal scale Y line to temp buffer -static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc) +static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, + int flags, int canMMX2BeUsed, int16_t *hLumFilter, + int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode) { #ifdef HAVE_MMX // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) - if(sws_flags != SWS_FAST_BILINEAR || (!canMMX2BeUsed)) + if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) #else - if(sws_flags != SWS_FAST_BILINEAR) + if(!(flags&SWS_FAST_BILINEAR)) #endif { RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); @@ -1614,7 +1630,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in "psllq $16, %%mm2 \n\t" "paddw %%mm6, %%mm2 \n\t" "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFF - "movq %%mm2, "MANGLE(temp0)" \n\t" + "movq %%mm2, %%mm4 \n\t" "movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF "punpcklwd %%mm6, %%mm6 \n\t" "punpcklwd %%mm6, %%mm6 \n\t" @@ -1630,8 +1646,8 @@ static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, in PREFETCH" 1024(%%esi) \n\t"\ PREFETCH" 1056(%%esi) \n\t"\ PREFETCH" 1088(%%esi) \n\t"\ - "call "MANGLE(funnyYCode)" \n\t"\ - "movq "MANGLE(temp0)", %%mm2 \n\t"\ + "call *%6 \n\t"\ + "movq %%mm4, %%mm2 \n\t"\ "xorl %%ecx, %%ecx \n\t" FUNNY_Y_CODE @@ -1644,7 +1660,7 @@ FUNNY_Y_CODE FUNNY_Y_CODE :: "m" (src), "m" (dst), "m" (dstWidth), "m" ((xInc*4)>>16), - "m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF) + "m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF), "m" (funnyYCode) : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" ); for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; @@ -1709,14 +1725,15 @@ FUNNY_Y_CODE } } -inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, - uint8_t *src1, uint8_t *src2, int srcW, int xInc) +inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, + int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, + int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode) { #ifdef HAVE_MMX // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) - if(sws_flags != SWS_FAST_BILINEAR || (!canMMX2BeUsed)) + if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) #else - if(sws_flags != SWS_FAST_BILINEAR) + if(!(flags&SWS_FAST_BILINEAR)) #endif { RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); @@ -1741,7 +1758,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, "psllq $16, %%mm2 \n\t" "paddw %%mm6, %%mm2 \n\t" "psllq $16, %%mm2 \n\t" //0,t,2t,3t t=xInc&0xFFFF - "movq %%mm2, "MANGLE(temp0)" \n\t" + "movq %%mm2, %%mm4 \n\t" "movd %4, %%mm6 \n\t" //(xInc*4)&0xFFFF "punpcklwd %%mm6, %%mm6 \n\t" "punpcklwd %%mm6, %%mm6 \n\t" @@ -1757,8 +1774,8 @@ inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, PREFETCH" 1024(%%esi) \n\t"\ PREFETCH" 1056(%%esi) \n\t"\ PREFETCH" 1088(%%esi) \n\t"\ - "call "MANGLE(funnyUVCode)" \n\t"\ - "movq "MANGLE(temp0)", %%mm2 \n\t"\ + "call *%7 \n\t"\ + "movq %%mm4, %%mm2 \n\t"\ "xorl %%ecx, %%ecx \n\t" FUNNYUVCODE @@ -1786,7 +1803,7 @@ FUNNYUVCODE FUNNYUVCODE :: "m" (src1), "m" (dst), "m" (dstWidth), "m" ((xInc*4)>>16), - "m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF), "m" (src2) + "m" ((xInc*4)&0xFFFF), "m" (xInc&0xFFFF), "m" (src2), "m" (funnyUVCode) : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" ); for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) @@ -1858,585 +1875,67 @@ FUNNYUVCODE } } -static inline void RENAME(initFilter)(int16_t *dstFilter, int16_t *filterPos, int *filterSize, int xInc, - int srcW, int dstW, int filterAlign, int one) -{ - int i; - double filter[8000]; -#ifdef HAVE_MMX - asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) -#endif - - if(ABS(xInc - 0x10000) <10) // unscaled - { - int i; - *filterSize= (1 +(filterAlign-1)) & (~(filterAlign-1)); // 1 or 4 normaly - for(i=0; i<dstW*(*filterSize); i++) filter[i]=0; - - for(i=0; i<dstW; i++) - { - filter[i*(*filterSize)]=1; - filterPos[i]=i; - } - - } - else if(xInc <= (1<<16) || sws_flags==SWS_FAST_BILINEAR) // upscale - { - int i; - int xDstInSrc; - if(sws_flags==SWS_BICUBIC) *filterSize= 4; - else *filterSize= 2; -// printf("%d %d %d\n", filterSize, srcW, dstW); - *filterSize= (*filterSize +(filterAlign-1)) & (~(filterAlign-1)); - - xDstInSrc= xInc/2 - 0x8000; - for(i=0; i<dstW; i++) - { - int xx= (xDstInSrc>>16) - (*filterSize>>1) + 1; - int j; - - filterPos[i]= xx; - if(sws_flags == SWS_BICUBIC) - { - double d= ABS(((xx+1)<<16) - xDstInSrc)/(double)(1<<16); - double y1,y2,y3,y4; - double A= -0.75; - // Equation is from VirtualDub - y1 = ( + A*d - 2.0*A*d*d + A*d*d*d); - y2 = (+ 1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d); - y3 = ( - A*d + (2.0*A+3.0)*d*d - (A+2.0)*d*d*d); - y4 = ( + A*d*d - A*d*d*d); - -// printf("%d %d %d \n", coeff, (int)d, xDstInSrc); - filter[i*(*filterSize) + 0]= y1; - filter[i*(*filterSize) + 1]= y2; - filter[i*(*filterSize) + 2]= y3; - filter[i*(*filterSize) + 3]= y4; -// printf("%1.3f %d, %d, %d, %d\n",d , y1, y2, y3, y4); - } - else - { - for(j=0; j<*filterSize; j++) - { - double d= ABS((xx<<16) - xDstInSrc)/(double)(1<<16); - double coeff= 1.0 - d; - if(coeff<0) coeff=0; - // printf("%d %d %d \n", coeff, (int)d, xDstInSrc); - filter[i*(*filterSize) + j]= coeff; - xx++; - } - } - xDstInSrc+= xInc; - } +static void RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, + int srcSliceH, uint8_t* dst[], int dstStride[]){ + + /* load a few things into local vars to make the code more readable? and faster */ + const int srcW= c->srcW; + const int dstW= c->dstW; + const int dstH= c->dstH; + const int chrDstW= c->chrDstW; + const int lumXInc= c->lumXInc; + const int chrXInc= c->chrXInc; + const int dstFormat= c->dstFormat; //FIXME serach for dstbpp; + const int flags= c->flags; + const int canMMX2BeUsed= c->canMMX2BeUsed; + int16_t *vLumFilterPos= c->vLumFilterPos; + int16_t *vChrFilterPos= c->vChrFilterPos; + int16_t *hLumFilterPos= c->hLumFilterPos; + int16_t *hChrFilterPos= c->hChrFilterPos; + int16_t *vLumFilter= c->vLumFilter; + int16_t *vChrFilter= c->vChrFilter; + int16_t *hLumFilter= c->hLumFilter; + int16_t *hChrFilter= c->hChrFilter; + int16_t *lumMmxFilter= c->lumMmxFilter; + int16_t *chrMmxFilter= c->chrMmxFilter; + const int vLumFilterSize= c->vLumFilterSize; + const int vChrFilterSize= c->vChrFilterSize; + const int hLumFilterSize= c->hLumFilterSize; + const int hChrFilterSize= c->hChrFilterSize; + int16_t **lumPixBuf= c->lumPixBuf; + int16_t **chrPixBuf= c->chrPixBuf; + const int vLumBufSize= c->vLumBufSize; + const int vChrBufSize= c->vChrBufSize; + uint8_t *funnyYCode= c->funnyYCode; + uint8_t *funnyUVCode= c->funnyUVCode; + + /* vars whch will change and which we need to storw back in the context */ + int dstY= c->dstY; + int lumBufIndex= c->lumBufIndex; + int chrBufIndex= c->chrBufIndex; + int lastInLumBuf= c->lastInLumBuf; + int lastInChrBuf= c->lastInChrBuf; + + if(srcSliceY ==0){ + lumBufIndex=0; + chrBufIndex=0; + dstY=0; + lastInLumBuf= -1; + lastInChrBuf= -1; } - else // downscale - { - int xDstInSrc; - if(sws_flags==SWS_BICUBIC) *filterSize= (int)ceil(1 + 4.0*srcW / (double)dstW); - else *filterSize= (int)ceil(1 + 2.0*srcW / (double)dstW); -// printf("%d %d %d\n", *filterSize, srcW, dstW); - *filterSize= (*filterSize +(filterAlign-1)) & (~(filterAlign-1)); - - xDstInSrc= xInc/2 - 0x8000; - for(i=0; i<dstW; i++) - { - int xx= (int)((double)xDstInSrc/(double)(1<<16) - ((*filterSize)-1)*0.5 + 0.5); - int j; - - filterPos[i]= xx; - for(j=0; j<*filterSize; j++) - { - double d= ABS((xx<<16) - xDstInSrc)/(double)xInc; - double coeff; - if(sws_flags == SWS_BICUBIC) - { - double A= -0.75; -// d*=2; - // Equation is from VirtualDub - if(d<1.0) - coeff = (1.0 - (A+3.0)*d*d + (A+2.0)*d*d*d); - else if(d<2.0) - coeff = (-4.0*A + 8.0*A*d - 5.0*A*d*d + A*d*d*d); - else - coeff=0.0; - } - else - { - coeff= 1.0 - d; - if(coeff<0) coeff=0; - } -// if(filterAlign==1) printf("%d %d %d \n", coeff, (int)d, xDstInSrc); - filter[i*(*filterSize) + j]= coeff; - xx++; - } - xDstInSrc+= xInc; - } - } - - //fix borders - for(i=0; i<dstW; i++) - { - int j; - if(filterPos[i] < 0) - { - // Move filter coeffs left to compensate for filterPos - for(j=1; j<*filterSize; j++) - { - int left= MAX(j + filterPos[i], 0); - filter[i*(*filterSize) + left] += filter[i*(*filterSize) + j]; - filter[i*(*filterSize) + j]=0; - } - filterPos[i]= 0; - } - - if(filterPos[i] + (*filterSize) > srcW) - { - int shift= filterPos[i] + (*filterSize) - srcW; - // Move filter coeffs right to compensate for filterPos - for(j=(*filterSize)-2; j>=0; j--) - { - int right= MIN(j + shift, (*filterSize)-1); - filter[i*(*filterSize) +right] += filter[i*(*filterSize) +j]; - filter[i*(*filterSize) +j]=0; - } - filterPos[i]= srcW - (*filterSize); - } - } - - //FIXME try to align filterpos if possible / try to shift filterpos to put zeros at the end - // and skip these than later - - //Normalize - for(i=0; i<dstW; i++) - { - int j; - double sum=0; - double scale= one; - for(j=0; j<*filterSize; j++) - { - sum+= filter[i*(*filterSize) + j]; - } - scale/= sum; - for(j=0; j<*filterSize; j++) - { - dstFilter[i*(*filterSize) + j]= (int)(filter[i*(*filterSize) + j]*scale); - } - } -} - -#ifdef HAVE_MMX2 -static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode) -{ - uint8_t *fragment; - int imm8OfPShufW1; - int imm8OfPShufW2; - int fragmentLength; - - int xpos, i; - - // create an optimized horizontal scaling routine - - //code fragment - - asm volatile( - "jmp 9f \n\t" - // Begin - "0: \n\t" - "movq (%%esi), %%mm0 \n\t" //FIXME Alignment - "movq %%mm0, %%mm1 \n\t" - "psrlq $8, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "addw %%bx, %%cx \n\t" //2*xalpha += (4*lumXInc)&0xFFFF - "pshufw $0xFF, %%mm1, %%mm1 \n\t" - "1: \n\t" - "adcl %%edx, %%esi \n\t" //xx+= (4*lumXInc)>>16 + carry - "pshufw $0xFF, %%mm0, %%mm0 \n\t" - "2: \n\t" - "psrlw $9, %%mm3 \n\t" - "psubw %%mm1, %%mm0 \n\t" - "pmullw %%mm3, %%mm0 \n\t" - "paddw %%mm6, %%mm2 \n\t" // 2*alpha += xpos&0xFFFF - "psllw $7, %%mm1 \n\t" - "paddw %%mm1, %%mm0 \n\t" - - "movq %%mm0, (%%edi, %%eax) \n\t" - - "addl $8, %%eax \n\t" - // End - "9: \n\t" -// "int $3\n\t" - "leal 0b, %0 \n\t" - "leal 1b, %1 \n\t" - "leal 2b, %2 \n\t" - "decl %1 \n\t" - "decl %2 \n\t" - "subl %0, %1 \n\t" - "subl %0, %2 \n\t" - "leal 9b, %3 \n\t" - "subl %0, %3 \n\t" - :"=r" (fragment), "=r" (imm8OfPShufW1), "=r" (imm8OfPShufW2), - "=r" (fragmentLength) - ); - - xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers - - for(i=0; i<dstW/8; i++) - { - int xx=xpos>>16; - - if((i&3) == 0) - { - int a=0; - int b=((xpos+xInc)>>16) - xx; - int c=((xpos+xInc*2)>>16) - xx; - int d=((xpos+xInc*3)>>16) - xx; - - memcpy(funnyCode + fragmentLength*i/4, fragment, fragmentLength); - - funnyCode[fragmentLength*i/4 + imm8OfPShufW1]= - funnyCode[fragmentLength*i/4 + imm8OfPShufW2]= - a | (b<<2) | (c<<4) | (d<<6); - - // if we dont need to read 8 bytes than dont :), reduces the chance of - // crossing a cache line - if(d<3) funnyCode[fragmentLength*i/4 + 1]= 0x6E; - - funnyCode[fragmentLength*(i+4)/4]= RET; - } - xpos+=xInc; - } -/* - xpos= 0; //chrXInc/2 - 0x10000; // difference between centers of chrom samples - for(i=0; i<dstUVw/8; i++) - { - int xx=xpos>>16; - - if((i&3) == 0) - { - int a=0; - int b=((xpos+chrXInc)>>16) - xx; - int c=((xpos+chrXInc*2)>>16) - xx; - int d=((xpos+chrXInc*3)>>16) - xx; - - memcpy(funnyUVCode + fragmentLength*i/4, fragment, fragmentLength); - - funnyUVCode[fragmentLength*i/4 + imm8OfPShufW1]= - funnyUVCode[fragmentLength*i/4 + imm8OfPShufW2]= - a | (b<<2) | (c<<4) | (d<<6); - - // if we dont need to read 8 bytes than dont :), reduces the chance of - // crossing a cache line - if(d<3) funnyUVCode[fragmentLength*i/4 + 1]= 0x6E; - - funnyUVCode[fragmentLength*(i+4)/4]= RET; - } - xpos+=chrXInc; - } -*/ -// funnyCode[0]= RET; -} -#endif // HAVE_MMX2 - -static void RENAME(SwScale_YV12slice)(unsigned char* srcptr[],int stride[], int srcSliceY , - int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, - int srcW, int srcH, int dstW, int dstH){ - - -unsigned int lumXInc= (srcW << 16) / dstW; -unsigned int lumYInc= (srcH << 16) / dstH; -unsigned int chrXInc; -unsigned int chrYInc; - -static int dstY; - -// used to detect a size change -static int oldDstW= -1; -static int oldSrcW= -1; -static int oldDstH= -1; -static int oldSrcH= -1; -static int oldFlags=-1; - -static int lastInLumBuf; -static int lastInChrBuf; - -int chrDstW, chrDstH; - -static int lumBufIndex=0; -static int chrBufIndex=0; - -static int firstTime=1; - -const int widthAlign= dstbpp==12 ? 16 : 8; -const int bytespp= (dstbpp+1)/8; //(12->1, 15&16->2, 24->3, 32->4) -const int over= dstbpp==12 ? (((dstW+15)&(~15))) - dststride - : (((dstW+7)&(~7)))*bytespp - dststride; -if(dststride%widthAlign !=0 ) -{ - if(firstTime) - fprintf(stderr, "SwScaler: Warning: dstStride is not a multiple of %d!\n" - "SwScaler: ->cannot do aligned memory acesses anymore\n", - widthAlign); -} - -if(over>0 && verbose) -{ - if(firstTime) - fprintf(stderr, "SwScaler: Warning: output width is not a multiple of 8 (16 for YV12)\n" - "SwScaler: and dststride is not large enough to handle %d extra bytes\n" - "SwScaler: ->using unoptimized C version for last line(s)\n", - over); -} - - - -//printf("%d %d %d %d\n", srcW, srcH, dstW, dstH); -//printf("%d %d %d %d\n", lumXInc, lumYInc, srcSliceY, srcSliceH); - -#ifdef HAVE_MMX2 -canMMX2BeUsed= (lumXInc <= 0x10000 && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; -if(!canMMX2BeUsed && lumXInc <= 0x10000 && (srcW&15)==0 && sws_flags==SWS_FAST_BILINEAR) -{ - if(firstTime) - fprintf(stderr, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); -} -#else -canMMX2BeUsed=0; // should be 0 anyway but ... -#endif - -if(firstTime) -{ -#if defined (DITHER1XBPP) && defined (HAVE_MMX) - char *dither= " dithered"; -#else - char *dither= ""; -#endif - if(sws_flags==SWS_FAST_BILINEAR) - fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler "); - else if(sws_flags==SWS_BILINEAR) - fprintf(stderr, "\nSwScaler: BILINEAR scaler "); - else if(sws_flags==SWS_BICUBIC) - fprintf(stderr, "\nSwScaler: BICUBIC scaler "); - else - fprintf(stderr, "\nSwScaler: ehh flags invalid?! "); - - if(dstbpp==15) - fprintf(stderr, "with%s BGR15 output ", dither); - else if(dstbpp==16) - fprintf(stderr, "with%s BGR16 output ", dither); - else if(dstbpp==24) - fprintf(stderr, "with BGR24 output "); - else if(dstbpp==32) - fprintf(stderr, "with BGR32 output "); - else if(dstbpp==12) - fprintf(stderr, "with YV12 output "); - else - fprintf(stderr, "without output "); - -#ifdef HAVE_MMX2 - fprintf(stderr, "using MMX2\n"); -#elif defined (HAVE_3DNOW) - fprintf(stderr, "using 3DNOW\n"); -#elif defined (HAVE_MMX) - fprintf(stderr, "using MMX\n"); -#elif defined (ARCH_X86) - fprintf(stderr, "using X86 ASM\n"); -#else - fprintf(stderr, "using C\n"); -#endif -} - - -// match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst -// n-2 is the last chrominance sample available -// this is not perfect, but noone shuld notice the difference, the more correct variant -// would be like the vertical one, but that would require some special code for the -// first and last pixel -if(sws_flags==SWS_FAST_BILINEAR) -{ - if(canMMX2BeUsed) lumXInc+= 20; -#ifndef HAVE_MMX //we dont use the x86asm scaler if mmx is available - else lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; -#endif -} - -if(fullUVIpol && !(dstbpp==12)) chrXInc= lumXInc>>1, chrDstW= dstW; -else chrXInc= lumXInc, chrDstW= (dstW+1)>>1; - -if(dstbpp==12) chrYInc= lumYInc, chrDstH= (dstH+1)>>1; -else chrYInc= lumYInc>>1, chrDstH= dstH; - - // force calculation of the horizontal interpolation of the first line - - if(srcSliceY ==0){ -// printf("dstW %d, srcw %d, mmx2 %d\n", dstW, srcW, canMMX2BeUsed); - lumBufIndex=0; - chrBufIndex=0; - dstY=0; - - //precalculate horizontal scaler filter coefficients - if(oldDstW!=dstW || oldSrcW!=srcW || oldFlags!=sws_flags) - { -#ifdef HAVE_MMX - const int filterAlign=4; -#else - const int filterAlign=1; -#endif - oldDstW= dstW; oldSrcW= srcW; oldFlags= sws_flags; - - RENAME(initFilter)(hLumFilter, hLumFilterPos, &hLumFilterSize, lumXInc, - srcW , dstW , filterAlign, 1<<14); - RENAME(initFilter)(hChrFilter, hChrFilterPos, &hChrFilterSize, chrXInc, - (srcW+1)>>1, chrDstW, filterAlign, 1<<14); - -#ifdef HAVE_MMX2 -// cant downscale !!! - if(canMMX2BeUsed && sws_flags == SWS_FAST_BILINEAR) - { - initMMX2HScaler(dstW , lumXInc, funnyYCode); - initMMX2HScaler(chrDstW, chrXInc, funnyUVCode); - } -#endif - } // Init Horizontal stuff - - if(oldDstH!=dstH || oldSrcH!=srcH || oldFlags!=sws_flags) - { - int i; - oldDstH= dstH; oldSrcH= srcH; oldFlags= sws_flags; //FIXME swsflags conflict with x check - - // deallocate pixbufs - for(i=0; i<vLumBufSize; i++) free(lumPixBuf[i]); - for(i=0; i<vChrBufSize; i++) free(chrPixBuf[i]); - - RENAME(initFilter)(vLumFilter, vLumFilterPos, &vLumFilterSize, lumYInc, - srcH , dstH, 1, (1<<12)-4); - RENAME(initFilter)(vChrFilter, vChrFilterPos, &vChrFilterSize, chrYInc, - (srcH+1)>>1, chrDstH, 1, (1<<12)-4); - - // Calculate Buffer Sizes so that they wont run out while handling these damn slices - vLumBufSize= vLumFilterSize; vChrBufSize= vChrFilterSize; - for(i=0; i<dstH; i++) - { - int chrI= i*chrDstH / dstH; - int nextSlice= MAX(vLumFilterPos[i ] + vLumFilterSize - 1, - ((vChrFilterPos[chrI] + vChrFilterSize - 1)<<1)); - nextSlice&= ~1; // Slices start at even boundaries - if(vLumFilterPos[i ] + vLumBufSize < nextSlice) - vLumBufSize= nextSlice - vLumFilterPos[i ]; - if(vChrFilterPos[chrI] + vChrBufSize < (nextSlice>>1)) - vChrBufSize= (nextSlice>>1) - vChrFilterPos[chrI]; - } - - // allocate pixbufs (we use dynamic allocation because otherwise we would need to - // allocate several megabytes to handle all possible cases) - for(i=0; i<vLumBufSize; i++) - lumPixBuf[i]= lumPixBuf[i+vLumBufSize]= (uint16_t*)memalign(8, 4000); - for(i=0; i<vChrBufSize; i++) - chrPixBuf[i]= chrPixBuf[i+vChrBufSize]= (uint16_t*)memalign(8, 8000); - - //try to avoid drawing green stuff between the right end and the stride end - for(i=0; i<vLumBufSize; i++) memset(lumPixBuf[i], 0, 4000); - for(i=0; i<vChrBufSize; i++) memset(chrPixBuf[i], 64, 8000); - - ASSERT(chrDstH<=dstH) - ASSERT(vLumFilterSize*dstH*4<16000) - ASSERT(vChrFilterSize*chrDstH*4<16000) -#ifdef HAVE_MMX - // pack filter data for mmx code - for(i=0; i<vLumFilterSize*dstH; i++) - lumMmxFilter[4*i]=lumMmxFilter[4*i+1]=lumMmxFilter[4*i+2]=lumMmxFilter[4*i+3]= - vLumFilter[i]; - for(i=0; i<vChrFilterSize*chrDstH; i++) - chrMmxFilter[4*i]=chrMmxFilter[4*i+1]=chrMmxFilter[4*i+2]=chrMmxFilter[4*i+3]= - vChrFilter[i]; -#endif - } - - if(firstTime && verbose) - { -#ifdef HAVE_MMX2 - int mmx2=1; -#else - int mmx2=0; -#endif -#ifdef HAVE_MMX - int mmx=1; -#else - int mmx=0; -#endif - -#ifdef HAVE_MMX - if(canMMX2BeUsed && sws_flags==SWS_FAST_BILINEAR) - printf("SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); - else - { - if(hLumFilterSize==4) - printf("SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); - else if(hLumFilterSize==8) - printf("SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); - else - printf("SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); - - if(hChrFilterSize==4) - printf("SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); - else if(hChrFilterSize==8) - printf("SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); - else - printf("SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); - } -#elif defined (ARCH_X86) - printf("SwScaler: using X86-Asm scaler for horizontal scaling\n"); -#else - if(sws_flags==SWS_FAST_BILINEAR) - printf("SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); - else - printf("SwScaler: using C scaler for horizontal scaling\n"); -#endif - - if(dstbpp==12) - { - if(vLumFilterSize==1) - printf("SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12)\n", mmx ? "MMX" : "C"); - else - printf("SwScaler: using n-tap %s scaler for vertical scaling (YV12)\n", mmx ? "MMX" : "C"); - } - else - { - if(vLumFilterSize==1 && vChrFilterSize==2) - printf("SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" - "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n", mmx ? "MMX" : "C"); - else if(vLumFilterSize==2 && vChrFilterSize==2) - printf("SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", mmx ? "MMX" : "C"); - else - printf("SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", mmx ? "MMX" : "C"); - } - - if(dstbpp==24) - printf("SwScaler: using %s YV12->BGR24 Converter\n", - mmx2 ? "MMX2" : (mmx ? "MMX" : "C")); - else - printf("SwScaler: using %s YV12->BGR%d Converter\n", mmx ? "MMX" : "C", dstbpp); - - printf("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); - } - - lastInLumBuf= -1; - lastInChrBuf= -1; - } // if(firstLine) for(;dstY < dstH; dstY++){ - unsigned char *dest =dstptr[0]+dststride*dstY; - unsigned char *uDest=dstptr[1]+(dststride>>1)*(dstY>>1); - unsigned char *vDest=dstptr[2]+(dststride>>1)*(dstY>>1); - const int chrDstY= dstbpp==12 ? (dstY>>1) : dstY; + unsigned char *dest =dst[0]+dstStride[0]*dstY; + unsigned char *uDest=dst[1]+dstStride[1]*(dstY>>1); + unsigned char *vDest=dst[2]+dstStride[2]*(dstY>>1); + const int chrDstY= dstFormat==IMGFMT_YV12 ? (dstY>>1) : dstY; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input - if(sws_flags == SWS_FAST_BILINEAR) + if(flags&SWS_FAST_BILINEAR) { //handle holes if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; @@ -2452,24 +1951,29 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; //Do horizontal scaling while(lastInLumBuf < lastLumSrcY) { - uint8_t *src= srcptr[0]+(lastInLumBuf + 1 - srcSliceY)*stride[0]; + uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; lumBufIndex++; ASSERT(lumBufIndex < 2*vLumBufSize) ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) // printf("%d %d\n", lumBufIndex, vLumBufSize); - RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, src, srcW, lumXInc); + RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, + flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, + funnyYCode); lastInLumBuf++; } while(lastInChrBuf < lastChrSrcY) { - uint8_t *src1= srcptr[1]+(lastInChrBuf + 1 - (srcSliceY>>1))*stride[1]; - uint8_t *src2= srcptr[2]+(lastInChrBuf + 1 - (srcSliceY>>1))*stride[2]; + uint8_t *src1= src[1]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[1]; + uint8_t *src2= src[2]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[2]; chrBufIndex++; ASSERT(chrBufIndex < 2*vChrBufSize) ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) < (srcSliceH>>1)) ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0) - RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc); + //FIXME replace parameters through context struct (some at least) + RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, + flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, + funnyUVCode); lastInChrBuf++; } //wrap buf index around to stay inside the ring buffer @@ -2486,23 +1990,27 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; //Do horizontal scaling while(lastInLumBuf+1 < srcSliceY + srcSliceH) { - uint8_t *src= srcptr[0]+(lastInLumBuf + 1 - srcSliceY)*stride[0]; + uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; lumBufIndex++; ASSERT(lumBufIndex < 2*vLumBufSize) ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) - RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, src, srcW, lumXInc); + RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, + flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, + funnyYCode); lastInLumBuf++; } while(lastInChrBuf+1 < ((srcSliceY + srcSliceH)>>1)) { - uint8_t *src1= srcptr[1]+(lastInChrBuf + 1 - (srcSliceY>>1))*stride[1]; - uint8_t *src2= srcptr[2]+(lastInChrBuf + 1 - (srcSliceY>>1))*stride[2]; + uint8_t *src1= src[1]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[1]; + uint8_t *src2= src[2]+(lastInChrBuf + 1 - (srcSliceY>>1))*srcStride[2]; chrBufIndex++; ASSERT(chrBufIndex < 2*vChrBufSize) ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) < (srcSliceH>>1)) ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0) - RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc); + RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc, + flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, + funnyUVCode); lastInChrBuf++; } //wrap buf index around to stay inside the ring buffer @@ -2517,9 +2025,9 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; g5Dither= dither8[dstY&1]; r5Dither= dither8[(dstY+1)&1]; #endif - if(dstY < dstH-2 || over<=0) + if(dstY < dstH-2) { - if(dstbpp==12) //YV12 + if(dstFormat==IMGFMT_YV12) //YV12 { if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 @@ -2551,7 +2059,7 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; int chrAlpha= vChrFilter[2*dstY+1]; RENAME(yuv2rgb1)(*lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), - dest, dstW, chrAlpha, dstbpp); + dest, dstW, chrAlpha, dstFormat, flags); } else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB { @@ -2559,14 +2067,14 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; int chrAlpha= vChrFilter[2*dstY+1]; RENAME(yuv2rgb2)(*lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), - dest, dstW, lumAlpha, chrAlpha, dstbpp); + dest, dstW, lumAlpha, chrAlpha, dstFormat, flags); } else //General RGB { RENAME(yuv2rgbX)( vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - dest, dstW, dstbpp, + dest, dstW, dstFormat, lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4); } } @@ -2575,7 +2083,7 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; { int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - if(dstbpp==12) //YV12 + if(dstFormat==IMGFMT_YV12) //YV12 { if(dstY&1) uDest=vDest= NULL; //FIXME split functions in lumi / chromi yuv2yuvXinC( @@ -2590,7 +2098,7 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; yuv2rgbXinC( vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, - dest, dstW, dstbpp); + dest, dstW, dstFormat); } } } @@ -2599,5 +2107,10 @@ else chrYInc= lumYInc>>1, chrDstH= dstH; __asm __volatile(SFENCE:::"memory"); __asm __volatile(EMMS:::"memory"); #endif - firstTime=0; + /* store changed local vars back in the context */ + c->dstY= dstY; + c->lumBufIndex= lumBufIndex; + c->chrBufIndex= chrBufIndex; + c->lastInLumBuf= lastInLumBuf; + c->lastInChrBuf= lastInChrBuf; } |