diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2011-06-02 04:41:44 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-06-02 21:03:55 +0200 |
commit | 6713989c231104ff4381ef58f25ec1af8603535b (patch) | |
tree | a2e9c22e162bba4c8e5907697839aa25f86141a2 /libswscale | |
parent | 877f76ad33bb9b0b0d09565dd9ec1cf8e91096f1 (diff) | |
download | ffmpeg-6713989c231104ff4381ef58f25ec1af8603535b.tar.gz |
swscale: dither for planar yuv outputs
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libswscale')
-rw-r--r-- | libswscale/swscale.c | 26 | ||||
-rw-r--r-- | libswscale/swscale_internal.h | 10 | ||||
-rw-r--r-- | libswscale/swscale_template.c | 31 | ||||
-rw-r--r-- | libswscale/x86/swscale_template.c | 64 |
4 files changed, 77 insertions, 54 deletions
diff --git a/libswscale/swscale.c b/libswscale/swscale.c index 6bf7fcfd9d..65bb7c01c1 100644 --- a/libswscale/swscale.c +++ b/libswscale/swscale.c @@ -282,6 +282,8 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={ { 112, 16,104, 8,118, 22,110, 14,}, }}; +static const uint8_t flat64[8]={64,64,64,64,64,64,64,64}; + uint16_t dither_scale[15][16]={ { 2, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,}, { 2, 3, 7, 7, 13, 13, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,}, @@ -417,12 +419,13 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, - const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW) + const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7] << 12; int j; for (j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -432,8 +435,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, if (uDest) for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7] << 12; + int v = chrDither[(i+3)&7] << 12; int j; for (j=0; j<chrFilterSize; j++) { u += chrUSrc[j][i] * chrFilter[j]; @@ -446,7 +449,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, if (CONFIG_SWSCALE_ALPHA && aDest) for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7] << 12; int j; for (j=0; j<lumFilterSize; j++) val += alpSrc[j][i] * lumFilter[j]; @@ -459,12 +462,13 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, - uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) + uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat, + const uint8_t *lumDither, const uint8_t *chrDither) { //FIXME Optimize (just quickly written not optimized..) int i; for (i=0; i<dstW; i++) { - int val=1<<18; + int val = lumDither[i&7]<<12; int j; for (j=0; j<lumFilterSize; j++) val += lumSrc[j][i] * lumFilter[j]; @@ -477,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc if (dstFormat == PIX_FMT_NV12) for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7]<<12; + int v = chrDither[(i+3)&7]<<12; int j; for (j=0; j<chrFilterSize; j++) { u += chrUSrc[j][i] * chrFilter[j]; @@ -490,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc } else for (i=0; i<chrDstW; i++) { - int u=1<<18; - int v=1<<18; + int u = chrDither[i&7]<<12; + int v = chrDither[(i+3)&7]<<12; int j; for (j=0; j<chrFilterSize; j++) { u += chrUSrc[j][i] * chrFilter[j]; diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h index 8577448c38..87712be1b3 100644 --- a/libswscale/swscale_internal.h +++ b/libswscale/swscale_internal.h @@ -195,6 +195,8 @@ typedef struct SwsContext { #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" #define UV_OFF "11*8+4*4*256*3+48" #define UV_OFFx2 "11*8+4*4*256*3+56" +#define DITHER16 "11*8+4*4*256*3+64" +#define DITHER32 "11*8+4*4*256*3+64+16" DECLARE_ALIGNED(8, uint64_t, redDither); DECLARE_ALIGNED(8, uint64_t, greenDither); @@ -219,6 +221,8 @@ typedef struct SwsContext { int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes DECLARE_ALIGNED(8, ptrdiff_t, uv_offx2); ///< offset (in bytes) between u and v planes + uint16_t dither16[8]; + uint32_t dither32[8]; #if HAVE_ALTIVEC vector signed short CY; @@ -255,13 +259,13 @@ typedef struct SwsContext { const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, uint8_t *dest, uint8_t *uDest, - int dstW, int chrDstW, int dstFormat); + int dstW, int chrDstW, int dstFormat, const uint8_t *lumDither, const uint8_t *chrDither); void (*yuv2yuv1 )(struct SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW); + int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither); void (*yuv2yuvX )(struct SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, const int16_t *chrFilter, const int16_t **chrUSrc, @@ -269,7 +273,7 @@ typedef struct SwsContext { const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, - int dstW, int chrDstW); + int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither); void (*yuv2packed1)(struct SwsContext *c, const uint16_t *buf0, const uint16_t *ubuf0, const uint16_t *ubuf1, diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c index 6c85487e9c..58e05ac7b3 100644 --- a/libswscale/swscale_template.c +++ b/libswscale/swscale_template.c @@ -24,11 +24,11 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) { yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, chrFilter, chrUSrc, chrVSrc, chrFilterSize, - alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW); + alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, @@ -36,36 +36,37 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, uint8_t *dest, uint8_t *uDest, - int dstW, int chrDstW, enum PixelFormat dstFormat) + int dstW, int chrDstW, enum PixelFormat dstFormat, const uint8_t *dither, const uint8_t *chrDither) { yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, chrFilter, chrUSrc, chrVSrc, chrFilterSize, - dest, uDest, dstW, chrDstW, dstFormat); + dest, uDest, dstW, chrDstW, dstFormat, dither, chrDither); } static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, const uint8_t *lumDither, const uint8_t *chrDither) { int i; + for (i=0; i<dstW; i++) { - int val= (lumSrc[i]+64)>>7; + int val= (lumSrc[i]+lumDither[i&7])>>7; dest[i]= av_clip_uint8(val); } if (uDest) for (i=0; i<chrDstW; i++) { - int u=(chrUSrc[i]+64)>>7; - int v=(chrVSrc[i]+64)>>7; + int u=(chrUSrc[i]+chrDither[i&7])>>7; + int v=(chrVSrc[i]+chrDither[(i+3)&7])>>7; uDest[i]= av_clip_uint8(u); vDest[i]= av_clip_uint8(v); } if (CONFIG_SWSCALE_ALPHA && aDest) for (i=0; i<dstW; i++) { - int val= (alpSrc[i]+64)>>7; + int val= (alpSrc[i]+lumDither[i&7])>>7; aDest[i]= av_clip_uint8(val); } } @@ -609,6 +610,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; unsigned char *aDest=(CONFIG_SWSCALE_ALPHA && alpPixBuf) ? dst[3]+dstStride[3]*dstY : NULL; + const uint8_t *lumDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][dstY &7] : flat64; + const uint8_t *chrDither= isNBPS(c->srcFormat) || is16BPS(c->srcFormat) ? dithers[7][chrDstY&7] : flat64; const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input const int firstLumSrcY2= vLumFilterPos[FFMIN(dstY | ((1<<c->chrDstVSubSample) - 1), dstH-1)]; @@ -699,7 +702,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], c->yuv2nv12X(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - dest, uDest, dstW, chrDstW, dstFormat); + dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither); } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi @@ -716,13 +719,13 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], const int16_t *chrVBuf= chrVSrcPtr[0]; const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, - uDest, vDest, aDest, dstW, chrDstW); + uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } else { //General YV12 c->yuv2yuvX(c, vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } } else { assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); @@ -784,7 +787,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], yuv2nv12XinC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - dest, uDest, dstW, chrDstW, dstFormat); + dest, uDest, dstW, chrDstW, dstFormat, lumDither, chrDither); } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi @@ -798,7 +801,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], yuv2yuvXinC( vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, - alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); + alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW, lumDither, chrDither); } } else { assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index e2a530ae8d..d726c175f6 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -39,8 +39,8 @@ #define YSCALEYUV2YV12X(offset, dest, end, pos) \ __asm__ volatile(\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ + "movq "DITHER16"+0(%0), %%mm3 \n\t"\ + "movq "DITHER16"+8(%0), %%mm4 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ ".p2align 4 \n\t" /* FIXME Unroll? */\ @@ -62,8 +62,8 @@ MOVNTQ(%%mm3, (%1, %3))\ "add $8, %3 \n\t"\ "cmp %2, %3 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ - "movq %%mm3, %%mm4 \n\t"\ + "movq "DITHER16"+0(%0), %%mm3 \n\t"\ + "movq "DITHER16"+8(%0), %%mm4 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "jb 1b \n\t"\ @@ -78,13 +78,18 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { + int i; if (uDest) { x86_reg uv_off = c->uv_off; + for(i=0; i<8; i++) c->dither16[i] = chrDither[i]>>4; YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) + for(i=0; i<8; i++) c->dither16[i] = chrDither[(i+3)&7]>>4; YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) } + for(i=0; i<8; i++) c->dither16[i] = lumDither[i]>>4; if (CONFIG_SWSCALE_ALPHA && aDest) { YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) } @@ -95,6 +100,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, #define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \ __asm__ volatile(\ "lea " offset "(%0), %%"REG_d" \n\t"\ + "movq "DITHER32"+0(%0), %%mm4 \n\t"\ + "movq "DITHER32"+8(%0), %%mm5 \n\t"\ + "movq "DITHER32"+16(%0), %%mm6 \n\t"\ + "movq "DITHER32"+24(%0), %%mm7 \n\t"\ "pxor %%mm4, %%mm4 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ @@ -126,26 +135,21 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, "paddd %%mm2, %%mm6 \n\t"\ "paddd %%mm0, %%mm7 \n\t"\ " jnz 1b \n\t"\ - "psrad $16, %%mm4 \n\t"\ - "psrad $16, %%mm5 \n\t"\ - "psrad $16, %%mm6 \n\t"\ - "psrad $16, %%mm7 \n\t"\ - "movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\ + "psrad $19, %%mm4 \n\t"\ + "psrad $19, %%mm5 \n\t"\ + "psrad $19, %%mm6 \n\t"\ + "psrad $19, %%mm7 \n\t"\ "packssdw %%mm5, %%mm4 \n\t"\ "packssdw %%mm7, %%mm6 \n\t"\ - "paddw %%mm0, %%mm4 \n\t"\ - "paddw %%mm0, %%mm6 \n\t"\ - "psraw $3, %%mm4 \n\t"\ - "psraw $3, %%mm6 \n\t"\ "packuswb %%mm6, %%mm4 \n\t"\ MOVNTQ(%%mm4, (%1, %3))\ "add $8, %3 \n\t"\ "cmp %2, %3 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ - "pxor %%mm4, %%mm4 \n\t"\ - "pxor %%mm5, %%mm5 \n\t"\ - "pxor %%mm6, %%mm6 \n\t"\ - "pxor %%mm7, %%mm7 \n\t"\ + "movq "DITHER32"+0(%0), %%mm4 \n\t"\ + "movq "DITHER32"+8(%0), %%mm5 \n\t"\ + "movq "DITHER32"+16(%0), %%mm6 \n\t"\ + "movq "DITHER32"+24(%0), %%mm7 \n\t"\ "mov (%%"REG_d"), %%"REG_S" \n\t"\ "jb 1b \n\t"\ :: "r" (&c->redDither),\ @@ -159,13 +163,18 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, const int16_t **chrVSrc, int chrFilterSize, const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { + int i; if (uDest) { x86_reg uv_off = c->uv_off; + for(i=0; i<8; i++) c->dither32[i] = chrDither[i]<<12; YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) + for(i=0; i<8; i++) c->dither32[i] = chrDither[(i+3)&7]<<12; YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) } + for(i=0; i<8; i++) c->dither32[i] = lumDither[i]<<12; if (CONFIG_SWSCALE_ALPHA && aDest) { YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) } @@ -190,7 +199,8 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int p= 4; const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; @@ -211,14 +221,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc, #define YSCALEYUV2YV121_ACCURATE \ "mov %2, %%"REG_a" \n\t"\ - "pcmpeqw %%mm7, %%mm7 \n\t"\ - "psrlw $15, %%mm7 \n\t"\ - "psllw $6, %%mm7 \n\t"\ + "movq 0(%3), %%mm6 \n\t"\ + "movq 8(%3), %%mm7 \n\t"\ ".p2align 4 \n\t" /* FIXME Unroll? */\ "1: \n\t"\ "movq (%0, %%"REG_a", 2), %%mm0 \n\t"\ "movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\ - "paddsw %%mm7, %%mm0 \n\t"\ + "paddsw %%mm6, %%mm0 \n\t"\ "paddsw %%mm7, %%mm1 \n\t"\ "psraw $7, %%mm0 \n\t"\ "psraw $7, %%mm1 \n\t"\ @@ -231,7 +240,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, const int16_t *chrUSrc, const int16_t *chrVSrc, const int16_t *alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, - uint8_t *aDest, int dstW, int chrDstW) + uint8_t *aDest, int dstW, int chrDstW, + const uint8_t *lumDither, const uint8_t *chrDither) { int p= 4; const int16_t *src[4]= { alpSrc + dstW, lumSrc + dstW, chrUSrc + chrDstW, chrVSrc + chrDstW }; @@ -240,10 +250,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc, while (p--) { if (dst[p]) { + int i; + for(i=0; i<8; i++) c->dither16[i] = i<2 ? lumDither[i] : chrDither[i]; __asm__ volatile( YSCALEYUV2YV121_ACCURATE :: "r" (src[p]), "r" (dst[p] + counter[p]), - "g" (-counter[p]) + "g" (-counter[p]), "r"(c->dither16) : "%"REG_a ); } |