diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2002-10-29 18:35:15 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2002-10-29 18:35:15 +0000 |
commit | ec487e5db84a57d5c3ad49c111bb25c6054919fe (patch) | |
tree | c75098b30fecf115a616d32297774bca6bfd0f21 | |
parent | 02c7e1775de32aecdc8ff4d4f65d40adda18a98e (diff) | |
download | ffmpeg-ec487e5db84a57d5c3ad49c111bb25c6054919fe.tar.gz |
better deblocking filter
Originally committed as revision 7961 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
-rw-r--r-- | postproc/postprocess.c | 80 | ||||
-rw-r--r-- | postproc/postprocess.h | 7 | ||||
-rw-r--r-- | postproc/postprocess_template.c | 77 |
3 files changed, 101 insertions, 63 deletions
diff --git a/postproc/postprocess.c b/postproc/postprocess.c index a732d400a4..599d6064b7 100644 --- a/postproc/postprocess.c +++ b/postproc/postprocess.c @@ -59,7 +59,6 @@ compare the quality & speed of all filters split this huge file optimize c versions try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks -put fastmemcpy back ... */ @@ -149,13 +148,14 @@ typedef struct PPContext{ uint64_t __attribute__((aligned(8))) pQPb; uint64_t __attribute__((aligned(8))) pQPb2; - - uint64_t __attribute__((aligned(8))) mmxDcOffset; - uint64_t __attribute__((aligned(8))) mmxDcThreshold; + uint64_t __attribute__((aligned(8))) mmxDcOffset[32]; + uint64_t __attribute__((aligned(8))) mmxDcThreshold[32]; + + QP_STORE_T *nonBQPTable; + int QP; - int dcOffset; - int dcThreshold; + int nonBQP; int frameNum; @@ -247,8 +247,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) { int numEq= 0; int y; - const int dcOffset= c->dcOffset; - const int dcThreshold= c->dcThreshold; + const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; for(y=0; y<BLOCK_SIZE; y++) { if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++; @@ -269,8 +269,8 @@ static inline int isHorizDC(uint8_t src[], int stride, PPContext *c) static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){ int numEq= 0; int y; - const int dcOffset= c->dcOffset; - const int dcThreshold= c->dcThreshold; + const int dcOffset= ((c->QP*c->ppMode.baseDcDiff)>>8) + 1; + const int dcThreshold= dcOffset*2 + 1; src+= stride*4; // src points to begin of the 8x8 Block for(y=0; y<BLOCK_SIZE-1; y++) { @@ -725,7 +725,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality) else if(filters[i].mask == V_DEBLOCK || filters[i].mask == H_DEBLOCK) { int o; - ppMode.maxDcDiff=1; + ppMode.baseDcDiff=256/4; // hFlatnessThreshold= 40; // vFlatnessThreshold= 40; @@ -736,7 +736,7 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality) if(tail==options[o]) break; numOfUnknownOptions--; - if(o==0) ppMode.maxDcDiff= val; + if(o==0) ppMode.baseDcDiff= val; else ppMode.flatnessThreshold= val; } } @@ -768,6 +768,8 @@ struct PPMode getPPModeByNameAndQuality(char *name, int quality) void *getPPContext(int width, int height){ PPContext *c= memalign(32, sizeof(PPContext)); int i; + int mbWidth = (width+15)>>4; + int mbHeight= (height+15)>>4; c->tempBlocks= (uint8_t*)memalign(8, 2*16*8); c->yHistogram= (uint64_t*)memalign(8, 256*sizeof(uint64_t)); @@ -789,6 +791,8 @@ void *getPPContext(int width, int height){ c->tempDstBlock= (uint8_t*)memalign(8, 1024*24); c->tempSrcBlock= (uint8_t*)memalign(8, 1024*24); c->deintTemp= (uint8_t*)memalign(8, width+16); + c->nonBQPTable= (QP_STORE_T*)memalign(8, mbWidth*mbHeight*sizeof(QP_STORE_T)); + memset(c->nonBQPTable, 0, mbWidth*mbHeight*sizeof(QP_STORE_T)); c->frameNum=-1; @@ -809,6 +813,7 @@ void freePPContext(void *vc){ free(c->tempDstBlock); free(c->tempSrcBlock); free(c->deintTemp); + free(c->nonBQPTable); free(c); } @@ -841,12 +846,14 @@ void revertPPOpt(void *conf, char* opt) void postprocess(uint8_t * src[3], int srcStride[3], uint8_t * dst[3], int dstStride[3], - int horizontalSize, int verticalSize, + int width, int height, QP_STORE_T *QP_store, int QPStride, - PPMode *mode, void *c) + PPMode *mode, void *vc, int pict_type) { - + int mbWidth = (width+15)>>4; + int mbHeight= (height+15)>>4; QP_STORE_T quantArray[2048/8]; + PPContext *c = (PPContext*)vc; if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) { @@ -858,6 +865,29 @@ void postprocess(uint8_t * src[3], int srcStride[3], else for(i=0; i<2048/8; i++) quantArray[i]= 1; } +if(0){ +int x,y; +for(y=0; y<mbHeight; y++){ + for(x=0; x<mbWidth; x++){ + printf("%2d ", QP_store[x + y*QPStride]); + } + printf("\n"); +} + printf("\n"); +} +//printf("pict_type:%d\n", pict_type); + if(pict_type!=3) + { + int x,y; + for(y=0; y<mbHeight; y++){ + for(x=0; x<mbWidth; x++){ + int qscale= QP_store[x + y*QPStride]; + if(qscale&~31) + qscale=31; + c->nonBQPTable[y*mbWidth + x]= qscale; + } + } + } if(firstTime2 && verbose) { @@ -866,30 +896,30 @@ void postprocess(uint8_t * src[3], int srcStride[3], } postProcess(src[0], srcStride[0], dst[0], dstStride[0], - horizontalSize, verticalSize, QP_store, QPStride, 0, mode, c); + width, height, QP_store, QPStride, 0, mode, c); - horizontalSize = (horizontalSize+1)>> 1; - verticalSize = (verticalSize+1)>>1; + width = (width +1)>>1; + height = (height+1)>>1; if(mode->chromMode) { postProcess(src[1], srcStride[1], dst[1], dstStride[1], - horizontalSize, verticalSize, QP_store, QPStride, 1, mode, c); + width, height, QP_store, QPStride, 1, mode, c); postProcess(src[2], srcStride[2], dst[2], dstStride[2], - horizontalSize, verticalSize, QP_store, QPStride, 2, mode, c); + width, height, QP_store, QPStride, 2, mode, c); } else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]) { - memcpy(dst[1], src[1], srcStride[1]*verticalSize); - memcpy(dst[2], src[2], srcStride[2]*verticalSize); + memcpy(dst[1], src[1], srcStride[1]*height); + memcpy(dst[2], src[2], srcStride[2]*height); } else { int y; - for(y=0; y<verticalSize; y++) + for(y=0; y<height; y++) { - memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), horizontalSize); - memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), horizontalSize); + memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width); + memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width); } } } diff --git a/postproc/postprocess.h b/postproc/postprocess.h index 66cccd1131..751d95956d 100644 --- a/postproc/postprocess.h +++ b/postproc/postprocess.h @@ -54,7 +54,6 @@ #define TEMP_NOISE_FILTER 0x100000 #define FORCE_QUANT 0x200000 - #define GET_PP_QUALITY_MAX 6 //use if u want a faster postprocessing code @@ -76,8 +75,8 @@ typedef struct PPMode{ int maxAllowedY; // for brihtness correction int maxTmpNoise[3]; // for Temporal Noise Reducing filter (Maximal sum of abs differences) - - int maxDcDiff; // max abs diff between pixels to be considered flat + + int baseDcDiff; int flatnessThreshold; int forcedQuant; // quantizer if FORCE_QUANT is used @@ -87,7 +86,7 @@ void postprocess(uint8_t * src[3], int srcStride[3], uint8_t * dst[3], int dstStride[3], int horizontalSize, int verticalSize, QP_STORE_T *QP_store, int QP_stride, - PPMode *mode, void *ppContext); + PPMode *mode, void *ppContext, int pict_type); // name is the stuff after "-pp" on the command line PPMode getPPModeByNameAndQuality(char *name, int quality); diff --git a/postproc/postprocess_template.c b/postproc/postprocess_template.c index f24eccf2ce..beea9604f5 100644 --- a/postproc/postprocess_template.c +++ b/postproc/postprocess_template.c @@ -56,8 +56,9 @@ asm volatile( "leal (%1, %2), %%eax \n\t" // 0 1 2 3 4 5 6 7 8 9 // %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2 - "movq %3, %%mm7 \n\t" // mm7 = 0x7F - "movq %4, %%mm6 \n\t" // mm6 = 0x7D + "movq %3, %%mm7 \n\t" + "movq %4, %%mm6 \n\t" + "movq (%1), %%mm0 \n\t" "movq (%%eax), %%mm1 \n\t" "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece @@ -119,7 +120,7 @@ asm volatile( #endif "movd %%mm0, %0 \n\t" : "=r" (numEq) - : "r" (src), "r" (stride), "m" (c->mmxDcOffset), "m" (c->mmxDcThreshold) + : "r" (src), "r" (stride), "m" (c->mmxDcOffset[c->nonBQP]), "m" (c->mmxDcThreshold[c->nonBQP]) : "%eax" ); numEq= (-numEq) &0xFF; @@ -150,6 +151,7 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c ); return isOk==0; #else +#if 1 int x; const int QP= c->QP; src+= stride*3; @@ -159,6 +161,24 @@ static inline int RENAME(isVertMinMaxOk)(uint8_t src[], int stride, PPContext *c } return 1; +#else + int x; + const int QP= c->QP; + src+= stride*4; + for(x=0; x<BLOCK_SIZE; x++) + { + int min=255; + int max=0; + int y; + for(y=0; y<8; y++){ + int v= src[x + y*stride]; + if(v>max) max=v; + if(v<min) min=v; + } + if(max-min > 2*QP) return 0; + } + return 1; +#endif #endif } @@ -2639,22 +2659,23 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int int black=0, white=255; // blackest black and whitest white in the picture int QPCorrecture= 256*256; - int copyAhead; + int copyAhead, i; //FIXME remove uint64_t * const yHistogram= c.yHistogram; uint8_t * const tempSrc= c.tempSrc; uint8_t * const tempDst= c.tempDst; - - c.dcOffset= c.ppMode.maxDcDiff; - c.dcThreshold= c.ppMode.maxDcDiff*2 + 1; + const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; #ifdef HAVE_MMX - c.mmxDcOffset= 0x7F - c.dcOffset; - c.mmxDcThreshold= 0x7F - c.dcThreshold; - - c.mmxDcOffset*= 0x0101010101010101LL; - c.mmxDcThreshold*= 0x0101010101010101LL; + for(i=0; i<32; i++){ + int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; + int threshold= offset*2 + 1; + c.mmxDcOffset[i]= 0x7F - offset; + c.mmxDcThreshold[i]= 0x7F - threshold; + c.mmxDcOffset[i]*= 0x0101010101010101LL; + c.mmxDcThreshold[i]*= 0x0101010101010101LL; + } #endif if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16; @@ -2814,11 +2835,8 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int uint8_t *tempBlock1= c.tempBlocks; uint8_t *tempBlock2= c.tempBlocks + 8; #endif -#ifdef ARCH_X86 int *QPptr= isColor ? &QPs[(y>>3)*QPStride] :&QPs[(y>>4)*QPStride]; - int QPDelta= isColor ? (-1) : 1<<31; - int QPFrac= 1<<30; -#endif + int *nonBQPptr= isColor ? &c.nonBQPTable[(y>>3)*mbWidth] :&c.nonBQPTable[(y>>4)*mbWidth]; int QP=0; /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards if not than use a temporary buffer */ @@ -2855,28 +2873,19 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int #ifdef HAVE_MMX uint8_t *tmpXchg; #endif -#ifdef ARCH_X86 - QP= *QPptr; - asm volatile( - "addl %2, %1 \n\t" - "sbbl %%eax, %%eax \n\t" - "shll $2, %%eax \n\t" - "subl %%eax, %0 \n\t" - : "+r" (QPptr), "+m" (QPFrac) - : "r" (QPDelta) - : "%eax" - ); -#else - QP= isColor ? - QPs[(y>>3)*QPStride + (x>>3)]: - QPs[(y>>4)*QPStride + (x>>4)]; -#endif - if(!isColor) + if(isColor) + { + QP= QPptr[x>>3]; + c.nonBQP= nonBQPptr[x>>3]; + } + else { + QP= QPptr[x>>4]; QP= (QP* QPCorrecture + 256*128)>>16; + c.nonBQP= nonBQPptr[x>>4]; + c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16; yHistogram[ srcBlock[srcStride*12 + 4] ]++; } -//printf("%d ", QP); c.QP= QP; #ifdef HAVE_MMX asm volatile( |