diff options
author | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2022-11-04 20:22:03 +0100 |
---|---|---|
committer | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2022-11-09 17:39:00 +0100 |
commit | f0ee38790b9fad7bef69f30fc5292be9f1a6fcc3 (patch) | |
tree | acbdbf7f34e2d6146bc1b269ceeb9c1518bd826e /libpostproc | |
parent | b616b0470406add76fd89ae09f326be4cc41d96b (diff) | |
download | ffmpeg-f0ee38790b9fad7bef69f30fc5292be9f1a6fcc3.tar.gz |
postproc/postprocess_template: Fix alignment
postProcess in postprocess_template.c copies a PPContext
to the stack, works with this copy and then copies
it back again. Said local copy uses a hardcoded alignment
of eight, although PPContext has alignment 32 since
cbe27006cee0099076d1d68af646f3ef914167d8
(this commit was in anticipation of AVX2 code that never landed).
This leads to misalignment in the filter-(pp|pp1|pp2|pp3|qp)
FATE-tests which UBSan complains about. So avoid the local copy.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Diffstat (limited to 'libpostproc')
-rw-r--r-- | libpostproc/postprocess_template.c | 156 |
1 files changed, 77 insertions, 79 deletions
diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index bcf7bdad66..ade1d6ce2b 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -2860,14 +2860,13 @@ static inline void RENAME(prefetcht2)(const void *p) * Filter array of bytes (Y or U or V values) */ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, - const int8_t QPs[], int QPStride, int isColor, PPContext *c2) + const int8_t QPs[], int QPStride, int isColor, PPContext *c) { - DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access int x,y; #ifdef TEMPLATE_PP_TIME_MODE const int mode= TEMPLATE_PP_TIME_MODE; #else - const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; + const int mode = isColor ? c->ppMode.chromMode : c->ppMode.lumMode; #endif int black=0, white=255; // blackest black and whitest white in the picture int QPCorrecture= 256*256; @@ -2877,29 +2876,29 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ int i; #endif - const int qpHShift= isColor ? 4-c.hChromaSubSample : 4; - const int qpVShift= isColor ? 4-c.vChromaSubSample : 4; + const int qpHShift = isColor ? 4 - c->hChromaSubSample : 4; + const int qpVShift = isColor ? 4 - c->vChromaSubSample : 4; //FIXME remove - uint64_t * const yHistogram= c.yHistogram; - uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride; - uint8_t * const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32; + uint64_t * const yHistogram= c->yHistogram; + uint8_t * const tempSrc = srcStride > 0 ? c->tempSrc : c->tempSrc - 23*srcStride; + uint8_t * const tempDst = (dstStride > 0 ? c->tempDst : c->tempDst - 23*dstStride) + 32; //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; if (mode & VISUALIZE){ if(!(mode & (V_A_DEBLOCK | H_A_DEBLOCK)) || TEMPLATE_PP_MMX) { - av_log(c2, AV_LOG_WARNING, "Visualization is currently only supported with the accurate deblock filter without SIMD\n"); + av_log(c, AV_LOG_WARNING, "Visualization is currently only supported with the accurate deblock filter without SIMD\n"); } } #if TEMPLATE_PP_MMX for(i=0; i<57; i++){ - int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; + int offset = ((i * c->ppMode.baseDcDiff) >> 8) + 1; int threshold= offset*2 + 1; - c.mmxDcOffset[i]= 0x7F - offset; - c.mmxDcThreshold[i]= 0x7F - threshold; - c.mmxDcOffset[i]*= 0x0101010101010101LL; - c.mmxDcThreshold[i]*= 0x0101010101010101LL; + c->mmxDcOffset[i] = 0x7F - offset; + c->mmxDcThreshold[i] = 0x7F - threshold; + c->mmxDcOffset[i] *= 0x0101010101010101LL; + c->mmxDcThreshold[i] *= 0x0101010101010101LL; } #endif @@ -2925,16 +2924,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ uint64_t clipped; AVRational scale; - c.frameNum++; + c->frameNum++; // first frame is fscked so we ignore it - if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256; + if (c->frameNum == 1) + yHistogram[0] = width * (uint64_t)height/64*15/256; for(i=0; i<256; i++){ sum+= yHistogram[i]; } /* We always get a completely black picture first. */ - maxClipped= av_rescale(sum, c.ppMode.maxClippedThreshold.num, c.ppMode.maxClippedThreshold.den); + maxClipped = av_rescale(sum, c->ppMode.maxClippedThreshold.num, + c->ppMode.maxClippedThreshold.den); clipped= sum; for(black=255; black>0; black--){ @@ -2948,27 +2949,27 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ clipped-= yHistogram[white]; } - scale = (AVRational){c.ppMode.maxAllowedY - c.ppMode.minAllowedY, white - black}; + scale = (AVRational){c->ppMode.maxAllowedY - c->ppMode.minAllowedY, white - black}; #if TEMPLATE_PP_MMXEXT - c.packedYScale = (uint16_t)av_rescale(scale.num, 256, scale.den); - c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; + c->packedYScale = (uint16_t)av_rescale(scale.num, 256, scale.den); + c->packedYOffset = (((black*c->packedYScale)>>8) - c->ppMode.minAllowedY) & 0xFFFF; #else - c.packedYScale = (uint16_t)av_rescale(scale.num, 1024, scale.den); - c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF; + c->packedYScale = (uint16_t)av_rescale(scale.num, 1024, scale.den); + c->packedYOffset = (black - c->ppMode.minAllowedY) & 0xFFFF; #endif - c.packedYOffset|= c.packedYOffset<<32; - c.packedYOffset|= c.packedYOffset<<16; + c->packedYOffset |= c->packedYOffset<<32; + c->packedYOffset |= c->packedYOffset<<16; - c.packedYScale|= c.packedYScale<<32; - c.packedYScale|= c.packedYScale<<16; + c->packedYScale |= c->packedYScale<<32; + c->packedYScale |= c->packedYScale<<16; if(mode & LEVEL_FIX) QPCorrecture= (int)av_rescale(scale.num, 256*256, scale.den); else QPCorrecture= 256*256; }else{ - c.packedYScale= 0x0100010001000100LL; - c.packedYOffset= 0; + c->packedYScale = 0x0100010001000100LL; + c->packedYOffset = 0; QPCorrecture= 256*256; } @@ -2988,22 +2989,22 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); RENAME(blockCopy)(dstBlock + dstStride*8, dstStride, - srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset); + srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c->packedYOffset); RENAME(duplicate)(dstBlock + dstStride*8, dstStride); if(mode & LINEAR_IPOL_DEINT_FILTER) RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); else if(mode & LINEAR_BLEND_DEINT_FILTER) - RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x); + RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c->deintTemp + x); else if(mode & MEDIAN_DEINT_FILTER) RENAME(deInterlaceMedian)(dstBlock, dstStride); else if(mode & CUBIC_IPOL_DEINT_FILTER) RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); else if(mode & FFMPEG_DEINT_FILTER) - RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); + RENAME(deInterlaceFF)(dstBlock, dstStride, c->deintTemp + x); else if(mode & LOWPASS5_DEINT_FILTER) - RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); + RENAME(deInterlaceL5)(dstBlock, dstStride, c->deintTemp + x, c->deintTemp + width + x); /* else if(mode & CUBIC_BLEND_DEINT_FILTER) RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); */ @@ -3025,11 +3026,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ const uint8_t *srcBlock= &(src[y*srcStride]); uint8_t *dstBlock= &(dst[y*dstStride]); #if TEMPLATE_PP_MMX - uint8_t *tempBlock1= c.tempBlocks; - uint8_t *tempBlock2= c.tempBlocks + 8; + uint8_t *tempBlock1 = c->tempBlocks; + uint8_t *tempBlock2 = c->tempBlocks + 8; #endif const int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride]; - int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*FFABS(QPStride)]; + int8_t *nonBQPptr = &c->nonBQPTable[(y>>qpVShift)*FFABS(QPStride)]; int QP=0, nonBQP=0; /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards if not than use a temporary buffer */ @@ -3072,8 +3073,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ nonBQP= (nonBQP* QPCorrecture + 256*128)>>16; yHistogram[(srcBlock+qp_index*8)[srcStride*12 + 4]]++; } - c.QP_block[qp_index] = QP; - c.nonBQP_block[qp_index] = nonBQP; + c->QP_block[qp_index] = QP; + c->nonBQP_block[qp_index] = nonBQP; #if TEMPLATE_PP_MMX __asm__ volatile( "movd %1, %%mm7 \n\t" @@ -3081,7 +3082,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ "packuswb %%mm7, %%mm7 \n\t" // 0,QP, 0, QP, 0,QP, 0, QP "packuswb %%mm7, %%mm7 \n\t" // QP,..., QP "movq %%mm7, %0 \n\t" - : "=m" (c.pQPb_block[qp_index]) + : "=m" (c->pQPb_block[qp_index]) : "r" (QP) ); #endif @@ -3093,20 +3094,20 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ RENAME(prefetcht0)(dstBlock + (((x>>2)&6) + copyAhead+1)*dstStride + 32); RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride, - srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset); + srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c->packedYOffset); if(mode & LINEAR_IPOL_DEINT_FILTER) RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride); else if(mode & LINEAR_BLEND_DEINT_FILTER) - RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x); + RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c->deintTemp + x); else if(mode & MEDIAN_DEINT_FILTER) RENAME(deInterlaceMedian)(dstBlock, dstStride); else if(mode & CUBIC_IPOL_DEINT_FILTER) RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride); else if(mode & FFMPEG_DEINT_FILTER) - RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x); + RENAME(deInterlaceFF)(dstBlock, dstStride, c->deintTemp + x); else if(mode & LOWPASS5_DEINT_FILTER) - RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x); + RENAME(deInterlaceL5)(dstBlock, dstStride, c->deintTemp + x, c->deintTemp + width + x); /* else if(mode & CUBIC_BLEND_DEINT_FILTER) RENAME(deInterlaceBlendCubic)(dstBlock, dstStride); */ @@ -3121,24 +3122,24 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ const int stride= dstStride; //temporary while changing QP stuff to make things continue to work //eventually QP,nonBQP,etc will be arrays and this will be unnecessary - c.QP = c.QP_block[qp_index]; - c.nonBQP = c.nonBQP_block[qp_index]; - c.pQPb = c.pQPb_block[qp_index]; - c.pQPb2 = c.pQPb2_block[qp_index]; + c->QP = c->QP_block[qp_index]; + c->nonBQP = c->nonBQP_block[qp_index]; + c->pQPb = c->pQPb_block[qp_index]; + c->pQPb2 = c->pQPb2_block[qp_index]; /* only deblock if we have 2 blocks */ if(y + 8 < height){ if(mode & V_X1_FILTER) - RENAME(vertX1Filter)(dstBlock, stride, &c); + RENAME(vertX1Filter)(dstBlock, stride, c); else if(mode & V_DEBLOCK){ - const int t= RENAME(vertClassify)(dstBlock, stride, &c); + const int t = RENAME(vertClassify)(dstBlock, stride, c); if(t==1) - RENAME(doVertLowPass)(dstBlock, stride, &c); + RENAME(doVertLowPass)(dstBlock, stride, c); else if(t==2) - RENAME(doVertDefFilter)(dstBlock, stride, &c); + RENAME(doVertDefFilter)(dstBlock, stride, c); }else if(mode & V_A_DEBLOCK){ - RENAME(do_a_deblock)(dstBlock, stride, 1, &c, mode); + RENAME(do_a_deblock)(dstBlock, stride, 1, c, mode); } } @@ -3151,10 +3152,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ for(x = startx, qp_index=0; x < endx; x+=BLOCK_SIZE, qp_index++){ const int stride= dstStride; - c.QP = c.QP_block[qp_index]; - c.nonBQP = c.nonBQP_block[qp_index]; - c.pQPb = c.pQPb_block[qp_index]; - c.pQPb2 = c.pQPb2_block[qp_index]; + c->QP = c->QP_block[qp_index]; + c->nonBQP = c->nonBQP_block[qp_index]; + c->pQPb = c->pQPb_block[qp_index]; + c->pQPb2 = c->pQPb2_block[qp_index]; #if TEMPLATE_PP_MMX RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); #endif @@ -3162,60 +3163,60 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ if(x - 8 >= 0){ #if TEMPLATE_PP_MMX if(mode & H_X1_FILTER) - RENAME(vertX1Filter)(tempBlock1, 16, &c); + RENAME(vertX1Filter)(tempBlock1, 16, c); else if(mode & H_DEBLOCK){ - const int t= RENAME(vertClassify)(tempBlock1, 16, &c); + const int t= RENAME(vertClassify)(tempBlock1, 16, c); if(t==1) - RENAME(doVertLowPass)(tempBlock1, 16, &c); + RENAME(doVertLowPass)(tempBlock1, 16, c); else if(t==2) - RENAME(doVertDefFilter)(tempBlock1, 16, &c); + RENAME(doVertDefFilter)(tempBlock1, 16, c); }else if(mode & H_A_DEBLOCK){ - RENAME(do_a_deblock)(tempBlock1, 16, 1, &c, mode); + RENAME(do_a_deblock)(tempBlock1, 16, 1, c, mode); } RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16); #else if(mode & H_X1_FILTER) - horizX1Filter(dstBlock-4, stride, c.QP); + horizX1Filter(dstBlock-4, stride, c->QP); else if(mode & H_DEBLOCK){ #if TEMPLATE_PP_ALTIVEC DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; int t; transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); - t = vertClassify_altivec(tempBlock-48, 16, &c); + t = vertClassify_altivec(tempBlock-48, 16, c); if(t==1) { - doVertLowPass_altivec(tempBlock-48, 16, &c); + doVertLowPass_altivec(tempBlock-48, 16, c); transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); } else if(t==2) { - doVertDefFilter_altivec(tempBlock-48, 16, &c); + doVertDefFilter_altivec(tempBlock-48, 16, c); transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride); } #else - const int t= RENAME(horizClassify)(dstBlock-4, stride, &c); + const int t= RENAME(horizClassify)(dstBlock-4, stride, c); if(t==1) - RENAME(doHorizLowPass)(dstBlock-4, stride, &c); + RENAME(doHorizLowPass)(dstBlock-4, stride, c); else if(t==2) - RENAME(doHorizDefFilter)(dstBlock-4, stride, &c); + RENAME(doHorizDefFilter)(dstBlock-4, stride, c); #endif }else if(mode & H_A_DEBLOCK){ - RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c, mode); + RENAME(do_a_deblock)(dstBlock-8, 1, stride, c, mode); } #endif //TEMPLATE_PP_MMX if(mode & DERING){ //FIXME filter first line - if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); + if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, c); } if(mode & TEMP_NOISE_FILTER) { RENAME(tempNoiseReducer)(dstBlock-8, stride, - c.tempBlurred[isColor] + y*dstStride + x, - c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, - c.ppMode.maxTmpNoise); + c->tempBlurred[isColor] + y*dstStride + x, + c->tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, + c->ppMode.maxTmpNoise); } } @@ -3229,14 +3230,14 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ } if(mode & DERING){ - if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c); + if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, c); } if((mode & TEMP_NOISE_FILTER)){ RENAME(tempNoiseReducer)(dstBlock-8, dstStride, - c.tempBlurred[isColor] + y*dstStride + x, - c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, - c.ppMode.maxTmpNoise); + c->tempBlurred[isColor] + y*dstStride + x, + c->tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, + c->ppMode.maxTmpNoise); } /* did we use a tmp buffer for the last lines*/ @@ -3278,9 +3279,6 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ } } #endif - - *c2= c; //copy local context back - } #undef RENAME |