diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-03-12 07:28:12 -0700 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-03-12 22:54:10 +0100 |
commit | d85c9b036e65afa05dcc8fbf37813ef4a05db1f3 (patch) | |
tree | cdf7469df86a63771fa6a2df5ef9ee4db9be2a95 /libavcodec/vp3dsp.c | |
parent | db594f65ec4e4a8d85113f309f3d9c31959b48e3 (diff) | |
download | ffmpeg-d85c9b036e65afa05dcc8fbf37813ef4a05db1f3.tar.gz |
vp3/x86: use full transpose for all IDCTs.
This way, the special IDCT permutations are no longer needed. Bfin code
is disabled until someone updates it. This is similar to how H264 does
it, and removes the dsputil dependency imposed by the scantable code.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/vp3dsp.c')
-rw-r--r-- | libavcodec/vp3dsp.c | 92 |
1 files changed, 35 insertions, 57 deletions
diff --git a/libavcodec/vp3dsp.c b/libavcodec/vp3dsp.c index 051812e72d..93489637ba 100644 --- a/libavcodec/vp3dsp.c +++ b/libavcodec/vp3dsp.c @@ -54,11 +54,12 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int /* Inverse DCT on the rows now */ for (i = 0; i < 8; i++) { /* Check for non-zero values */ - if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { - A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); - B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); - C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); - D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); + if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | + ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { + A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]); + B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]); + C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]); + D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]); Ad = M(xC4S4, (A - C)); Bd = M(xC4S4, (B - D)); @@ -66,11 +67,11 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Cd = A + C; Dd = B + D; - E = M(xC4S4, (ip[0] + ip[4])); - F = M(xC4S4, (ip[0] - ip[4])); + E = M(xC4S4, (ip[0 * 8] + ip[4 * 8])); + F = M(xC4S4, (ip[0 * 8] - ip[4 * 8])); - G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); - H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); + G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]); + H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]); Ed = E - G; Gd = E + G; @@ -82,33 +83,33 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Hd = Bd + H; /* Final sequence of operations over-write original inputs. */ - ip[0] = Gd + Cd ; - ip[7] = Gd - Cd ; + ip[0 * 8] = Gd + Cd ; + ip[7 * 8] = Gd - Cd ; - ip[1] = Add + Hd; - ip[2] = Add - Hd; + ip[1 * 8] = Add + Hd; + ip[2 * 8] = Add - Hd; - ip[3] = Ed + Dd ; - ip[4] = Ed - Dd ; + ip[3 * 8] = Ed + Dd ; + ip[4 * 8] = Ed - Dd ; - ip[5] = Fd + Bdd; - ip[6] = Fd - Bdd; + ip[5 * 8] = Fd + Bdd; + ip[6 * 8] = Fd - Bdd; } - ip += 8; /* next row */ + ip += 1; /* next row */ } ip = input; for ( i = 0; i < 8; i++) { /* Check for non-zero values (bitwise or faster than ||) */ - if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | - ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { + if ( ip[1] | ip[2] | ip[3] | + ip[4] | ip[5] | ip[6] | ip[7] ) { - A = M(xC1S7, ip[1*8]) + M(xC7S1, ip[7*8]); - B = M(xC7S1, ip[1*8]) - M(xC1S7, ip[7*8]); - C = M(xC3S5, ip[3*8]) + M(xC5S3, ip[5*8]); - D = M(xC3S5, ip[5*8]) - M(xC5S3, ip[3*8]); + A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]); + B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]); + C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]); + D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]); Ad = M(xC4S4, (A - C)); Bd = M(xC4S4, (B - D)); @@ -116,16 +117,16 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Cd = A + C; Dd = B + D; - E = M(xC4S4, (ip[0*8] + ip[4*8])) + 8; - F = M(xC4S4, (ip[0*8] - ip[4*8])) + 8; + E = M(xC4S4, (ip[0] + ip[4])) + 8; + F = M(xC4S4, (ip[0] - ip[4])) + 8; if(type==1){ //HACK E += 16*128; F += 16*128; } - G = M(xC2S6, ip[2*8]) + M(xC6S2, ip[6*8]); - H = M(xC6S2, ip[2*8]) - M(xC2S6, ip[6*8]); + G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]); + H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]); Ed = E - G; Gd = E + G; @@ -137,19 +138,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int Hd = Bd + H; /* Final sequence of operations over-write original inputs. */ - if(type==0){ - ip[0*8] = (Gd + Cd ) >> 4; - ip[7*8] = (Gd - Cd ) >> 4; - - ip[1*8] = (Add + Hd ) >> 4; - ip[2*8] = (Add - Hd ) >> 4; - - ip[3*8] = (Ed + Dd ) >> 4; - ip[4*8] = (Ed - Dd ) >> 4; - - ip[5*8] = (Fd + Bdd ) >> 4; - ip[6*8] = (Fd - Bdd ) >> 4; - }else if(type==1){ + if(type==1){ dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4); dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4); @@ -176,16 +165,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int } } else { - if(type==0){ - ip[0*8] = - ip[1*8] = - ip[2*8] = - ip[3*8] = - ip[4*8] = - ip[5*8] = - ip[6*8] = - ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); - }else if(type==1){ + if(type==1){ dst[0*stride]= dst[1*stride]= dst[2*stride]= @@ -193,10 +173,10 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int dst[4*stride]= dst[5*stride]= dst[6*stride]= - dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20)); + dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20)); }else{ - if(ip[0*8]){ - int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20); + if(ip[0]){ + int v= ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20); dst[0*stride] = av_clip_uint8(dst[0*stride] + v); dst[1*stride] = av_clip_uint8(dst[1*stride] + v); dst[2*stride] = av_clip_uint8(dst[2*stride] + v); @@ -209,7 +189,7 @@ static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int } } - ip++; /* next column */ + ip += 8; /* next column */ dst++; } } @@ -307,8 +287,6 @@ av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags) c->v_loop_filter = vp3_v_loop_filter_c; c->h_loop_filter = vp3_h_loop_filter_c; - c->idct_perm = FF_NO_IDCT_PERM; - if (ARCH_ARM) ff_vp3dsp_init_arm(c, flags); if (ARCH_BFIN) |