diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-16 14:18:21 -0500 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-17 17:35:35 -0500 |
commit | 1da6ea39542424ddb691dc6cf08d611a4c6db247 (patch) | |
tree | 36b2273877ee0a91d23a73b3eab1cad15acff600 /libavcodec | |
parent | c2ca851b234e169b50730ef357aeade8047491eb (diff) | |
download | ffmpeg-1da6ea39542424ddb691dc6cf08d611a4c6db247.tar.gz |
VC1: transpose IDCT 8x8 coeffs while reading.
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/ppc/vc1dsp_altivec.c | 1 | ||||
-rw-r--r-- | libavcodec/vc1.h | 1 | ||||
-rw-r--r-- | libavcodec/vc1dec.c | 95 | ||||
-rw-r--r-- | libavcodec/vc1dsp.c | 24 |
4 files changed, 63 insertions, 58 deletions
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c index a2f55f2db0..5ffe9a5479 100644 --- a/libavcodec/ppc/vc1dsp_altivec.c +++ b/libavcodec/ppc/vc1dsp_altivec.c @@ -154,7 +154,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64]) src6 = vec_ld( 96, block); src7 = vec_ld(112, block); - TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7); s0 = vec_unpackl(src0); s1 = vec_unpackl(src1); s2 = vec_unpackl(src2); diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h index e5a9cbaec1..da0b6c16e7 100644 --- a/libavcodec/vc1.h +++ b/libavcodec/vc1.h @@ -215,6 +215,7 @@ typedef struct VC1Context{ int k_y; ///< Number of bits for MVs (depends on MV range) int range_x, range_y; ///< MV range uint8_t pq, altpq; ///< Current/alternate frame quantizer scale + uint8_t zz_8x8[4][64];///< Zigzag table for TT_8x8, permuted for IDCT const uint8_t* zz_8x4;///< Zigzag scan table for TT_8x4 coding mode const uint8_t* zz_4x8;///< Zigzag scan table for TT_4x8 coding mode /** pquant parameters */ diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index 3bd2cce297..169797ca75 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -1499,11 +1499,11 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded if(v->s.ac_pred) { if(!dc_pred_dir) - zz_table = wmv1_scantable[2]; + zz_table = v->zz_8x8[2]; else - zz_table = wmv1_scantable[3]; + zz_table = v->zz_8x8[3]; } else - zz_table = wmv1_scantable[1]; + zz_table = v->zz_8x8[1]; ac_val = s->ac_val[0][0] + s->block_index[n] * 16; ac_val2 = ac_val; @@ -1524,16 +1524,16 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded if(s->ac_pred) { if(dc_pred_dir) { //left for(k = 1; k < 8; k++) - block[k << 3] += ac_val[k]; + block[k] += ac_val[k]; } else { //top for(k = 1; k < 8; k++) - block[k] += ac_val[k + 8]; + block[k << 3] += ac_val[k + 8]; } } /* save AC coeffs for further prediction */ for(k = 1; k < 8; k++) { - ac_val2[k] = block[k << 3]; - ac_val2[k + 8] = block[k]; + ac_val2[k] = block[k]; + ac_val2[k + 8] = block[k << 3]; } /* scale AC coeffs */ @@ -1570,15 +1570,15 @@ not_coded: if(s->ac_pred) { if(dc_pred_dir) { //left for(k = 1; k < 8; k++) { - block[k << 3] = ac_val[k] * scale; - if(!v->pquantizer && block[k << 3]) - block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq; + block[k] = ac_val[k] * scale; + if(!v->pquantizer && block[k]) + block[k] += (block[k] < 0) ? -v->pq : v->pq; } } else { //top for(k = 1; k < 8; k++) { - block[k] = ac_val[k + 8] * scale; - if(!v->pquantizer && block[k]) - block[k] += (block[k] < 0) ? -v->pq : v->pq; + block[k << 3] = ac_val[k + 8] * scale; + if(!v->pquantizer && block[k << 3]) + block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq; } } i = 63; @@ -1682,11 +1682,11 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c if(v->s.ac_pred) { if(!dc_pred_dir) - zz_table = wmv1_scantable[2]; + zz_table = v->zz_8x8[2]; else - zz_table = wmv1_scantable[3]; + zz_table = v->zz_8x8[3]; } else - zz_table = wmv1_scantable[1]; + zz_table = v->zz_8x8[1]; while (!last) { vc1_decode_ac_coeff(v, &last, &skip, &value, codingset); @@ -1705,25 +1705,25 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c if(dc_pred_dir) { //left for(k = 1; k < 8; k++) - block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; + block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } else { //top for(k = 1; k < 8; k++) - block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; + block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } } else { if(dc_pred_dir) { //left for(k = 1; k < 8; k++) - block[k << 3] += ac_val[k]; + block[k] += ac_val[k]; } else { //top for(k = 1; k < 8; k++) - block[k] += ac_val[k + 8]; + block[k << 3] += ac_val[k + 8]; } } } /* save AC coeffs for further prediction */ for(k = 1; k < 8; k++) { - ac_val2[k] = block[k << 3]; - ac_val2[k + 8] = block[k]; + ac_val2[k] = block[k]; + ac_val2[k + 8] = block[k << 3]; } /* scale AC coeffs */ @@ -1765,15 +1765,15 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c if(use_pred) { if(dc_pred_dir) { //left for(k = 1; k < 8; k++) { - block[k << 3] = ac_val2[k] * scale; - if(!v->pquantizer && block[k << 3]) - block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant; + block[k] = ac_val2[k] * scale; + if(!v->pquantizer && block[k]) + block[k] += (block[k] < 0) ? -mquant : mquant; } } else { //top for(k = 1; k < 8; k++) { - block[k] = ac_val2[k + 8] * scale; - if(!v->pquantizer && block[k]) - block[k] += (block[k] < 0) ? -mquant : mquant; + block[k << 3] = ac_val2[k + 8] * scale; + if(!v->pquantizer && block[k << 3]) + block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant; } } i = 63; @@ -1884,17 +1884,14 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c if(coded) { int last = 0, skip, value; - const uint8_t *zz_table; int k; - zz_table = wmv1_scantable[0]; - while (!last) { vc1_decode_ac_coeff(v, &last, &skip, &value, codingset); i += skip; if(i > 63) break; - block[zz_table[i++]] = value; + block[v->zz_8x8[0][i++]] = value; } /* apply AC prediction if needed */ @@ -1906,25 +1903,25 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c if(dc_pred_dir) { //left for(k = 1; k < 8; k++) - block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; + block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } else { //top for(k = 1; k < 8; k++) - block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; + block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18; } } else { if(dc_pred_dir) { //left for(k = 1; k < 8; k++) - block[k << 3] += ac_val[k]; + block[k] += ac_val[k]; } else { //top for(k = 1; k < 8; k++) - block[k] += ac_val[k + 8]; + block[k << 3] += ac_val[k + 8]; } } } /* save AC coeffs for further prediction */ for(k = 1; k < 8; k++) { - ac_val2[k] = block[k << 3]; - ac_val2[k + 8] = block[k]; + ac_val2[k] = block[k]; + ac_val2[k + 8] = block[k << 3]; } /* scale AC coeffs */ @@ -1966,15 +1963,15 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c if(use_pred) { if(dc_pred_dir) { //left for(k = 1; k < 8; k++) { - block[k << 3] = ac_val2[k] * scale; - if(!v->pquantizer && block[k << 3]) - block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant; + block[k] = ac_val2[k] * scale; + if(!v->pquantizer && block[k]) + block[k] += (block[k] < 0) ? -mquant : mquant; } } else { //top for(k = 1; k < 8; k++) { - block[k] = ac_val2[k + 8] * scale; - if(!v->pquantizer && block[k]) - block[k] += (block[k] < 0) ? -mquant : mquant; + block[k << 3] = ac_val2[k + 8] * scale; + if(!v->pquantizer && block[k << 3]) + block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant; } } i = 63; @@ -2035,7 +2032,7 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan i += skip; if(i > 63) break; - idx = wmv1_scantable[0][i++]; + idx = v->zz_8x8[0][i++]; block[idx] = value * scale; if(!v->pquantizer) block[idx] += (block[idx] < 0) ? -mquant : mquant; @@ -3007,6 +3004,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) VC1Context *v = avctx->priv_data; MpegEncContext *s = &v->s; GetBitContext gb; + int i; if (!avctx->extradata_size || !avctx->extradata) return -1; if (!(avctx->flags & CODEC_FLAG_GRAY)) @@ -3025,6 +3023,13 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx) if(ff_msmpeg4_decode_init(avctx) < 0) return -1; if (vc1_init_common(v) < 0) return -1; + for (i = 0; i < 64; i++) { +#define transpose(x) ((x>>3) | ((x&7)<<3)) + v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]); + v->zz_8x8[1][i] = transpose(wmv1_scantable[1][i]); + v->zz_8x8[2][i] = transpose(wmv1_scantable[2][i]); + v->zz_8x8[3][i] = transpose(wmv1_scantable[3][i]); + } avctx->coded_width = avctx->width; avctx->coded_height = avctx->height; diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index aab1694797..fd740e12d4 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -203,25 +203,25 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64]) { int i; register int t1,t2,t3,t4,t5,t6,t7,t8; - DCTELEM *src, *dst; + DCTELEM *src, *dst, temp[64]; src = block; - dst = block; + dst = temp; for(i = 0; i < 8; i++){ - t1 = 12 * (src[0] + src[4]) + 4; - t2 = 12 * (src[0] - src[4]) + 4; - t3 = 16 * src[2] + 6 * src[6]; - t4 = 6 * src[2] - 16 * src[6]; + t1 = 12 * (src[ 0] + src[32]) + 4; + t2 = 12 * (src[ 0] - src[32]) + 4; + t3 = 16 * src[16] + 6 * src[48]; + t4 = 6 * src[16] - 16 * src[48]; t5 = t1 + t3; t6 = t2 + t4; t7 = t2 - t4; t8 = t1 - t3; - t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; - t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; - t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; - t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; + t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; + t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; + t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; + t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; dst[0] = (t5 + t1) >> 3; dst[1] = (t6 + t2) >> 3; @@ -232,11 +232,11 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64]) dst[6] = (t6 - t2) >> 3; dst[7] = (t5 - t1) >> 3; - src += 8; + src += 1; dst += 8; } - src = block; + src = temp; dst = block; for(i = 0; i < 8; i++){ t1 = 12 * (src[ 0] + src[32]) + 64; |