aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2011-02-16 14:18:21 -0500
committerMichael Niedermayer <michaelni@gmx.at>2011-02-18 19:52:38 +0100
commit2739dc5d858a18c4d10f043af0e31a8a57ca147f (patch)
tree8208bafdee32649f0ab3001126d40427ab99bc86
parent003c32e72cb675b271d5434858899970000f369a (diff)
downloadffmpeg-2739dc5d858a18c4d10f043af0e31a8a57ca147f.tar.gz
VC1: transpose IDCT 8x8 coeffs while reading.
(cherry picked from commit 1da6ea39542424ddb691dc6cf08d611a4c6db247)
-rw-r--r--libavcodec/ppc/vc1dsp_altivec.c1
-rw-r--r--libavcodec/vc1.h1
-rw-r--r--libavcodec/vc1dec.c95
-rw-r--r--libavcodec/vc1dsp.c24
4 files changed, 63 insertions, 58 deletions
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index a2f55f2db0..5ffe9a5479 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -154,7 +154,6 @@ static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
src6 = vec_ld( 96, block);
src7 = vec_ld(112, block);
- TRANSPOSE8(src0, src1, src2, src3, src4, src5, src6, src7);
s0 = vec_unpackl(src0);
s1 = vec_unpackl(src1);
s2 = vec_unpackl(src2);
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index e5a9cbaec1..da0b6c16e7 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -215,6 +215,7 @@ typedef struct VC1Context{
int k_y; ///< Number of bits for MVs (depends on MV range)
int range_x, range_y; ///< MV range
uint8_t pq, altpq; ///< Current/alternate frame quantizer scale
+ uint8_t zz_8x8[4][64];///< Zigzag table for TT_8x8, permuted for IDCT
const uint8_t* zz_8x4;///< Zigzag scan table for TT_8x4 coding mode
const uint8_t* zz_4x8;///< Zigzag scan table for TT_4x8 coding mode
/** pquant parameters */
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 3bd2cce297..169797ca75 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -1499,11 +1499,11 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded
if(v->s.ac_pred) {
if(!dc_pred_dir)
- zz_table = wmv1_scantable[2];
+ zz_table = v->zz_8x8[2];
else
- zz_table = wmv1_scantable[3];
+ zz_table = v->zz_8x8[3];
} else
- zz_table = wmv1_scantable[1];
+ zz_table = v->zz_8x8[1];
ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
ac_val2 = ac_val;
@@ -1524,16 +1524,16 @@ static int vc1_decode_i_block(VC1Context *v, DCTELEM block[64], int n, int coded
if(s->ac_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
- block[k << 3] += ac_val[k];
+ block[k] += ac_val[k];
} else { //top
for(k = 1; k < 8; k++)
- block[k] += ac_val[k + 8];
+ block[k << 3] += ac_val[k + 8];
}
}
/* save AC coeffs for further prediction */
for(k = 1; k < 8; k++) {
- ac_val2[k] = block[k << 3];
- ac_val2[k + 8] = block[k];
+ ac_val2[k] = block[k];
+ ac_val2[k + 8] = block[k << 3];
}
/* scale AC coeffs */
@@ -1570,15 +1570,15 @@ not_coded:
if(s->ac_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++) {
- block[k << 3] = ac_val[k] * scale;
- if(!v->pquantizer && block[k << 3])
- block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
+ block[k] = ac_val[k] * scale;
+ if(!v->pquantizer && block[k])
+ block[k] += (block[k] < 0) ? -v->pq : v->pq;
}
} else { //top
for(k = 1; k < 8; k++) {
- block[k] = ac_val[k + 8] * scale;
- if(!v->pquantizer && block[k])
- block[k] += (block[k] < 0) ? -v->pq : v->pq;
+ block[k << 3] = ac_val[k + 8] * scale;
+ if(!v->pquantizer && block[k << 3])
+ block[k << 3] += (block[k << 3] < 0) ? -v->pq : v->pq;
}
}
i = 63;
@@ -1682,11 +1682,11 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
if(v->s.ac_pred) {
if(!dc_pred_dir)
- zz_table = wmv1_scantable[2];
+ zz_table = v->zz_8x8[2];
else
- zz_table = wmv1_scantable[3];
+ zz_table = v->zz_8x8[3];
} else
- zz_table = wmv1_scantable[1];
+ zz_table = v->zz_8x8[1];
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
@@ -1705,25 +1705,25 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
- block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+ block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
} else { //top
for(k = 1; k < 8; k++)
- block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+ block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
}
} else {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
- block[k << 3] += ac_val[k];
+ block[k] += ac_val[k];
} else { //top
for(k = 1; k < 8; k++)
- block[k] += ac_val[k + 8];
+ block[k << 3] += ac_val[k + 8];
}
}
}
/* save AC coeffs for further prediction */
for(k = 1; k < 8; k++) {
- ac_val2[k] = block[k << 3];
- ac_val2[k + 8] = block[k];
+ ac_val2[k] = block[k];
+ ac_val2[k + 8] = block[k << 3];
}
/* scale AC coeffs */
@@ -1765,15 +1765,15 @@ static int vc1_decode_i_block_adv(VC1Context *v, DCTELEM block[64], int n, int c
if(use_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++) {
- block[k << 3] = ac_val2[k] * scale;
- if(!v->pquantizer && block[k << 3])
- block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
+ block[k] = ac_val2[k] * scale;
+ if(!v->pquantizer && block[k])
+ block[k] += (block[k] < 0) ? -mquant : mquant;
}
} else { //top
for(k = 1; k < 8; k++) {
- block[k] = ac_val2[k + 8] * scale;
- if(!v->pquantizer && block[k])
- block[k] += (block[k] < 0) ? -mquant : mquant;
+ block[k << 3] = ac_val2[k + 8] * scale;
+ if(!v->pquantizer && block[k << 3])
+ block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
}
}
i = 63;
@@ -1884,17 +1884,14 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
if(coded) {
int last = 0, skip, value;
- const uint8_t *zz_table;
int k;
- zz_table = wmv1_scantable[0];
-
while (!last) {
vc1_decode_ac_coeff(v, &last, &skip, &value, codingset);
i += skip;
if(i > 63)
break;
- block[zz_table[i++]] = value;
+ block[v->zz_8x8[0][i++]] = value;
}
/* apply AC prediction if needed */
@@ -1906,25 +1903,25 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
- block[k << 3] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+ block[k] += (ac_val[k] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
} else { //top
for(k = 1; k < 8; k++)
- block[k] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
+ block[k << 3] += (ac_val[k + 8] * q2 * ff_vc1_dqscale[q1 - 1] + 0x20000) >> 18;
}
} else {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++)
- block[k << 3] += ac_val[k];
+ block[k] += ac_val[k];
} else { //top
for(k = 1; k < 8; k++)
- block[k] += ac_val[k + 8];
+ block[k << 3] += ac_val[k + 8];
}
}
}
/* save AC coeffs for further prediction */
for(k = 1; k < 8; k++) {
- ac_val2[k] = block[k << 3];
- ac_val2[k + 8] = block[k];
+ ac_val2[k] = block[k];
+ ac_val2[k + 8] = block[k << 3];
}
/* scale AC coeffs */
@@ -1966,15 +1963,15 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c
if(use_pred) {
if(dc_pred_dir) { //left
for(k = 1; k < 8; k++) {
- block[k << 3] = ac_val2[k] * scale;
- if(!v->pquantizer && block[k << 3])
- block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
+ block[k] = ac_val2[k] * scale;
+ if(!v->pquantizer && block[k])
+ block[k] += (block[k] < 0) ? -mquant : mquant;
}
} else { //top
for(k = 1; k < 8; k++) {
- block[k] = ac_val2[k + 8] * scale;
- if(!v->pquantizer && block[k])
- block[k] += (block[k] < 0) ? -mquant : mquant;
+ block[k << 3] = ac_val2[k + 8] * scale;
+ if(!v->pquantizer && block[k << 3])
+ block[k << 3] += (block[k << 3] < 0) ? -mquant : mquant;
}
}
i = 63;
@@ -2035,7 +2032,7 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan
i += skip;
if(i > 63)
break;
- idx = wmv1_scantable[0][i++];
+ idx = v->zz_8x8[0][i++];
block[idx] = value * scale;
if(!v->pquantizer)
block[idx] += (block[idx] < 0) ? -mquant : mquant;
@@ -3007,6 +3004,7 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
VC1Context *v = avctx->priv_data;
MpegEncContext *s = &v->s;
GetBitContext gb;
+ int i;
if (!avctx->extradata_size || !avctx->extradata) return -1;
if (!(avctx->flags & CODEC_FLAG_GRAY))
@@ -3025,6 +3023,13 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
if(ff_msmpeg4_decode_init(avctx) < 0)
return -1;
if (vc1_init_common(v) < 0) return -1;
+ for (i = 0; i < 64; i++) {
+#define transpose(x) ((x>>3) | ((x&7)<<3))
+ v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
+ v->zz_8x8[1][i] = transpose(wmv1_scantable[1][i]);
+ v->zz_8x8[2][i] = transpose(wmv1_scantable[2][i]);
+ v->zz_8x8[3][i] = transpose(wmv1_scantable[3][i]);
+ }
avctx->coded_width = avctx->width;
avctx->coded_height = avctx->height;
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index aab1694797..fd740e12d4 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -203,25 +203,25 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
{
int i;
register int t1,t2,t3,t4,t5,t6,t7,t8;
- DCTELEM *src, *dst;
+ DCTELEM *src, *dst, temp[64];
src = block;
- dst = block;
+ dst = temp;
for(i = 0; i < 8; i++){
- t1 = 12 * (src[0] + src[4]) + 4;
- t2 = 12 * (src[0] - src[4]) + 4;
- t3 = 16 * src[2] + 6 * src[6];
- t4 = 6 * src[2] - 16 * src[6];
+ t1 = 12 * (src[ 0] + src[32]) + 4;
+ t2 = 12 * (src[ 0] - src[32]) + 4;
+ t3 = 16 * src[16] + 6 * src[48];
+ t4 = 6 * src[16] - 16 * src[48];
t5 = t1 + t3;
t6 = t2 + t4;
t7 = t2 - t4;
t8 = t1 - t3;
- t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7];
- t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7];
- t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7];
- t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7];
+ t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56];
+ t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56];
+ t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56];
+ t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56];
dst[0] = (t5 + t1) >> 3;
dst[1] = (t6 + t2) >> 3;
@@ -232,11 +232,11 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64])
dst[6] = (t6 - t2) >> 3;
dst[7] = (t5 - t1) >> 3;
- src += 8;
+ src += 1;
dst += 8;
}
- src = block;
+ src = temp;
dst = block;
for(i = 0; i < 8; i++){
t1 = 12 * (src[ 0] + src[32]) + 64;