diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-21 09:07:13 -0500 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-21 10:23:44 -0500 |
commit | f8bed30d8b176fa030f6737765338bb4a2bcabc9 (patch) | |
tree | f55e179732f7db45c5fcfecd80757676bdbf7fcc /libavcodec/vc1dec.c | |
parent | 8d9ac969cb4ac3e3e18f6425703af4d7aec6c513 (diff) | |
download | ffmpeg-f8bed30d8b176fa030f6737765338bb4a2bcabc9.tar.gz |
VC1: merge idct8x8, coeff adjustments and put_pixels.
Merging these functions allows merging some loops, which makes the
results (particularly after SIMD optimizations) much faster.
Diffstat (limited to 'libavcodec/vc1dec.c')
-rw-r--r-- | libavcodec/vc1dec.c | 58 |
1 files changed, 33 insertions, 25 deletions
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c index a3db6fe70b..ed92d8cadd 100644 --- a/libavcodec/vc1dec.c +++ b/libavcodec/vc1dec.c @@ -2009,8 +2009,7 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan if(i==1) v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block); else{ - v->vc1dsp.vc1_inv_trans_8x8(block); - s->dsp.add_pixels_clamped(block, dst, linesize); + v->vc1dsp.vc1_inv_trans_8x8_add(dst, linesize, block); } if(apply_filter && cbp_top & 0xC) v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq); @@ -2117,7 +2116,7 @@ static int vc1_decode_p_mb(VC1Context *v) { MpegEncContext *s = &v->s; GetBitContext *gb = &s->gb; - int i, j; + int i; int mb_pos = s->mb_x + s->mb_y * s->mb_stride; int cbp; /* cbp decoding stuff */ int mqdiff, mquant; /* MB quantization */ @@ -2149,6 +2148,8 @@ static int vc1_decode_p_mb(VC1Context *v) { if (!skipped) { + vc1_idct_func idct8x8_fn; + GET_MVDATA(dmv_x, dmv_y); if (s->mb_intra) { @@ -2183,6 +2184,7 @@ static int vc1_decode_p_mb(VC1Context *v) VC1_TTMB_VLC_BITS, 2); if(!s->mb_intra) vc1_mc_1mv(v, 0); dst_idx = 0; + idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; for (i=0; i<6; i++) { s->dc_val[0][s->block_index[i]] = 0; @@ -2200,9 +2202,9 @@ static int vc1_decode_p_mb(VC1Context *v) vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; - v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); - if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; - s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); + idct8x8_fn(s->dest[dst_idx] + off, + i & 4 ? s->uvlinesize : s->linesize, + s->block[i]); if(v->pq >= 9 && v->overlap) { if(v->c_avail) v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); @@ -2267,6 +2269,7 @@ static int vc1_decode_p_mb(VC1Context *v) { int intra_count = 0, coded_inter = 0; int is_intra[6], is_coded[6]; + vc1_idct_func idct8x8_fn; /* Get CBPCY */ cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2); for (i=0; i<6; i++) @@ -2316,6 +2319,7 @@ static int vc1_decode_p_mb(VC1Context *v) } if (!v->ttmbf && coded_inter) ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2); + idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; for (i=0; i<6; i++) { dst_idx += i >> 2; @@ -2331,9 +2335,9 @@ static int vc1_decode_p_mb(VC1Context *v) vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset); if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; - v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); - if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; - s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize); + idct8x8_fn(s->dest[dst_idx] + off, + (i&4)?s->uvlinesize:s->linesize, + s->block[i]); if(v->pq >= 9 && v->overlap) { if(v->c_avail) v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); @@ -2409,7 +2413,7 @@ static void vc1_decode_b_mb(VC1Context *v) { MpegEncContext *s = &v->s; GetBitContext *gb = &s->gb; - int i, j; + int i; int mb_pos = s->mb_x + s->mb_y * s->mb_stride; int cbp = 0; /* cbp decoding stuff */ int mqdiff, mquant; /* MB quantization */ @@ -2422,6 +2426,7 @@ static void vc1_decode_b_mb(VC1Context *v) int skipped, direct; int dmv_x[2], dmv_y[2]; int bmvtype = BMV_TYPE_BACKWARD; + vc1_idct_func idct8x8_fn; mquant = v->pq; /* Loosy initialization */ s->mb_intra = 0; @@ -2519,6 +2524,7 @@ static void vc1_decode_b_mb(VC1Context *v) } } dst_idx = 0; + idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; for (i=0; i<6; i++) { s->dc_val[0][s->block_index[i]] = 0; @@ -2536,9 +2542,9 @@ static void vc1_decode_b_mb(VC1Context *v) vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset); if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue; - v->vc1dsp.vc1_inv_trans_8x8(s->block[i]); - if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; - s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize); + idct8x8_fn(s->dest[dst_idx] + off, + i & 4 ? s->uvlinesize : s->linesize, + s->block[i]); } else if(val) { vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), 0, 0, 0); if(!v->ttmbf && ttmb < 8) ttmb = -1; @@ -2551,11 +2557,12 @@ static void vc1_decode_b_mb(VC1Context *v) */ static void vc1_decode_i_blocks(VC1Context *v) { - int k, j; + int k; MpegEncContext *s = &v->s; int cbp, val; uint8_t *coded_val; int mb_pos; + vc1_idct_func idct8x8_fn; /* select codingmode used for VLC tables selection */ switch(v->y_ac_table_index){ @@ -2590,6 +2597,10 @@ static void vc1_decode_i_blocks(VC1Context *v) s->mb_x = s->mb_y = 0; s->mb_intra = 1; s->first_slice_line = 1; + if(v->pq >= 9 && v->overlap) { + idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm]; + } else + idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm]; for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { s->mb_x = 0; ff_init_block_index(s); @@ -2626,14 +2637,9 @@ static void vc1_decode_i_blocks(VC1Context *v) vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2); if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue; - v->vc1dsp.vc1_inv_trans_8x8(s->block[k]); - if(v->pq >= 9 && v->overlap) { - if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] <<= 1; - s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize); - } else { - if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] = (s->block[k][j] - 64) << 1; - s->dsp.put_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize); - } + idct8x8_fn(dst[k], + k & 4 ? s->uvlinesize : s->linesize, + s->block[k]); } if(v->pq >= 9 && v->overlap) { @@ -2691,6 +2697,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v) int mqdiff; int overlap; GetBitContext *gb = &s->gb; + vc1_idct_func idct8x8_fn; /* select codingmode used for VLC tables selection */ switch(v->y_ac_table_index){ @@ -2721,6 +2728,7 @@ static void vc1_decode_i_blocks_adv(VC1Context *v) s->mb_x = s->mb_y = 0; s->mb_intra = 1; s->first_slice_line = 1; + idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[0]; for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { s->mb_x = 0; ff_init_block_index(s); @@ -2777,9 +2785,9 @@ static void vc1_decode_i_blocks_adv(VC1Context *v) vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant); if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue; - v->vc1dsp.vc1_inv_trans_8x8(s->block[k]); - s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], - k & 4 ? s->uvlinesize : s->linesize); + idct8x8_fn(dst[k], + k & 4 ? s->uvlinesize : s->linesize, + s->block[k]); } if(overlap) { |