diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2011-02-21 09:07:13 -0500 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2011-02-22 02:44:36 +0100 |
commit | 6a786b15c34765ec00be3cd808dafbb041fd5881 (patch) | |
tree | 29df90cbe6fcc3e4f7729a8121e4513298841742 /libavcodec/vc1dsp.c | |
parent | 90ed2776ea7368f1b6cf07af5f29fd06efbfcd65 (diff) | |
download | ffmpeg-6a786b15c34765ec00be3cd808dafbb041fd5881.tar.gz |
VC1: merge idct8x8, coeff adjustments and put_pixels.
Merging these functions allows merging some loops, which makes the
results (particularly after SIMD optimizations) much faster.
(cherry picked from commit f8bed30d8b176fa030f6737765338bb4a2bcabc9)
Diffstat (limited to 'libavcodec/vc1dsp.c')
-rw-r--r-- | libavcodec/vc1dsp.c | 54 |
1 files changed, 44 insertions, 10 deletions
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c index 000dad7d26..dbe2120829 100644 --- a/libavcodec/vc1dsp.c +++ b/libavcodec/vc1dsp.c @@ -199,7 +199,7 @@ static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, DCTELEM *block) } } -static void vc1_inv_trans_8x8_c(DCTELEM block[64]) +static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl, int sub) { int i; register int t1,t2,t3,t4,t5,t6,t7,t8; @@ -254,20 +254,50 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64]) t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; - dst[ 0] = (t5 + t1) >> 7; - dst[ 8] = (t6 + t2) >> 7; - dst[16] = (t7 + t3) >> 7; - dst[24] = (t8 + t4) >> 7; - dst[32] = (t8 - t4 + 1) >> 7; - dst[40] = (t7 - t3 + 1) >> 7; - dst[48] = (t6 - t2 + 1) >> 7; - dst[56] = (t5 - t1 + 1) >> 7; + dst[ 0] = (((t5 + t1 ) >> 7) - sub) << shl; + dst[ 8] = (((t6 + t2 ) >> 7) - sub) << shl; + dst[16] = (((t7 + t3 ) >> 7) - sub) << shl; + dst[24] = (((t8 + t4 ) >> 7) - sub) << shl; + dst[32] = (((t8 - t4 + 1) >> 7) - sub) << shl; + dst[40] = (((t7 - t3 + 1) >> 7) - sub) << shl; + dst[48] = (((t6 - t2 + 1) >> 7) - sub) << shl; + dst[56] = (((t5 - t1 + 1) >> 7) - sub) << shl; src++; dst++; } } +static void vc1_inv_trans_8x8_add_c(uint8_t *dest, int linesize, DCTELEM *block) +{ + vc1_inv_trans_8x8_c(block, 0, 0); + ff_add_pixels_clamped_c(block, dest, linesize); +} + +static void vc1_inv_trans_8x8_put_signed_c(uint8_t *dest, int linesize, DCTELEM *block) +{ + vc1_inv_trans_8x8_c(block, 0, 0); + ff_put_signed_pixels_clamped_c(block, dest, linesize); +} + +static void vc1_inv_trans_8x8_put_signed_rangered_c(uint8_t *dest, int linesize, DCTELEM *block) +{ + vc1_inv_trans_8x8_c(block, 1, 0); + ff_put_signed_pixels_clamped_c(block, dest, linesize); +} + +static void vc1_inv_trans_8x8_put_c(uint8_t *dest, int linesize, DCTELEM *block) +{ + vc1_inv_trans_8x8_c(block, 0, 0); + ff_put_pixels_clamped_c(block, dest, linesize); +} + +static void vc1_inv_trans_8x8_put_rangered_c(uint8_t *dest, int linesize, DCTELEM *block) +{ + vc1_inv_trans_8x8_c(block, 1, 64); + ff_put_pixels_clamped_c(block, dest, linesize); +} + /** Do inverse transform on 8x4 part of block */ static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block) @@ -662,7 +692,11 @@ static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a } av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) { - dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; + dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_c; + dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_c; + dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_c; + dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_c; + dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_c; dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; |