diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-23 06:02:52 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-23 06:02:52 +0000 |
commit | 3ae079a3c8be54cc3c9c7e68c8b2324f3a4e4253 (patch) | |
tree | 40ed03ae012a9e053643a2b1da000be0e1a32aa6 /libavcodec/vp8dsp.c | |
parent | 3df56f411810c665704518e0e9e083b02eaca573 (diff) | |
download | ffmpeg-3ae079a3c8be54cc3c9c7e68c8b2324f3a4e4253.tar.gz |
VP8: optimize DC-only chroma case in the same way as luma.
Add MMX idct_dc_add4uv function for this case.
~40% faster chroma idct.
Originally committed as revision 24455 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/vp8dsp.c')
-rw-r--r-- | libavcodec/vp8dsp.c | 39 |
1 files changed, 18 insertions, 21 deletions
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c index 64b09d52ee..f3f3fb6da0 100644 --- a/libavcodec/vp8dsp.c +++ b/libavcodec/vp8dsp.c @@ -109,24 +109,20 @@ static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) } } -static void vp8_idct_dc_add4_c(uint8_t *dst, DCTELEM block[4][16], int stride) +static void vp8_idct_dc_add4uv_c(uint8_t *dst, DCTELEM block[4][16], int stride) { - int i, j; - for (j = 0; j < 4; j++) { - uint8_t *pix = dst+j*4; - int dc = (block[j][0] + 4) >> 3; - uint8_t *cm = ff_cropTbl + MAX_NEG_CROP + dc; - block[j][0] = 0; - if (!dc) - continue; - for (i = 0; i < 4; i++) { - pix[0] = cm[pix[0]]; - pix[1] = cm[pix[1]]; - pix[2] = cm[pix[2]]; - pix[3] = cm[pix[3]]; - pix += stride; - } - } + vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); + vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); + vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride); + vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); +} + +static void vp8_idct_dc_add4y_c(uint8_t *dst, DCTELEM block[4][16], int stride) +{ + vp8_idct_dc_add_c(dst+ 0, block[0], stride); + vp8_idct_dc_add_c(dst+ 4, block[1], stride); + vp8_idct_dc_add_c(dst+ 8, block[2], stride); + vp8_idct_dc_add_c(dst+12, block[3], stride); } // because I like only having two parameters to pass functions... @@ -479,10 +475,11 @@ VP8_BILINEAR(4) av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) { - dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; - dsp->vp8_idct_add = vp8_idct_add_c; - dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; - dsp->vp8_idct_dc_add4 = vp8_idct_dc_add4_c; + dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; + dsp->vp8_idct_add = vp8_idct_add_c; + dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; + dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; + dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |