diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-23 06:02:52 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-07-23 06:02:52 +0000 |
commit | 3ae079a3c8be54cc3c9c7e68c8b2324f3a4e4253 (patch) | |
tree | 40ed03ae012a9e053643a2b1da000be0e1a32aa6 /libavcodec/vp8.c | |
parent | 3df56f411810c665704518e0e9e083b02eaca573 (diff) | |
download | ffmpeg-3ae079a3c8be54cc3c9c7e68c8b2324f3a4e4253.tar.gz |
VP8: optimize DC-only chroma case in the same way as luma.
Add MMX idct_dc_add4uv function for this case.
~40% faster chroma idct.
Originally committed as revision 24455 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/vp8.c')
-rw-r--r-- | libavcodec/vp8.c | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 24fc679a94..744627c932 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -1206,7 +1206,7 @@ static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) } } } else { - s->vp8dsp.vp8_idct_dc_add4(y_dst, s->block[y], s->linesize); + s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize); } } y_dst += 4*s->linesize; @@ -1214,19 +1214,24 @@ static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) } for (ch = 0; ch < 2; ch++) { - if (AV_RN32A(s->non_zero_count_cache[4+ch])) { + uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]); + if (nnz4) { uint8_t *ch_dst = dst[1+ch]; - for (y = 0; y < 2; y++) { - for (x = 0; x < 2; x++) { - int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; - if (nnz) { - if (nnz == 1) - s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); - else - s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + if (nnz4&~0x01010101) { + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + else + s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize); + } } + ch_dst += 4*s->uvlinesize; } - ch_dst += 4*s->uvlinesize; + } else { + s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize); } } } |