aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/vp8.c
diff options
context:
space:
mode:
authorJason Garrett-Glaser <darkshikari@gmail.com>2010-07-23 06:02:52 +0000
committerJason Garrett-Glaser <darkshikari@gmail.com>2010-07-23 06:02:52 +0000
commit3ae079a3c8be54cc3c9c7e68c8b2324f3a4e4253 (patch)
tree40ed03ae012a9e053643a2b1da000be0e1a32aa6 /libavcodec/vp8.c
parent3df56f411810c665704518e0e9e083b02eaca573 (diff)
downloadffmpeg-3ae079a3c8be54cc3c9c7e68c8b2324f3a4e4253.tar.gz
VP8: optimize DC-only chroma case in the same way as luma.
Add MMX idct_dc_add4uv function for this case. ~40% faster chroma idct. Originally committed as revision 24455 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/vp8.c')
-rw-r--r--libavcodec/vp8.c27
1 files changed, 16 insertions, 11 deletions
diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 24fc679a94..744627c932 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -1206,7 +1206,7 @@ static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
}
}
} else {
- s->vp8dsp.vp8_idct_dc_add4(y_dst, s->block[y], s->linesize);
+ s->vp8dsp.vp8_idct_dc_add4y(y_dst, s->block[y], s->linesize);
}
}
y_dst += 4*s->linesize;
@@ -1214,19 +1214,24 @@ static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb)
}
for (ch = 0; ch < 2; ch++) {
- if (AV_RN32A(s->non_zero_count_cache[4+ch])) {
+ uint32_t nnz4 = AV_RN32A(s->non_zero_count_cache[4+ch]);
+ if (nnz4) {
uint8_t *ch_dst = dst[1+ch];
- for (y = 0; y < 2; y++) {
- for (x = 0; x < 2; x++) {
- int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x];
- if (nnz) {
- if (nnz == 1)
- s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
- else
- s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
+ if (nnz4&~0x01010101) {
+ for (y = 0; y < 2; y++) {
+ for (x = 0; x < 2; x++) {
+ int nnz = s->non_zero_count_cache[4+ch][(y<<1)+x];
+ if (nnz) {
+ if (nnz == 1)
+ s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
+ else
+ s->vp8dsp.vp8_idct_add(ch_dst+4*x, s->block[4+ch][(y<<1)+x], s->uvlinesize);
+ }
}
+ ch_dst += 4*s->uvlinesize;
}
- ch_dst += 4*s->uvlinesize;
+ } else {
+ s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, s->block[4+ch], s->uvlinesize);
}
}
}