aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp8dsp-init.c
diff options
context:
space:
mode:
authorJason Garrett-Glaser <darkshikari@gmail.com>2010-07-23 02:58:27 +0000
committerJason Garrett-Glaser <darkshikari@gmail.com>2010-07-23 02:58:27 +0000
commit8a467b2d44d20c1a0b731dce9edeff772732a558 (patch)
tree259b69473524a9db002d79b90009817f84af4113 /libavcodec/x86/vp8dsp-init.c
parentef38842f0bc97ce5b158f51f3e65aae4164fc6a5 (diff)
downloadffmpeg-8a467b2d44d20c1a0b731dce9edeff772732a558.tar.gz
VP8: 30% faster idct_mb
Take shortcuts based on statistically common situations. Add 4-at-a-time idct_dc function (mmx and sse2) since rows of 4 DC-only DCT blocks are common. TODO: tie this more directly into the MB mode, since the DC-level transform is only used for non-splitmv blocks? Originally committed as revision 24452 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/vp8dsp-init.c')
-rw-r--r--libavcodec/x86/vp8dsp-init.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c
index 6cf1704594..5da70824fc 100644
--- a/libavcodec/x86/vp8dsp-init.c
+++ b/libavcodec/x86/vp8dsp-init.c
@@ -220,6 +220,8 @@ HVBILIN(ssse3, 8, 16, 16)
extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride);
+extern void ff_vp8_idct_dc_add4_mmx(uint8_t *dst, DCTELEM block[4][16], int stride);
+extern void ff_vp8_idct_dc_add4_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
@@ -283,6 +285,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
#if HAVE_YASM
if (mm_flags & FF_MM_MMX) {
c->vp8_idct_dc_add = ff_vp8_idct_dc_add_mmx;
+ c->vp8_idct_dc_add4 = ff_vp8_idct_dc_add4_mmx;
c->vp8_idct_add = ff_vp8_idct_add_mmx;
c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmx;
c->put_vp8_epel_pixels_tab[0][0][0] =
@@ -351,6 +354,8 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
}
if (mm_flags & FF_MM_SSE2) {
+ c->vp8_idct_dc_add4 = ff_vp8_idct_dc_add4_sse2;
+
c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;