diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2008-02-06 12:32:31 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2008-02-06 12:32:31 +0000 |
commit | 1d67b037f7fc344f2e0b4dac6ac0ca3dd5438e6c (patch) | |
tree | 796bdb0ab7fa2efb3a8b385d70f7128b3291a172 /libavcodec/i386/dsputil_mmx.c | |
parent | 20d565be6d52c38495eeaa8904c02315421629b3 (diff) | |
download | ffmpeg-1d67b037f7fc344f2e0b4dac6ac0ca3dd5438e6c.tar.gz |
sse2 h264 motion compensation. not new code, just separate out the cases that didn't need ssse3.
Originally committed as revision 11877 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/i386/dsputil_mmx.c')
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 42 |
1 files changed, 35 insertions, 7 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 55343181de..54246fc8ba 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -3523,20 +3523,48 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; } -/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma + +#define H264_QPEL_FUNCS(x, y, CPU)\ + c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\ + c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\ + c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\ + c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU; if((mm_flags & MM_SSE2) && !(mm_flags & MM_3DNOW)){ // these functions are slower than mmx on AMD, but faster on Intel +/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma c->put_pixels_tab[0][0] = put_pixels16_sse2; c->avg_pixels_tab[0][0] = avg_pixels16_sse2; - } */ - + H264_QPEL_FUNCS(0, 0, sse2); + } + if(mm_flags & MM_SSE2){ + H264_QPEL_FUNCS(0, 1, sse2); + H264_QPEL_FUNCS(0, 2, sse2); + H264_QPEL_FUNCS(0, 3, sse2); + H264_QPEL_FUNCS(1, 1, sse2); + H264_QPEL_FUNCS(1, 2, sse2); + H264_QPEL_FUNCS(1, 3, sse2); + H264_QPEL_FUNCS(2, 1, sse2); + H264_QPEL_FUNCS(2, 2, sse2); + H264_QPEL_FUNCS(2, 3, sse2); + H264_QPEL_FUNCS(3, 1, sse2); + H264_QPEL_FUNCS(3, 2, sse2); + H264_QPEL_FUNCS(3, 3, sse2); + } #ifdef HAVE_SSSE3 if(mm_flags & MM_SSSE3){ - SET_QPEL_FUNCS(put_h264_qpel, 0, 16, ssse3); - SET_QPEL_FUNCS(put_h264_qpel, 1, 8, ssse3); - SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, ssse3); - SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, ssse3); + H264_QPEL_FUNCS(1, 0, ssse3); + H264_QPEL_FUNCS(1, 1, ssse3); + H264_QPEL_FUNCS(1, 2, ssse3); + H264_QPEL_FUNCS(1, 3, ssse3); + H264_QPEL_FUNCS(2, 0, ssse3); + H264_QPEL_FUNCS(2, 1, ssse3); + H264_QPEL_FUNCS(2, 2, ssse3); + H264_QPEL_FUNCS(2, 3, ssse3); + H264_QPEL_FUNCS(3, 0, ssse3); + H264_QPEL_FUNCS(3, 1, ssse3); + H264_QPEL_FUNCS(3, 2, ssse3); + H264_QPEL_FUNCS(3, 3, ssse3); } #endif |