diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2006-02-07 07:35:03 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2006-02-07 07:35:03 +0000 |
commit | c03ce51dfb4647e362e09aacb9ec238e4691ea18 (patch) | |
tree | ccb216372b775d236ca3eb1ada3bd9abf6cba01a | |
parent | 0331f092379bba24f6117497c62197818330dc15 (diff) | |
download | ffmpeg-c03ce51dfb4647e362e09aacb9ec238e4691ea18.tar.gz |
11% faster put_h264_qpel16_v_lowpass_mmx2
Originally committed as revision 4950 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/i386/h264dsp_mmx.c | 38 |
1 files changed, 27 insertions, 11 deletions
diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c index 19fb508b93..39daa7f23e 100644 --- a/libavcodec/i386/h264dsp_mmx.c +++ b/libavcodec/i386/h264dsp_mmx.c @@ -591,11 +591,11 @@ static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i );\ }\ \ -static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - int h= 2;\ +static inline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ + int w= 2;\ src -= 2*srcStride;\ \ - while(h--){\ + while(w--){\ asm volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ @@ -626,8 +626,24 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ : "memory"\ );\ - src += 4-13*srcStride;\ - dst += 4-8*dstStride;\ + if(h==16){\ + asm volatile(\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ + QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\ + QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\ + QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\ + QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ + QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ + \ + : "+a"(src), "+c"(dst)\ + : "S"((long)srcStride), "D"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\ + : "memory"\ + );\ + }\ + src += 4-(h+5)*srcStride;\ + dst += 4-h*dstStride;\ }\ }\ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\ @@ -731,13 +747,13 @@ static inline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int dst += 8 - size*dstStride;\ }while(w--);\ }\ +\ +static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\ +}\ static void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ - src += 8*srcStride;\ - dst += 8*dstStride;\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst , src , dstStride, srcStride);\ - OPNAME ## h264_qpel8_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\ + OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\ }\ \ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |