diff options
author | Ramiro Polla <ramiro.polla@gmail.com> | 2010-10-25 18:02:02 +0000 |
---|---|---|
committer | Ramiro Polla <ramiro.polla@gmail.com> | 2010-10-25 18:02:02 +0000 |
commit | b32c9ca9a3265cc8566d183dad260a056eb68fae (patch) | |
tree | d8da0905f729922d13c26e156d3206f1106cd2f6 | |
parent | 3ab354d7778cd84bd92e17e5b3563a8180053c8a (diff) | |
download | ffmpeg-b32c9ca9a3265cc8566d183dad260a056eb68fae.tar.gz |
h264dsp: merge some asm blocks
Some code was initializing some xmm registers in one asm block and using them
in the following block, assuming they wouldn't be changed in between blocks.
Originally committed as revision 25568 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/x86/h264_qpel_mmx.c | 46 |
1 files changed, 20 insertions, 26 deletions
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c index 6228922a38..3ad182091b 100644 --- a/libavcodec/x86/h264_qpel_mmx.c +++ b/libavcodec/x86/h264_qpel_mmx.c @@ -299,11 +299,8 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, int h=8;\ __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ - "movq %0, %%mm6 \n\t"\ - :: "m"(ff_pw_5)\ - );\ - do{\ - __asm__ volatile(\ + "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\ + "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ "movq 1(%0), %%mm2 \n\t"\ "movq %%mm0, %%mm1 \n\t"\ @@ -336,7 +333,7 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, "punpcklbw %%mm7, %%mm5 \n\t"\ "paddw %%mm3, %%mm2 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\ - "movq %5, %%mm5 \n\t"\ + "movq "MANGLE(ff_pw_16)", %%mm5\n\t"\ "paddw %%mm5, %%mm2 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\ "paddw %%mm2, %%mm0 \n\t"\ @@ -347,15 +344,15 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, "packuswb %%mm1, %%mm0 \n\t"\ PAVGB" %%mm4, %%mm0 \n\t"\ OP(%%mm0, (%1),%%mm5, q)\ - "add %4, %0 \n\t"\ - "add %4, %1 \n\t"\ - "add %3, %2 \n\t"\ - : "+a"(src), "+c"(dst), "+d"(src2)\ - : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ - "m"(ff_pw_16)\ + "add %5, %0 \n\t"\ + "add %5, %1 \n\t"\ + "add %4, %2 \n\t"\ + "decl %3 \n\t"\ + "jg 1b \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ + : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ : "memory"\ );\ - }while(--h);\ }\ \ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ @@ -697,11 +694,8 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, int h=8;\ __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ - "movdqa %0, %%xmm6 \n\t"\ - :: "m"(ff_pw_5)\ - );\ - do{\ - __asm__ volatile(\ + "movdqa "MANGLE(ff_pw_5)", %%xmm6\n\t"\ + "1: \n\t"\ "lddqu -2(%0), %%xmm1 \n\t"\ "movdqa %%xmm1, %%xmm0 \n\t"\ "punpckhbw %%xmm7, %%xmm1 \n\t"\ @@ -721,22 +715,22 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, "psllw $2, %%xmm2 \n\t"\ "movq (%2), %%xmm3 \n\t"\ "psubw %%xmm1, %%xmm2 \n\t"\ - "paddw %5, %%xmm0 \n\t"\ + "paddw "MANGLE(ff_pw_16)", %%xmm0\n\t"\ "pmullw %%xmm6, %%xmm2 \n\t"\ "paddw %%xmm0, %%xmm2 \n\t"\ "psraw $5, %%xmm2 \n\t"\ "packuswb %%xmm2, %%xmm2 \n\t"\ "pavgb %%xmm3, %%xmm2 \n\t"\ OP(%%xmm2, (%1), %%xmm4, q)\ - "add %4, %0 \n\t"\ - "add %4, %1 \n\t"\ - "add %3, %2 \n\t"\ - : "+a"(src), "+c"(dst), "+d"(src2)\ - : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\ - "m"(ff_pw_16)\ + "add %5, %0 \n\t"\ + "add %5, %1 \n\t"\ + "add %4, %2 \n\t"\ + "decl %3 \n\t"\ + "jg 1b \n\t"\ + : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\ + : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\ : "memory"\ );\ - }while(--h);\ }\ QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ \ |