diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-09-01 20:48:59 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-09-01 20:48:59 +0000 |
commit | 14bc1f24858a8e83a59dd61a88bdd2bc65993e2b (patch) | |
tree | 4266bdbeabacee16a8aaade37e43e93a1f4e66d1 /libavcodec/x86/dsputil_mmx.h | |
parent | 82c76ceee74bdb9ad5365a25e5944619f6097491 (diff) | |
download | ffmpeg-14bc1f24858a8e83a59dd61a88bdd2bc65993e2b.tar.gz |
Split h264dsp_mmx.c (which was #included in dsputil_mmx.c) in h264_qpel_mmx.c,
still #included in dsputil_mmx.c and is part of DSPContext, and h264dsp_mmx.c,
which represents H264DSPContext and is now compiled on its own.
Originally committed as revision 25018 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/dsputil_mmx.h')
-rw-r--r-- | libavcodec/x86/dsputil_mmx.h | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h index 2458063eea..4a1023c8a7 100644 --- a/libavcodec/x86/dsputil_mmx.h +++ b/libavcodec/x86/dsputil_mmx.h @@ -94,6 +94,35 @@ extern const double ff_pd_2[2]; SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\ SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */ +static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ + __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... + "movd %4, %%mm0 \n\t" + "movd %5, %%mm1 \n\t" + "movd %6, %%mm2 \n\t" + "movd %7, %%mm3 \n\t" + "punpcklbw %%mm1, %%mm0 \n\t" + "punpcklbw %%mm3, %%mm2 \n\t" + "movq %%mm0, %%mm1 \n\t" + "punpcklwd %%mm2, %%mm0 \n\t" + "punpckhwd %%mm2, %%mm1 \n\t" + "movd %%mm0, %0 \n\t" + "punpckhdq %%mm0, %%mm0 \n\t" + "movd %%mm0, %1 \n\t" + "movd %%mm1, %2 \n\t" + "punpckhdq %%mm1, %%mm1 \n\t" + "movd %%mm1, %3 \n\t" + + : "=m" (*(uint32_t*)(dst + 0*dst_stride)), + "=m" (*(uint32_t*)(dst + 1*dst_stride)), + "=m" (*(uint32_t*)(dst + 2*dst_stride)), + "=m" (*(uint32_t*)(dst + 3*dst_stride)) + : "m" (*(uint32_t*)(src + 0*src_stride)), + "m" (*(uint32_t*)(src + 1*src_stride)), + "m" (*(uint32_t*)(src + 2*src_stride)), + "m" (*(uint32_t*)(src + 3*src_stride)) + ); +} + // e,f,g,h can be memory // out: a,d,t,c #define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\ |