diff options
author | Nick Kurshev <nickols_k@mail.ru> | 2001-10-28 12:02:16 +0000 |
---|---|---|
committer | Nick Kurshev <nickols_k@mail.ru> | 2001-10-28 12:02:16 +0000 |
commit | 96b956cc464587ec03ff5dbe3031770c11d2a35b (patch) | |
tree | 8e50f924c2d8388fd20dd3a390b450b038b84420 | |
parent | 49a0c6eec83ace5faec994b8791c9eeb19778eed (diff) | |
download | ffmpeg-96b956cc464587ec03ff5dbe3031770c11d2a35b.tar.gz |
mmx, mmx2, 3dnow optimized 24to32
Originally committed as revision 2512 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc
-rw-r--r-- | postproc/rgb2rgb.c | 38 | ||||
-rw-r--r-- | postproc/rgb2rgb_template.c | 38 |
2 files changed, 72 insertions, 4 deletions
diff --git a/postproc/rgb2rgb.c b/postproc/rgb2rgb.c index dc5f062f3e..73bbca7849 100644 --- a/postproc/rgb2rgb.c +++ b/postproc/rgb2rgb.c @@ -3,6 +3,26 @@ #include "rgb2rgb.h" #include "mmx.h" +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#endif + +#ifdef HAVE_3DNOW +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#ifdef HAVE_MMX2 +#define MOVNTQ "movntq" +#else +#define MOVNTQ "movq" +#endif + void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) { uint8_t *dest = dst; @@ -14,10 +34,23 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) #endif end = s + src_size; #ifdef HAVE_MMX +#ifdef PREFETCH + __asm __volatile( + PREFETCH" %0\n\t" + PREFETCH" 64%0\n\t" + PREFETCHW" %1\n\t" + PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); +#endif mm_end = (uint8_t*)((((unsigned long)end)/16)*16); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while(s < mm_end) { +#ifdef PREFETCH + __asm __volatile( + PREFETCH" 128%0\n\t" + PREFETCHW" 128%1" + ::"m"(*s),"m"(*dest):"memory"); +#endif __asm __volatile( "movd %1, %%mm0\n\t" "movd 3%1, %%mm1\n\t" @@ -27,14 +60,15 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) "punpckldq %%mm3, %%mm2\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm2\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm2, 8%0" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm2, 8%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 16; s += 12; } + __asm __volatile(EMMS:::"memory"); #endif while(s < end) { diff --git a/postproc/rgb2rgb_template.c b/postproc/rgb2rgb_template.c index dc5f062f3e..73bbca7849 100644 --- a/postproc/rgb2rgb_template.c +++ b/postproc/rgb2rgb_template.c @@ -3,6 +3,26 @@ #include "rgb2rgb.h" #include "mmx.h" +#ifdef HAVE_3DNOW +#define PREFETCH "prefetch" +#define PREFETCHW "prefetchw" +#elif HAVE_MMX2 +#define PREFETCH "prefetchnta" +#define PREFETCHW "prefetcht0" +#endif + +#ifdef HAVE_3DNOW +#define EMMS "femms" +#else +#define EMMS "emms" +#endif + +#ifdef HAVE_MMX2 +#define MOVNTQ "movntq" +#else +#define MOVNTQ "movq" +#endif + void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) { uint8_t *dest = dst; @@ -14,10 +34,23 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) #endif end = s + src_size; #ifdef HAVE_MMX +#ifdef PREFETCH + __asm __volatile( + PREFETCH" %0\n\t" + PREFETCH" 64%0\n\t" + PREFETCHW" %1\n\t" + PREFETCHW" 64%1\n\t"::"m"(*s),"m"(*dest):"memory"); +#endif mm_end = (uint8_t*)((((unsigned long)end)/16)*16); __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); while(s < mm_end) { +#ifdef PREFETCH + __asm __volatile( + PREFETCH" 128%0\n\t" + PREFETCHW" 128%1" + ::"m"(*s),"m"(*dest):"memory"); +#endif __asm __volatile( "movd %1, %%mm0\n\t" "movd 3%1, %%mm1\n\t" @@ -27,14 +60,15 @@ void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size) "punpckldq %%mm3, %%mm2\n\t" "pand %%mm7, %%mm0\n\t" "pand %%mm7, %%mm2\n\t" - "movq %%mm0, %0\n\t" - "movq %%mm2, 8%0" + MOVNTQ" %%mm0, %0\n\t" + MOVNTQ" %%mm2, 8%0" :"=m"(*dest) :"m"(*s) :"memory"); dest += 16; s += 12; } + __asm __volatile(EMMS:::"memory"); #endif while(s < end) { |