aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2002-09-20 10:03:01 +0000
committerMichael Niedermayer <michaelni@gmx.at>2002-09-20 10:03:01 +0000
commit084c726ba306eb19077ac525764acec6c110f1d4 (patch)
treeda04fc4e7cf96499d27dc191ea959353b289243e /libavcodec
parent3f09f52adf1ccdb93873f57570d8389316dc13ed (diff)
downloadffmpeg-084c726ba306eb19077ac525764acec6c110f1d4.tar.gz
pix_sum16_mmx()
Originally committed as revision 961 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/i386/dsputil_mmx.c39
1 files changed, 39 insertions, 0 deletions
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 4336e4bde0..eaec8fe45a 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -420,6 +420,44 @@ static void clear_blocks_mmx(DCTELEM *blocks)
);
}
+static int pix_sum16_mmx(UINT8 * pix, int line_size){
+ const int h=16;
+ int sum;
+ int index= -line_size*h;
+
+ __asm __volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "pxor %%mm6, %%mm6 \n\t"
+ "1: \n\t"
+ "movq (%2, %1), %%mm0 \n\t"
+ "movq (%2, %1), %%mm1 \n\t"
+ "movq 8(%2, %1), %%mm2 \n\t"
+ "movq 8(%2, %1), %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "paddw %%mm2, %%mm3 \n\t"
+ "paddw %%mm1, %%mm3 \n\t"
+ "paddw %%mm3, %%mm6 \n\t"
+ "addl %3, %1 \n\t"
+ " js 1b \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "psrlq $32, %%mm6 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "psrlq $16, %%mm6 \n\t"
+ "paddw %%mm5, %%mm6 \n\t"
+ "movd %%mm6, %0 \n\t"
+ "andl $0xFFFF, %0 \n\t"
+ : "=&r" (sum), "+r" (index)
+ : "r" (pix - index), "r" (line_size)
+ );
+
+ return sum;
+}
+
#if 0
static void just_return() { return; }
#endif
@@ -448,6 +486,7 @@ void dsputil_init_mmx(void)
put_pixels_clamped = put_pixels_clamped_mmx;
add_pixels_clamped = add_pixels_clamped_mmx;
clear_blocks= clear_blocks_mmx;
+ pix_sum= pix_sum16_mmx;
pix_abs16x16 = pix_abs16x16_mmx;
pix_abs16x16_x2 = pix_abs16x16_x2_mmx;