Drop unused args from vector_fmul_add_add, simpify code, and rename

The src3 and step arguments to vector_fmul_add_add() are always zero and one, respectively. This removes these arguments from the function, simplifies the code accordingly, and renames the function to better match the new operation. Originally committed as revision 20061 to svn://svn.ffmpeg.org/ffmpeg/trunk
author: Måns Rullgård <mans@mansr.com> 2009-09-27 16:51:54 +0000
committer: Måns Rullgård <mans@mansr.com> 2009-09-27 16:51:54 +0000
commit: 952e87219815b0d8a698e0c098e4fb7984f8b19d (patch)
tree: c1cd595004f4e6f151c13e5c85cf33c01a736c04 /libavcodec/x86
parent: 7f5c14210b3184d6c4330bf63af9512808c37379 (diff)
download: ffmpeg-952e87219815b0d8a698e0c098e4fb7984f8b19d.tar.gz
1 files changed, 6 insertions, 70 deletions
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index f430abcbb0..79ceb15554 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2125,34 +2125,9 @@ static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *
     );
 }
 
-static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
-                                      const float *src2, int src3, int len, int step){
+static void vector_fmul_add_3dnow(float *dst, const float *src0, const float *src1,
+                                  const float *src2, int len){
     x86_reg i = (len-4)*4;
-    if(step == 2 && src3 == 0){
-        dst += (len-4)*2;
-        __asm__ volatile(
-            "1: \n\t"
-            "movq   (%2,%0),  %%mm0 \n\t"
-            "movq  8(%2,%0),  %%mm1 \n\t"
-            "pfmul  (%3,%0),  %%mm0 \n\t"
-            "pfmul 8(%3,%0),  %%mm1 \n\t"
-            "pfadd  (%4,%0),  %%mm0 \n\t"
-            "pfadd 8(%4,%0),  %%mm1 \n\t"
-            "movd     %%mm0,   (%1) \n\t"
-            "movd     %%mm1, 16(%1) \n\t"
-            "psrlq      $32,  %%mm0 \n\t"
-            "psrlq      $32,  %%mm1 \n\t"
-            "movd     %%mm0,  8(%1) \n\t"
-            "movd     %%mm1, 24(%1) \n\t"
-            "sub  $32, %1 \n\t"
-            "sub  $16, %0 \n\t"
-            "jge  1b \n\t"
-            :"+r"(i), "+r"(dst)
-            :"r"(src0), "r"(src1), "r"(src2)
-            :"memory"
-        );
-    }
-    else if(step == 1 && src3 == 0){
         __asm__ volatile(
             "1: \n\t"
             "movq    (%2,%0), %%mm0 \n\t"
@@ -2169,47 +2144,11 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float
             :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
             :"memory"
         );
-    }
-    else
-        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
     __asm__ volatile("femms");
 }
-static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
-                                    const float *src2, int src3, int len, int step){
+static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
+                                const float *src2, int len){
     x86_reg i = (len-8)*4;
-    if(step == 2 && src3 == 0){
-        dst += (len-8)*2;
-        __asm__ volatile(
-            "1: \n\t"
-            "movaps   (%2,%0), %%xmm0 \n\t"
-            "movaps 16(%2,%0), %%xmm1 \n\t"
-            "mulps    (%3,%0), %%xmm0 \n\t"
-            "mulps  16(%3,%0), %%xmm1 \n\t"
-            "addps    (%4,%0), %%xmm0 \n\t"
-            "addps  16(%4,%0), %%xmm1 \n\t"
-            "movss     %%xmm0,   (%1) \n\t"
-            "movss     %%xmm1, 32(%1) \n\t"
-            "movhlps   %%xmm0, %%xmm2 \n\t"
-            "movhlps   %%xmm1, %%xmm3 \n\t"
-            "movss     %%xmm2, 16(%1) \n\t"
-            "movss     %%xmm3, 48(%1) \n\t"
-            "shufps $0xb1, %%xmm0, %%xmm0 \n\t"
-            "shufps $0xb1, %%xmm1, %%xmm1 \n\t"
-            "movss     %%xmm0,  8(%1) \n\t"
-            "movss     %%xmm1, 40(%1) \n\t"
-            "movhlps   %%xmm0, %%xmm2 \n\t"
-            "movhlps   %%xmm1, %%xmm3 \n\t"
-            "movss     %%xmm2, 24(%1) \n\t"
-            "movss     %%xmm3, 56(%1) \n\t"
-            "sub  $64, %1 \n\t"
-            "sub  $32, %0 \n\t"
-            "jge  1b \n\t"
-            :"+r"(i), "+r"(dst)
-            :"r"(src0), "r"(src1), "r"(src2)
-            :"memory"
-        );
-    }
-    else if(step == 1 && src3 == 0){
         __asm__ volatile(
             "1: \n\t"
             "movaps   (%2,%0), %%xmm0 \n\t"
@@ -2226,9 +2165,6 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *
             :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
             :"memory"
         );
-    }
-    else
-        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
 }
 
 static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
@@ -3077,7 +3013,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->ac3_downmix = ac3_downmix_sse;
             c->vector_fmul = vector_fmul_sse;
             c->vector_fmul_reverse = vector_fmul_reverse_sse;
-            c->vector_fmul_add_add = vector_fmul_add_add_sse;
+            c->vector_fmul_add = vector_fmul_add_sse;
             c->vector_fmul_window = vector_fmul_window_sse;
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
             c->vector_clipf = vector_clipf_sse;
@@ -3085,7 +3021,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             c->float_to_int16_interleave = float_to_int16_interleave_sse;
         }
         if(mm_flags & FF_MM_3DNOW)
-            c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
+            c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse
         if(mm_flags & FF_MM_SSE2){
             c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
             c->float_to_int16 = float_to_int16_sse2;
author	Måns Rullgård <mans@mansr.com>	2009-09-27 16:51:54 +0000
committer	Måns Rullgård <mans@mansr.com>	2009-09-27 16:51:54 +0000
commit	952e87219815b0d8a698e0c098e4fb7984f8b19d (patch)
tree	c1cd595004f4e6f151c13e5c85cf33c01a736c04 /libavcodec/x86
parent	7f5c14210b3184d6c4330bf63af9512808c37379 (diff)
download	ffmpeg-952e87219815b0d8a698e0c098e4fb7984f8b19d.tar.gz