aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/x86
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-01-19 22:26:58 -0800
committerRonald S. Bultje <rsbultje@gmail.com>2013-01-22 11:55:42 -0800
commit55aa03b9f8f11ebb7535424cc0e5635558590f49 (patch)
treee13b12d715aa32cb1d81ed066735af40ea6dbb8a /libavutil/x86
parent0881cbf314982cce8448bd12644ce2a6e0b8c576 (diff)
downloadffmpeg-55aa03b9f8f11ebb7535424cc0e5635558590f49.tar.gz
floatdsp: move vector_fmul_add from dsputil to avfloatdsp.
Diffstat (limited to 'libavutil/x86')
-rw-r--r--libavutil/x86/float_dsp.asm28
-rw-r--r--libavutil/x86/float_dsp_init.c7
2 files changed, 35 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 4113fd91e4..70fc1d0310 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -162,3 +162,31 @@ VECTOR_DMUL_SCALAR
INIT_YMM avx
VECTOR_DMUL_SCALAR
%endif
+
+;-----------------------------------------------------------------------------
+; vector_fmul_add(float *dst, const float *src0, const float *src1,
+; const float *src2, int len)
+;-----------------------------------------------------------------------------
+%macro VECTOR_FMUL_ADD 0
+cglobal vector_fmul_add, 5,5,2, dst, src0, src1, src2, len
+ lea lenq, [lend*4 - 2*mmsize]
+ALIGN 16
+.loop:
+ mova m0, [src0q + lenq]
+ mova m1, [src0q + lenq + mmsize]
+ mulps m0, m0, [src1q + lenq]
+ mulps m1, m1, [src1q + lenq + mmsize]
+ addps m0, m0, [src2q + lenq]
+ addps m1, m1, [src2q + lenq + mmsize]
+ mova [dstq + lenq], m0
+ mova [dstq + lenq + mmsize], m1
+
+ sub lenq, 2*mmsize
+ jge .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse
+VECTOR_FMUL_ADD
+INIT_YMM avx
+VECTOR_FMUL_ADD
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 1c678cb40c..0a2ad5ba0e 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -41,6 +41,11 @@ extern void ff_vector_dmul_scalar_sse2(double *dst, const double *src,
extern void ff_vector_dmul_scalar_avx(double *dst, const double *src,
double mul, int len);
+void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1,
+ const float *src2, int len);
+void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1,
+ const float *src2, int len);
+
#if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win,
@@ -123,6 +128,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul = ff_vector_fmul_sse;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
+ fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
@@ -131,5 +137,6 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul = ff_vector_fmul_avx;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
+ fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
}
}