aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2012-09-22 18:41:25 -0400
committerJustin Ruggles <justin.ruggles@gmail.com>2012-11-26 11:30:19 -0500
commit947f933687b9fd4d80b6cad468ddc2b5b20a9c38 (patch)
tree64f58493f3abc3690267f77899289ccfbe49734a /libavutil
parent284ea790d89441fa1e6b2d72d3c1ed6d61972f0b (diff)
downloadffmpeg-947f933687b9fd4d80b6cad468ddc2b5b20a9c38.tar.gz
x86: float_dsp: add SSE version of vector_fmul_scalar()
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/x86/float_dsp.asm29
-rw-r--r--libavutil/x86/float_dsp_init.c4
2 files changed, 33 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index a8857b937c..317df9c3c1 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -85,3 +85,32 @@ INIT_XMM sse
VECTOR_FMAC_SCALAR
INIT_YMM avx
VECTOR_FMAC_SCALAR
+
+;------------------------------------------------------------------------------
+; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len)
+;------------------------------------------------------------------------------
+
+%macro VECTOR_FMUL_SCALAR 0
+%if UNIX64
+cglobal vector_fmul_scalar, 3,3,2, dst, src, len
+%else
+cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len
+%endif
+%if ARCH_X86_32
+ movss m0, mulm
+%elif WIN64
+ SWAP 0, 2
+%endif
+ shufps m0, m0, 0
+ lea lenq, [lend*4-mmsize]
+.loop:
+ mova m1, [srcq+lenq]
+ mulps m1, m0
+ mova [dstq+lenq], m1
+ sub lenq, mmsize
+ jge .loop
+ REP_RET
+%endmacro
+
+INIT_XMM sse
+VECTOR_FMUL_SCALAR
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index d1b0b8c622..d14ec6a377 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -32,6 +32,9 @@ extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul,
extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul,
int len);
+extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul,
+ int len);
+
void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
{
int mm_flags = av_get_cpu_flags();
@@ -39,6 +42,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
if (EXTERNAL_SSE(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_sse;
fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
+ fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
}
if (EXTERNAL_AVX(mm_flags)) {
fdsp->vector_fmul = ff_vector_fmul_avx;