diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-09-22 18:41:25 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-11-26 11:30:19 -0500 |
commit | 947f933687b9fd4d80b6cad468ddc2b5b20a9c38 (patch) | |
tree | 64f58493f3abc3690267f77899289ccfbe49734a /libavutil | |
parent | 284ea790d89441fa1e6b2d72d3c1ed6d61972f0b (diff) | |
download | ffmpeg-947f933687b9fd4d80b6cad468ddc2b5b20a9c38.tar.gz |
x86: float_dsp: add SSE version of vector_fmul_scalar()
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/x86/float_dsp.asm | 29 | ||||
-rw-r--r-- | libavutil/x86/float_dsp_init.c | 4 |
2 files changed, 33 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index a8857b937c..317df9c3c1 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -85,3 +85,32 @@ INIT_XMM sse VECTOR_FMAC_SCALAR INIT_YMM avx VECTOR_FMAC_SCALAR + +;------------------------------------------------------------------------------ +; void ff_vector_fmul_scalar(float *dst, const float *src, float mul, int len) +;------------------------------------------------------------------------------ + +%macro VECTOR_FMUL_SCALAR 0 +%if UNIX64 +cglobal vector_fmul_scalar, 3,3,2, dst, src, len +%else +cglobal vector_fmul_scalar, 4,4,3, dst, src, mul, len +%endif +%if ARCH_X86_32 + movss m0, mulm +%elif WIN64 + SWAP 0, 2 +%endif + shufps m0, m0, 0 + lea lenq, [lend*4-mmsize] +.loop: + mova m1, [srcq+lenq] + mulps m1, m0 + mova [dstq+lenq], m1 + sub lenq, mmsize + jge .loop + REP_RET +%endmacro + +INIT_XMM sse +VECTOR_FMUL_SCALAR diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index d1b0b8c622..d14ec6a377 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -32,6 +32,9 @@ extern void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, extern void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, int len); +extern void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) { int mm_flags = av_get_cpu_flags(); @@ -39,6 +42,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) if (EXTERNAL_SSE(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_sse; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; } if (EXTERNAL_AVX(mm_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; |