aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/x86/float_dsp.asm
diff options
context:
space:
mode:
authorJustin Ruggles <justin.ruggles@gmail.com>2012-06-08 23:20:59 -0400
committerJustin Ruggles <justin.ruggles@gmail.com>2012-06-18 18:01:14 -0400
commit82b2df979069063beb14be340350501c8340f9cd (patch)
tree3c6a61d185f9fbab0d21ebce259ef232973d7219 /libavutil/x86/float_dsp.asm
parentcb5042d02c66aed68643633446f6bf623b72416e (diff)
downloadffmpeg-82b2df979069063beb14be340350501c8340f9cd.tar.gz
float_dsp: add x86-optimized functions for vector_fmac_scalar()
Diffstat (limited to 'libavutil/x86/float_dsp.asm')
-rw-r--r--libavutil/x86/float_dsp.asm47
1 files changed, 47 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 53be7ab99a..66ef09398d 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -19,6 +19,7 @@
;******************************************************************************
%include "x86inc.asm"
+%include "x86util.asm"
SECTION .text
@@ -53,3 +54,49 @@ VECTOR_FMUL
INIT_YMM avx
VECTOR_FMUL
%endif
+
+;------------------------------------------------------------------------------
+; void ff_vector_fmac_scalar(float *dst, const float *src, float mul, int len)
+;------------------------------------------------------------------------------
+
+%macro VECTOR_FMAC_SCALAR 0
+%if UNIX64
+cglobal vector_fmac_scalar, 3,3,3, dst, src, len
+%else
+cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len
+%endif
+%if WIN64
+ SWAP 0, 2
+%endif
+%if ARCH_X86_32
+ VBROADCASTSS m0, mulm
+%else
+ shufps xmm0, xmm0, 0
+%if cpuflag(avx)
+ vinsertf128 m0, m0, xmm0, 1
+%endif
+%endif
+ lea lenq, [lend*4-2*mmsize]
+.loop
+ mulps m1, m0, [srcq+lenq ]
+ mulps m2, m0, [srcq+lenq+mmsize]
+ addps m1, m1, [dstq+lenq ]
+ addps m2, m2, [dstq+lenq+mmsize]
+ mova [dstq+lenq ], m1
+ mova [dstq+lenq+mmsize], m2
+ sub lenq, 2*mmsize
+ jge .loop
+%if mmsize == 32
+ vzeroupper
+ RET
+%else
+ REP_RET
+%endif
+%endmacro
+
+INIT_XMM sse
+VECTOR_FMAC_SCALAR
+%if HAVE_AVX
+INIT_YMM avx
+VECTOR_FMAC_SCALAR
+%endif