diff options
author | James Almer <jamrial@gmail.com> | 2014-03-10 17:09:20 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-03-13 04:34:05 +0100 |
commit | 7d7487e85c066bf3f4e5821a49081f520b6bc1e7 (patch) | |
tree | 435ea72c626de74760ed86b026f4396f242687ac /libavutil/x86/float_dsp_init.c | |
parent | 12ce58bebdff6bfae9c56dc785e3003968f93277 (diff) | |
download | ffmpeg-7d7487e85c066bf3f4e5821a49081f520b6bc1e7.tar.gz |
x86/float_dsp: add ff_vector_{fmul_add, fmac_scalar}_fma3
~7% faster than AVX
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/x86/float_dsp_init.c')
-rw-r--r-- | libavutil/x86/float_dsp_init.c | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 97f7b7c7ca..88ffbc11b5 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -33,6 +33,8 @@ void ff_vector_fmac_scalar_sse(float *dst, const float *src, float mul, int len); void ff_vector_fmac_scalar_avx(float *dst, const float *src, float mul, int len); +void ff_vector_fmac_scalar_fma3(float *dst, const float *src, float mul, + int len); void ff_vector_fmul_scalar_sse(float *dst, const float *src, float mul, int len); @@ -46,6 +48,8 @@ void ff_vector_fmul_add_sse(float *dst, const float *src0, const float *src1, const float *src2, int len); void ff_vector_fmul_add_avx(float *dst, const float *src0, const float *src1, const float *src2, int len); +void ff_vector_fmul_add_fma3(float *dst, const float *src0, const float *src1, + const float *src2, int len); void ff_vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len); @@ -153,4 +157,8 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; } + if (EXTERNAL_FMA3(cpu_flags)) { + fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; + fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; + } } |