diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-20 15:41:52 -0800 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2013-01-22 11:55:42 -0800 |
commit | d56668bd80075615b89aff652fe8a576bf853ceb (patch) | |
tree | 9da3ed036b716dbaf33f5c9869578bedb6e393a2 /libavutil/x86 | |
parent | 5959bfaca396ecaf63a8123055f499688b79cae3 (diff) | |
download | ffmpeg-d56668bd80075615b89aff652fe8a576bf853ceb.tar.gz |
floatdsp: move scalarproduct_float from dsputil to avfloatdsp.
This makes the aac decoder and all voice codecs independent of dsputil.
Diffstat (limited to 'libavutil/x86')
-rw-r--r-- | libavutil/x86/float_dsp.asm | 25 | ||||
-rw-r--r-- | libavutil/x86/float_dsp_init.c | 3 |
2 files changed, 28 insertions, 0 deletions
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index 126f3495c4..779339c575 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -227,3 +227,28 @@ INIT_XMM sse VECTOR_FMUL_REVERSE INIT_YMM avx VECTOR_FMUL_REVERSE + +; float scalarproduct_float_sse(const float *v1, const float *v2, int len) +INIT_XMM sse +cglobal scalarproduct_float, 3,3,2, v1, v2, offset + neg offsetq + shl offsetq, 2 + sub v1q, offsetq + sub v2q, offsetq + xorps xmm0, xmm0 +.loop: + movaps xmm1, [v1q+offsetq] + mulps xmm1, [v2q+offsetq] + addps xmm0, xmm1 + add offsetq, 16 + js .loop + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xmm0 + fld dword r0m +%endif + RET diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 9f63b4c057..81c9a7d468 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0, void ff_vector_fmul_reverse_avx(float *dst, const float *src0, const float *src1, int len); +float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); + #if HAVE_6REGS && HAVE_INLINE_ASM static void vector_fmul_window_3dnowext(float *dst, const float *src0, const float *src1, const float *win, @@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; fdsp->vector_fmul_add = ff_vector_fmul_add_sse; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; + fdsp->scalarproduct_float = ff_scalarproduct_float_sse; } if (EXTERNAL_SSE2(mm_flags)) { fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; |