diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 14:31:55 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 14:31:55 +0100 |
commit | 8102f27b5b3dff54f8099019c2df4701ac5e5d4f (patch) | |
tree | f623e5063c24e6c4afc50338dfeecf7054c673a2 /libavutil | |
parent | 24604ebaf85b5436d10da811f455dd710b353aca (diff) | |
parent | 73b704ac609d83e0be124589f24efd9b94947cf9 (diff) | |
download | ffmpeg-8102f27b5b3dff54f8099019c2df4701ac5e5d4f.tar.gz |
Merge commit '73b704ac609d83e0be124589f24efd9b94947cf9'
* commit '73b704ac609d83e0be124589f24efd9b94947cf9':
arm: Add some missing header #includes
floatdsp: move scalarproduct_float from dsputil to avfloatdsp.
Conflicts:
libavcodec/acelp_pitch_delay.c
libavcodec/amrnbdec.c
libavcodec/amrwbdec.c
libavcodec/ra288.c
libavcodec/x86/dsputil_mmx.c
libavutil/x86/float_dsp.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/arm/float_dsp_init_neon.c | 3 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_neon.S | 13 | ||||
-rw-r--r-- | libavutil/float_dsp.c | 12 | ||||
-rw-r--r-- | libavutil/float_dsp.h | 22 | ||||
-rw-r--r-- | libavutil/x86/float_dsp.asm | 28 | ||||
-rw-r--r-- | libavutil/x86/float_dsp_init.c | 3 |
6 files changed, 80 insertions, 1 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index b3644e82a2..a7245ad92b 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -43,6 +43,8 @@ void ff_vector_fmul_reverse_neon(float *dst, const float *src0, void ff_butterflies_float_neon(float *v1, float *v2, int len); +float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; @@ -52,4 +54,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) fdsp->vector_fmul_add = ff_vector_fmul_add_neon; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; fdsp->butterflies_float = ff_butterflies_float_neon; + fdsp->scalarproduct_float = ff_scalarproduct_float_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index 4acc406d33..559b565628 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -256,3 +256,16 @@ function ff_butterflies_float_neon, export=1 bgt 1b bx lr endfunc + +function ff_scalarproduct_float_neon, export=1 + vmov.f32 q2, #0.0 +1: vld1.32 {q0},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vmla.f32 q2, q0, q1 + subs r2, r2, #4 + bgt 1b + vadd.f32 d0, d4, d5 + vpadd.f32 d0, d0, d0 +NOVFP vmov.32 r0, d0[0] + bx lr +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 05688e4c3f..50c9e60c32 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -104,6 +104,17 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2, } } +float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len) +{ + float p = 0.0; + int i; + + for (i = 0; i < len; i++) + p += v1[i] * v2[i]; + + return p; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; @@ -114,6 +125,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) fdsp->vector_fmul_add = vector_fmul_add_c; fdsp->vector_fmul_reverse = vector_fmul_reverse_c; fdsp->butterflies_float = butterflies_float_c; + fdsp->scalarproduct_float = avpriv_scalarproduct_float_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index ff83beddbe..6cc7e76c11 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -146,9 +146,31 @@ typedef struct AVFloatDSPContext { * @param len length of vectors, multiple of 4 */ void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); + + /** + * Calculate the scalar product of two vectors of floats. + * + * @param v1 first vector, 16-byte aligned + * @param v2 second vector, 16-byte aligned + * @param len length of vectors, multiple of 4 + * + * @return sum of elementwise products + */ + float (*scalarproduct_float)(const float *v1, const float *v2, int len); } AVFloatDSPContext; /** + * Return the scalar product of two vectors. + * + * @param v1 first input vector + * @param v2 first input vector + * @param len number of elements + * + * @return sum of elementwise products + */ +float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len); + +/** * Initialize a float DSP context. * * @param fdsp float DSP context diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm index 3e5e91ad07..004e6cf1fe 100644 --- a/libavutil/x86/float_dsp.asm +++ b/libavutil/x86/float_dsp.asm @@ -236,4 +236,30 @@ VECTOR_FMUL_REVERSE %if HAVE_AVX_EXTERNAL INIT_YMM avx VECTOR_FMUL_REVERSE -%endif
\ No newline at end of file +%endif + +; float scalarproduct_float_sse(const float *v1, const float *v2, int len) +INIT_XMM sse +cglobal scalarproduct_float, 3,3,2, v1, v2, offset + neg offsetq + shl offsetq, 2 + sub v1q, offsetq + sub v2q, offsetq + xorps xmm0, xmm0 +.loop: + movaps xmm1, [v1q+offsetq] + mulps xmm1, [v2q+offsetq] + addps xmm0, xmm1 + add offsetq, 16 + js .loop + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%if ARCH_X86_64 == 0 + movss r0m, xmm0 + fld dword r0m +%endif + RET + diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 9c58e2bc30..5c6383bc74 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0, void ff_vector_fmul_reverse_avx(float *dst, const float *src0, const float *src1, int len); +float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order); + #if HAVE_6REGS && HAVE_INLINE_ASM static void vector_fmul_window_3dnowext(float *dst, const float *src0, const float *src1, const float *win, @@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse; fdsp->vector_fmul_add = ff_vector_fmul_add_sse; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse; + fdsp->scalarproduct_float = ff_scalarproduct_float_sse; } if (EXTERNAL_SSE2(mm_flags)) { fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; |