aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-01-23 14:31:55 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-01-23 14:31:55 +0100
commit8102f27b5b3dff54f8099019c2df4701ac5e5d4f (patch)
treef623e5063c24e6c4afc50338dfeecf7054c673a2 /libavutil
parent24604ebaf85b5436d10da811f455dd710b353aca (diff)
parent73b704ac609d83e0be124589f24efd9b94947cf9 (diff)
downloadffmpeg-8102f27b5b3dff54f8099019c2df4701ac5e5d4f.tar.gz
Merge commit '73b704ac609d83e0be124589f24efd9b94947cf9'
* commit '73b704ac609d83e0be124589f24efd9b94947cf9': arm: Add some missing header #includes floatdsp: move scalarproduct_float from dsputil to avfloatdsp. Conflicts: libavcodec/acelp_pitch_delay.c libavcodec/amrnbdec.c libavcodec/amrwbdec.c libavcodec/ra288.c libavcodec/x86/dsputil_mmx.c libavutil/x86/float_dsp.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/arm/float_dsp_init_neon.c3
-rw-r--r--libavutil/arm/float_dsp_neon.S13
-rw-r--r--libavutil/float_dsp.c12
-rw-r--r--libavutil/float_dsp.h22
-rw-r--r--libavutil/x86/float_dsp.asm28
-rw-r--r--libavutil/x86/float_dsp_init.c3
6 files changed, 80 insertions, 1 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c
index b3644e82a2..a7245ad92b 100644
--- a/libavutil/arm/float_dsp_init_neon.c
+++ b/libavutil/arm/float_dsp_init_neon.c
@@ -43,6 +43,8 @@ void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
void ff_butterflies_float_neon(float *v1, float *v2, int len);
+float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
+
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
{
fdsp->vector_fmul = ff_vector_fmul_neon;
@@ -52,4 +54,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_add = ff_vector_fmul_add_neon;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon;
fdsp->butterflies_float = ff_butterflies_float_neon;
+ fdsp->scalarproduct_float = ff_scalarproduct_float_neon;
}
diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S
index 4acc406d33..559b565628 100644
--- a/libavutil/arm/float_dsp_neon.S
+++ b/libavutil/arm/float_dsp_neon.S
@@ -256,3 +256,16 @@ function ff_butterflies_float_neon, export=1
bgt 1b
bx lr
endfunc
+
+function ff_scalarproduct_float_neon, export=1
+ vmov.f32 q2, #0.0
+1: vld1.32 {q0},[r0,:128]!
+ vld1.32 {q1},[r1,:128]!
+ vmla.f32 q2, q0, q1
+ subs r2, r2, #4
+ bgt 1b
+ vadd.f32 d0, d4, d5
+ vpadd.f32 d0, d0, d0
+NOVFP vmov.32 r0, d0[0]
+ bx lr
+endfunc
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 05688e4c3f..50c9e60c32 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -104,6 +104,17 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
}
}
+float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len)
+{
+ float p = 0.0;
+ int i;
+
+ for (i = 0; i < len; i++)
+ p += v1[i] * v2[i];
+
+ return p;
+}
+
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
{
fdsp->vector_fmul = vector_fmul_c;
@@ -114,6 +125,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact)
fdsp->vector_fmul_add = vector_fmul_add_c;
fdsp->vector_fmul_reverse = vector_fmul_reverse_c;
fdsp->butterflies_float = butterflies_float_c;
+ fdsp->scalarproduct_float = avpriv_scalarproduct_float_c;
#if ARCH_ARM
ff_float_dsp_init_arm(fdsp);
diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index ff83beddbe..6cc7e76c11 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -146,9 +146,31 @@ typedef struct AVFloatDSPContext {
* @param len length of vectors, multiple of 4
*/
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
+
+ /**
+ * Calculate the scalar product of two vectors of floats.
+ *
+ * @param v1 first vector, 16-byte aligned
+ * @param v2 second vector, 16-byte aligned
+ * @param len length of vectors, multiple of 4
+ *
+ * @return sum of elementwise products
+ */
+ float (*scalarproduct_float)(const float *v1, const float *v2, int len);
} AVFloatDSPContext;
/**
+ * Return the scalar product of two vectors.
+ *
+ * @param v1 first input vector
+ * @param v2 first input vector
+ * @param len number of elements
+ *
+ * @return sum of elementwise products
+ */
+float avpriv_scalarproduct_float_c(const float *v1, const float *v2, int len);
+
+/**
* Initialize a float DSP context.
*
* @param fdsp float DSP context
diff --git a/libavutil/x86/float_dsp.asm b/libavutil/x86/float_dsp.asm
index 3e5e91ad07..004e6cf1fe 100644
--- a/libavutil/x86/float_dsp.asm
+++ b/libavutil/x86/float_dsp.asm
@@ -236,4 +236,30 @@ VECTOR_FMUL_REVERSE
%if HAVE_AVX_EXTERNAL
INIT_YMM avx
VECTOR_FMUL_REVERSE
-%endif \ No newline at end of file
+%endif
+
+; float scalarproduct_float_sse(const float *v1, const float *v2, int len)
+INIT_XMM sse
+cglobal scalarproduct_float, 3,3,2, v1, v2, offset
+ neg offsetq
+ shl offsetq, 2
+ sub v1q, offsetq
+ sub v2q, offsetq
+ xorps xmm0, xmm0
+.loop:
+ movaps xmm1, [v1q+offsetq]
+ mulps xmm1, [v2q+offsetq]
+ addps xmm0, xmm1
+ add offsetq, 16
+ js .loop
+ movhlps xmm1, xmm0
+ addps xmm0, xmm1
+ movss xmm1, xmm0
+ shufps xmm0, xmm0, 1
+ addss xmm0, xmm1
+%if ARCH_X86_64 == 0
+ movss r0m, xmm0
+ fld dword r0m
+%endif
+ RET
+
diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c
index 9c58e2bc30..5c6383bc74 100644
--- a/libavutil/x86/float_dsp_init.c
+++ b/libavutil/x86/float_dsp_init.c
@@ -51,6 +51,8 @@ void ff_vector_fmul_reverse_sse(float *dst, const float *src0,
void ff_vector_fmul_reverse_avx(float *dst, const float *src0,
const float *src1, int len);
+float ff_scalarproduct_float_sse(const float *v1, const float *v2, int order);
+
#if HAVE_6REGS && HAVE_INLINE_ASM
static void vector_fmul_window_3dnowext(float *dst, const float *src0,
const float *src1, const float *win,
@@ -135,6 +137,7 @@ void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
+ fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;