diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-06-08 13:49:56 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-06-18 18:01:14 -0400 |
commit | cb5042d02c66aed68643633446f6bf623b72416e (patch) | |
tree | c8e75354a37c981aa3a488d7c236750a38d9b1d9 /libavutil | |
parent | 4e4dd7173023502b5b3e7c3d7ccd7e6fe45b7afe (diff) | |
download | ffmpeg-cb5042d02c66aed68643633446f6bf623b72416e.tar.gz |
float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/arm/float_dsp_init_neon.c | 4 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_neon.S | 48 | ||||
-rw-r--r-- | libavutil/float_dsp.c | 9 | ||||
-rw-r--r-- | libavutil/float_dsp.h | 16 |
4 files changed, 77 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index fa6d0d7d15..3ca0288b31 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -26,7 +26,11 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); +void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; + fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index d66fa09424..03b164388f 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1 3: vst1.32 {d16-d19},[r0,:128]! bx lr endfunc + +function ff_vector_fmac_scalar_neon, export=1 +VFP len .req r2 +VFP acc .req r3 +NOVFP len .req r3 +NOVFP acc .req r2 +VFP vdup.32 q15, d0[0] +NOVFP vdup.32 q15, r2 + bics r12, len, #15 + mov acc, r0 + beq 3f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! +1: vmla.f32 q8, q0, q15 + vld1.32 {q2}, [r1,:128]! + vld1.32 {q10}, [acc,:128]! + vmla.f32 q9, q1, q15 + vld1.32 {q3}, [r1,:128]! + vld1.32 {q11}, [acc,:128]! + vmla.f32 q10, q2, q15 + vst1.32 {q8}, [r0,:128]! + vmla.f32 q11, q3, q15 + vst1.32 {q9}, [r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vst1.32 {q10}, [r0,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! + vst1.32 {q11}, [r0,:128]! + b 1b +2: vst1.32 {q10}, [r0,:128]! + vst1.32 {q11}, [r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vmla.f32 q8, q0, q15 + vst1.32 {q8}, [r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 039dd07d36..2e90939090 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -28,9 +28,18 @@ static void vector_fmul_c(float *dst, const float *src0, const float *src1, dst[i] = src0[i] * src1[i]; } +static void vector_fmac_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] += src[i] * mul; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; + fdsp->vector_fmac_scalar = vector_fmac_scalar_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 30161a252b..4e266304da 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -35,6 +35,22 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmul)(float *dst, const float *src0, const float *src1, int len); + + /** + * Multiply a vector of floats by a scalar float and add to + * destination vector. Source and destination vectors must + * overlap exactly or not at all. + * + * @param dst result vector + * constraints: 16-byte aligned + * @param src input vector + * constraints: 16-byte aligned + * @param mul scalar value + * @param len length of vector + * constraints: multiple of 4 + */ + void (*vector_fmac_scalar)(float *dst, const float *src, float mul, + int len); } AVFloatDSPContext; /** |