diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-09-22 18:13:57 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-11-26 11:29:06 -0500 |
commit | 284ea790d89441fa1e6b2d72d3c1ed6d61972f0b (patch) | |
tree | bcfcca17dd96f1ea102ed069072abde7a57920bf /libavutil | |
parent | 1e276553886a7ca315a055c489fabe456e789e3f (diff) | |
download | ffmpeg-284ea790d89441fa1e6b2d72d3c1ed6d61972f0b.tar.gz |
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/arm/float_dsp_init_neon.c | 4 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_neon.S | 38 | ||||
-rw-r--r-- | libavutil/float_dsp.c | 9 | ||||
-rw-r--r-- | libavutil/float_dsp.h | 15 |
4 files changed, 66 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index 3ca0288b31..88eb4b3d2a 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, int len); +void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index 4aa6f838dd..6d7bd5236e 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2 bx lr .unreq len endfunc + +function ff_vector_fmul_scalar_neon, export=1 +VFP len .req r2 +NOVFP len .req r3 +VFP vdup.32 q8, d0[0] +NOVFP vdup.32 q8, r2 + bics r12, len, #15 + beq 3f + vld1.32 {q0},[r1,:128]! + vld1.32 {q1},[r1,:128]! +1: vmul.f32 q0, q0, q8 + vld1.32 {q2},[r1,:128]! + vmul.f32 q1, q1, q8 + vld1.32 {q3},[r1,:128]! + vmul.f32 q2, q2, q8 + vst1.32 {q0},[r0,:128]! + vmul.f32 q3, q3, q8 + vst1.32 {q1},[r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0},[r1,:128]! + vst1.32 {q2},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vst1.32 {q3},[r0,:128]! + b 1b +2: vst1.32 {q2},[r0,:128]! + vst1.32 {q3},[r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0},[r1,:128]! + vmul.f32 q0, q0, q8 + vst1.32 {q0},[r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 2e90939090..b6b11818b5 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -36,10 +36,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul, dst[i] += src[i] * mul; } +static void vector_fmul_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] = src[i] * mul; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; fdsp->vector_fmac_scalar = vector_fmac_scalar_c; + fdsp->vector_fmul_scalar = vector_fmul_scalar_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 95cef62f29..cb4b28f0e2 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmac_scalar)(float *dst, const float *src, float mul, int len); + + /** + * Multiply a vector of floats by a scalar float. Source and + * destination vectors must overlap exactly or not at all. + * + * @param dst result vector + * constraints: 16-byte aligned + * @param src input vector + * constraints: 16-byte aligned + * @param mul scalar value + * @param len length of vector + * constraints: multiple of 4 + */ + void (*vector_fmul_scalar)(float *dst, const float *src, float mul, + int len); } AVFloatDSPContext; /** |