diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-11-27 13:39:52 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-11-27 13:39:52 +0100 |
commit | 2684d2e3ea8a1a2863ae9842d072341b44b09829 (patch) | |
tree | 3d116023d439f92f473480c8f68f6d27f9f62344 /libavutil | |
parent | 257196209fe7d27ad22e18bf5757ffcad47dce6b (diff) | |
parent | 284ea790d89441fa1e6b2d72d3c1ed6d61972f0b (diff) | |
download | ffmpeg-2684d2e3ea8a1a2863ae9842d072341b44b09829.tar.gz |
Merge commit '284ea790d89441fa1e6b2d72d3c1ed6d61972f0b'
* commit '284ea790d89441fa1e6b2d72d3c1ed6d61972f0b':
dsputil: move vector_fmul_scalar() to AVFloatDSPContext in libavutil
aacenc: use the correct output buffer
aacdec: fix signed overflows in lcg_random()
base64: fix signed overflow in shift
Conflicts:
libavcodec/dsputil.c
libavutil/base64.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/arm/float_dsp_init_neon.c | 4 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_neon.S | 38 | ||||
-rw-r--r-- | libavutil/float_dsp.c | 9 | ||||
-rw-r--r-- | libavutil/float_dsp.h | 15 |
4 files changed, 66 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index 3ca0288b31..88eb4b3d2a 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -29,8 +29,12 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int l void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, int len); +void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; + fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index 4aa6f838dd..6d7bd5236e 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -108,3 +108,41 @@ NOVFP vdup.32 q15, r2 bx lr .unreq len endfunc + +function ff_vector_fmul_scalar_neon, export=1 +VFP len .req r2 +NOVFP len .req r3 +VFP vdup.32 q8, d0[0] +NOVFP vdup.32 q8, r2 + bics r12, len, #15 + beq 3f + vld1.32 {q0},[r1,:128]! + vld1.32 {q1},[r1,:128]! +1: vmul.f32 q0, q0, q8 + vld1.32 {q2},[r1,:128]! + vmul.f32 q1, q1, q8 + vld1.32 {q3},[r1,:128]! + vmul.f32 q2, q2, q8 + vst1.32 {q0},[r0,:128]! + vmul.f32 q3, q3, q8 + vst1.32 {q1},[r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0},[r1,:128]! + vst1.32 {q2},[r0,:128]! + vld1.32 {q1},[r1,:128]! + vst1.32 {q3},[r0,:128]! + b 1b +2: vst1.32 {q2},[r0,:128]! + vst1.32 {q3},[r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0},[r1,:128]! + vmul.f32 q0, q0, q8 + vst1.32 {q0},[r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 48f67c6253..7b551871d6 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -39,10 +39,19 @@ static void vector_fmac_scalar_c(float *dst, const float *src, float mul, dst[i] += src[i] * mul; } +static void vector_fmul_scalar_c(float *dst, const float *src, float mul, + int len) +{ + int i; + for (i = 0; i < len; i++) + dst[i] = src[i] * mul; +} + void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) { fdsp->vector_fmul = vector_fmul_c; fdsp->vector_fmac_scalar = vector_fmac_scalar_c; + fdsp->vector_fmul_scalar = vector_fmul_scalar_c; #if ARCH_ARM ff_float_dsp_init_arm(fdsp); diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index 48a78476c9..b4e89cb722 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -51,6 +51,21 @@ typedef struct AVFloatDSPContext { */ void (*vector_fmac_scalar)(float *dst, const float *src, float mul, int len); + + /** + * Multiply a vector of floats by a scalar float. Source and + * destination vectors must overlap exactly or not at all. + * + * @param dst result vector + * constraints: 16-byte aligned + * @param src input vector + * constraints: 16-byte aligned + * @param mul scalar value + * @param len length of vector + * constraints: multiple of 4 + */ + void (*vector_fmul_scalar)(float *dst, const float *src, float mul, + int len); } AVFloatDSPContext; /** |