diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-06-19 20:52:00 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-06-19 20:53:27 +0200 |
commit | cabbd271a5f37042291c06b9f8bd6c641fbddfde (patch) | |
tree | 110238d357631f95c4849d0d99d978a61b2a1ee7 /libavutil/arm | |
parent | 6b9446e93296ed236d497fe3f493d8956571f888 (diff) | |
parent | 4cc2920dd2c0ce4e64e709da4f78508e1ec9871e (diff) | |
download | ffmpeg-cabbd271a5f37042291c06b9f8bd6c641fbddfde.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (24 commits)
flvdec: remove incomplete, disabled seeking code
mem: add support for _aligned_malloc() as found on Windows
lavc: Extend the documentation for avcodec_init_packet
flvdec: remove incomplete, disabled seeking code
http: replace atoll() with strtoll()
mpegts: remove unused/incomplete/broken seeking code
af_amix: allow float planar sample format as input
af_amix: use AVFloatDSPContext.vector_fmac_scalar()
float_dsp: add x86-optimized functions for vector_fmac_scalar()
float_dsp: Move vector_fmac_scalar() from libavcodec to libavutil
lavr: Add x86-optimized function for flt to s32 conversion
lavr: Add x86-optimized function for flt to s16 conversion
lavr: Add x86-optimized functions for s32 to flt conversion
lavr: Add x86-optimized functions for s32 to s16 conversion
lavr: Add x86-optimized functions for s16 to flt conversion
lavr: Add x86-optimized function for s16 to s32 conversion
rtpenc: Support packetizing iLBC
rtpdec: Add a depacketizer for iLBC
Implement the iLBC storage file format
mov: Support muxing/demuxing iLBC
...
Conflicts:
Changelog
configure
libavcodec/avcodec.h
libavcodec/dsputil.c
libavcodec/version.h
libavformat/movenc.c
libavformat/mpegts.c
libavformat/version.h
libavutil/mem.c
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/arm')
-rw-r--r-- | libavutil/arm/float_dsp_init_neon.c | 4 | ||||
-rw-r--r-- | libavutil/arm/float_dsp_neon.S | 48 |
2 files changed, 52 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_init_neon.c b/libavutil/arm/float_dsp_init_neon.c index fa6d0d7d15..3ca0288b31 100644 --- a/libavutil/arm/float_dsp_init_neon.c +++ b/libavutil/arm/float_dsp_init_neon.c @@ -26,7 +26,11 @@ void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, int len); +void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, + int len); + void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) { fdsp->vector_fmul = ff_vector_fmul_neon; + fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_neon; } diff --git a/libavutil/arm/float_dsp_neon.S b/libavutil/arm/float_dsp_neon.S index d66fa09424..03b164388f 100644 --- a/libavutil/arm/float_dsp_neon.S +++ b/libavutil/arm/float_dsp_neon.S @@ -62,3 +62,51 @@ function ff_vector_fmul_neon, export=1 3: vst1.32 {d16-d19},[r0,:128]! bx lr endfunc + +function ff_vector_fmac_scalar_neon, export=1 +VFP len .req r2 +VFP acc .req r3 +NOVFP len .req r3 +NOVFP acc .req r2 +VFP vdup.32 q15, d0[0] +NOVFP vdup.32 q15, r2 + bics r12, len, #15 + mov acc, r0 + beq 3f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! +1: vmla.f32 q8, q0, q15 + vld1.32 {q2}, [r1,:128]! + vld1.32 {q10}, [acc,:128]! + vmla.f32 q9, q1, q15 + vld1.32 {q3}, [r1,:128]! + vld1.32 {q11}, [acc,:128]! + vmla.f32 q10, q2, q15 + vst1.32 {q8}, [r0,:128]! + vmla.f32 q11, q3, q15 + vst1.32 {q9}, [r0,:128]! + subs r12, r12, #16 + beq 2f + vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vst1.32 {q10}, [r0,:128]! + vld1.32 {q1}, [r1,:128]! + vld1.32 {q9}, [acc,:128]! + vst1.32 {q11}, [r0,:128]! + b 1b +2: vst1.32 {q10}, [r0,:128]! + vst1.32 {q11}, [r0,:128]! + ands len, len, #15 + it eq + bxeq lr +3: vld1.32 {q0}, [r1,:128]! + vld1.32 {q8}, [acc,:128]! + vmla.f32 q8, q0, q15 + vst1.32 {q8}, [r0,:128]! + subs len, len, #4 + bgt 3b + bx lr + .unreq len +endfunc |