diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 14:04:50 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-01-23 14:04:50 +0100 |
commit | 6e6e1708984e45881b9a5d4e26c3e7de852c54d5 (patch) | |
tree | 5e04d38f8e152faf98921843ca5e4530cbdc46a4 /libavutil/arm/float_dsp_vfp.S | |
parent | b1b870fbd7185bffbe27c5918001b40a8ff8b920 (diff) | |
parent | 42d324694883cdf1fff1612ac70fa403692a1ad4 (diff) | |
download | ffmpeg-6e6e1708984e45881b9a5d4e26c3e7de852c54d5.tar.gz |
Merge commit '42d324694883cdf1fff1612ac70fa403692a1ad4'
* commit '42d324694883cdf1fff1612ac70fa403692a1ad4':
floatdsp: move vector_fmul_reverse from dsputil to avfloatdsp.
Conflicts:
libavcodec/arm/dsputil_init_vfp.c
libavcodec/arm/dsputil_vfp.S
libavcodec/dsputil.c
libavcodec/ppc/float_altivec.c
libavcodec/x86/dsputil.asm
libavutil/x86/float_dsp.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/arm/float_dsp_vfp.S')
-rw-r--r-- | libavutil/arm/float_dsp_vfp.S | 69 |
1 files changed, 69 insertions, 0 deletions
diff --git a/libavutil/arm/float_dsp_vfp.S b/libavutil/arm/float_dsp_vfp.S index db63e5a675..8695fbd981 100644 --- a/libavutil/arm/float_dsp_vfp.S +++ b/libavutil/arm/float_dsp_vfp.S @@ -66,3 +66,72 @@ function ff_vector_fmul_vfp, export=1 vpop {d8-d15} bx lr endfunc + +/** + * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. + * Assume that len is a positive number and is multiple of 8 + */ +@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, +@ const float *src1, int len) +function ff_vector_fmul_reverse_vfp, export=1 + vpush {d8-d15} + add r2, r2, r3, lsl #2 + vldmdb r2!, {s0-s3} + vldmia r1!, {s8-s11} + vldmdb r2!, {s4-s7} + vldmia r1!, {s12-s15} + vmul.f32 s8, s3, s8 + vmul.f32 s9, s2, s9 + vmul.f32 s10, s1, s10 + vmul.f32 s11, s0, s11 +1: + subs r3, r3, #16 + it ge + vldmdbge r2!, {s16-s19} + vmul.f32 s12, s7, s12 + it ge + vldmiage r1!, {s24-s27} + vmul.f32 s13, s6, s13 + it ge + vldmdbge r2!, {s20-s23} + vmul.f32 s14, s5, s14 + it ge + vldmiage r1!, {s28-s31} + vmul.f32 s15, s4, s15 + it ge + vmulge.f32 s24, s19, s24 + it gt + vldmdbgt r2!, {s0-s3} + it ge + vmulge.f32 s25, s18, s25 + vstmia r0!, {s8-s13} + it ge + vmulge.f32 s26, s17, s26 + it gt + vldmiagt r1!, {s8-s11} + itt ge + vmulge.f32 s27, s16, s27 + vmulge.f32 s28, s23, s28 + it gt + vldmdbgt r2!, {s4-s7} + it ge + vmulge.f32 s29, s22, s29 + vstmia r0!, {s14-s15} + ittt ge + vmulge.f32 s30, s21, s30 + vmulge.f32 s31, s20, s31 + vmulge.f32 s8, s3, s8 + it gt + vldmiagt r1!, {s12-s15} + itttt ge + vmulge.f32 s9, s2, s9 + vmulge.f32 s10, s1, s10 + vstmiage r0!, {s24-s27} + vmulge.f32 s11, s0, s11 + it ge + vstmiage r0!, {s28-s31} + bgt 1b + + vpop {d8-d15} + bx lr +endfunc |