diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2011-01-13 15:28:06 -0500 |
---|---|---|
committer | Mans Rullgard <mans@mansr.com> | 2011-01-22 17:53:27 +0000 |
commit | 6eabb0d3ad42b91c1b4c298718c29961f7c1653a (patch) | |
tree | 0cb7ebc7b25fcb4bf3f91fe2735ff9f264dff015 /libavcodec/arm/dsputil_vfp.S | |
parent | fcb7e535dd9ad142c079af62af9c1d0f4b001057 (diff) | |
download | ffmpeg-6eabb0d3ad42b91c1b4c298718c29961f7c1653a.tar.gz |
Change DSPContext.vector_fmul() from dst=dst*src to dest=src0*src1.
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec/arm/dsputil_vfp.S')
-rw-r--r-- | libavcodec/arm/dsputil_vfp.S | 29 |
1 files changed, 14 insertions, 15 deletions
diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S index b704ba9144..a65b69e20a 100644 --- a/libavcodec/arm/dsputil_vfp.S +++ b/libavcodec/arm/dsputil_vfp.S @@ -41,34 +41,33 @@ * ARM VFP optimized implementation of 'vector_fmul_c' function. * Assume that len is a positive number and is multiple of 8 */ -@ void ff_vector_fmul_vfp(float *dst, const float *src, int len) +@ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len) function ff_vector_fmul_vfp, export=1 vpush {d8-d15} - mov r3, r0 fmrx r12, fpscr orr r12, r12, #(3 << 16) /* set vector size to 4 */ fmxr fpscr, r12 - vldmia r3!, {s0-s3} - vldmia r1!, {s8-s11} - vldmia r3!, {s4-s7} - vldmia r1!, {s12-s15} + vldmia r1!, {s0-s3} + vldmia r2!, {s8-s11} + vldmia r1!, {s4-s7} + vldmia r2!, {s12-s15} vmul.f32 s8, s0, s8 1: - subs r2, r2, #16 + subs r3, r3, #16 vmul.f32 s12, s4, s12 - vldmiage r3!, {s16-s19} - vldmiage r1!, {s24-s27} - vldmiage r3!, {s20-s23} - vldmiage r1!, {s28-s31} + vldmiage r1!, {s16-s19} + vldmiage r2!, {s24-s27} + vldmiage r1!, {s20-s23} + vldmiage r2!, {s28-s31} vmulge.f32 s24, s16, s24 vstmia r0!, {s8-s11} vstmia r0!, {s12-s15} vmulge.f32 s28, s20, s28 - vldmiagt r3!, {s0-s3} - vldmiagt r1!, {s8-s11} - vldmiagt r3!, {s4-s7} - vldmiagt r1!, {s12-s15} + vldmiagt r1!, {s0-s3} + vldmiagt r2!, {s8-s11} + vldmiagt r1!, {s4-s7} + vldmiagt r2!, {s12-s15} vmulge.f32 s8, s0, s8 vstmiage r0!, {s24-s27} vstmiage r0!, {s28-s31} |