aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/arm/dsputil_neon.S
diff options
context:
space:
mode:
authorMans Rullgard <mans@mansr.com>2011-09-28 14:34:54 +0100
committerMans Rullgard <mans@mansr.com>2011-09-28 15:56:09 +0100
commitbaf6b738f23bbe7db3a8438372d01a958492e477 (patch)
treed0a92c1496b69789afcd05ace8924c928afc5867 /libavcodec/arm/dsputil_neon.S
parenta92a1b93b4888821b2c0f01ab628d8ebaf3f6d61 (diff)
downloadffmpeg-baf6b738f23bbe7db3a8438372d01a958492e477.tar.gz
ARM: NEON optimised vector_fmac_scalar()
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec/arm/dsputil_neon.S')
-rw-r--r--libavcodec/arm/dsputil_neon.S48
1 files changed, 48 insertions, 0 deletions
diff --git a/libavcodec/arm/dsputil_neon.S b/libavcodec/arm/dsputil_neon.S
index aef8a129ee..13f0699eb2 100644
--- a/libavcodec/arm/dsputil_neon.S
+++ b/libavcodec/arm/dsputil_neon.S
@@ -587,6 +587,54 @@ NOVFP vdup.32 q8, r2
.unreq len
endfunc
+function ff_vector_fmac_scalar_neon, export=1
+VFP len .req r2
+VFP acc .req r3
+NOVFP len .req r3
+NOVFP acc .req r2
+VFP vdup.32 q15, d0[0]
+NOVFP vdup.32 q15, r2
+ bics r12, len, #15
+ mov acc, r0
+ beq 3f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+1: vmla.f32 q8, q0, q15
+ vld1.32 {q2}, [r1,:128]!
+ vld1.32 {q10}, [acc,:128]!
+ vmla.f32 q9, q1, q15
+ vld1.32 {q3}, [r1,:128]!
+ vld1.32 {q11}, [acc,:128]!
+ vmla.f32 q10, q2, q15
+ vst1.32 {q8}, [r0,:128]!
+ vmla.f32 q11, q3, q15
+ vst1.32 {q9}, [r0,:128]!
+ subs r12, r12, #16
+ beq 2f
+ vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vst1.32 {q10}, [r0,:128]!
+ vld1.32 {q1}, [r1,:128]!
+ vld1.32 {q9}, [acc,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ b 1b
+2: vst1.32 {q10}, [r0,:128]!
+ vst1.32 {q11}, [r0,:128]!
+ ands len, len, #15
+ it eq
+ bxeq lr
+3: vld1.32 {q0}, [r1,:128]!
+ vld1.32 {q8}, [acc,:128]!
+ vmla.f32 q8, q0, q15
+ vst1.32 {q8}, [r0,:128]!
+ subs len, len, #4
+ bgt 3b
+ bx lr
+ .unreq len
+endfunc
+
function ff_butterflies_float_neon, export=1
1: vld1.32 {q0},[r0,:128]
vld1.32 {q1},[r1,:128]