aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/dsputil_yasm.asm
diff options
context:
space:
mode:
authorAlex Converse <alex.converse@gmail.com>2010-01-22 23:07:58 +0000
committerAlex Converse <alex.converse@gmail.com>2010-01-22 23:07:58 +0000
commit3deb53849e706b1ef932ff4f0c663f60275f8415 (patch)
tree34fcb29f270b4bdb70f1ab4802d74c08695a91be /libavcodec/x86/dsputil_yasm.asm
parent57835fc1aeaf0b3bbc0816b123a931ea39de758b (diff)
downloadffmpeg-3deb53849e706b1ef932ff4f0c663f60275f8415.tar.gz
Implement an sse version of scalarproduct_float().
Originally committed as revision 21386 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/dsputil_yasm.asm')
-rw-r--r--libavcodec/x86/dsputil_yasm.asm24
1 files changed, 24 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
index 023fc4d3bd..e2478a4845 100644
--- a/libavcodec/x86/dsputil_yasm.asm
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -397,3 +397,27 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left
.unaligned:
ADD_HFYU_LEFT_LOOP 0
+
+; float ff_scalarproduct_float_sse(const float *v1, const float *v2, int len)
+cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset
+ neg offsetq
+ shl offsetq, 2
+ sub v1q, offsetq
+ sub v2q, offsetq
+ xorps xmm0, xmm0
+ .loop:
+ movaps xmm1, [v1q+offsetq]
+ mulps xmm1, [v2q+offsetq]
+ addps xmm0, xmm1
+ add offsetq, 16
+ js .loop
+ movhlps xmm1, xmm0
+ addps xmm0, xmm1
+ movss xmm1, xmm0
+ shufps xmm0, xmm0, 1
+ addss xmm0, xmm1
+%ifndef ARCH_X86_64
+ movd r0m, xmm0
+ fld dword r0m
+%endif
+ RET