diff options
author | Alex Converse <alex.converse@gmail.com> | 2010-01-22 23:07:58 +0000 |
---|---|---|
committer | Alex Converse <alex.converse@gmail.com> | 2010-01-22 23:07:58 +0000 |
commit | 3deb53849e706b1ef932ff4f0c663f60275f8415 (patch) | |
tree | 34fcb29f270b4bdb70f1ab4802d74c08695a91be /libavcodec/x86/dsputil_yasm.asm | |
parent | 57835fc1aeaf0b3bbc0816b123a931ea39de758b (diff) | |
download | ffmpeg-3deb53849e706b1ef932ff4f0c663f60275f8415.tar.gz |
Implement an sse version of scalarproduct_float().
Originally committed as revision 21386 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86/dsputil_yasm.asm')
-rw-r--r-- | libavcodec/x86/dsputil_yasm.asm | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm index 023fc4d3bd..e2478a4845 100644 --- a/libavcodec/x86/dsputil_yasm.asm +++ b/libavcodec/x86/dsputil_yasm.asm @@ -397,3 +397,27 @@ cglobal add_hfyu_left_prediction_sse4, 3,3,7, dst, src, w, left .unaligned: ADD_HFYU_LEFT_LOOP 0 + +; float ff_scalarproduct_float_sse(const float *v1, const float *v2, int len) +cglobal scalarproduct_float_sse, 3,3,2, v1, v2, offset + neg offsetq + shl offsetq, 2 + sub v1q, offsetq + sub v2q, offsetq + xorps xmm0, xmm0 + .loop: + movaps xmm1, [v1q+offsetq] + mulps xmm1, [v2q+offsetq] + addps xmm0, xmm1 + add offsetq, 16 + js .loop + movhlps xmm1, xmm0 + addps xmm0, xmm1 + movss xmm1, xmm0 + shufps xmm0, xmm0, 1 + addss xmm0, xmm1 +%ifndef ARCH_X86_64 + movd r0m, xmm0 + fld dword r0m +%endif + RET |