diff options
author | James Almer <jamrial@gmail.com> | 2019-01-02 20:48:59 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2019-01-03 10:12:18 -0300 |
commit | 9b5bd665e105894919cdcfa0ed9818919538e5f6 (patch) | |
tree | aa6f906372abd463f74d3b25512e37e42e341386 | |
parent | 3ba5eef2c7b21a1af48d8d9ccb58f2ed4ebc874a (diff) | |
download | ffmpeg-9b5bd665e105894919cdcfa0ed9818919538e5f6.tar.gz |
x86/af_afir: fix processing the last element
ff_fcmul_add_sse3() is now identical to the C version.
Reviewed-by: Paul B Mahol <onemda@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavfilter/x86/af_afir.asm | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/libavfilter/x86/af_afir.asm b/libavfilter/x86/af_afir.asm index 849d85e70f..fcc1f426db 100644 --- a/libavfilter/x86/af_afir.asm +++ b/libavfilter/x86/af_afir.asm @@ -30,7 +30,6 @@ SECTION .text INIT_XMM sse3 cglobal fcmul_add, 4,4,6, sum, t, c, len shl lend, 3 - add lend, mmsize*2 add tq, lenq add cq, lenq add sumq, lenq @@ -57,4 +56,8 @@ ALIGN 16 movaps [sumq + lenq+mmsize], m3 add lenq, mmsize*2 jl .loop - REP_RET + movss xm0, [tq + lenq] + mulss xm0, [cq + lenq] + addss xm0, [sumq + lenq] + movss [sumq + lenq], xm0 + RET |