diff options
author | James Almer <jamrial@gmail.com> | 2022-09-22 17:10:37 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2022-09-22 18:17:26 -0300 |
commit | 0922c6b01bd50f0ce6e659f765c244f6a8f29eb3 (patch) | |
tree | 8294ba78a8a21b29c346af6cd4ff4ea4ce2430ff | |
parent | a1c6f4b653b6fca51eea40f12a22ab1cb045751d (diff) | |
download | ffmpeg-0922c6b01bd50f0ce6e659f765c244f6a8f29eb3.tar.gz |
x86/lpc: use fused negative multiply-add instructions where useful
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/lpc.asm | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/libavcodec/x86/lpc.asm b/libavcodec/x86/lpc.asm index 61a5796e5d..a585c17ef5 100644 --- a/libavcodec/x86/lpc.asm +++ b/libavcodec/x86/lpc.asm @@ -79,11 +79,12 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 .loop_o: movapd m1, m6 - mulpd m2, m0, m0 - subpd m1, m2 %if cpuflag(avx2) + fnmaddpd m1, m0, m0, m1 vpermpd m2, m1, q0123 %else + mulpd m2, m0, m0 + subpd m1, m2 shufpd m2, m1, m1, 01b %endif @@ -116,8 +117,12 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 .loop_o_scalar: movapd xm1, xm6 +%if cpuflag(avx2) + fnmaddpd xm1, xm0, xm0, xm1 +%else mulpd xm2, xm0, xm0 subpd xm1, xm2 +%endif cvtdq2pd xm3, [dataq + off1q] cvtdq2pd xm4, [dataq + off2q] @@ -174,8 +179,12 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 .loop_e: movapd m1, m6 +%if cpuflag(avx2) + fnmaddpd m1, m0, m0, m1 +%else mulpd m2, m0, m0 subpd m1, m2 +%endif %if cpuflag(avx2) vpermpd m2, m1, q0123 %else @@ -210,8 +219,12 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 .loop_e_scalar: movapd xm1, xm6 +%if cpuflag(avx2) + fnmaddpd xm1, xm0, xm0, xm1 +%else mulpd xm2, xm0, xm0 subpd xm1, xm2 +%endif cvtdq2pd xm3, [dataq + off1q] cvtdq2pd xm4, [dataq + off2q] |