diff options
author | Lynne <dev@lynne.ee> | 2022-09-22 03:41:02 +0200 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-09-22 04:27:19 +0200 |
commit | b67776e12f90a11c0c976d0add8d213a2684064f (patch) | |
tree | 25dc819d2d8be9552b05835ad220e3cebf851fd9 /libavcodec/x86 | |
parent | dea944b838626b5576598b7f13cc34e6263ebbfe (diff) | |
download | ffmpeg-b67776e12f90a11c0c976d0add8d213a2684064f.tar.gz |
x86/lpc: fix even scalar loop overreads/writes
Passes checkasm with valgrind, tested to sizes of more than 4000 samples.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/lpc.asm | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/libavcodec/x86/lpc.asm b/libavcodec/x86/lpc.asm index f5133a2950..ad74f1d8ac 100644 --- a/libavcodec/x86/lpc.asm +++ b/libavcodec/x86/lpc.asm @@ -38,6 +38,8 @@ SECTION .text cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 cmp lenq, 0 je .end + cmp lenq, 2 + je .two cmp lenq, 1 je .one @@ -192,14 +194,13 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 jge .loop_e .scalar_e: - subpd m0, m7 - movapd m7, [dec_tab_scalar] - subpd m0, m7 - subpd m0, m7 - subpd m0, m7 + subpd xm0, xm7 + movapd xm7, [dec_tab_scalar] + subpd xm0, xm7 add off1q, (mmsize/2) - sub off2q, (mmsize/2) - 4 - 8*cpuflag(avx2) + sub off2q, (mmsize/2) - 8*cpuflag(avx2) + add lenq, 6 + 4*cpuflag(avx2) addpd xm0, [sub_tab] @@ -208,22 +209,27 @@ cglobal lpc_apply_welch_window, 3, 5, 8, data, len, out, off1, off2 mulpd xm2, xm0, xm0 subpd xm1, xm2 - cvtdq2pd m3, [dataq + off1q - 4] - cvtdq2pd m4, [dataq + off2q - 4] + cvtdq2pd xm3, [dataq + off1q] + cvtdq2pd xm4, [dataq + off2q] - mulpd m3, m1 - mulpd m4, m1 + mulpd xm3, xm1 + shufpd xm1, xm1, 00b + mulpd xm4, xm1 - movhpd [outq + off1q*2], xm3 - movhpd [outq + off2q*2], xm4 + movlpd [outq + off1q*2], xm3 + movhpd [outq + off2q*2 + 8], xm4 subpd xm0, xm7 add off2q, 4 sub off1q, 4 - jge .loop_e_scalar + sub lenq, 2 + jg .loop_e_scalar RET +.two: + xorpd xm0, xm0 + movhpd [outq + 8], xm0 .one: xorpd xm0, xm0 movhpd [outq], xm0 |