diff options
author | James Almer <jamrial@gmail.com> | 2023-11-22 16:04:02 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2023-11-25 21:50:56 -0300 |
commit | d8b1a34433ecf0c2c9fb50754e98954f5ab67d4a (patch) | |
tree | d82b6917cd1c3a5e7e80abcbca151f65a8b823f1 | |
parent | 2d9ed64859c9887d0504cd71dbd5b2c15e14251a (diff) | |
download | ffmpeg-d8b1a34433ecf0c2c9fb50754e98954f5ab67d4a.tar.gz |
x86/ac3dsp: reduce instruction count inside the float_to_fixed24 loop
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/ac3dsp.asm | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm index a95d359d95..42c8310462 100644 --- a/libavcodec/x86/ac3dsp.asm +++ b/libavcodec/x86/ac3dsp.asm @@ -77,16 +77,20 @@ AC3_EXPONENT_MIN INIT_XMM sse2 cglobal float_to_fixed24, 3, 3, 9, dst, src, len movaps m0, [pf_1_24] + shl lenq, 2 + add srcq, lenq + add dstq, lenq + neg lenq .loop: - movaps m1, [srcq ] - movaps m2, [srcq+16 ] - movaps m3, [srcq+32 ] - movaps m4, [srcq+48 ] + movaps m1, [srcq+lenq ] + movaps m2, [srcq+lenq+16 ] + movaps m3, [srcq+lenq+32 ] + movaps m4, [srcq+lenq+48 ] %ifdef m8 - movaps m5, [srcq+64 ] - movaps m6, [srcq+80 ] - movaps m7, [srcq+96 ] - movaps m8, [srcq+112] + movaps m5, [srcq+lenq+64 ] + movaps m6, [srcq+lenq+80 ] + movaps m7, [srcq+lenq+96 ] + movaps m8, [srcq+lenq+112] %endif mulps m1, m0 mulps m2, m0 @@ -108,24 +112,20 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len cvtps2dq m7, m7 cvtps2dq m8, m8 %endif - movdqa [dstq ], m1 - movdqa [dstq+16 ], m2 - movdqa [dstq+32 ], m3 - movdqa [dstq+48 ], m4 + movdqa [dstq+lenq ], m1 + movdqa [dstq+lenq+16 ], m2 + movdqa [dstq+lenq+32 ], m3 + movdqa [dstq+lenq+48 ], m4 %ifdef m8 - movdqa [dstq+64 ], m5 - movdqa [dstq+80 ], m6 - movdqa [dstq+96 ], m7 - movdqa [dstq+112], m8 - add srcq, 128 - add dstq, 128 - sub lenq, 32 + movdqa [dstq+lenq+64 ], m5 + movdqa [dstq+lenq+80 ], m6 + movdqa [dstq+lenq+96 ], m7 + movdqa [dstq+lenq+112], m8 + add lenq, 128 %else - add srcq, 64 - add dstq, 64 - sub lenq, 16 + add lenq, 64 %endif - ja .loop + jl .loop RET ;------------------------------------------------------------------------------ |