aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2023-11-22 16:04:02 -0300
committerJames Almer <jamrial@gmail.com>2023-11-25 21:50:56 -0300
commitd8b1a34433ecf0c2c9fb50754e98954f5ab67d4a (patch)
treed82b6917cd1c3a5e7e80abcbca151f65a8b823f1
parent2d9ed64859c9887d0504cd71dbd5b2c15e14251a (diff)
downloadffmpeg-d8b1a34433ecf0c2c9fb50754e98954f5ab67d4a.tar.gz
x86/ac3dsp: reduce instruction count inside the float_to_fixed24 loop
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/ac3dsp.asm46
1 files changed, 23 insertions, 23 deletions
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index a95d359d95..42c8310462 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -77,16 +77,20 @@ AC3_EXPONENT_MIN
INIT_XMM sse2
cglobal float_to_fixed24, 3, 3, 9, dst, src, len
movaps m0, [pf_1_24]
+ shl lenq, 2
+ add srcq, lenq
+ add dstq, lenq
+ neg lenq
.loop:
- movaps m1, [srcq ]
- movaps m2, [srcq+16 ]
- movaps m3, [srcq+32 ]
- movaps m4, [srcq+48 ]
+ movaps m1, [srcq+lenq ]
+ movaps m2, [srcq+lenq+16 ]
+ movaps m3, [srcq+lenq+32 ]
+ movaps m4, [srcq+lenq+48 ]
%ifdef m8
- movaps m5, [srcq+64 ]
- movaps m6, [srcq+80 ]
- movaps m7, [srcq+96 ]
- movaps m8, [srcq+112]
+ movaps m5, [srcq+lenq+64 ]
+ movaps m6, [srcq+lenq+80 ]
+ movaps m7, [srcq+lenq+96 ]
+ movaps m8, [srcq+lenq+112]
%endif
mulps m1, m0
mulps m2, m0
@@ -108,24 +112,20 @@ cglobal float_to_fixed24, 3, 3, 9, dst, src, len
cvtps2dq m7, m7
cvtps2dq m8, m8
%endif
- movdqa [dstq ], m1
- movdqa [dstq+16 ], m2
- movdqa [dstq+32 ], m3
- movdqa [dstq+48 ], m4
+ movdqa [dstq+lenq ], m1
+ movdqa [dstq+lenq+16 ], m2
+ movdqa [dstq+lenq+32 ], m3
+ movdqa [dstq+lenq+48 ], m4
%ifdef m8
- movdqa [dstq+64 ], m5
- movdqa [dstq+80 ], m6
- movdqa [dstq+96 ], m7
- movdqa [dstq+112], m8
- add srcq, 128
- add dstq, 128
- sub lenq, 32
+ movdqa [dstq+lenq+64 ], m5
+ movdqa [dstq+lenq+80 ], m6
+ movdqa [dstq+lenq+96 ], m7
+ movdqa [dstq+lenq+112], m8
+ add lenq, 128
%else
- add srcq, 64
- add dstq, 64
- sub lenq, 16
+ add lenq, 64
%endif
- ja .loop
+ jl .loop
RET
;------------------------------------------------------------------------------