diff options
author | James Almer <jamrial@gmail.com> | 2017-06-15 23:20:05 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-06-15 23:20:05 -0300 |
commit | b3446862bfdbfc8c500c052e0aa48674c1d9ca9f (patch) | |
tree | 4fe4203451596c1f61dfc1d8b3ebb9c608051e49 | |
parent | c0607d88ee1fbd5a5272abe415ee83cc10310021 (diff) | |
download | ffmpeg-b3446862bfdbfc8c500c052e0aa48674c1d9ca9f.tar.gz |
x86/vorbisdsp: optimize ff_vorbis_inverse_coupling_sse
About 7% faster.
-rw-r--r-- | libavcodec/x86/vorbisdsp.asm | 19 |
1 files changed, 11 insertions, 8 deletions
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm index b25d838868..d952296716 100644 --- a/libavcodec/x86/vorbisdsp.asm +++ b/libavcodec/x86/vorbisdsp.asm @@ -57,13 +57,17 @@ cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size %endif INIT_XMM sse -cglobal vorbis_inverse_coupling, 3, 4, 6, mag, ang, block_size, cntr +cglobal vorbis_inverse_coupling, 3, 3, 6, mag, ang, block_size mova m5, [pdw_80000000] - xor cntrq, cntrq + shl block_sized, 2 + add magq, block_sizeq + add angq, block_sizeq + neg block_sizeq + align 16 .loop: - mova m0, [magq+cntrq*4] - mova m1, [angq+cntrq*4] + mova m0, [magq+block_sizeq] + mova m1, [angq+block_sizeq] xorps m2, m2 xorps m3, m3 cmpleps m2, m0 ; m <= 0.0 @@ -75,9 +79,8 @@ align 16 andnps m4, m1 addps m3, m0 ; a = m + ((a < 0) & (a ^ sign(m))) subps m0, m4 ; m = m + ((a > 0) & (a ^ sign(m))) - mova [angq+cntrq*4], m3 - mova [magq+cntrq*4], m0 - add cntrq, 4 - cmp cntrq, block_sizeq + mova [angq+block_sizeq], m3 + mova [magq+block_sizeq], m0 + add block_sizeq, mmsize jl .loop RET |