diff options
author | James Almer <jamrial@gmail.com> | 2017-06-02 19:17:28 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-06-03 12:39:43 -0300 |
commit | be3809a521fecfd3a61db99d660f243bd32b30bb (patch) | |
tree | 33ec3093c87b67dd4fffbbb4ecad6eb25c23b165 | |
parent | 2ba896fef7edf6e83ef12dd82d067469cadbaf8f (diff) | |
download | ffmpeg-be3809a521fecfd3a61db99d660f243bd32b30bb.tar.gz |
x86/aacpsdsp: optimize ff_ps_stereo_interpolate_sse3
Move the unpacking outside of the loop. 5% to 10% faster.
Suggested-by: ubitux
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/aacpsdsp.asm | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm index bb8a7f5df0..4548bb4257 100644 --- a/libavcodec/x86/aacpsdsp.asm +++ b/libavcodec/x86/aacpsdsp.asm @@ -93,6 +93,10 @@ cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n movaps m1, [h_stepq] cmp nd, 0 jle .ret + unpcklps m4, m0, m0 + unpckhps m0, m0 + unpcklps m5, m1, m1 + unpckhps m1, m1 shl nd, 3 add lq, nq add rq, nq @@ -100,15 +104,12 @@ cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n align 16 .loop: + addps m4, m5 addps m0, m1 movddup m2, [lq+nq] movddup m3, [rq+nq] - movaps m4, m0 - movaps m5, m0 - unpcklps m4, m4 - unpckhps m5, m5 mulps m2, m4 - mulps m3, m5 + mulps m3, m0 addps m2, m3 movsd [lq+nq], m2 movhps [rq+nq], m2 |