aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2017-06-02 19:17:28 -0300
committerJames Almer <jamrial@gmail.com>2017-06-03 12:39:43 -0300
commitbe3809a521fecfd3a61db99d660f243bd32b30bb (patch)
tree33ec3093c87b67dd4fffbbb4ecad6eb25c23b165
parent2ba896fef7edf6e83ef12dd82d067469cadbaf8f (diff)
downloadffmpeg-be3809a521fecfd3a61db99d660f243bd32b30bb.tar.gz
x86/aacpsdsp: optimize ff_ps_stereo_interpolate_sse3
Move the unpacking outside of the loop. 5% to 10% faster. Suggested-by: ubitux Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/aacpsdsp.asm11
1 files changed, 6 insertions, 5 deletions
diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index bb8a7f5df0..4548bb4257 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -93,6 +93,10 @@ cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n
movaps m1, [h_stepq]
cmp nd, 0
jle .ret
+ unpcklps m4, m0, m0
+ unpckhps m0, m0
+ unpcklps m5, m1, m1
+ unpckhps m1, m1
shl nd, 3
add lq, nq
add rq, nq
@@ -100,15 +104,12 @@ cglobal ps_stereo_interpolate, 5, 5, 6, l, r, h, h_step, n
align 16
.loop:
+ addps m4, m5
addps m0, m1
movddup m2, [lq+nq]
movddup m3, [rq+nq]
- movaps m4, m0
- movaps m5, m0
- unpcklps m4, m4
- unpckhps m5, m5
mulps m2, m4
- mulps m3, m5
+ mulps m3, m0
addps m2, m3
movsd [lq+nq], m2
movhps [rq+nq], m2