aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorReimar Döffinger <Reimar.Doeffinger@gmx.de>2012-03-07 21:35:13 +0100
committerRonald S. Bultje <rsbultje@gmail.com>2012-03-07 13:50:13 -0800
commit6eda85e15b38863a627fd0602098aa3250174698 (patch)
tree6af8c9e85828150cd9f3db2934273bcd7127110b /libavcodec
parent3416d0805ec224ac814158162c065ceacfd7c2fa (diff)
downloadffmpeg-6eda85e15b38863a627fd0602098aa3250174698.tar.gz
sbrdsp.asm: convert all instructions to float/SSE ones.
Since the values are floats, using the float operations makes sense, improves performance on some CPUs and makes the code SSE compatible instead of needing SSE2. Based on suggestion by Jason. Signed-off-by: Reimar Döffinger <Reimar.Doeffinger@gmx.de> Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/x86/sbrdsp.asm16
1 files changed, 8 insertions, 8 deletions
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index c3b559bb15..31a1c8b76f 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -82,14 +82,14 @@ cglobal sbr_hf_g_filt, 5, 6, 5
lea r0, [r0 + r3*8]
neg r3
.loop4:
- movq m0, [r2 + 4*r3 + 0]
- movq m1, [r2 + 4*r3 + 8]
- movq m2, [r1 + 0*STEP]
- movq m3, [r1 + 2*STEP]
+ movlps m0, [r2 + 4*r3 + 0]
+ movlps m1, [r2 + 4*r3 + 8]
+ movlps m2, [r1 + 0*STEP]
+ movlps m3, [r1 + 2*STEP]
movhps m2, [r1 + 1*STEP]
movhps m3, [r1 + 3*STEP]
- punpckldq m0, m0
- punpckldq m1, m1
+ unpcklps m0, m0
+ unpcklps m1, m1
mulps m0, m2
mulps m1, m3
movu [r0 + 8*r3 + 0], m0
@@ -101,8 +101,8 @@ cglobal sbr_hf_g_filt, 5, 6, 5
jz .end
.loop1: ; element 0 and 1 can be computed at the same time
movss m0, [r2]
- movq m2, [r1]
- punpckldq m0, m0
+ movlps m2, [r1]
+ unpcklps m0, m0
mulps m2, m0
movlps [r0], m2
add r0, 8