aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/x86util.asm
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2011-05-21 23:36:23 +0200
committerReinhard Tartler <siretart@tauware.de>2011-05-22 19:27:18 +0200
commit422b2362fc83ed3a75532ea68a6d167c52f447ec (patch)
tree6e960264a1be0f40765c1f761c6b574d5e2e7b90 /libavcodec/x86/x86util.asm
parent165c7c420d611bfa16d999f2033619c542961926 (diff)
downloadffmpeg-422b2362fc83ed3a75532ea68a6d167c52f447ec.tar.gz
dct32_sse: eliminate some spills
125->104 cycles on penryn (x86_64 only)
Diffstat (limited to 'libavcodec/x86/x86util.asm')
-rw-r--r--libavcodec/x86/x86util.asm20
1 files changed, 20 insertions, 0 deletions
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
index 7bd985a33b..141e96000c 100644
--- a/libavcodec/x86/x86util.asm
+++ b/libavcodec/x86/x86util.asm
@@ -41,6 +41,13 @@
SWAP %2, %4, %3
%endmacro
+%macro SBUTTERFLYPS 3
+ movaps m%3, m%1
+ unpcklps m%1, m%2
+ unpckhps m%3, m%2
+ SWAP %2, %3
+%endmacro
+
%macro TRANSPOSE4x4B 5
SBUTTERFLY bw, %1, %2, %5
SBUTTERFLY bw, %3, %4, %5
@@ -74,6 +81,19 @@
SWAP %2, %3
%endmacro
+; identical behavior to TRANSPOSE4x4D, but using SSE1 float ops
+%macro TRANSPOSE4x4PS 5
+ SBUTTERFLYPS %1, %2, %5
+ SBUTTERFLYPS %3, %4, %5
+ movaps m%5, m%1
+ movlhps m%1, m%3
+ movhlps m%3, m%5
+ movaps m%5, m%2
+ movlhps m%2, m%4
+ movhlps m%4, m%5
+ SWAP %2, %3
+%endmacro
+
%macro TRANSPOSE8x8W 9-11
%ifdef ARCH_X86_64
SBUTTERFLY wd, %1, %2, %9