diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2011-05-21 23:36:23 +0200 |
---|---|---|
committer | Reinhard Tartler <siretart@tauware.de> | 2011-05-22 19:27:18 +0200 |
commit | 422b2362fc83ed3a75532ea68a6d167c52f447ec (patch) | |
tree | 6e960264a1be0f40765c1f761c6b574d5e2e7b90 /libavcodec/x86/x86util.asm | |
parent | 165c7c420d611bfa16d999f2033619c542961926 (diff) | |
download | ffmpeg-422b2362fc83ed3a75532ea68a6d167c52f447ec.tar.gz |
dct32_sse: eliminate some spills
125->104 cycles on penryn (x86_64 only)
Diffstat (limited to 'libavcodec/x86/x86util.asm')
-rw-r--r-- | libavcodec/x86/x86util.asm | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm index 7bd985a33b..141e96000c 100644 --- a/libavcodec/x86/x86util.asm +++ b/libavcodec/x86/x86util.asm @@ -41,6 +41,13 @@ SWAP %2, %4, %3 %endmacro +%macro SBUTTERFLYPS 3 + movaps m%3, m%1 + unpcklps m%1, m%2 + unpckhps m%3, m%2 + SWAP %2, %3 +%endmacro + %macro TRANSPOSE4x4B 5 SBUTTERFLY bw, %1, %2, %5 SBUTTERFLY bw, %3, %4, %5 @@ -74,6 +81,19 @@ SWAP %2, %3 %endmacro +; identical behavior to TRANSPOSE4x4D, but using SSE1 float ops +%macro TRANSPOSE4x4PS 5 + SBUTTERFLYPS %1, %2, %5 + SBUTTERFLYPS %3, %4, %5 + movaps m%5, m%1 + movlhps m%1, m%3 + movhlps m%3, m%5 + movaps m%5, m%2 + movlhps m%2, m%4 + movhlps m%4, m%5 + SWAP %2, %3 +%endmacro + %macro TRANSPOSE8x8W 9-11 %ifdef ARCH_X86_64 SBUTTERFLY wd, %1, %2, %9 |