aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Darnley <jdarnley@obe.tv>2017-02-11 13:25:09 +0100
committerJames Darnley <jdarnley@obe.tv>2017-02-18 20:26:51 +0100
commit7627df15d411a69f236b4650e88b1ab911f38efc (patch)
tree5c96771296d82fc23b52e8959e364a55cd368ba1
parente18bc2114f3deb8ef1ab9ddaef282c8d9678669d (diff)
downloadffmpeg-7627df15d411a69f236b4650e88b1ab911f38efc.tar.gz
x86util: import MOVHL macro
Originally committed to x264 in 1637239a by Henrik Gramner who has agreed to re-license it as LGPL. Original commit message follows. x86: Avoid some bypass delays and false dependencies A bypass delay of 1-3 clock cycles may occur on some CPUs when transitioning between int and float domains, so try to avoid that if possible.
-rw-r--r--libavutil/x86/x86util.asm12
1 files changed, 12 insertions, 0 deletions
diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm
index c063436e0a..1408f0a176 100644
--- a/libavutil/x86/x86util.asm
+++ b/libavutil/x86/x86util.asm
@@ -876,3 +876,15 @@
psrlq %1, 8*(%2)
%endif
%endmacro
+
+%macro MOVHL 2 ; dst, src
+%ifidn %1, %2
+ punpckhqdq %1, %2
+%elif cpuflag(avx)
+ punpckhqdq %1, %2, %2
+%elif cpuflag(sse4)
+ pshufd %1, %2, q3232 ; pshufd is slow on some older CPUs, so only use it on more modern ones
+%else
+ movhlps %1, %2 ; may cause an int/float domain transition and has a dependency on dst
+%endif
+%endmacro