aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/x86
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2022-11-14 02:32:33 -0300
committerJames Almer <jamrial@gmail.com>2024-07-10 13:25:44 -0300
commit4a04cca69af807ccf831da977a94350611967c4c (patch)
tree827ec417bfa53c97317296f2808dfc8d1b410139 /libavutil/x86
parent34b4ca8696de64ca756e7aed7bdefa9ff6bb5fac (diff)
downloadffmpeg-4a04cca69af807ccf831da977a94350611967c4c.tar.gz
x86/intreadwrite: use intrinsics instead of inline asm for AV_ZERO128
When called inside a loop, the inline asm version results in one pxor unnecessarely emitted per iteration, as the contents of the __asm__() block are opaque to the compiler's instruction scheduler. This is not the case with intrinsics, where pxor will be emitted once with any half decent compiler. This also has the benefit of removing any SSE -> AVX penalty that may happen when the compiler emits VEX encoded instructions. Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavutil/x86')
-rw-r--r--libavutil/x86/intreadwrite.h15
1 files changed, 7 insertions, 8 deletions
diff --git a/libavutil/x86/intreadwrite.h b/libavutil/x86/intreadwrite.h
index 5e57d6a8cd..9bbef00dba 100644
--- a/libavutil/x86/intreadwrite.h
+++ b/libavutil/x86/intreadwrite.h
@@ -22,6 +22,9 @@
#define AVUTIL_X86_INTREADWRITE_H
#include <stdint.h>
+#if HAVE_INTRINSICS_SSE2
+#include <emmintrin.h>
+#endif
#include "config.h"
#include "libavutil/attributes.h"
@@ -43,20 +46,16 @@ static av_always_inline void AV_COPY128(void *d, const void *s)
#endif /* __SSE__ */
-#ifdef __SSE2__
+#if HAVE_INTRINSICS_SSE2
#define AV_ZERO128 AV_ZERO128
static av_always_inline void AV_ZERO128(void *d)
{
- struct v {uint64_t v[2];};
-
- __asm__("pxor %%xmm0, %%xmm0 \n\t"
- "movdqa %%xmm0, %0 \n\t"
- : "=m"(*(struct v*)d)
- :: "xmm0");
+ __m128i zero = _mm_setzero_si128();
+ _mm_store_si128(d, zero);
}
-#endif /* __SSE2__ */
+#endif /* HAVE_INTRINSICS_SSE2 */
#endif /* HAVE_MMX */