diff options
author | James Almer <jamrial@gmail.com> | 2014-02-22 02:47:01 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-22 17:25:52 +0100 |
commit | a2af8eddab75f1eac712411e4dde89823c0845e8 (patch) | |
tree | f2ba681511f1d83bcac6fb75c45d187f81fde690 /libavutil | |
parent | f98821dddb4abb3764fc2f22a6afe3eb11292d3c (diff) | |
download | ffmpeg-a2af8eddab75f1eac712411e4dde89823c0845e8.tar.gz |
x86: add detection for FMA3 instruction set
Based on x264 code
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/cpu.c | 4 | ||||
-rw-r--r-- | libavutil/cpu.h | 1 | ||||
-rw-r--r-- | libavutil/x86/cpu.c | 5 | ||||
-rw-r--r-- | libavutil/x86/cpu.h | 3 |
4 files changed, 12 insertions, 1 deletions
diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 9ac599826d..74de61e0b7 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -91,6 +91,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_SSE42 (AV_CPU_FLAG_SSE42 | CPUFLAG_SSE4) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) +#define CPUFLAG_FMA3 (AV_CPU_FLAG_FMA3 | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) #define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) static const AVOption cpuflags_opts[] = { @@ -111,6 +112,7 @@ int av_parse_cpu_flags(const char *s) { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_SSE42 }, .unit = "flags" }, { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX }, .unit = "flags" }, { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP }, .unit = "flags" }, + { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA3 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" }, { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW }, .unit = "flags" }, @@ -166,6 +168,7 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) { "sse4.2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_SSE42 }, .unit = "flags" }, { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX }, .unit = "flags" }, { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_XOP }, .unit = "flags" }, + { "fma3" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA3 }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_FMA4 }, .unit = "flags" }, { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_AVX2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_3DNOW }, .unit = "flags" }, @@ -279,6 +282,7 @@ static const struct { { AV_CPU_FLAG_SSE42, "sse4.2" }, { AV_CPU_FLAG_AVX, "avx" }, { AV_CPU_FLAG_XOP, "xop" }, + { AV_CPU_FLAG_FMA3, "fma3" }, { AV_CPU_FLAG_FMA4, "fma4" }, { AV_CPU_FLAG_3DNOW, "3dnow" }, { AV_CPU_FLAG_3DNOWEXT, "3dnowext" }, diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 55c3ec9a06..1d0293fed3 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -51,6 +51,7 @@ // #define AV_CPU_FLAG_CMOV 0x1000 ///< supports cmov instruction // #endif #define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used +#define AV_CPU_FLAG_FMA3 0x10000 ///< Haswell FMA3 functions #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 18049eaead..333b0f805f 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -131,8 +131,11 @@ int ff_get_cpu_flags_x86(void) if ((ecx & 0x18000000) == 0x18000000) { /* Check for OS support */ xgetbv(0, eax, edx); - if ((eax & 0x6) == 0x6) + if ((eax & 0x6) == 0x6) { rval |= AV_CPU_FLAG_AVX; + if (ecx&0x00001000) + rval |= AV_CPU_FLAG_FMA3; + } } #if HAVE_AVX2 if (max_std_level >= 7) { diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index a151c887d3..bc64b1b3bd 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -38,6 +38,7 @@ #define X86_SSE42(flags) CPUEXT(flags, SSE42) #define X86_AVX(flags) CPUEXT(flags, AVX) #define X86_XOP(flags) CPUEXT(flags, XOP) +#define X86_FMA3(flags) CPUEXT(flags, FMA3) #define X86_FMA4(flags) CPUEXT(flags, FMA4) #define X86_AVX2(flags) CPUEXT(flags, AVX2) @@ -53,6 +54,7 @@ #define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) #define EXTERNAL_XOP(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, XOP) +#define EXTERNAL_FMA3(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA3) #define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) #define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2) @@ -68,6 +70,7 @@ #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42) #define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX) #define INLINE_XOP(flags) CPUEXT_SUFFIX(flags, _INLINE, XOP) +#define INLINE_FMA3(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA3) #define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4) #define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2) |