diff options
author | Kieran Kunhya <kierank@ob-encoder.com> | 2013-10-20 10:28:38 -0500 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-10-26 02:34:22 +0200 |
commit | 865b70bc5d1cf37ec6d6cb729a69dda2cca28bd5 (patch) | |
tree | c8b62155b3715ac4b6bf82ea250d6d442ccaf5e8 | |
parent | 780669ef7c23c00836a24921fcc6b03be2b8ca4a (diff) | |
download | ffmpeg-865b70bc5d1cf37ec6d6cb729a69dda2cca28bd5.tar.gz |
Add AVX2 capable CPU detection. Patch based on x264's AVX2 detection
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rwxr-xr-x | configure | 3 | ||||
-rw-r--r-- | libavutil/cpu.c | 3 | ||||
-rw-r--r-- | libavutil/cpu.h | 2 | ||||
-rw-r--r-- | libavutil/x86/cpu.c | 7 | ||||
-rw-r--r-- | libavutil/x86/cpu.h | 3 |
5 files changed, 18 insertions, 0 deletions
@@ -315,6 +315,7 @@ Optimization options (experts only): --disable-sse42 disable SSE4.2 optimizations --disable-avx disable AVX optimizations --disable-fma4 disable FMA4 optimizations + --disable-avx2 disable AVX2 optimizations --disable-armv5te disable armv5te optimizations --disable-armv6 disable armv6 optimizations --disable-armv6t2 disable armv6t2 optimizations @@ -1344,6 +1345,7 @@ ARCH_EXT_LIST_X86=' amd3dnow amd3dnowext avx + avx2 fma4 i686 mmx @@ -1705,6 +1707,7 @@ sse4_deps="ssse3" sse42_deps="sse4" avx_deps="sse42" fma4_deps="avx" +avx2_deps="avx" mmx_external_deps="yasm" mmx_inline_deps="inline_asm" diff --git a/libavutil/cpu.c b/libavutil/cpu.c index a31e195b69..deb2a873d5 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -85,6 +85,7 @@ int av_parse_cpu_flags(const char *s) #define CPUFLAG_AVX (AV_CPU_FLAG_AVX | CPUFLAG_SSE42) #define CPUFLAG_XOP (AV_CPU_FLAG_XOP | CPUFLAG_AVX) #define CPUFLAG_FMA4 (AV_CPU_FLAG_FMA4 | CPUFLAG_AVX) +#define CPUFLAG_AVX2 (AV_CPU_FLAG_AVX2 | CPUFLAG_AVX) static const AVOption cpuflags_opts[] = { { "flags" , NULL, 0, AV_OPT_TYPE_FLAGS, { .i64 = 0 }, INT64_MIN, INT64_MAX, .unit = "flags" }, #if ARCH_PPC @@ -104,6 +105,7 @@ int av_parse_cpu_flags(const char *s) { "avx" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX }, .unit = "flags" }, { "xop" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_XOP }, .unit = "flags" }, { "fma4" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_FMA4 }, .unit = "flags" }, + { "avx2" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_AVX2 }, .unit = "flags" }, { "3dnow" , NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOW }, .unit = "flags" }, { "3dnowext", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = CPUFLAG_3DNOWEXT }, .unit = "flags" }, { "cmov", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_CMOV }, .unit = "flags" }, @@ -267,6 +269,7 @@ static const struct { { AV_CPU_FLAG_3DNOW, "3dnow" }, { AV_CPU_FLAG_3DNOWEXT, "3dnowext" }, { AV_CPU_FLAG_CMOV, "cmov" }, + { AV_CPU_FLAG_AVX2, "avx2" }, #endif { 0 } }; diff --git a/libavutil/cpu.h b/libavutil/cpu.h index 50bdea7918..e9185f12f4 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -50,6 +50,8 @@ // #else // #define AV_CPU_FLAG_CMOV 0x1000 ///< supports cmov instruction // #endif +#define AV_CPU_FLAG_AVX2 0x8000 ///< AVX2 functions: requires OS support even if YMM registers aren't used + #define AV_CPU_FLAG_ALTIVEC 0x0001 ///< standard diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 81bb15a0a4..174515f68d 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -134,6 +134,13 @@ int ff_get_cpu_flags_x86(void) if ((eax & 0x6) == 0x6) rval |= AV_CPU_FLAG_AVX; } + if (HAVE_AVX2 && max_std_level >= 7) + { + cpuid(7, eax, ebx, ecx, edx); + if (ebx&0x00000020) + rval |= AV_CPU_FLAG_AVX2; + /* TODO: BMI1/2 */ + } #endif /* HAVE_AVX */ #endif /* HAVE_SSE */ } diff --git a/libavutil/x86/cpu.h b/libavutil/x86/cpu.h index 738c1ec48f..3724357e74 100644 --- a/libavutil/x86/cpu.h +++ b/libavutil/x86/cpu.h @@ -38,6 +38,7 @@ #define X86_SSE42(flags) CPUEXT(flags, SSE42) #define X86_AVX(flags) CPUEXT(flags, AVX) #define X86_FMA4(flags) CPUEXT(flags, FMA4) +#define X86_AVX2(flags) CPUEXT(flags, AVX2) #define EXTERNAL_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOW) #define EXTERNAL_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AMD3DNOWEXT) @@ -51,6 +52,7 @@ #define EXTERNAL_SSE42(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, SSE42) #define EXTERNAL_AVX(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX) #define EXTERNAL_FMA4(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, FMA4) +#define EXTERNAL_AVX2(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, AVX2) #define INLINE_AMD3DNOW(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOW) #define INLINE_AMD3DNOWEXT(flags) CPUEXT_SUFFIX(flags, _INLINE, AMD3DNOWEXT) @@ -64,6 +66,7 @@ #define INLINE_SSE42(flags) CPUEXT_SUFFIX(flags, _INLINE, SSE42) #define INLINE_AVX(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX) #define INLINE_FMA4(flags) CPUEXT_SUFFIX(flags, _INLINE, FMA4) +#define INLINE_AVX2(flags) CPUEXT_SUFFIX(flags, _INLINE, AVX2) void ff_cpu_cpuid(int index, int *eax, int *ebx, int *ecx, int *edx); void ff_cpu_xgetbv(int op, int *eax, int *edx); |