diff options
author | James Almer <jamrial@gmail.com> | 2015-05-26 14:29:06 -0300 |
---|---|---|
committer | Luca Barbato <lu_zero@gentoo.org> | 2015-05-31 12:07:11 +0200 |
commit | f7cafb5d02aa3f26c185f6f9851413ad77a73872 (patch) | |
tree | 1541c3f4034cdf43a6775131c2eb4a5ba1e7d0ac /libavutil/x86 | |
parent | d0bf20a4f25ac5de021c860a0c8ad05638ee2078 (diff) | |
download | ffmpeg-f7cafb5d02aa3f26c185f6f9851413ad77a73872.tar.gz |
x86: add AV_CPU_FLAG_AVXSLOW flag
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
Diffstat (limited to 'libavutil/x86')
-rw-r--r-- | libavutil/x86/cpu.c | 17 |
1 files changed, 14 insertions, 3 deletions
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 8be6d94742..098ccf7004 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -167,6 +167,7 @@ int ff_get_cpu_flags_x86(void) if (ext_caps & (1 << 22)) rval |= AV_CPU_FLAG_MMXEXT; + if (!strncmp(vendor.c, "AuthenticAMD", 12)) { /* Allow for selectively disabling SSE2 functions on AMD processors with SSE2 support but not SSE4a. This includes Athlon64, some Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster @@ -174,9 +175,19 @@ int ff_get_cpu_flags_x86(void) AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case so that SSE2 is used unless explicitly disabled by checking AV_CPU_FLAG_SSE2SLOW. */ - if (!strncmp(vendor.c, "AuthenticAMD", 12) && - rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) { - rval |= AV_CPU_FLAG_SSE2SLOW; + if (rval & AV_CPU_FLAG_SSE2 && !(ecx & 0x00000040)) + rval |= AV_CPU_FLAG_SSE2SLOW; + + /* Similar to the above but for AVX functions on AMD processors. + This is necessary only for functions using YMM registers on Bulldozer + based CPUs as they lack 256-bits execution units. SSE/AVX functions + using XMM registers are always faster on them. + AV_CPU_FLAG_AVX and AV_CPU_FLAG_AVXSLOW are both set so that AVX is + used unless explicitly disabled by checking AV_CPU_FLAG_AVXSLOW. + TODO: Confirm if Excavator is affected or not by this once it's + released, and update the check if necessary. Same for btver2. */ + if (family == 0x15 && (rval & AV_CPU_FLAG_AVX)) + rval |= AV_CPU_FLAG_AVXSLOW; } /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be |