diff options
author | James Almer <jamrial@gmail.com> | 2017-09-27 23:10:09 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-09-27 23:10:09 -0300 |
commit | 3b345d389be2d67017f904caa21713f53a8e8c90 (patch) | |
tree | 44d50b299738c39f3631855bf5a67464ec0d4540 | |
parent | 522f87708653af3badcdc33be983bcc6009de49b (diff) | |
download | ffmpeg-3b345d389be2d67017f904caa21713f53a8e8c90.tar.gz |
avutil/cpu: split flag checks per arch in av_cpu_max_align()
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavutil/aarch64/cpu.c | 10 | ||||
-rw-r--r-- | libavutil/arm/cpu.c | 10 | ||||
-rw-r--r-- | libavutil/cpu.c | 39 | ||||
-rw-r--r-- | libavutil/cpu_internal.h | 5 | ||||
-rw-r--r-- | libavutil/ppc/cpu.c | 12 | ||||
-rw-r--r-- | libavutil/x86/cpu.c | 27 |
6 files changed, 72 insertions, 31 deletions
diff --git a/libavutil/aarch64/cpu.c b/libavutil/aarch64/cpu.c index 8ef077aaea..cc641da576 100644 --- a/libavutil/aarch64/cpu.c +++ b/libavutil/aarch64/cpu.c @@ -26,3 +26,13 @@ int ff_get_cpu_flags_aarch64(void) AV_CPU_FLAG_NEON * HAVE_NEON | AV_CPU_FLAG_VFP * HAVE_VFP; } + +size_t ff_get_cpu_max_align_aarch64(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_NEON) + return 16; + + return 8; +} diff --git a/libavutil/arm/cpu.c b/libavutil/arm/cpu.c index 3889ef011c..81e85e2525 100644 --- a/libavutil/arm/cpu.c +++ b/libavutil/arm/cpu.c @@ -158,3 +158,13 @@ int ff_get_cpu_flags_arm(void) } #endif + +size_t ff_get_cpu_max_align_arm(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_NEON) + return 16; + + return 8; +} diff --git a/libavutil/cpu.c b/libavutil/cpu.c index ab04494acf..c8401b8258 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -304,37 +304,14 @@ int av_cpu_count(void) size_t av_cpu_max_align(void) { - int av_unused flags = av_get_cpu_flags(); - -#if ARCH_ARM || ARCH_AARCH64 - if (flags & AV_CPU_FLAG_NEON) - return 16; -#elif ARCH_PPC - if (flags & (AV_CPU_FLAG_ALTIVEC | - AV_CPU_FLAG_VSX | - AV_CPU_FLAG_POWER8)) - return 16; -#elif ARCH_X86 - if (flags & (AV_CPU_FLAG_AVX2 | - AV_CPU_FLAG_AVX | - AV_CPU_FLAG_XOP | - AV_CPU_FLAG_FMA4 | - AV_CPU_FLAG_FMA3 | - AV_CPU_FLAG_AVXSLOW)) - return 32; - if (flags & (AV_CPU_FLAG_AESNI | - AV_CPU_FLAG_SSE42 | - AV_CPU_FLAG_SSE4 | - AV_CPU_FLAG_SSSE3 | - AV_CPU_FLAG_SSE3 | - AV_CPU_FLAG_SSE2 | - AV_CPU_FLAG_SSE | - AV_CPU_FLAG_ATOM | - AV_CPU_FLAG_SSSE3SLOW | - AV_CPU_FLAG_SSE3SLOW | - AV_CPU_FLAG_SSE2SLOW)) - return 16; -#endif + if (ARCH_AARCH64) + return ff_get_cpu_max_align_aarch64(); + if (ARCH_ARM) + return ff_get_cpu_max_align_arm(); + if (ARCH_PPC) + return ff_get_cpu_max_align_ppc(); + if (ARCH_X86) + return ff_get_cpu_max_align_x86(); return 8; } diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 6c352abe1b..b8bf1e5396 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -44,4 +44,9 @@ int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); +size_t ff_get_cpu_max_align_aarch64(void); +size_t ff_get_cpu_max_align_arm(void); +size_t ff_get_cpu_max_align_ppc(void); +size_t ff_get_cpu_max_align_x86(void); + #endif /* AVUTIL_CPU_INTERNAL_H */ diff --git a/libavutil/ppc/cpu.c b/libavutil/ppc/cpu.c index 0f1e982624..7bb7cd813c 100644 --- a/libavutil/ppc/cpu.c +++ b/libavutil/ppc/cpu.c @@ -148,3 +148,15 @@ out: #endif /* HAVE_ALTIVEC */ return 0; } + +size_t ff_get_cpu_max_align_ppc(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & (AV_CPU_FLAG_ALTIVEC | + AV_CPU_FLAG_VSX | + AV_CPU_FLAG_POWER8)) + return 16; + + return 8; +} diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c index 3800a11ad8..f33088c8c7 100644 --- a/libavutil/x86/cpu.c +++ b/libavutil/x86/cpu.c @@ -233,3 +233,30 @@ int ff_get_cpu_flags_x86(void) return rval; } + +size_t ff_get_cpu_max_align_x86(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & (AV_CPU_FLAG_AVX2 | + AV_CPU_FLAG_AVX | + AV_CPU_FLAG_XOP | + AV_CPU_FLAG_FMA4 | + AV_CPU_FLAG_FMA3 | + AV_CPU_FLAG_AVXSLOW)) + return 32; + if (flags & (AV_CPU_FLAG_AESNI | + AV_CPU_FLAG_SSE42 | + AV_CPU_FLAG_SSE4 | + AV_CPU_FLAG_SSSE3 | + AV_CPU_FLAG_SSE3 | + AV_CPU_FLAG_SSE2 | + AV_CPU_FLAG_SSE | + AV_CPU_FLAG_ATOM | + AV_CPU_FLAG_SSSE3SLOW | + AV_CPU_FLAG_SSE3SLOW | + AV_CPU_FLAG_SSE2SLOW)) + return 16; + + return 8; +} |