diff options
author | Martin Storsjö <martin@martin.st> | 2023-05-25 14:57:37 +0300 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2023-06-06 12:40:26 +0300 |
commit | fb1b88af77cd39034cef4b6d08af79496cd75ed8 (patch) | |
tree | f11f26af022b7c0a11d7f58bd53f8302051892a2 | |
parent | fa11c4c7fa3961b0101daaa0032bb26a7a1a9c0f (diff) | |
download | ffmpeg-fb1b88af77cd39034cef4b6d08af79496cd75ed8.tar.gz |
configure: aarch64: Support assembling the dotprod and i8mm arch extensions
These are available since ARMv8.4-a and ARMv8.6-a respectively,
but can also be available optionally since ARMv8.2-a.
Check if ".arch armv8.2-a" and ".arch_extension {dotprod,i8mm}" are
supported, and check if the instructions can be assembled.
Current clang versions fail to support the dotprod and i8mm
features in the .arch_extension directive, but do support them
if enabled with -march=armv8.4-a on the command line. (Curiously,
lowering the arch level with ".arch armv8.2-a" doesn't make the
extensions unavailable if they were enabled with -march; if that
changes, Clang should also learn to support these extensions via
.arch_extension for them to remain usable here.)
Signed-off-by: Martin Storsjö <martin@martin.st>
-rwxr-xr-x | configure | 81 | ||||
-rw-r--r-- | libavutil/aarch64/asm.S | 11 |
2 files changed, 91 insertions, 1 deletions
@@ -454,6 +454,8 @@ Optimization options (experts only): --disable-armv6t2 disable armv6t2 optimizations --disable-vfp disable VFP optimizations --disable-neon disable NEON optimizations + --disable-dotprod disable DOTPROD optimizations + --disable-i8mm disable I8MM optimizations --disable-inline-asm disable use of inline assembly --disable-x86asm disable use of standalone x86 assembly --disable-mipsdsp disable MIPS DSP ASE R1 optimizations @@ -1154,6 +1156,43 @@ check_insn(){ check_as ${1}_external "$2" } +check_arch_level(){ + log check_arch_level "$@" + level="$1" + check_as tested_arch_level ".arch $level" + enabled tested_arch_level && as_arch_level="$level" +} + +check_archext_insn(){ + log check_archext_insn "$@" + feature="$1" + instr="$2" + # Check if the assembly is accepted in inline assembly. + check_inline_asm ${feature}_inline "\"$instr\"" + # We don't check if the instruction is supported out of the box by the + # external assembler (we don't try to set ${feature}_external) as we don't + # need to use these instructions in non-runtime detected codepaths. + + disable $feature + + enabled as_arch_directive && arch_directive=".arch $as_arch_level" || arch_directive="" + + # Test if the assembler supports the .arch_extension $feature directive. + arch_extension_directive=".arch_extension $feature" + test_as <<EOF && enable as_archext_${feature}_directive || arch_extension_directive="" +$arch_directive +$arch_extension_directive +EOF + + # Test if we can assemble the instruction after potential .arch and + # .arch_extension directives. + test_as <<EOF && enable ${feature} +$arch_directive +$arch_extension_directive +$instr +EOF +} + check_x86asm(){ log check_x86asm "$@" name=$1 @@ -2059,6 +2098,8 @@ ARCH_EXT_LIST_ARM=" armv6 armv6t2 armv8 + dotprod + i8mm neon vfp vfpv3 @@ -2322,6 +2363,8 @@ SYSTEM_LIBRARIES=" TOOLCHAIN_FEATURES=" as_arch_directive + as_archext_dotprod_directive + as_archext_i8mm_directive as_dn_directive as_fpu_directive as_func @@ -2622,6 +2665,8 @@ intrinsics_neon_deps="neon" vfp_deps_any="aarch64 arm" vfpv3_deps="vfp" setend_deps="arm" +dotprod_deps="aarch64 neon" +i8mm_deps="aarch64 neon" map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM @@ -5988,12 +6033,27 @@ check_inline_asm inline_asm_labels '"1:\n"' check_inline_asm inline_asm_nonlocal_labels '"Label:\n"' if enabled aarch64; then + as_arch_level="armv8-a" + check_as as_arch_directive ".arch $as_arch_level" + enabled as_arch_directive && check_arch_level armv8.2-a + enabled armv8 && check_insn armv8 'prfm pldl1strm, [x0]' # internal assembler in clang 3.3 does not support this instruction enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1' enabled vfp && check_insn vfp 'fmadd d0, d0, d1, d2' - map 'enabled_any ${v}_external ${v}_inline || disable $v' $ARCH_EXT_LIST_ARM + archext_list="dotprod i8mm" + enabled dotprod && check_archext_insn dotprod 'udot v0.4s, v0.16b, v0.16b' + enabled i8mm && check_archext_insn i8mm 'usdot v0.4s, v0.16b, v0.16b' + + # Disable the main feature (e.g. HAVE_NEON) if neither inline nor external + # assembly support the feature out of the box. Skip this for the features + # checked with check_archext_insn above, as that function takes care of + # updating all the variables as necessary. + for v in $ARCH_EXT_LIST_ARM; do + is_in $v $archext_list && continue + enabled_any ${v}_external ${v}_inline || disable $v + done elif enabled alpha; then @@ -6022,6 +6082,12 @@ EOF warn "Compiler does not indicate floating-point ABI, guessing $fpabi." fi + # Test for various instruction sets, testing support both in inline and + # external assembly. This sets the ${v}_inline or ${v}_external flags + # if the instruction can be used unconditionally in either inline or + # external assembly. This means that if the ${v}_external feature is set, + # that feature can be used unconditionally in various support macros + # anywhere in external assembly, in any function. enabled armv5te && check_insn armv5te 'qadd r0, r0, r0' enabled armv6 && check_insn armv6 'sadd16 r0, r0, r0' enabled armv6t2 && check_insn armv6t2 'movt r0, #0' @@ -6030,6 +6096,14 @@ EOF enabled vfpv3 && check_insn vfpv3 'vmov.f32 s0, #1.0' enabled setend && check_insn setend 'setend be' + # If neither inline nor external assembly can use the feature by default, + # disable the main unsuffixed feature (e.g. HAVE_NEON). + # + # For targets that support runtime CPU feature detection, don't disable + # the main feature flag - there we assume that all supported toolchains + # can assemble code for all instruction set features (e.g. NEON) with + # suitable assembly flags (such as ".fpu neon"); we don't check + # specifically that they really do. [ $target_os = linux ] || [ $target_os = android ] || map 'enabled_any ${v}_external ${v}_inline || disable $v' \ $ARCH_EXT_LIST_ARM @@ -7610,6 +7684,8 @@ fi if enabled aarch64; then echo "NEON enabled ${neon-no}" echo "VFP enabled ${vfp-no}" + echo "DOTPROD enabled ${dotprod-no}" + echo "I8MM enabled ${i8mm-no}" fi if enabled arm; then echo "ARMv5TE enabled ${armv5te-no}" @@ -7900,6 +7976,9 @@ test -n "$assert_level" && test -n "$malloc_prefix" && echo "#define MALLOC_PREFIX $malloc_prefix" >>$TMPH +enabled aarch64 && + echo "#define AS_ARCH_LEVEL $as_arch_level" >>$TMPH + if enabled x86asm; then append config_files $TMPASM cat > $TMPASM <<EOF diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S index a7782415d7..8589cf74fc 100644 --- a/libavutil/aarch64/asm.S +++ b/libavutil/aarch64/asm.S @@ -36,6 +36,17 @@ # define __has_feature(x) 0 #endif +#if HAVE_AS_ARCH_DIRECTIVE + .arch AS_ARCH_LEVEL +#endif + +#if HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE + .arch_extension dotprod +#endif +#if HAVE_AS_ARCHEXT_I8MM_DIRECTIVE + .arch_extension i8mm +#endif + /* Support macros for * - Armv8.3-A Pointer Authentication and |