diff options
author | toshiksvg <toshiksvg@yandex-team.com> | 2024-03-07 22:57:59 +0300 |
---|---|---|
committer | toshiksvg <toshiksvg@yandex-team.com> | 2024-03-07 23:15:23 +0300 |
commit | 7c1f60188c1081eafec304ac8ca36dcbfd5b8f93 (patch) | |
tree | d35ba0b5079b8bf608fa7cfa6552376b740d9588 /build/scripts/link_exe.py | |
parent | acf76c30586f66e464c4ec86c3e78f1c62b2bf6b (diff) | |
download | ydb-7c1f60188c1081eafec304ac8ca36dcbfd5b8f93.tar.gz |
Added function to rename .ctors sections into .init_array in cuda libraries
Добавил функцию, чтобы все библиотеки из CUDA пакетов, которые статически линкуются, гарантировано имели символы в секции `.array_init`, а не в `.ctors`, как происходит сейчас.
В противном случае некоторые библиотеки не работают в случае статической линковки (например, `nvrtc`).
1362e42f94015ba083431caa04d7ae436fd6bf99
Diffstat (limited to 'build/scripts/link_exe.py')
-rw-r--r-- | build/scripts/link_exe.py | 91 |
1 files changed, 61 insertions, 30 deletions
diff --git a/build/scripts/link_exe.py b/build/scripts/link_exe.py index 710f3b77fd..9ad94f23bf 100644 --- a/build/scripts/link_exe.py +++ b/build/scripts/link_exe.py @@ -41,6 +41,9 @@ CUDA_LIBRARIES = { '-lnvinfer_plugin_static': '-lnvinfer_plugin', '-lnvonnxparser_static': '-lnvonnxparser', '-lnvparsers_static': '-lnvparsers', + '-lnvrtc_static': '-lnvrtc', + '-lnvrtc-builtins_static': '-lnvrtc-builtins', + '-lnvptxcompiler_static': '', } @@ -97,37 +100,25 @@ class CUDAManager: f.write(script) -def process_cuda_libraries(cmd, cuda_manager, build_root): - if not cuda_manager.has_cuda_fatbins(cmd): - return cmd - - def tmpdir_generator(prefix): - for idx in itertools.count(): - path = os.path.abspath(os.path.join(build_root, prefix + '_' + str(idx))) - os.makedirs(path) - yield path - - # add custom linker script - to_dirpath = next(tmpdir_generator('cuda_linker_script')) - script_path = os.path.join(to_dirpath, 'script') - with open(script_path, 'w') as f: - cuda_manager.write_linker_script(f) - flags_with_linker = list(cmd) + ['-Wl,--script={}'.format(script_path)] +def tmpdir_generator(base_path, prefix): + for idx in itertools.count(): + path = os.path.abspath(os.path.join(base_path, prefix + '_' + str(idx))) + os.makedirs(path) + yield path - if not cuda_manager.can_prune_libs: - return flags_with_linker - tmpdir_gen = tmpdir_generator('cuda_pruned_libs') +def process_cuda_library_by_external_tool(cmd, build_root, tool_name, callable_tool_executor, allowed_cuda_libs): + tmpdir_gen = tmpdir_generator(build_root, 'cuda_' + tool_name + '_libs') - flags_pruned = [] + new_flags = [] cuda_deps = set() # Because each directory flag only affects flags that follow it, # for correct pruning we need to process that in reversed order - for flag in reversed(flags_with_linker): - if flag in cuda_manager.fatbin_libs: + for flag in reversed(cmd): + if flag in allowed_cuda_libs: cuda_deps.add('lib' + flag[2:] + '.a') - flag += '_pruned' + flag += '_' + tool_name elif flag.startswith('-L') and os.path.exists(flag[2:]) and os.path.isdir(flag[2:]) and any(f in cuda_deps for f in os.listdir(flag[2:])): from_dirpath = flag[2:] from_deps = list(cuda_deps & set(os.listdir(from_dirpath))) @@ -137,19 +128,57 @@ def process_cuda_libraries(cmd, cuda_manager, build_root): for f in from_deps: from_path = os.path.join(from_dirpath, f) - to_path = os.path.join(to_dirpath, f[:-2] + '_pruned.a') - cuda_manager.prune_lib(from_path, to_path) + to_path = os.path.join(to_dirpath, f[:-2] + '_' + tool_name +'.a') + callable_tool_executor(from_path, to_path) cuda_deps.remove(f) # do not remove current directory # because it can contain other libraries we want link to - # instead we just add new directory with pruned libs - flags_pruned.append('-L' + to_dirpath) + # instead we just add new directory with processed by tool libs + new_flags.append('-L' + to_dirpath) - flags_pruned.append(flag) + new_flags.append(flag) assert not cuda_deps, ('Unresolved CUDA deps: ' + ','.join(cuda_deps)) - return reversed(flags_pruned) + return reversed(new_flags) + + +def process_cuda_libraries_by_objcopy(cmd, build_root, objcopy_exe): + if not objcopy_exe: + return cmd + + def run_objcopy(from_path, to_path): + rename_section_command = [objcopy_exe, "--rename-section", ".ctors=.init_array", from_path, to_path] + subprocess.check_call(rename_section_command) + + possible_libraries = set(CUDA_LIBRARIES.keys()) + possible_libraries.update([ + '-lcudadevrt', + '-lcufilt', + '-lculibos', + ]) + possible_libraries.update([ + lib_name + "_pruner" for lib_name in possible_libraries + ]) + + return process_cuda_library_by_external_tool(list(cmd), build_root, 'objcopy', run_objcopy, possible_libraries) + + +def process_cuda_libraries_by_nvprune(cmd, cuda_manager, build_root): + if not cuda_manager.has_cuda_fatbins(cmd): + return cmd + + # add custom linker script + to_dirpath = next(tmpdir_generator(build_root, 'cuda_linker_script')) + script_path = os.path.join(to_dirpath, 'script') + with open(script_path, 'w') as f: + cuda_manager.write_linker_script(f) + flags_with_linker = list(cmd) + ['-Wl,--script={}'.format(script_path)] + + if not cuda_manager.can_prune_libs: + return flags_with_linker + + return process_cuda_library_by_external_tool(flags_with_linker, build_root, 'pruner', cuda_manager.prune_lib, cuda_manager.fatbin_libs) def remove_excessive_flags(cmd): @@ -264,6 +293,7 @@ def parse_args(): parser.add_option('--cuda-architectures', help='List of supported CUDA architectures, separated by ":" (e.g. "sm_52:compute_70:lto_90a"') parser.add_option('--nvprune-exe') + parser.add_option('--objcopy-exe') parser.add_option('--build-root') parser.add_option('--arch') parser.add_option('--linker-output') @@ -295,7 +325,8 @@ if __name__ == '__main__': cmd = fix_cmd_for_dynamic_cuda(cmd) else: cuda_manager = CUDAManager(opts.cuda_architectures, opts.nvprune_exe) - cmd = process_cuda_libraries(cmd, cuda_manager, opts.build_root) + cmd = process_cuda_libraries_by_nvprune(cmd, cuda_manager, opts.build_root) + cmd = process_cuda_libraries_by_objcopy(cmd, opts.build_root, opts.objcopy_exe) cmd = ProcessWholeArchiveOption(opts.arch, opts.whole_archive_peers, opts.whole_archive_libs).construct_cmd(cmd) if opts.custom_step: |