aboutsummaryrefslogtreecommitdiffstats
path: root/build/scripts/compile_cuda.py
diff options
context:
space:
mode:
authoralexv-smirnov <alex@ydb.tech>2023-06-13 11:05:01 +0300
committeralexv-smirnov <alex@ydb.tech>2023-06-13 11:05:01 +0300
commitbf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0 (patch)
tree1d1df72c0541a59a81439842f46d95396d3e7189 /build/scripts/compile_cuda.py
parent8bfdfa9a9bd19bddbc58d888e180fbd1218681be (diff)
downloadydb-bf0f13dd39ee3e65092ba3572bb5b1fcd125dcd0.tar.gz
add ymake export to ydb
Diffstat (limited to 'build/scripts/compile_cuda.py')
-rw-r--r--build/scripts/compile_cuda.py168
1 files changed, 168 insertions, 0 deletions
diff --git a/build/scripts/compile_cuda.py b/build/scripts/compile_cuda.py
new file mode 100644
index 0000000000..f8e1fa2b6d
--- /dev/null
+++ b/build/scripts/compile_cuda.py
@@ -0,0 +1,168 @@
+import sys
+import subprocess
+import os
+import collections
+import re
+import tempfile
+
+
+def is_clang(command):
+ for word in command:
+ if '--compiler-bindir' in word and 'clang' in word:
+ return True
+
+ return False
+
+
+def main():
+ try:
+ sys.argv.remove('--y_skip_nocxxinc')
+ skip_nocxxinc = True
+ except ValueError:
+ skip_nocxxinc = False
+
+ spl = sys.argv.index('--cflags')
+ cmd = 1
+ mtime0 = None
+ if sys.argv[1] == '--mtime':
+ mtime0 = sys.argv[2]
+ cmd = 3
+ command = sys.argv[cmd: spl]
+ cflags = sys.argv[spl + 1:]
+
+ dump_args = False
+ if '--y_dump_args' in command:
+ command.remove('--y_dump_args')
+ dump_args = True
+
+ executable = command[0]
+ if not os.path.exists(executable):
+ print >> sys.stderr, '{} not found'.format(executable)
+ sys.exit(1)
+
+ if is_clang(command):
+ # nvcc concatenates the sources for clang, and clang reports unused
+ # things from .h files as if they they were defined in a .cpp file.
+ cflags += ['-Wno-unused-function', '-Wno-unused-parameter']
+
+ if not is_clang(command) and '-fopenmp=libomp' in cflags:
+ cflags.append('-fopenmp')
+ cflags.remove('-fopenmp=libomp')
+
+ skip_list = [
+ '-gline-tables-only',
+ # clang coverage
+ '-fprofile-instr-generate',
+ '-fcoverage-mapping',
+ '/Zc:inline', # disable unreferenced functions (kernel registrators) remove
+ '-Wno-c++17-extensions',
+ '-flto',
+ '-faligned-allocation',
+ '-fsized-deallocation',
+ # While it might be reasonable to compile host part of .cu sources with these optimizations enabled,
+ # nvcc passes these options down towards cicc which lacks x86_64 extensions support.
+ '-msse2',
+ '-msse3',
+ '-mssse3',
+ '-msse4.1',
+ '-msse4.2',
+ ]
+
+ if skip_nocxxinc:
+ skip_list.append('-nostdinc++')
+
+ for flag in skip_list:
+ if flag in cflags:
+ cflags.remove(flag)
+
+ skip_prefix_list = [
+ '-fsanitize=',
+ '-fsanitize-coverage=',
+ '-fsanitize-blacklist=',
+ '--system-header-prefix',
+ ]
+ new_cflags = []
+ for flag in cflags:
+ if all(not flag.startswith(skip_prefix) for skip_prefix in skip_prefix_list):
+ if flag.startswith('-fopenmp-version='):
+ new_cflags.append('-fopenmp-version=45') # Clang 11 only supports OpenMP 4.5, but the default is 5.0, so we need to forcefully redefine it.
+ else:
+ new_cflags.append(flag)
+ cflags = new_cflags
+
+ if not is_clang(command):
+ def good(arg):
+ if arg.startswith('--target='):
+ return False
+ return True
+ cflags = filter(good, cflags)
+
+ cpp_args = []
+ compiler_args = []
+
+ # NVCC requires particular MSVC versions which may differ from the version
+ # used to compile regular C++ code. We have a separate MSVC in Arcadia for
+ # the CUDA builds and pass it's root in $Y_VC_Root.
+ # The separate MSVC for CUDA may absent in Yandex Open Source builds.
+ vc_root = os.environ.get('Y_VC_Root')
+
+ cflags_queue = collections.deque(cflags)
+ while cflags_queue:
+
+ arg = cflags_queue.popleft()
+ if arg == '-mllvm':
+ compiler_args.append(arg)
+ compiler_args.append(cflags_queue.popleft())
+ continue
+ if arg[:2].upper() in ('-I', '/I', '-B'):
+ value = arg[2:]
+ if not value:
+ value = cflags_queue.popleft()
+ if arg[1] == 'I':
+ cpp_args.append('-I{}'.format(value))
+ elif arg[1] == 'B': # todo: delete "B" flag check when cuda stop to use gcc
+ pass
+ continue
+
+ match = re.match(r'[-/]D(.*)', arg)
+ if match:
+ define = match.group(1)
+ # We have C++ flags configured for the regular C++ build.
+ # There is Y_MSVC_INCLUDE define with a path to the VC header files.
+ # We need to change the path accordingly when using a separate MSVC for CUDA.
+ if vc_root and define.startswith('Y_MSVC_INCLUDE'):
+ define = os.path.expandvars('Y_MSVC_INCLUDE={}/include'.format(vc_root))
+ cpp_args.append('-D' + define.replace('\\', '/'))
+ continue
+
+ compiler_args.append(arg)
+
+ command += cpp_args
+ if compiler_args:
+ command += ['--compiler-options', ','.join(compiler_args)]
+
+ # --keep is necessary to prevent nvcc from embedding nvcc pid in generated
+ # symbols. It makes nvcc use the original file name as the prefix in the
+ # generated files (otherwise it also prepends tmpxft_{pid}_00000000-5), and
+ # cicc derives the module name from its {input}.cpp1.ii file name.
+ command += ['--keep', '--keep-dir', tempfile.mkdtemp(prefix='compile_cuda.py.')]
+ # nvcc generates symbols like __fatbinwrap_{len}_{basename}_{hash} where
+ # {basename} is {input}.cpp1.ii with non-C chars translated to _, {len} is
+ # {basename} length, and {hash} is the hash of first exported symbol in
+ # {input}.cpp1.ii if there is one, otherwise it is based on its modification
+ # time (converted to string in the local timezone) and the current working
+ # directory. To stabilize the names of these symbols we need to fix mtime,
+ # timezone, and cwd.
+ if mtime0:
+ os.environ['LD_PRELOAD'] = mtime0
+ os.environ['TZ'] = 'UTC0' # POSIX fixed offset format.
+ os.environ['TZDIR'] = '/var/empty' # Against counterfeit /usr/share/zoneinfo/$TZ.
+
+ if dump_args:
+ sys.stdout.write('\n'.join(command))
+ else:
+ sys.exit(subprocess.Popen(command, stdout=sys.stderr, stderr=sys.stderr, cwd='/').wait())
+
+
+if __name__ == '__main__':
+ main()