import sys
import subprocess
import os
import collections
import re
import tempfile
def is_clang(command):
for word in command:
if '--compiler-bindir' in word and 'clang' in word:
return True
return False
def main():
try:
sys.argv.remove('--y_skip_nocxxinc')
skip_nocxxinc = True
except ValueError:
skip_nocxxinc = False
spl = sys.argv.index('--cflags')
cmd = 1
mtime0 = None
if sys.argv[1] == '--mtime':
mtime0 = sys.argv[2]
cmd = 3
command = sys.argv[cmd: spl]
cflags = sys.argv[spl + 1:]
dump_args = False
if '--y_dump_args' in command:
command.remove('--y_dump_args')
dump_args = True
executable = command[0]
if not os.path.exists(executable):
print >> sys.stderr, '{} not found'.format(executable)
sys.exit(1)
if is_clang(command):
# nvcc concatenates the sources for clang, and clang reports unused
# things from .h files as if they they were defined in a .cpp file.
cflags += ['-Wno-unused-function', '-Wno-unused-parameter']
if not is_clang(command) and '-fopenmp=libomp' in cflags:
cflags.append('-fopenmp')
cflags.remove('-fopenmp=libomp')
skip_list = [
'-gline-tables-only',
# clang coverage
'-fprofile-instr-generate',
'-fcoverage-mapping',
'/Zc:inline', # disable unreferenced functions (kernel registrators) remove
'-Wno-c++17-extensions',
'-flto',
'-faligned-allocation',
'-fsized-deallocation',
# While it might be reasonable to compile host part of .cu sources with these optimizations enabled,
# nvcc passes these options down towards cicc which lacks x86_64 extensions support.
'-msse2',
'-msse3',
'-mssse3',
'-msse4.1',
'-msse4.2',
]
if skip_nocxxinc:
skip_list.append('-nostdinc++')
for flag in skip_list:
if flag in cflags:
cflags.remove(flag)
skip_prefix_list = [
'-fsanitize=',
'-fsanitize-coverage=',
'-fsanitize-blacklist=',
'--system-header-prefix',
]
new_cflags = []
for flag in cflags:
if all(not flag.startswith(skip_prefix) for skip_prefix in skip_prefix_list):
if flag.startswith('-fopenmp-version='):
new_cflags.append('-fopenmp-version=45') # Clang 11 only supports OpenMP 4.5, but the default is 5.0, so we need to forcefully redefine it.
else:
new_cflags.append(flag)
cflags = new_cflags
if not is_clang(command):
def good(arg):
if arg.startswith('--target='):
return False
return True
cflags = filter(good, cflags)
cpp_args = []
compiler_args = []
# NVCC requires particular MSVC versions which may differ from the version
# used to compile regular C++ code. We have a separate MSVC in Arcadia for
# the CUDA builds and pass it's root in $Y_VC_Root.
# The separate MSVC for CUDA may absent in Yandex Open Source builds.
vc_root = os.environ.get('Y_VC_Root')
cflags_queue = collections.deque(cflags)
while cflags_queue:
arg = cflags_queue.popleft()
if arg == '-mllvm':
compiler_args.append(arg)
compiler_args.append(cflags_queue.popleft())
continue
if arg[:2].upper() in ('-I', '/I', '-B'):
value = arg[2:]
if not value:
value = cflags_queue.popleft()
if arg[1] == 'I':
cpp_args.append('-I{}'.format(value))
elif arg[1] == 'B': # todo: delete "B" flag check when cuda stop to use gcc
pass
continue
match = re.match(r'[-/]D(.*)', arg)
if match:
define = match.group(1)
# We have C++ flags configured for the regular C++ build.
# There is Y_MSVC_INCLUDE define with a path to the VC header files.
# We need to change the path accordingly when using a separate MSVC for CUDA.
if vc_root and define.startswith('Y_MSVC_INCLUDE'):
define = os.path.expandvars('Y_MSVC_INCLUDE={}/include'.format(vc_root))
cpp_args.append('-D' + define.replace('\\', '/'))
continue
compiler_args.append(arg)
command += cpp_args
if compiler_args:
command += ['--compiler-options', ','.join(compiler_args)]
# --keep is necessary to prevent nvcc from embedding nvcc pid in generated
# symbols. It makes nvcc use the original file name as the prefix in the
# generated files (otherwise it also prepends tmpxft_{pid}_00000000-5), and
# cicc derives the module name from its {input}.cpp1.ii file name.
command += ['--keep', '--keep-dir', tempfile.mkdtemp(prefix='compile_cuda.py.')]
# nvcc generates symbols like __fatbinwrap_{len}_{basename}_{hash} where
# {basename} is {input}.cpp1.ii with non-C chars translated to _, {len} is
# {basename} length, and {hash} is the hash of first exported symbol in
# {input}.cpp1.ii if there is one, otherwise it is based on its modification
# time (converted to string in the local timezone) and the current working
# directory. To stabilize the names of these symbols we need to fix mtime,
# timezone, and cwd.
if mtime0:
os.environ['LD_PRELOAD'] = mtime0
os.environ['TZ'] = 'UTC0' # POSIX fixed offset format.
os.environ['TZDIR'] = '/var/empty' # Against counterfeit /usr/share/zoneinfo/$TZ.
if dump_args:
sys.stdout.write('\n'.join(command))
else:
sys.exit(subprocess.Popen(command, stdout=sys.stderr, stderr=sys.stderr, cwd='/').wait())
if __name__ == '__main__':
main()