when ($MOST_USED_CUDA) { CUDA12=yes } CUDA_VERSION=0 TENSORRT_VERSION=10 FULL_TENSORRT_VERSION=10.13.2 when ($CUDA11) { CUDA_VERSION=11.4 CUDA_ARCHITECTURES= CUDNN_VERSION=8.0.5 TENSORRT_VERSION=7 NVCC_STD_VER=17 } when ($CUDA12) { CUDA_VERSION=12.9 CUDA_ARCHITECTURES= CUDNN_VERSION=9.10.2 TENSORRT_VERSION=10 NVCC_STD_VER=20 } when ($TENSORFLOW_WITH_CUDA) { CUDA_REQUIRED=yes } when ($MSVC == "yes") { NVCC_STD=/std:c++${NVCC_STD_VER} } otherwise { NVCC_STD=-std=c++${NVCC_STD_VER} } # tag:flags ### @usage: CUDA_NVCC_FLAGS(compiler flags) ### Add the specified flags to the compile line .cu-files. macro CUDA_NVCC_FLAGS(Flags...) { SET_APPEND(CUDA_NVCC_FLAGS $Flags) } MTIME=--mtime ${tool:"tools/mtime0"} CUSTOM_PID=--custom-pid ${tool:"tools/custom_pid"} when ($CUDA_SANITIZE) { _CUDA_SANITIZE_FLAG=--y_sanitize } otherwise { _CUDA_SANITIZE_FLAG= } _CUDA_CUBIN_EXTRA_FLAGS= when ($CUDA_VERSION != "11.4" && $CUDA11 != "yes") { _CUDA_CUBIN_EXTRA_FLAGS+=-Xcicc=--orig_src_path_name=${rootrel;input:SRC} } _CUDA_CUBIN_CMD=$YMAKE_PYTHON3 ${input:"build/scripts/compile_cuda.py"} \ $MTIME $CUSTOM_PID \ $NVCC $NVCC_STD $NVCC_FLAGS \ $_CUDA_SANITIZE_FLAG \ -gencode arch=compute_${ARCH},code=sm_${ARCH} \ --cubin \ -c ${input:SRC} \ -o ${noauto;output;suf=.${ARCH}.cubin;noext:SRC} \ -Xcicc=--orig_src_file_name=${input:SRC} \ $_CUDA_CUBIN_EXTRA_FLAGS \ ${pre=-I:_C__INCLUDE} \ --cflags $C_FLAGS_PLATFORM $CXXFLAGS $NVCC_STD $SRCFLAGS -D__CUDA_ARCH_LIST__=${ARCH_LIST} \ ${hide;input:"build/internal/platform/cuda/cuda_runtime_include.h"} \ $NVCC_ENV $CUDA_HOST_COMPILER_ENV \ ${hide;kv:"p CU"} ${hide;kv:"pc light-green"} macro _CUDA_COMPILE_DEVICE(SRC, ARCH, ARCH_LIST) { .CMD=$_CUDA_CUBIN_CMD && ${cwd:BINDIR} $COPY_CMD ${suf=.ptx;nopath;noext:SRC} ${noauto;output;suf=.${ARCH}.ptx;noext:SRC} && ${cwd:BINDIR} $COPY_CMD ${suf=.cudafe1.stub.c;nopath;noext:SRC} ${noauto;output;suf=.${ARCH}.cudafe1.stub.c;noext:SRC} && ${cwd:BINDIR} $COPY_CMD ${suf=.module_id;nopath;noext:SRC} ${noauto;output;suf=.${ARCH}.module_id;noext:SRC} } _CUDA_PREPROCES_CMD=$YMAKE_PYTHON3 ${input:"build/scripts/compile_cuda.py"} \ $MTIME $CUSTOM_PID \ $NVCC $NVCC_STD $NVCC_FLAGS \ $_CUDA_SANITIZE_FLAG \ --preprocess \ -c ${input:SRC} \ -o ${tmp;suf=.ii;noext:SRC} \ ${pre=-I:_C__INCLUDE} \ --cflags $C_FLAGS_PLATFORM $CXXFLAGS $NVCC_STD $SRCFLAGS -U__CUDA_ARCH__ -D__CUDA_ARCH_LIST__=${ARCH_LIST} \ ${hide;input:"build/internal/platform/cuda/cuda_runtime_include.h"} \ $NVCC_ENV $CUDA_HOST_COMPILER_ENV \ ${hide;kv:"p CU"} ${hide;kv:"pc light-green"} _CUDA_FRONTEND_CMD=$CUDAFE \ --c++${NVCC_STD_VER} --clang --clang_version=${COMPILER_VERSION}0000 \ -w --display_error_number \ --orig_src_file_name ${rootrel;input:SRC} \ --unicode_source_kind=UTF-8 \ --allow_managed --extended-lambda --relaxed_constexpr --m64 --parse_templates \ --gen_c_file_name ${noauto;output;suf=.cudafe1.cpp;noext:SRC} \ --stub_file_name ${output_include;suf=.${STUB_ARCH}.cudafe1.stub.c;nopath;noext:SRC} \ --module_id_file_name ${input;suf=.${STUB_ARCH}.module_id;noext:SRC} \ ${hide;output_include;suf=.fatbin.c;nopath;noext:SRC} \ ${tmp;suf=.ii;noext:SRC} macro _CUDA_COMPILE_HOST(SRC, STUB_ARCH, ARCH_LIST) { .CMD=$_CUDA_PREPROCES_CMD && $_CUDA_FRONTEND_CMD } _CUDA_FATBIN_CMD=$YMAKE_PYTHON3 ${input:"build/scripts/fatbinary_wrapper.py"} \ $FATBINARY \ -64 -compress-all \ --ident=${input:SRC} \ ${input:IMAGES} \ --embedded-fatbin=${noauto;output;suf=.fatbin.c;noext:SRC} \ ${hide;kv:"p CU"} ${hide;kv:"pc light-green"} macro _CUDA_FATBIN(SRC, IMAGES...) { .CMD=$_CUDA_FATBIN_CMD } # tag:src-processing macro _SRC("cu", SRC, SRCFLAGS...) { .CMD=$_SRC_CU_CMD .SEM=target_options-privates-ITEM && target_options-privates-option target_cuda_sources && target_options-privates-args ${input:SRC} ${hide;output;suf=${OBJ_SUF}.o:SRC} && platform_vars-CMAKE_CUDA_STANDARD ${quo:NVCC_STD_VER} && target_macroses-ITEM && target_macroses-macro target_cuda_flags && target_macroses-args $CUDA_NVCC_FLAGS $NVCC_GENCODE_FLAGS && target_macroses-ITEM && target_macroses-macro target_cuda_cflags && target_macroses-args $USER_CXXFLAGS $SRCFLAGS $_SEM_EXTRA_CXX_FLAGS .PEERDIR=$_SRC_CU_PEERDIR } CUDA_DEVICE_LINK_LIBRARY_CMD=$NVCC_OLD $NVCC_FLAGS $NVCC_GENCODE_FLAGS -o ${output;suf=${OBJ_SUF}${NVCC_OBJ_EXT}:"devlink"} -dlink ${input:_NVCC_DEVICE_SRCS} -I$CUDA_TARGET_ROOT/include --compiler-options ${join= :C_FLAGS_PLATFORM} $NVCC_ENV ${hide;kv:"p DL"} ${hide;kv:"pc light-blue"} && $LINK_LIB ### @usage: CUDA_DEVICE_LINK_LIBRARY() ### ### The LIBRARY() module with an additional step with CUDA device linking. ### Use [NVCC_DEVICE_LINK](#macro_NVCC_DEVICE_LINK) macro to specify sources for device link. module CUDA_DEVICE_LINK_LIBRARY: LIBRARY { SET(_LD_LINK_LIB_EXTRA_INPUT ${output;suf=${OBJ_SUF}${NVCC_OBJ_EXT}:"devlink"}) .CMD=$CUDA_DEVICE_LINK_LIBRARY_CMD } # tag:flags ### @usage: NVCC_DEVICE_LINK(file.cu...) ### Run nvcc --device-link on objects compiled from srcs with --device-c. ### This generates a stub object devlink.o that supplies missing pieces for the ### host linker to link relocatable device objects into the final executable. ### This macro can be used only with [CUDA_DEVICE_LINK_LIBRARY](#module_CUDA_DEVICE_LINK_LIBRARY) module. macro NVCC_DEVICE_LINK(Srcs...) { SET_APPEND(_NVCC_DEVICE_SRCS ${suf=${OBJ_SUF}${NVCC_OBJ_EXT}:Srcs}) .PEERDIR=build/internal/platform/cuda } CUDA_NVPRUNE=yes ### @usage: NO_CUDA_NVPRUNE() ### Disable nvprune for a PROGRAM macro NO_CUDA_NVPRUNE() { DISABLE(CUDA_NVPRUNE) }