blob: bd7e86e175b818f0932268cf5cdbddf52e6a5d9f (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
when ($MOST_USED_CUDA) {
CUDA12=yes
}
CUDA_VERSION=0
TENSORRT_VERSION=10
FULL_TENSORRT_VERSION=10.13.2
when ($CUDA11) {
CUDA_VERSION=11.4
CUDA_ARCHITECTURES=
CUDNN_VERSION=8.0.5
TENSORRT_VERSION=7
NVCC_STD_VER=17
}
when ($CUDA12) {
CUDA_VERSION=12.9
CUDA_ARCHITECTURES=
CUDNN_VERSION=9.10.2
TENSORRT_VERSION=10
NVCC_STD_VER=20
}
when ($TENSORFLOW_WITH_CUDA) {
CUDA_REQUIRED=yes
}
when ($MSVC == "yes") {
NVCC_STD=/std:c++${NVCC_STD_VER}
}
otherwise {
NVCC_STD=-std=c++${NVCC_STD_VER}
}
# tag:flags
### @usage: CUDA_NVCC_FLAGS(compiler flags)
### Add the specified flags to the compile line .cu-files.
macro CUDA_NVCC_FLAGS(Flags...) {
SET_APPEND(CUDA_NVCC_FLAGS $Flags)
}
MTIME=--mtime ${tool:"tools/mtime0"}
CUSTOM_PID=--custom-pid ${tool:"tools/custom_pid"}
when ($CUDA_SANITIZE) {
_CUDA_SANITIZE_FLAG=--y_sanitize
}
otherwise {
_CUDA_SANITIZE_FLAG=
}
_CUDA_CUBIN_EXTRA_FLAGS=
when ($CUDA_VERSION != "11.4" && $CUDA11 != "yes") {
_CUDA_CUBIN_EXTRA_FLAGS+=-Xcicc=--orig_src_path_name=${rootrel;input:SRC}
}
_CUDA_CUBIN_CMD=$YMAKE_PYTHON3 ${input:"build/scripts/compile_cuda.py"} \
$MTIME $CUSTOM_PID \
$NVCC $NVCC_STD $NVCC_FLAGS \
$_CUDA_SANITIZE_FLAG \
-gencode arch=compute_${ARCH},code=sm_${ARCH} \
--cubin \
-c ${input:SRC} \
-o ${noauto;output;suf=.${ARCH}.cubin;noext:SRC} \
-Xcicc=--orig_src_file_name=${input:SRC} \
$_CUDA_CUBIN_EXTRA_FLAGS \
${pre=-I:_C__INCLUDE} \
--cflags $C_FLAGS_PLATFORM $CXXFLAGS $NVCC_STD $SRCFLAGS -D__CUDA_ARCH_LIST__=${ARCH_LIST} \
${hide;input:"build/internal/platform/cuda/cuda_runtime_include.h"} \
$NVCC_ENV $CUDA_HOST_COMPILER_ENV \
${hide;kv:"p CU"} ${hide;kv:"pc light-green"}
macro _CUDA_COMPILE_DEVICE(SRC, ARCH, ARCH_LIST) {
.CMD=$_CUDA_CUBIN_CMD && ${cwd:BINDIR} $COPY_CMD ${suf=.ptx;nopath;noext:SRC} ${noauto;output;suf=.${ARCH}.ptx;noext:SRC} && ${cwd:BINDIR} $COPY_CMD ${suf=.cudafe1.stub.c;nopath;noext:SRC} ${noauto;output;suf=.${ARCH}.cudafe1.stub.c;noext:SRC} && ${cwd:BINDIR} $COPY_CMD ${suf=.module_id;nopath;noext:SRC} ${noauto;output;suf=.${ARCH}.module_id;noext:SRC}
}
_CUDA_PREPROCES_CMD=$YMAKE_PYTHON3 ${input:"build/scripts/compile_cuda.py"} \
$MTIME $CUSTOM_PID \
$NVCC $NVCC_STD $NVCC_FLAGS \
$_CUDA_SANITIZE_FLAG \
--preprocess \
-c ${input:SRC} \
-o ${tmp;suf=.ii;noext:SRC} \
${pre=-I:_C__INCLUDE} \
--cflags $C_FLAGS_PLATFORM $CXXFLAGS $NVCC_STD $SRCFLAGS -U__CUDA_ARCH__ -D__CUDA_ARCH_LIST__=${ARCH_LIST} \
${hide;input:"build/internal/platform/cuda/cuda_runtime_include.h"} \
$NVCC_ENV $CUDA_HOST_COMPILER_ENV \
${hide;kv:"p CU"} ${hide;kv:"pc light-green"}
_CUDA_FRONTEND_CMD=$CUDAFE \
--c++${NVCC_STD_VER} --clang --clang_version=${COMPILER_VERSION}0000 \
-w --display_error_number \
--orig_src_file_name ${rootrel;input:SRC} \
--unicode_source_kind=UTF-8 \
--allow_managed --extended-lambda --relaxed_constexpr --m64 --parse_templates \
--gen_c_file_name ${noauto;output;suf=.cudafe1.cpp;noext:SRC} \
--stub_file_name ${output_include;suf=.${STUB_ARCH}.cudafe1.stub.c;nopath;noext:SRC} \
--module_id_file_name ${input;suf=.${STUB_ARCH}.module_id;noext:SRC} \
${hide;output_include;suf=.fatbin.c;nopath;noext:SRC} \
${tmp;suf=.ii;noext:SRC}
macro _CUDA_COMPILE_HOST(SRC, STUB_ARCH, ARCH_LIST) {
.CMD=$_CUDA_PREPROCES_CMD && $_CUDA_FRONTEND_CMD
}
_CUDA_FATBIN_CMD=$YMAKE_PYTHON3 ${input:"build/scripts/fatbinary_wrapper.py"} \
$FATBINARY \
-64 -compress-all \
--ident=${input:SRC} \
${input:IMAGES} \
--embedded-fatbin=${noauto;output;suf=.fatbin.c;noext:SRC} \
${hide;kv:"p CU"} ${hide;kv:"pc light-green"}
macro _CUDA_FATBIN(SRC, IMAGES...) {
.CMD=$_CUDA_FATBIN_CMD
}
# tag:src-processing
macro _SRC("cu", SRC, SRCFLAGS...) {
.CMD=$_SRC_CU_CMD
.SEM=target_options-privates-ITEM && target_options-privates-option target_cuda_sources && target_options-privates-args ${input:SRC} ${hide;output;suf=${OBJ_SUF}.o:SRC} && platform_vars-CMAKE_CUDA_STANDARD ${quo:NVCC_STD_VER} && target_macroses-ITEM && target_macroses-macro target_cuda_flags && target_macroses-args $CUDA_NVCC_FLAGS $NVCC_GENCODE_FLAGS && target_macroses-ITEM && target_macroses-macro target_cuda_cflags && target_macroses-args $USER_CXXFLAGS $SRCFLAGS $_SEM_EXTRA_CXX_FLAGS
.PEERDIR=$_SRC_CU_PEERDIR
}
CUDA_DEVICE_LINK_LIBRARY_CMD=$NVCC_OLD $NVCC_FLAGS $NVCC_GENCODE_FLAGS -o ${output;suf=${OBJ_SUF}${NVCC_OBJ_EXT}:"devlink"} -dlink ${input:_NVCC_DEVICE_SRCS} -I$CUDA_TARGET_ROOT/include --compiler-options ${join= :C_FLAGS_PLATFORM} $NVCC_ENV ${hide;kv:"p DL"} ${hide;kv:"pc light-blue"} && $LINK_LIB
### @usage: CUDA_DEVICE_LINK_LIBRARY()
###
### The LIBRARY() module with an additional step with CUDA device linking.
### Use [NVCC_DEVICE_LINK](#macro_NVCC_DEVICE_LINK) macro to specify sources for device link.
module CUDA_DEVICE_LINK_LIBRARY: LIBRARY {
SET(_LD_LINK_LIB_EXTRA_INPUT ${output;suf=${OBJ_SUF}${NVCC_OBJ_EXT}:"devlink"})
.CMD=$CUDA_DEVICE_LINK_LIBRARY_CMD
}
# tag:flags
### @usage: NVCC_DEVICE_LINK(file.cu...)
### Run nvcc --device-link on objects compiled from srcs with --device-c.
### This generates a stub object devlink.o that supplies missing pieces for the
### host linker to link relocatable device objects into the final executable.
### This macro can be used only with [CUDA_DEVICE_LINK_LIBRARY](#module_CUDA_DEVICE_LINK_LIBRARY) module.
macro NVCC_DEVICE_LINK(Srcs...) {
SET_APPEND(_NVCC_DEVICE_SRCS ${suf=${OBJ_SUF}${NVCC_OBJ_EXT}:Srcs})
.PEERDIR=build/internal/platform/cuda
}
CUDA_NVPRUNE=yes
### @usage: NO_CUDA_NVPRUNE()
### Disable nvprune for a PROGRAM
macro NO_CUDA_NVPRUNE() {
DISABLE(CUDA_NVPRUNE)
}
|