aboutsummaryrefslogtreecommitdiffstats
path: root/cmake/cuda.cmake
blob: 5764bb65c3aa630e6dbfaa0b93c21bf097d760e5 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
if (HAVE_CUDA)
  if(${CMAKE_VERSION} VERSION_LESS "3.17.0")
      message(FATAL_ERROR "Build with CUDA requires at least cmake 3.17.0")
  endif()

  enable_language(CUDA)

  include(global_flags)
  include(common)

  function(quote_if_contains_spaces OutVar Var)
    if (Var MATCHES ".*[ ].*")
      set(${OutVar} "\"${Var}\"" PARENT_SCOPE)
    else()
      set(${OutVar} ${Var} PARENT_SCOPE)
    endif()
  endfunction()

  function(get_cuda_flags_from_cxx_flags OutCudaFlags CxxFlags)
    # OutCudaFlags is an output string
    # CxxFlags is a string

    set(skipList
      -gline-tables-only
      # clang coverage
      -fprofile-instr-generate
      -fcoverage-mapping
      /Zc:inline # disable unreferenced functions (kernel registrators) remove
      -Wno-c++17-extensions
      -flto
      -faligned-allocation
      -fsized-deallocation
      # While it might be reasonable to compile host part of .cu sources with these optimizations enabled,
      # nvcc passes these options down towards cicc which lacks x86_64 extensions support.
      -msse2
      -msse3
      -mssse3
      -msse4.1
      -msse4.2
    )

    set(skipPrefixRegexp
      "(-fsanitize=|-fsanitize-coverage=|-fsanitize-blacklist=|--system-header-prefix|(/|-)std(:|=)c\\+\\+).*"
    )

    string(FIND "${CMAKE_CUDA_HOST_COMPILER}" clang hostCompilerIsClangPos)
    string(COMPARE NOTEQUAL ${hostCompilerIsClangPos} -1 isHostCompilerClang)


    function(separate_arguments_with_special_symbols Output Src)
      string(REPLACE ";" "$<SEMICOLON>" LocalOutput "${Src}")
      separate_arguments(LocalOutput NATIVE_COMMAND ${LocalOutput})
      set(${Output} ${LocalOutput} PARENT_SCOPE)
    endfunction()

    separate_arguments_with_special_symbols(Separated_CxxFlags "${CxxFlags}")

    if (MSVC)
      set(flagPrefixSymbol "/")
    else()
      set(flagPrefixSymbol "-")
    endif()

    set(localCudaCommonFlags "") # non host compiler options
    set(localCudaCompilerOptions "")

    while (Separated_CxxFlags)
      list(POP_FRONT Separated_CxxFlags cxxFlag)
      if ((cxxFlag IN_LIST skipList) OR (cxxFlag MATCHES ${skipPrefixRegexp}))
        continue()
      endif()
      if ((cxxFlag STREQUAL -fopenmp=libomp) AND (NOT isHostCompilerClang))
        list(APPEND localCudaCompilerOptions -fopenmp)
        continue()
      endif()
      if ((NOT isHostCompilerClang) AND (cxxFlag MATCHES "^\-\-target=.*"))
        continue()
      endif()
      if (cxxFlag MATCHES "^${flagPrefixSymbol}(D[^ ]+)=(.+)")
        set(key ${CMAKE_MATCH_1})
        quote_if_contains_spaces(safeValue "${CMAKE_MATCH_2}")
        list(APPEND localCudaCommonFlags "-${key}=${safeValue}")
        continue()
      endif()
      if (cxxFlag MATCHES "^${flagPrefixSymbol}([DI])(.*)")
        set(key ${CMAKE_MATCH_1})
        if (CMAKE_MATCH_2)
          set(value ${CMAKE_MATCH_2})
          set(sep "")
        else()
          list(POP_FRONT Separated_CxxFlags value)
          set(sep " ")
        endif()
        quote_if_contains_spaces(safeValue "${value}")
        list(APPEND localCudaCommonFlags "-${key}${sep}${safeValue}")
        continue()
      endif()
      list(APPEND localCudaCompilerOptions ${cxxFlag})
    endwhile()

    if (isHostCompilerClang)
      # nvcc concatenates the sources for clang, and clang reports unused
      # things from .h files as if they they were defined in a .cpp file.
      list(APPEND localCudaCommonFlags -Wno-unused-function -Wno-unused-parameter)
      if (CMAKE_CXX_COMPILER_TARGET)
        list(APPEND localCudaCompilerOptions "--target=${CMAKE_CXX_COMPILER_TARGET}") 
      endif()
    endif()

    if (CMAKE_SYSROOT)
      list(APPEND localCudaCompilerOptions "--sysroot=${CMAKE_SYSROOT}")
    endif()

    list(JOIN localCudaCommonFlags " " joinedLocalCudaCommonFlags)
    string(REPLACE "$<SEMICOLON>" ";" joinedLocalCudaCommonFlags "${joinedLocalCudaCommonFlags}")
    list(JOIN localCudaCompilerOptions , joinedLocalCudaCompilerOptions)
    set(${OutCudaFlags} "${joinedLocalCudaCommonFlags} --compiler-options ${joinedLocalCudaCompilerOptions}" PARENT_SCOPE)
  endfunction()

  get_cuda_flags_from_cxx_flags(CMAKE_CUDA_FLAGS "${CMAKE_CXX_FLAGS}")

  string(APPEND CMAKE_CUDA_FLAGS
    # Allow __host__, __device__ annotations in lambda declaration.
    " --expt-extended-lambda"
    # Allow host code to invoke __device__ constexpr functions and vice versa
    " --expt-relaxed-constexpr"
  )

  set(NVCC_STD_VER 14)
  if(MSVC)
    set(NVCC_STD "/std:c++${NVCC_STD_VER}")
  else()
    set(NVCC_STD "-std=c++${NVCC_STD_VER}")
  endif()
  string(APPEND CMAKE_CUDA_FLAGS " --compiler-options ${NVCC_STD}")

  string(APPEND CMAKE_CUDA_FLAGS " -DTHRUST_IGNORE_CUB_VERSION_CHECK")

  if(MSVC)
    # default CMake flags differ from our configuration
    set(CMAKE_CUDA_FLAGS_DEBUG "-D_DEBUG --compiler-options /Z7,/Ob0,/Od")
    set(CMAKE_CUDA_FLAGS_MINSIZEREL "-DNDEBUG --compiler-options /O1,/Ob1")
    set(CMAKE_CUDA_FLAGS_RELEASE "-DNDEBUG --compiler-options /Ox,/Ob2,/Oi")
    set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-DNDEBUG --compiler-options /Z7,/Ox,/Ob1")
  endif()

  # use versions from contrib, standard libraries from CUDA distibution are incompatible with MSVC and libcxx
  set(CUDA_EXTRA_INCLUDE_DIRECTORIES
    ${CMAKE_SOURCE_DIR}/contrib/libs/nvidia/thrust
    ${CMAKE_SOURCE_DIR}/contrib/libs/nvidia/cub
  )

  find_package(CUDAToolkit REQUIRED)

  if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL "11.2")
    string(APPEND CMAKE_CUDA_FLAGS " --threads 0")
  endif()

  message(VERBOSE "CMAKE_CUDA_FLAGS = \"${CMAKE_CUDA_FLAGS}\"")

  enable_language(CUDA)

  function(target_cuda_flags Tgt)
    set_property(TARGET ${Tgt} APPEND PROPERTY
      CUDA_FLAGS ${ARGN}
    )
  endfunction()

  function(target_cuda_cflags Tgt)
    if (NOT ("${ARGN}" STREQUAL ""))
      string(JOIN "," OPTIONS ${ARGN})
      set_property(TARGET ${Tgt} APPEND PROPERTY
        CUDA_FLAGS --compiler-options ${OPTIONS}
      )
    endif()
  endfunction()

  function(target_cuda_sources Tgt Scope)
    # add include directories on per-CMakeLists file level because some non-CUDA source files may want to include calls to CUDA libs
    include_directories(${CUDA_EXTRA_INCLUDE_DIRECTORIES})

    set_source_files_properties(${ARGN} PROPERTIES
      COMPILE_OPTIONS "$<JOIN:$<TARGET_GENEX_EVAL:${Tgt},$<TARGET_PROPERTY:${Tgt},CUDA_FLAGS>>,;>"
    )
    target_sources(${Tgt} ${Scope} ${ARGN})
  endfunction()

endif()