1if(WIN32 AND NOT MSVC) 2 message(STATUS "CUDA compilation is disabled (due to only Visual Studio compiler supported on your platform).") 3 return() 4endif() 5 6if(CMAKE_COMPILER_IS_GNUCXX AND NOT APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 7 message(STATUS "CUDA compilation is disabled (due to Clang unsupported on your platform).") 8 return() 9endif() 10 11set(CMAKE_MODULE_PATH "${OpenCV_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 12 13if(ANDROID) 14 set(CUDA_TARGET_OS_VARIANT "Android") 15endif() 16find_host_package(CUDA "${MIN_VER_CUDA}" QUIET) 17 18list(REMOVE_AT CMAKE_MODULE_PATH 0) 19 20if(CUDA_FOUND) 21 set(HAVE_CUDA 1) 22 23 if(WITH_CUFFT) 24 set(HAVE_CUFFT 1) 25 endif() 26 27 if(WITH_CUBLAS) 28 set(HAVE_CUBLAS 1) 29 endif() 30 31 if(WITH_NVCUVID) 32 find_cuda_helper_libs(nvcuvid) 33 if(WIN32) 34 find_cuda_helper_libs(nvcuvenc) 35 endif() 36 set(HAVE_NVCUVID 1) 37 endif() 38 39 message(STATUS "CUDA detected: " ${CUDA_VERSION}) 40 41 set(_generations "Fermi" "Kepler") 42 if(NOT CMAKE_CROSSCOMPILING) 43 list(APPEND _generations "Auto") 44 endif() 45 set(CUDA_GENERATION "" CACHE STRING "Build CUDA device code only for specific GPU architecture. Leave empty to build for all architectures.") 46 if( CMAKE_VERSION VERSION_GREATER "2.8" ) 47 set_property( CACHE CUDA_GENERATION PROPERTY STRINGS "" ${_generations} ) 48 endif() 49 50 if(CUDA_GENERATION) 51 if(NOT ";${_generations};" MATCHES ";${CUDA_GENERATION};") 52 string(REPLACE ";" ", " _generations "${_generations}") 53 message(FATAL_ERROR "ERROR: ${_generations} Generations are suppered.") 54 endif() 55 unset(CUDA_ARCH_BIN CACHE) 56 unset(CUDA_ARCH_PTX CACHE) 57 endif() 58 59 set(__cuda_arch_ptx "") 60 if(CUDA_GENERATION STREQUAL "Fermi") 61 set(__cuda_arch_bin "2.0 2.1(2.0)") 62 elseif(CUDA_GENERATION STREQUAL "Kepler") 63 if(${CUDA_VERSION} VERSION_LESS "5.0") 64 set(__cuda_arch_bin "3.0") 65 else() 66 set(__cuda_arch_bin "3.0 3.5") 67 endif() 68 elseif(CUDA_GENERATION STREQUAL "Auto") 69 execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" 70 WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" 71 RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out 72 ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) 73 if(NOT _nvcc_res EQUAL 0) 74 message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.") 75 else() 76 set(__cuda_arch_bin "${_nvcc_out}") 77 string(REPLACE "2.1" "2.1(2.0)" __cuda_arch_bin "${__cuda_arch_bin}") 78 endif() 79 endif() 80 81 if(NOT DEFINED __cuda_arch_bin) 82 if(ANDROID) 83 if(ARM) 84 set(__cuda_arch_bin "3.2") 85 set(__cuda_arch_ptx "") 86 elseif(AARCH64) 87 set(__cuda_arch_bin "5.3") 88 set(__cuda_arch_ptx "") 89 endif() 90 else() 91 if(${CUDA_VERSION} VERSION_LESS "5.0") 92 set(__cuda_arch_bin "1.1 1.2 1.3 2.0 2.1(2.0) 3.0") 93 elseif(${CUDA_VERSION} VERSION_GREATER "6.5") 94 set(__cuda_arch_bin "2.0 2.1(2.0) 3.0 3.5") 95 else() 96 set(__cuda_arch_bin "1.1 1.2 1.3 2.0 2.1(2.0) 3.0 3.5") 97 endif() 98 set(__cuda_arch_ptx "3.0") 99 endif() 100 endif() 101 102 set(CUDA_ARCH_BIN ${__cuda_arch_bin} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") 103 set(CUDA_ARCH_PTX ${__cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") 104 105 string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") 106 string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") 107 108 # Ckeck if user specified 1.0 compute capability: we don't support it 109 string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") 110 set(CUDA_ARCH_BIN_OR_PTX_10 0) 111 if(NOT ${HAS_ARCH_10} STREQUAL "") 112 set(CUDA_ARCH_BIN_OR_PTX_10 1) 113 endif() 114 115 # NVCC flags to be set 116 set(NVCC_FLAGS_EXTRA "") 117 118 # These vars will be passed into the templates 119 set(OPENCV_CUDA_ARCH_BIN "") 120 set(OPENCV_CUDA_ARCH_PTX "") 121 set(OPENCV_CUDA_ARCH_FEATURES "") 122 123 # Tell NVCC to add binaries for the specified GPUs 124 string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}") 125 foreach(ARCH IN LISTS ARCH_LIST) 126 if(ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") 127 # User explicitly specified PTX for the concrete BIN 128 set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) 129 set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}") 130 set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${CMAKE_MATCH_2}") 131 else() 132 # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN 133 set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH}) 134 set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}") 135 set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}") 136 endif() 137 endforeach() 138 139 # Tell NVCC to add PTX intermediate code for the specified architectures 140 string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}") 141 foreach(ARCH IN LISTS ARCH_LIST) 142 set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) 143 set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}") 144 set(OPENCV_CUDA_ARCH_FEATURES "${OPENCV_CUDA_ARCH_FEATURES} ${ARCH}") 145 endforeach() 146 147 # These vars will be processed in other scripts 148 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}) 149 set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}") 150 151 if(ANDROID) 152 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xptxas;-dlcm=ca") 153 endif() 154 155 message(STATUS "CUDA NVCC target flags: ${CUDA_NVCC_FLAGS}") 156 157 OCV_OPTION(CUDA_FAST_MATH "Enable --use_fast_math for CUDA compiler " OFF) 158 159 if(CUDA_FAST_MATH) 160 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} --use_fast_math) 161 endif() 162 163 mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD CUDA_SDK_ROOT_DIR) 164 165 macro(ocv_cuda_compile VAR) 166 foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) 167 set(${var}_backup_in_cuda_compile_ "${${var}}") 168 169 # we remove /EHa as it generates warnings under windows 170 string(REPLACE "/EHa" "" ${var} "${${var}}") 171 172 # we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1) 173 string(REPLACE "-ggdb3" "" ${var} "${${var}}") 174 175 # we remove -Wsign-promo as it generates warnings under linux 176 string(REPLACE "-Wsign-promo" "" ${var} "${${var}}") 177 178 # we remove -Wno-sign-promo as it generates warnings under linux 179 string(REPLACE "-Wno-sign-promo" "" ${var} "${${var}}") 180 181 # we remove -Wno-delete-non-virtual-dtor because it's used for C++ compiler 182 # but NVCC uses C compiler by default 183 string(REPLACE "-Wno-delete-non-virtual-dtor" "" ${var} "${${var}}") 184 185 # we remove -frtti because it's used for C++ compiler 186 # but NVCC uses C compiler by default 187 string(REPLACE "-frtti" "" ${var} "${${var}}") 188 189 string(REPLACE "-fvisibility-inlines-hidden" "" ${var} "${${var}}") 190 endforeach() 191 192 if(BUILD_SHARED_LIBS) 193 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -DCVAPI_EXPORTS) 194 endif() 195 196 if(UNIX OR APPLE) 197 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC) 198 endif() 199 if(APPLE) 200 set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fno-finite-math-only) 201 endif() 202 203 # disabled because of multiple warnings during building nvcc auto generated files 204 if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_GCC_REGEX_VERSION VERSION_GREATER "4.6.0") 205 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wunused-but-set-variable) 206 endif() 207 208 CUDA_COMPILE(${VAR} ${ARGN}) 209 210 foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) 211 set(${var} "${${var}_backup_in_cuda_compile_}") 212 unset(${var}_backup_in_cuda_compile_) 213 endforeach() 214 endmacro() 215else() 216 unset(CUDA_ARCH_BIN CACHE) 217 unset(CUDA_ARCH_PTX CACHE) 218endif() 219 220if(HAVE_CUDA) 221 set(CUDA_LIBS_PATH "") 222 foreach(p ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY}) 223 get_filename_component(_tmp ${p} PATH) 224 list(APPEND CUDA_LIBS_PATH ${_tmp}) 225 endforeach() 226 227 if(HAVE_CUBLAS) 228 foreach(p ${CUDA_cublas_LIBRARY}) 229 get_filename_component(_tmp ${p} PATH) 230 list(APPEND CUDA_LIBS_PATH ${_tmp}) 231 endforeach() 232 endif() 233 234 if(HAVE_CUFFT) 235 foreach(p ${CUDA_cufft_LIBRARY}) 236 get_filename_component(_tmp ${p} PATH) 237 list(APPEND CUDA_LIBS_PATH ${_tmp}) 238 endforeach() 239 endif() 240 241 list(REMOVE_DUPLICATES CUDA_LIBS_PATH) 242 link_directories(${CUDA_LIBS_PATH}) 243 244 set(CUDA_LIBRARIES_ABS ${CUDA_LIBRARIES}) 245 ocv_convert_to_lib_name(CUDA_LIBRARIES ${CUDA_LIBRARIES}) 246 set(CUDA_npp_LIBRARY_ABS ${CUDA_npp_LIBRARY}) 247 ocv_convert_to_lib_name(CUDA_npp_LIBRARY ${CUDA_npp_LIBRARY}) 248 if(HAVE_CUBLAS) 249 set(CUDA_cublas_LIBRARY_ABS ${CUDA_cublas_LIBRARY}) 250 ocv_convert_to_lib_name(CUDA_cublas_LIBRARY ${CUDA_cublas_LIBRARY}) 251 endif() 252 253 if(HAVE_CUFFT) 254 set(CUDA_cufft_LIBRARY_ABS ${CUDA_cufft_LIBRARY}) 255 ocv_convert_to_lib_name(CUDA_cufft_LIBRARY ${CUDA_cufft_LIBRARY}) 256 endif() 257endif() 258