1 // Tests CUDA compilation pipeline construction in Driver. 2 // REQUIRES: clang-driver 3 // REQUIRES: x86-registered-target 4 // REQUIRES: nvptx-registered-target 5 6 // Simple compilation case. Compile device-side to PTX assembly and make sure 7 // we use it on the host side. 8 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \ 9 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 10 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 11 // RUN: -check-prefix NOLINK %s 12 13 // Typical compilation + link case. 14 // RUN: %clang -### -target x86_64-linux-gnu %s 2>&1 \ 15 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 16 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 17 // RUN: -check-prefix LINK %s 18 19 // Verify that --cuda-host-only disables device-side compilation, but doesn't 20 // disable host-side compilation/linking. 21 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only %s 2>&1 \ 22 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 23 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 24 25 // Verify that --cuda-device-only disables host-side compilation and linking. 26 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only %s 2>&1 \ 27 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 28 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 29 30 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and 31 // --cuda-device-only wins. 32 33 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 34 // RUN: --cuda-host-only %s 2>&1 \ 35 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 36 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 37 38 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \ 39 // RUN: --cuda-host-only %s 2>&1 \ 40 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 41 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 42 43 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \ 44 // RUN: --cuda-device-only %s 2>&1 \ 45 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 46 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 47 48 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \ 49 // RUN: --cuda-device-only %s 2>&1 \ 50 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 51 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 52 53 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \ 54 // RUN: --cuda-compile-host-device %s 2>&1 \ 55 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 56 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 57 // RUN: -check-prefix LINK %s 58 59 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 60 // RUN: --cuda-compile-host-device %s 2>&1 \ 61 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 62 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 63 // RUN: -check-prefix LINK %s 64 65 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to 66 // device compilation. 67 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \ 68 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 69 // RUN: -check-prefix DEVICE-SM30 -check-prefix HOST \ 70 // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s 71 72 // Verify that there is one device-side compilation per --cuda-gpu-arch args 73 // and that all results are included on the host side. 74 // RUN: %clang -### -target x86_64-linux-gnu \ 75 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \ 76 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ 77 // RUN: -check-prefixes DEVICE-SM30,DEVICE2-SM35 \ 78 // RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \ 79 // RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s 80 81 // Verify that device-side results are passed to the correct tool when 82 // -save-temps is used. 83 // RUN: %clang -### -target x86_64-linux-gnu -save-temps -c %s 2>&1 \ 84 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \ 85 // RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s 86 87 // Verify that device-side results are passed to the correct tool when 88 // -fno-integrated-as is used. 89 // RUN: %clang -### -target x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \ 90 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 91 // RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \ 92 // RUN: -check-prefix HOST-AS -check-prefix NOLINK %s 93 94 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly. 95 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X 96 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 97 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 98 // RUN: --no-cuda-gpu-arch=sm_35 \ 99 // RUN: -c %s 2>&1 \ 100 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s 101 102 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X 103 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 104 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 105 // RUN: --no-cuda-gpu-arch=sm_35 \ 106 // RUN: -c %s 2>&1 \ 107 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s 108 109 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X 110 // we default to sm_20 -- same as if no --cuda-gpu-arch were passed. 111 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 112 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 113 // RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ 114 // RUN: -c %s 2>&1 \ 115 // RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s 116 117 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X 118 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 119 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\ 120 // RUN: --no-cuda-gpu-arch=sm_50 \ 121 // RUN: -c %s 2>&1 \ 122 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s 123 124 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X 125 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 126 // RUN: --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \ 127 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 128 // RUN: -c %s 2>&1 \ 129 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s 130 131 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X 132 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 133 // RUN: --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \ 134 // RUN: --no-cuda-gpu-arch=all \ 135 // RUN: --cuda-gpu-arch=sm_35 \ 136 // RUN: -c %s 2>&1 \ 137 // RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s 138 139 // g) There's no --cuda-gpu-arch=all 140 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 141 // RUN: --cuda-gpu-arch=all \ 142 // RUN: -c %s 2>&1 \ 143 // RUN: | FileCheck -check-prefix ARCHALLERROR %s 144 145 146 // Verify that --[no-]cuda-include-ptx arguments are handled correctly. 147 // a) by default we're including PTX for all GPUs. 148 // RUN: %clang -### -target x86_64-linux-gnu \ 149 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 150 // RUN: -c %s 2>&1 \ 151 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s 152 153 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs 154 // RUN: %clang -### -target x86_64-linux-gnu \ 155 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 156 // RUN: --no-cuda-include-ptx=all \ 157 // RUN: -c %s 2>&1 \ 158 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s 159 160 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only. 161 // RUN: %clang -### -target x86_64-linux-gnu \ 162 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 163 // RUN: --no-cuda-include-ptx=sm_35 \ 164 // RUN: -c %s 2>&1 \ 165 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s 166 // RUN: %clang -### -target x86_64-linux-gnu \ 167 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 168 // RUN: --no-cuda-include-ptx=sm_30 \ 169 // RUN: -c %s 2>&1 \ 170 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s 171 172 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all 173 // RUN: %clang -### -target x86_64-linux-gnu \ 174 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 175 // RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \ 176 // RUN: -c %s 2>&1 \ 177 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s 178 179 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX 180 // RUN: %clang -### -target x86_64-linux-gnu \ 181 // RUN: --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \ 182 // RUN: --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \ 183 // RUN: -c %s 2>&1 \ 184 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s 185 186 187 // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20" 188 // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20" 189 // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30" 190 // NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30" 191 // ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35" 192 // NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35" 193 // ARCHALLERROR: error: Unsupported CUDA gpu architecture: all 194 195 // Match device-side preprocessor and compiler phases with -save-temps. 196 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 197 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 198 // DEVICE-SAVE-SAME: "-fcuda-is-device" 199 // DEVICE-SAVE-SAME: "-x" "cuda" 200 201 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 202 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 203 // DEVICE-SAVE-SAME: "-fcuda-is-device" 204 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output" 205 206 // Match the job that produces PTX assembly. 207 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 208 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 209 // DEVICE-SAME: "-fcuda-is-device" 210 // DEVICE-SM30-SAME: "-target-cpu" "sm_30" 211 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" 212 // DEVICE-NOSAVE-SAME: "-x" "cuda" 213 // DEVICE-SAVE-SAME: "-x" "ir" 214 215 // Match the call to ptxas (which assembles PTX to SASS). 216 // DEVICE:ptxas 217 // DEVICE-SM30-DAG: "--gpu-name" "sm_30" 218 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]" 219 // DEVICE-DAG: "[[PTXFILE]]" 220 221 // Match another device-side compilation. 222 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda" 223 // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 224 // DEVICE2-SAME: "-fcuda-is-device" 225 // DEVICE2-SM35-SAME: "-target-cpu" "sm_35" 226 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]" 227 // DEVICE2-SAME: "-x" "cuda" 228 229 // Match another call to ptxas. 230 // DEVICE2: ptxas 231 // DEVICE2-SM35-DAG: "--gpu-name" "sm_35" 232 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]" 233 // DEVICE2-DAG: "[[PTXFILE2]]" 234 235 // Match no device-side compilation. 236 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" 237 // NODEVICE-NOT: "-fcuda-is-device" 238 239 // INCLUDES-DEVICE:fatbinary 240 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" 241 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" 242 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" 243 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" 244 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" 245 246 // Match host-side preprocessor job with -save-temps. 247 // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu" 248 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda" 249 // HOST-SAVE-NOT: "-fcuda-is-device" 250 // HOST-SAVE-SAME: "-x" "cuda" 251 252 // Match host-side compilation. 253 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" 254 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" 255 // HOST-NOT: "-fcuda-is-device" 256 // There is only one GPU binary after combining it with fatbinary! 257 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" 258 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" 259 // There is only one GPU binary after combining it with fatbinary. 260 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" 261 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" 262 // HOST-NOSAVE-SAME: "-x" "cuda" 263 // HOST-SAVE-SAME: "-x" "cuda-cpp-output" 264 265 // Match external assembler that uses compilation output. 266 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]" 267 268 // Match no GPU code inclusion. 269 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary" 270 271 // Match no host compilation. 272 // NOHOST-NOT: "-cc1" "-triple" 273 // NOHOST-NOT: "-x" "cuda" 274 275 // Match linker. 276 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}" 277 // LINK-SAME: "[[HOSTOUTPUT]]" 278 279 // Match no linker. 280 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}" 281 282 // FATBIN-COMMON:fatbinary 283 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" 284 // FATBIN-COMMON: "--image=profile=sm_30,file= 285 // PTX-SM30: "--image=profile=compute_30,file= 286 // NOPTX-SM30-NOT: "--image=profile=compute_30,file= 287 // FATBIN-COMMON: "--image=profile=sm_35,file= 288 // PTX-SM35: "--image=profile=compute_35,file= 289 // NOPTX-SM35-NOT: "--image=profile=compute_35,file= 290