1 // Tests CUDA compilation pipeline construction in Driver.
2 // REQUIRES: clang-driver
3 // REQUIRES: x86-registered-target
4 // REQUIRES: nvptx-registered-target
5 
6 // Simple compilation case. Compile device-side to PTX assembly and make sure
7 // we use it on the host side.
8 // RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
9 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
10 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
11 // RUN:    -check-prefix NOLINK %s
12 
13 // Typical compilation + link case.
14 // RUN: %clang -### -target x86_64-linux-gnu %s 2>&1 \
15 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
16 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
17 // RUN:    -check-prefix LINK %s
18 
19 // Verify that --cuda-host-only disables device-side compilation, but doesn't
20 // disable host-side compilation/linking.
21 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only %s 2>&1 \
22 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
23 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
24 
25 // Verify that --cuda-device-only disables host-side compilation and linking.
26 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only %s 2>&1 \
27 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
28 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
29 
30 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and
31 // --cuda-device-only wins.
32 
33 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
34 // RUN:    --cuda-host-only %s 2>&1 \
35 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
36 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
37 
38 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \
39 // RUN:    --cuda-host-only %s 2>&1 \
40 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
41 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
42 
43 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \
44 // RUN:    --cuda-device-only %s 2>&1 \
45 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
46 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
47 
48 // RUN: %clang -### -target x86_64-linux-gnu --cuda-compile-host-device \
49 // RUN:    --cuda-device-only %s 2>&1 \
50 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
51 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
52 
53 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only \
54 // RUN:   --cuda-compile-host-device %s 2>&1 \
55 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
56 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
57 // RUN:    -check-prefix LINK %s
58 
59 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
60 // RUN:   --cuda-compile-host-device %s 2>&1 \
61 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
62 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
63 // RUN:    -check-prefix LINK %s
64 
65 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
66 // device compilation.
67 // RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=sm_30 -c %s 2>&1 \
68 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
69 // RUN:    -check-prefix DEVICE-SM30 -check-prefix HOST \
70 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
71 
72 // Verify that there is one device-side compilation per --cuda-gpu-arch args
73 // and that all results are included on the host side.
74 // RUN: %clang -### -target x86_64-linux-gnu \
75 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 -c %s 2>&1 \
76 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
77 // RUN:             -check-prefixes DEVICE-SM30,DEVICE2-SM35 \
78 // RUN:             -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
79 // RUN:             -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
80 
81 // Verify that device-side results are passed to the correct tool when
82 // -save-temps is used.
83 // RUN: %clang -### -target x86_64-linux-gnu -save-temps -c %s 2>&1 \
84 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \
85 // RUN:    -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s
86 
87 // Verify that device-side results are passed to the correct tool when
88 // -fno-integrated-as is used.
89 // RUN: %clang -### -target x86_64-linux-gnu -fno-integrated-as -c %s 2>&1 \
90 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
91 // RUN:    -check-prefix HOST -check-prefix HOST-NOSAVE \
92 // RUN:    -check-prefix HOST-AS -check-prefix NOLINK %s
93 
94 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
95 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
96 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
97 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
98 // RUN:   --no-cuda-gpu-arch=sm_35 \
99 // RUN:   -c %s 2>&1 \
100 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
101 
102 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
103 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
104 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
105 // RUN:   --no-cuda-gpu-arch=sm_35 \
106 // RUN:   -c %s 2>&1 \
107 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,NOARCH-SM35 %s
108 
109 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
110 //    we default to sm_20 -- same as if no --cuda-gpu-arch were passed.
111 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
112 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
113 // RUN:   --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
114 // RUN:   -c %s 2>&1 \
115 // RUN: | FileCheck -check-prefixes ARCH-SM20,NOARCH-SM30,NOARCH-SM35 %s
116 
117 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
118 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
119 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30\
120 // RUN:   --no-cuda-gpu-arch=sm_50 \
121 // RUN:   -c %s 2>&1 \
122 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
123 
124 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
125 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
126 // RUN:   --no-cuda-gpu-arch=sm_35 --no-cuda-gpu-arch=sm_30 \
127 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
128 // RUN:   -c %s 2>&1 \
129 // RUN: | FileCheck -check-prefixes NOARCH-SM20,ARCH-SM30,ARCH-SM35 %s
130 
131 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
132 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
133 // RUN:   --cuda-gpu-arch=sm_20 --cuda-gpu-arch=sm_30 \
134 // RUN:   --no-cuda-gpu-arch=all \
135 // RUN:   --cuda-gpu-arch=sm_35 \
136 // RUN:   -c %s 2>&1 \
137 // RUN: | FileCheck -check-prefixes NOARCH-SM20,NOARCH-SM30,ARCH-SM35 %s
138 
139 // g) There's no --cuda-gpu-arch=all
140 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
141 // RUN:   --cuda-gpu-arch=all \
142 // RUN:   -c %s 2>&1 \
143 // RUN: | FileCheck -check-prefix ARCHALLERROR %s
144 
145 
146 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
147 // a) by default we're including PTX for all GPUs.
148 // RUN: %clang -### -target x86_64-linux-gnu \
149 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
150 // RUN:   -c %s 2>&1 \
151 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
152 
153 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
154 // RUN: %clang -### -target x86_64-linux-gnu \
155 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
156 // RUN:   --no-cuda-include-ptx=all \
157 // RUN:   -c %s 2>&1 \
158 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,NOPTX-SM30 %s
159 
160 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
161 // RUN: %clang -### -target x86_64-linux-gnu \
162 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
163 // RUN:   --no-cuda-include-ptx=sm_35 \
164 // RUN:   -c %s 2>&1 \
165 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM35,PTX-SM30 %s
166 // RUN: %clang -### -target x86_64-linux-gnu \
167 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
168 // RUN:   --no-cuda-include-ptx=sm_30 \
169 // RUN:   -c %s 2>&1 \
170 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,NOPTX-SM30 %s
171 
172 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
173 // RUN: %clang -### -target x86_64-linux-gnu \
174 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
175 // RUN:   --no-cuda-include-ptx=all --cuda-include-ptx=all \
176 // RUN:   -c %s 2>&1 \
177 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
178 
179 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
180 // RUN: %clang -### -target x86_64-linux-gnu \
181 // RUN:   --cuda-gpu-arch=sm_35 --cuda-gpu-arch=sm_30 \
182 // RUN:   --no-cuda-include-ptx=sm_30 --cuda-include-ptx=all \
183 // RUN:   -c %s 2>&1 \
184 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM35,PTX-SM30 %s
185 
186 
187 // ARCH-SM20: "-cc1"{{.*}}"-target-cpu" "sm_20"
188 // NOARCH-SM20-NOT: "-cc1"{{.*}}"-target-cpu" "sm_20"
189 // ARCH-SM30: "-cc1"{{.*}}"-target-cpu" "sm_30"
190 // NOARCH-SM30-NOT: "-cc1"{{.*}}"-target-cpu" "sm_30"
191 // ARCH-SM35: "-cc1"{{.*}}"-target-cpu" "sm_35"
192 // NOARCH-SM35-NOT: "-cc1"{{.*}}"-target-cpu" "sm_35"
193 // ARCHALLERROR: error: Unsupported CUDA gpu architecture: all
194 
195 // Match device-side preprocessor and compiler phases with -save-temps.
196 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
197 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
198 // DEVICE-SAVE-SAME: "-fcuda-is-device"
199 // DEVICE-SAVE-SAME: "-x" "cuda"
200 
201 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
202 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
203 // DEVICE-SAVE-SAME: "-fcuda-is-device"
204 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output"
205 
206 // Match the job that produces PTX assembly.
207 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
208 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
209 // DEVICE-SAME: "-fcuda-is-device"
210 // DEVICE-SM30-SAME: "-target-cpu" "sm_30"
211 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
212 // DEVICE-NOSAVE-SAME: "-x" "cuda"
213 // DEVICE-SAVE-SAME: "-x" "ir"
214 
215 // Match the call to ptxas (which assembles PTX to SASS).
216 // DEVICE:ptxas
217 // DEVICE-SM30-DAG: "--gpu-name" "sm_30"
218 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
219 // DEVICE-DAG: "[[PTXFILE]]"
220 
221 // Match another device-side compilation.
222 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
223 // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
224 // DEVICE2-SAME: "-fcuda-is-device"
225 // DEVICE2-SM35-SAME: "-target-cpu" "sm_35"
226 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
227 // DEVICE2-SAME: "-x" "cuda"
228 
229 // Match another call to ptxas.
230 // DEVICE2: ptxas
231 // DEVICE2-SM35-DAG: "--gpu-name" "sm_35"
232 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
233 // DEVICE2-DAG: "[[PTXFILE2]]"
234 
235 // Match no device-side compilation.
236 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
237 // NODEVICE-NOT: "-fcuda-is-device"
238 
239 // INCLUDES-DEVICE:fatbinary
240 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
241 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
242 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
243 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
244 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
245 
246 // Match host-side preprocessor job with -save-temps.
247 // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
248 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
249 // HOST-SAVE-NOT: "-fcuda-is-device"
250 // HOST-SAVE-SAME: "-x" "cuda"
251 
252 // Match host-side compilation.
253 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
254 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
255 // HOST-NOT: "-fcuda-is-device"
256 // There is only one GPU binary after combining it with fatbinary!
257 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
258 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
259 // There is only one GPU binary after combining it with fatbinary.
260 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
261 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
262 // HOST-NOSAVE-SAME: "-x" "cuda"
263 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
264 
265 // Match external assembler that uses compilation output.
266 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
267 
268 // Match no GPU code inclusion.
269 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary"
270 
271 // Match no host compilation.
272 // NOHOST-NOT: "-cc1" "-triple"
273 // NOHOST-NOT: "-x" "cuda"
274 
275 // Match linker.
276 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}"
277 // LINK-SAME: "[[HOSTOUTPUT]]"
278 
279 // Match no linker.
280 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
281 
282 // FATBIN-COMMON:fatbinary
283 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
284 // FATBIN-COMMON: "--image=profile=sm_30,file=
285 // PTX-SM30: "--image=profile=compute_30,file=
286 // NOPTX-SM30-NOT: "--image=profile=compute_30,file=
287 // FATBIN-COMMON: "--image=profile=sm_35,file=
288 // PTX-SM35: "--image=profile=compute_35,file=
289 // NOPTX-SM35-NOT: "--image=profile=compute_35,file=
290