1 // Tests the phases generated for a CUDA offloading target for different 2 // combinations of: 3 // - Number of gpu architectures; 4 // - Host/device-only compilation; 5 // - User-requested final phase - binary or assembly. 6 7 // REQUIRES: clang-driver 8 // REQUIRES: powerpc-registered-target 9 // REQUIRES: nvptx-registered-target 10 // 11 // Test single gpu architecture with complete compilation. 12 // 13 // Test CUDA NVPTX phases. 14 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 15 // RUN: --cuda-gpu-arch=sm_30 %s 2>&1 \ 16 // RUN: | FileCheck -check-prefixes=BIN %s 17 // 18 // BIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) 19 // BIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) 20 // BIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) 21 // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH:sm_30]]) 22 // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) 23 // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) 24 // BIN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) 25 // BIN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) 26 // BIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P7]]}, object 27 // BIN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH]])" {[[P6]]}, assembler 28 // BIN-DAG: [[P10:[0-9]+]]: linker, {[[P8]], [[P9]]}, cuda-fatbin, (device-[[T]]) 29 // BIN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P10]]}, ir 30 // BIN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) 31 // BIN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) 32 // BIN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) 33 34 // 35 // Test single gpu architecture up to the assemble phase. 36 // 37 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 38 // RUN: --cuda-gpu-arch=sm_30 %s -S 2>&1 \ 39 // RUN: | FileCheck -check-prefixes=ASM %s 40 // ASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) 41 // ASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) 42 // ASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) 43 // ASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) 44 // ASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler 45 // ASM-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]]) 46 // ASM-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (host-[[T]]) 47 // ASM-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (host-[[T]]) 48 // ASM-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (host-[[T]]) 49 50 // 51 // Test two gpu architectures with complete compilation. 52 // 53 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 54 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s 2>&1 \ 55 // RUN: | FileCheck -check-prefixes=BIN2 %s 56 // BIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) 57 // BIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) 58 // BIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) 59 // BIN2-DAG: [[P3:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH1:sm_30]]) 60 // BIN2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) 61 // BIN2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) 62 // BIN2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) 63 // BIN2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) 64 // BIN2-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P7]]}, object 65 // BIN2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH1]])" {[[P6]]}, assembler 66 // BIN2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) 67 // BIN2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) 68 // BIN2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) 69 // BIN2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) 70 // BIN2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) 71 // BIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P14]]}, object 72 // BIN2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P13]]}, assembler 73 // BIN2-DAG: [[P17:[0-9]+]]: linker, {[[P8]], [[P9]], [[P15]], [[P16]]}, cuda-fatbin, (device-[[T]]) 74 // BIN2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (powerpc64le-ibm-linux-gnu)" {[[P2]]}, "device-[[T]] ([[TRIPLE]])" {[[P17]]}, ir 75 // BIN2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) 76 // BIN2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) 77 // BIN2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) 78 79 // 80 // Test two gpu architecturess up to the assemble phase. 81 // 82 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 83 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \ 84 // RUN: | FileCheck -check-prefixes=ASM2 %s 85 // ASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH1:sm_30]]) 86 // ASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) 87 // ASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH1]]) 88 // ASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH1]]) 89 // ASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH1]])" {[[P3]]}, assembler 90 // ASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) 91 // ASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) 92 // ASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) 93 // ASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) 94 // ASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler 95 // ASM2-DAG: [[P10:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (host-[[T]]) 96 // ASM2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (host-[[T]]) 97 // ASM2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (host-[[T]]) 98 // ASM2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (host-[[T]]) 99 100 // 101 // Test single gpu architecture with complete compilation in host-only 102 // compilation mode. 103 // 104 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 105 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only 2>&1 \ 106 // RUN: | FileCheck -check-prefixes=HBIN %s 107 // HBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) 108 // HBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) 109 // HBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) 110 // HBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) 111 // HBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) 112 // HBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) 113 // HBIN-NOT: device 114 // 115 // Test single gpu architecture up to the assemble phase in host-only 116 // compilation mode. 117 // 118 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 119 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-host-only -S 2>&1 \ 120 // RUN: | FileCheck -check-prefixes=HASM %s 121 // HASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) 122 // HASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) 123 // HASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) 124 // HASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) 125 // HASM-NOT: device 126 127 // 128 // Test two gpu architectures with complete compilation in host-only 129 // compilation mode. 130 // 131 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 132 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only 2>&1 \ 133 // RUN: | FileCheck -check-prefixes=HBIN2 %s 134 // HBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) 135 // HBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) 136 // HBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) 137 // HBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) 138 // HBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (host-[[T]]) 139 // HBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (host-[[T]]) 140 // HBIN2-NOT: device 141 142 // 143 // Test two gpu architectures up to the assemble phase in host-only 144 // compilation mode. 145 // 146 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 147 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-host-only -S \ 148 // RUN: 2>&1 | FileCheck -check-prefixes=HASM2 %s 149 // HASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (host-[[T]]) 150 // HASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (host-[[T]]) 151 // HASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (host-[[T]]) 152 // HASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (host-[[T]]) 153 // HASM2-NOT: device 154 155 // 156 // Test single gpu architecture with complete compilation in device-only 157 // compilation mode. 158 // 159 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 160 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only 2>&1 \ 161 // RUN: | FileCheck -check-prefixes=DBIN %s 162 // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) 163 // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) 164 // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) 165 // DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) 166 // DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) 167 // DBIN-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] (nvptx64-nvidia-cuda:[[ARCH]])" {[[P4]]}, object 168 // DBIN-NOT: host 169 // 170 // Test single gpu architecture up to the assemble phase in device-only 171 // compilation mode. 172 // 173 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 174 // RUN: --cuda-gpu-arch=sm_30 %s --cuda-device-only -S 2>&1 \ 175 // RUN: | FileCheck -check-prefixes=DASM %s 176 // DASM-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) 177 // DASM-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) 178 // DASM-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) 179 // DASM-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) 180 // DASM-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler 181 // DASM-NOT: host 182 183 // 184 // Test two gpu architectures with complete compilation in device-only 185 // compilation mode. 186 // 187 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 188 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \ 189 // RUN: | FileCheck -check-prefixes=DBIN2 %s 190 // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) 191 // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) 192 // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) 193 // DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) 194 // DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) 195 // DBIN2-DAG: [[P5:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P4]]}, object 196 // DBIN2-DAG: [[P6:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) 197 // DBIN2-DAG: [[P7:[0-9]+]]: preprocessor, {[[P6]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) 198 // DBIN2-DAG: [[P8:[0-9]+]]: compiler, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) 199 // DBIN2-DAG: [[P9:[0-9]+]]: backend, {[[P8]]}, assembler, (device-[[T]], [[ARCH2]]) 200 // DBIN2-DAG: [[P10:[0-9]+]]: assembler, {[[P9]]}, object, (device-[[T]], [[ARCH2]]) 201 // DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P10]]}, object 202 // DBIN2-NOT: host 203 // 204 // Test two gpu architectures up to the assemble phase in device-only 205 // compilation mode. 206 // 207 // RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-phases \ 208 // RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S \ 209 // RUN: 2>&1 | FileCheck -check-prefixes=DASM2 %s 210 // DASM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T:cuda]], (device-[[T]], [[ARCH:sm_30]]) 211 // DASM2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) 212 // DASM2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) 213 // DASM2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) 214 // DASM2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE:nvptx64-nvidia-cuda]]:[[ARCH]])" {[[P3]]}, assembler 215 // DASM2-DAG: [[P5:[0-9]+]]: input, "{{.*}}cuda-phases.cu", [[T]], (device-[[T]], [[ARCH2:sm_35]]) 216 // DASM2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) 217 // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) 218 // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) 219 // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] ([[TRIPLE]]:[[ARCH2]])" {[[P8]]}, assembler 220 // DASM2-NOT: host 221