1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22
23 using namespace clang;
24 using namespace clang::targets;
25
26 namespace clang {
27 namespace targets {
28
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31
32 static const char *const DataLayoutStringR600 =
33 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35
36 static const char *const DataLayoutStringAMDGCN =
37 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40 "-ni:7";
41
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43 Generic, // Default
44 Global, // opencl_global
45 Local, // opencl_local
46 Constant, // opencl_constant
47 Private, // opencl_private
48 Generic, // opencl_generic
49 Global, // opencl_global_device
50 Global, // opencl_global_host
51 Global, // cuda_device
52 Constant, // cuda_constant
53 Local, // cuda_shared
54 Generic, // ptr32_sptr
55 Generic, // ptr32_uptr
56 Generic // ptr64
57 };
58
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60 Private, // Default
61 Global, // opencl_global
62 Local, // opencl_local
63 Constant, // opencl_constant
64 Private, // opencl_private
65 Generic, // opencl_generic
66 Global, // opencl_global_device
67 Global, // opencl_global_host
68 Global, // cuda_device
69 Constant, // cuda_constant
70 Local, // cuda_shared
71 Generic, // ptr32_sptr
72 Generic, // ptr32_uptr
73 Generic // ptr64
74
75 };
76 } // namespace targets
77 } // namespace clang
78
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS) \
81 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
83 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132 "flat_scratch_lo", "flat_scratch_hi",
133 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161 "a252", "a253", "a254", "a255"
162 };
163
getGCCRegNames() const164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165 return llvm::makeArrayRef(GCCRegNames);
166 }
167
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const168 bool AMDGPUTargetInfo::initFeatureMap(
169 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170 const std::vector<std::string> &FeatureVec) const {
171
172 using namespace llvm::AMDGPU;
173
174 // XXX - What does the member GPU mean if device name string passed here?
175 if (isAMDGCN(getTriple())) {
176 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177 case GK_GFX1033:
178 case GK_GFX1032:
179 case GK_GFX1031:
180 case GK_GFX1030:
181 Features["ci-insts"] = true;
182 Features["dot1-insts"] = true;
183 Features["dot2-insts"] = true;
184 Features["dot5-insts"] = true;
185 Features["dot6-insts"] = true;
186 Features["dl-insts"] = true;
187 Features["flat-address-space"] = true;
188 Features["16-bit-insts"] = true;
189 Features["dpp"] = true;
190 Features["gfx8-insts"] = true;
191 Features["gfx9-insts"] = true;
192 Features["gfx10-insts"] = true;
193 Features["gfx10-3-insts"] = true;
194 Features["s-memrealtime"] = true;
195 break;
196 case GK_GFX1012:
197 case GK_GFX1011:
198 Features["dot1-insts"] = true;
199 Features["dot2-insts"] = true;
200 Features["dot5-insts"] = true;
201 Features["dot6-insts"] = true;
202 LLVM_FALLTHROUGH;
203 case GK_GFX1010:
204 Features["dl-insts"] = true;
205 Features["ci-insts"] = true;
206 Features["flat-address-space"] = true;
207 Features["16-bit-insts"] = true;
208 Features["dpp"] = true;
209 Features["gfx8-insts"] = true;
210 Features["gfx9-insts"] = true;
211 Features["gfx10-insts"] = true;
212 Features["s-memrealtime"] = true;
213 break;
214 case GK_GFX908:
215 Features["dot3-insts"] = true;
216 Features["dot4-insts"] = true;
217 Features["dot5-insts"] = true;
218 Features["dot6-insts"] = true;
219 Features["mai-insts"] = true;
220 LLVM_FALLTHROUGH;
221 case GK_GFX906:
222 Features["dl-insts"] = true;
223 Features["dot1-insts"] = true;
224 Features["dot2-insts"] = true;
225 LLVM_FALLTHROUGH;
226 case GK_GFX90C:
227 case GK_GFX909:
228 case GK_GFX904:
229 case GK_GFX902:
230 case GK_GFX900:
231 Features["gfx9-insts"] = true;
232 LLVM_FALLTHROUGH;
233 case GK_GFX810:
234 case GK_GFX805:
235 case GK_GFX803:
236 case GK_GFX802:
237 case GK_GFX801:
238 Features["gfx8-insts"] = true;
239 Features["16-bit-insts"] = true;
240 Features["dpp"] = true;
241 Features["s-memrealtime"] = true;
242 LLVM_FALLTHROUGH;
243 case GK_GFX705:
244 case GK_GFX704:
245 case GK_GFX703:
246 case GK_GFX702:
247 case GK_GFX701:
248 case GK_GFX700:
249 Features["ci-insts"] = true;
250 Features["flat-address-space"] = true;
251 LLVM_FALLTHROUGH;
252 case GK_GFX602:
253 case GK_GFX601:
254 case GK_GFX600:
255 break;
256 case GK_NONE:
257 break;
258 default:
259 llvm_unreachable("Unhandled GPU!");
260 }
261 } else {
262 if (CPU.empty())
263 CPU = "r600";
264
265 switch (llvm::AMDGPU::parseArchR600(CPU)) {
266 case GK_CAYMAN:
267 case GK_CYPRESS:
268 case GK_RV770:
269 case GK_RV670:
270 // TODO: Add fp64 when implemented.
271 break;
272 case GK_TURKS:
273 case GK_CAICOS:
274 case GK_BARTS:
275 case GK_SUMO:
276 case GK_REDWOOD:
277 case GK_JUNIPER:
278 case GK_CEDAR:
279 case GK_RV730:
280 case GK_RV710:
281 case GK_RS880:
282 case GK_R630:
283 case GK_R600:
284 break;
285 default:
286 llvm_unreachable("Unhandled GPU!");
287 }
288 }
289
290 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
291 }
292
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const293 void AMDGPUTargetInfo::fillValidCPUList(
294 SmallVectorImpl<StringRef> &Values) const {
295 if (isAMDGCN(getTriple()))
296 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
297 else
298 llvm::AMDGPU::fillValidArchListR600(Values);
299 }
300
setAddressSpaceMap(bool DefaultIsPrivate)301 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
302 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
303 }
304
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)305 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
306 const TargetOptions &Opts)
307 : TargetInfo(Triple),
308 GPUKind(isAMDGCN(Triple) ?
309 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
310 llvm::AMDGPU::parseArchR600(Opts.CPU)),
311 GPUFeatures(isAMDGCN(Triple) ?
312 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
313 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
314 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
315 : DataLayoutStringR600);
316 assert(DataLayout->getAllocaAddrSpace() == Private);
317 GridValues = llvm::omp::AMDGPUGpuGridValues;
318
319 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
320 !isAMDGCN(Triple));
321 UseAddrSpaceMapMangling = true;
322
323 HasLegalHalfType = true;
324 HasFloat16 = true;
325 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
326 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
327
328 // Set pointer width and alignment for target address space 0.
329 PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
330 if (getMaxPointerWidth() == 64) {
331 LongWidth = LongAlign = 64;
332 SizeType = UnsignedLong;
333 PtrDiffType = SignedLong;
334 IntPtrType = SignedLong;
335 }
336
337 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
338 }
339
adjust(LangOptions & Opts)340 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
341 TargetInfo::adjust(Opts);
342 // ToDo: There are still a few places using default address space as private
343 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
344 // can be removed from the following line.
345 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
346 !isAMDGCN(getTriple()));
347 }
348
getTargetBuiltins() const349 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
350 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
351 Builtin::FirstTSBuiltin);
352 }
353
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const354 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
355 MacroBuilder &Builder) const {
356 Builder.defineMacro("__AMD__");
357 Builder.defineMacro("__AMDGPU__");
358
359 if (isAMDGCN(getTriple()))
360 Builder.defineMacro("__AMDGCN__");
361 else
362 Builder.defineMacro("__R600__");
363
364 if (GPUKind != llvm::AMDGPU::GK_NONE) {
365 StringRef CanonName = isAMDGCN(getTriple()) ?
366 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
367 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
368 if (isAMDGCN(getTriple())) {
369 Builder.defineMacro("__amdgcn_processor__",
370 Twine("\"") + Twine(CanonName) + Twine("\""));
371 Builder.defineMacro("__amdgcn_target_id__",
372 Twine("\"") + Twine(getTargetID().getValue()) +
373 Twine("\""));
374 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
375 auto Loc = OffloadArchFeatures.find(F);
376 if (Loc != OffloadArchFeatures.end()) {
377 std::string NewF = F.str();
378 std::replace(NewF.begin(), NewF.end(), '-', '_');
379 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
380 Twine("__"),
381 Loc->second ? "1" : "0");
382 }
383 }
384 }
385 }
386
387 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
388 // removed in the near future.
389 if (hasFMAF())
390 Builder.defineMacro("__HAS_FMAF__");
391 if (hasFastFMAF())
392 Builder.defineMacro("FP_FAST_FMAF");
393 if (hasLDEXPF())
394 Builder.defineMacro("__HAS_LDEXPF__");
395 if (hasFP64())
396 Builder.defineMacro("__HAS_FP64__");
397 if (hasFastFMA())
398 Builder.defineMacro("FP_FAST_FMA");
399
400 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
401 }
402
setAuxTarget(const TargetInfo * Aux)403 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
404 assert(HalfFormat == Aux->HalfFormat);
405 assert(FloatFormat == Aux->FloatFormat);
406 assert(DoubleFormat == Aux->DoubleFormat);
407
408 // On x86_64 long double is 80-bit extended precision format, which is
409 // not supported by AMDGPU. 128-bit floating point format is also not
410 // supported by AMDGPU. Therefore keep its own format for these two types.
411 auto SaveLongDoubleFormat = LongDoubleFormat;
412 auto SaveFloat128Format = Float128Format;
413 copyAuxTarget(Aux);
414 LongDoubleFormat = SaveLongDoubleFormat;
415 Float128Format = SaveFloat128Format;
416 // For certain builtin types support on the host target, claim they are
417 // support to pass the compilation of the host code during the device-side
418 // compilation.
419 // FIXME: As the side effect, we also accept `__float128` uses in the device
420 // code. To rejct these builtin types supported in the host target but not in
421 // the device target, one approach would support `device_builtin` attribute
422 // so that we could tell the device builtin types from the host ones. The
423 // also solves the different representations of the same builtin type, such
424 // as `size_t` in the MSVC environment.
425 if (Aux->hasFloat128Type()) {
426 HasFloat128 = true;
427 Float128Format = DoubleFormat;
428 }
429 }
430