1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // opencl_global_device
50     Global,   // opencl_global_host
51     Global,   // cuda_device
52     Constant, // cuda_constant
53     Local,    // cuda_shared
54     Generic,  // ptr32_sptr
55     Generic,  // ptr32_uptr
56     Generic   // ptr64
57 };
58 
59 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
60     Private,  // Default
61     Global,   // opencl_global
62     Local,    // opencl_local
63     Constant, // opencl_constant
64     Private,  // opencl_private
65     Generic,  // opencl_generic
66     Global,   // opencl_global_device
67     Global,   // opencl_global_host
68     Global,   // cuda_device
69     Constant, // cuda_constant
70     Local,    // cuda_shared
71     Generic,  // ptr32_sptr
72     Generic,  // ptr32_uptr
73     Generic   // ptr64
74 
75 };
76 } // namespace targets
77 } // namespace clang
78 
79 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
80 #define BUILTIN(ID, TYPE, ATTRS)                                               \
81   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
82 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
83   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
84 #include "clang/Basic/BuiltinsAMDGPU.def"
85 };
86 
87 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
88   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
89   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
90   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
91   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
92   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
93   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
94   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
95   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
96   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
97   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
98   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
99   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
100   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
101   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
102   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
103   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
104   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
105   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
106   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
107   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
108   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
109   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
110   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
111   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
112   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
113   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
114   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
115   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
116   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
117   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
118   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
119   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
120   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
121   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
122   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
123   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
124   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
125   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
126   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
127   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
128   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
129   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
130   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
131   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
132   "flat_scratch_lo", "flat_scratch_hi",
133   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
134   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
135   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
136   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
137   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
138   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
139   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
140   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
141   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
142   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
143   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
144   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
145   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
146   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
147   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
148   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
149   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
150   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
151   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
152   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
153   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
154   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
155   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
156   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
157   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
158   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
159   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
160   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
161   "a252", "a253", "a254", "a255"
162 };
163 
getGCCRegNames() const164 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
165   return llvm::makeArrayRef(GCCRegNames);
166 }
167 
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const168 bool AMDGPUTargetInfo::initFeatureMap(
169     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
170     const std::vector<std::string> &FeatureVec) const {
171 
172   using namespace llvm::AMDGPU;
173 
174   // XXX - What does the member GPU mean if device name string passed here?
175   if (isAMDGCN(getTriple())) {
176     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
177     case GK_GFX1033:
178     case GK_GFX1032:
179     case GK_GFX1031:
180     case GK_GFX1030:
181       Features["ci-insts"] = true;
182       Features["dot1-insts"] = true;
183       Features["dot2-insts"] = true;
184       Features["dot5-insts"] = true;
185       Features["dot6-insts"] = true;
186       Features["dl-insts"] = true;
187       Features["flat-address-space"] = true;
188       Features["16-bit-insts"] = true;
189       Features["dpp"] = true;
190       Features["gfx8-insts"] = true;
191       Features["gfx9-insts"] = true;
192       Features["gfx10-insts"] = true;
193       Features["gfx10-3-insts"] = true;
194       Features["s-memrealtime"] = true;
195       break;
196     case GK_GFX1012:
197     case GK_GFX1011:
198       Features["dot1-insts"] = true;
199       Features["dot2-insts"] = true;
200       Features["dot5-insts"] = true;
201       Features["dot6-insts"] = true;
202       LLVM_FALLTHROUGH;
203     case GK_GFX1010:
204       Features["dl-insts"] = true;
205       Features["ci-insts"] = true;
206       Features["flat-address-space"] = true;
207       Features["16-bit-insts"] = true;
208       Features["dpp"] = true;
209       Features["gfx8-insts"] = true;
210       Features["gfx9-insts"] = true;
211       Features["gfx10-insts"] = true;
212       Features["s-memrealtime"] = true;
213       break;
214     case GK_GFX908:
215       Features["dot3-insts"] = true;
216       Features["dot4-insts"] = true;
217       Features["dot5-insts"] = true;
218       Features["dot6-insts"] = true;
219       Features["mai-insts"] = true;
220       LLVM_FALLTHROUGH;
221     case GK_GFX906:
222       Features["dl-insts"] = true;
223       Features["dot1-insts"] = true;
224       Features["dot2-insts"] = true;
225       LLVM_FALLTHROUGH;
226     case GK_GFX90C:
227     case GK_GFX909:
228     case GK_GFX904:
229     case GK_GFX902:
230     case GK_GFX900:
231       Features["gfx9-insts"] = true;
232       LLVM_FALLTHROUGH;
233     case GK_GFX810:
234     case GK_GFX805:
235     case GK_GFX803:
236     case GK_GFX802:
237     case GK_GFX801:
238       Features["gfx8-insts"] = true;
239       Features["16-bit-insts"] = true;
240       Features["dpp"] = true;
241       Features["s-memrealtime"] = true;
242       LLVM_FALLTHROUGH;
243     case GK_GFX705:
244     case GK_GFX704:
245     case GK_GFX703:
246     case GK_GFX702:
247     case GK_GFX701:
248     case GK_GFX700:
249       Features["ci-insts"] = true;
250       Features["flat-address-space"] = true;
251       LLVM_FALLTHROUGH;
252     case GK_GFX602:
253     case GK_GFX601:
254     case GK_GFX600:
255       break;
256     case GK_NONE:
257       break;
258     default:
259       llvm_unreachable("Unhandled GPU!");
260     }
261   } else {
262     if (CPU.empty())
263       CPU = "r600";
264 
265     switch (llvm::AMDGPU::parseArchR600(CPU)) {
266     case GK_CAYMAN:
267     case GK_CYPRESS:
268     case GK_RV770:
269     case GK_RV670:
270       // TODO: Add fp64 when implemented.
271       break;
272     case GK_TURKS:
273     case GK_CAICOS:
274     case GK_BARTS:
275     case GK_SUMO:
276     case GK_REDWOOD:
277     case GK_JUNIPER:
278     case GK_CEDAR:
279     case GK_RV730:
280     case GK_RV710:
281     case GK_RS880:
282     case GK_R630:
283     case GK_R600:
284       break;
285     default:
286       llvm_unreachable("Unhandled GPU!");
287     }
288   }
289 
290   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
291 }
292 
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const293 void AMDGPUTargetInfo::fillValidCPUList(
294     SmallVectorImpl<StringRef> &Values) const {
295   if (isAMDGCN(getTriple()))
296     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
297   else
298     llvm::AMDGPU::fillValidArchListR600(Values);
299 }
300 
setAddressSpaceMap(bool DefaultIsPrivate)301 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
302   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
303 }
304 
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)305 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
306                                    const TargetOptions &Opts)
307     : TargetInfo(Triple),
308       GPUKind(isAMDGCN(Triple) ?
309               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
310               llvm::AMDGPU::parseArchR600(Opts.CPU)),
311       GPUFeatures(isAMDGCN(Triple) ?
312                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
313                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
314   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
315                                         : DataLayoutStringR600);
316   assert(DataLayout->getAllocaAddrSpace() == Private);
317   GridValues = llvm::omp::AMDGPUGpuGridValues;
318 
319   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
320                      !isAMDGCN(Triple));
321   UseAddrSpaceMapMangling = true;
322 
323   HasLegalHalfType = true;
324   HasFloat16 = true;
325   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
326   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
327 
328   // Set pointer width and alignment for target address space 0.
329   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
330   if (getMaxPointerWidth() == 64) {
331     LongWidth = LongAlign = 64;
332     SizeType = UnsignedLong;
333     PtrDiffType = SignedLong;
334     IntPtrType = SignedLong;
335   }
336 
337   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
338 }
339 
adjust(LangOptions & Opts)340 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
341   TargetInfo::adjust(Opts);
342   // ToDo: There are still a few places using default address space as private
343   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
344   // can be removed from the following line.
345   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
346                      !isAMDGCN(getTriple()));
347 }
348 
getTargetBuiltins() const349 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
350   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
351                                              Builtin::FirstTSBuiltin);
352 }
353 
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const354 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
355                                         MacroBuilder &Builder) const {
356   Builder.defineMacro("__AMD__");
357   Builder.defineMacro("__AMDGPU__");
358 
359   if (isAMDGCN(getTriple()))
360     Builder.defineMacro("__AMDGCN__");
361   else
362     Builder.defineMacro("__R600__");
363 
364   if (GPUKind != llvm::AMDGPU::GK_NONE) {
365     StringRef CanonName = isAMDGCN(getTriple()) ?
366       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
367     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
368     if (isAMDGCN(getTriple())) {
369       Builder.defineMacro("__amdgcn_processor__",
370                           Twine("\"") + Twine(CanonName) + Twine("\""));
371       Builder.defineMacro("__amdgcn_target_id__",
372                           Twine("\"") + Twine(getTargetID().getValue()) +
373                               Twine("\""));
374       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
375         auto Loc = OffloadArchFeatures.find(F);
376         if (Loc != OffloadArchFeatures.end()) {
377           std::string NewF = F.str();
378           std::replace(NewF.begin(), NewF.end(), '-', '_');
379           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
380                                   Twine("__"),
381                               Loc->second ? "1" : "0");
382         }
383       }
384     }
385   }
386 
387   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
388   // removed in the near future.
389   if (hasFMAF())
390     Builder.defineMacro("__HAS_FMAF__");
391   if (hasFastFMAF())
392     Builder.defineMacro("FP_FAST_FMAF");
393   if (hasLDEXPF())
394     Builder.defineMacro("__HAS_LDEXPF__");
395   if (hasFP64())
396     Builder.defineMacro("__HAS_FP64__");
397   if (hasFastFMA())
398     Builder.defineMacro("FP_FAST_FMA");
399 
400   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
401 }
402 
setAuxTarget(const TargetInfo * Aux)403 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
404   assert(HalfFormat == Aux->HalfFormat);
405   assert(FloatFormat == Aux->FloatFormat);
406   assert(DoubleFormat == Aux->DoubleFormat);
407 
408   // On x86_64 long double is 80-bit extended precision format, which is
409   // not supported by AMDGPU. 128-bit floating point format is also not
410   // supported by AMDGPU. Therefore keep its own format for these two types.
411   auto SaveLongDoubleFormat = LongDoubleFormat;
412   auto SaveFloat128Format = Float128Format;
413   copyAuxTarget(Aux);
414   LongDoubleFormat = SaveLongDoubleFormat;
415   Float128Format = SaveFloat128Format;
416   // For certain builtin types support on the host target, claim they are
417   // support to pass the compilation of the host code during the device-side
418   // compilation.
419   // FIXME: As the side effect, we also accept `__float128` uses in the device
420   // code. To rejct these builtin types supported in the host target but not in
421   // the device target, one approach would support `device_builtin` attribute
422   // so that we could tell the device builtin types from the host ones. The
423   // also solves the different representations of the same builtin type, such
424   // as `size_t` in the MSVC environment.
425   if (Aux->hasFloat128Type()) {
426     HasFloat128 = true;
427     Float128Format = DoubleFormat;
428   }
429 }
430