1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// AMDGPU HSA Metadata Streamer.
12 ///
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
26 
27 namespace llvm {
28 
29 static cl::opt<bool> DumpHSAMetadata(
30     "amdgpu-dump-hsa-metadata",
31     cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33     "amdgpu-verify-hsa-metadata",
34     cl::desc("Verify AMDGPU HSA Metadata"));
35 
36 namespace AMDGPU {
37 namespace HSAMD {
38 
dump(StringRef HSAMetadataString) const39 void MetadataStreamer::dump(StringRef HSAMetadataString) const {
40   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
41 }
42 
verify(StringRef HSAMetadataString) const43 void MetadataStreamer::verify(StringRef HSAMetadataString) const {
44   errs() << "AMDGPU HSA Metadata Parser Test: ";
45 
46   HSAMD::Metadata FromHSAMetadataString;
47   if (fromString(HSAMetadataString, FromHSAMetadataString)) {
48     errs() << "FAIL\n";
49     return;
50   }
51 
52   std::string ToHSAMetadataString;
53   if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
54     errs() << "FAIL\n";
55     return;
56   }
57 
58   errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
59          << '\n';
60   if (HSAMetadataString != ToHSAMetadataString) {
61     errs() << "Original input: " << HSAMetadataString << '\n'
62            << "Produced output: " << ToHSAMetadataString << '\n';
63   }
64 }
65 
getAccessQualifier(StringRef AccQual) const66 AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const {
67   if (AccQual.empty())
68     return AccessQualifier::Unknown;
69 
70   return StringSwitch<AccessQualifier>(AccQual)
71              .Case("read_only",  AccessQualifier::ReadOnly)
72              .Case("write_only", AccessQualifier::WriteOnly)
73              .Case("read_write", AccessQualifier::ReadWrite)
74              .Default(AccessQualifier::Default);
75 }
76 
getAddressSpaceQualifer(unsigned AddressSpace) const77 AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer(
78     unsigned AddressSpace) const {
79   if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS)
80     return AddressSpaceQualifier::Private;
81   if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS)
82     return AddressSpaceQualifier::Global;
83   if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS)
84     return AddressSpaceQualifier::Constant;
85   if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS)
86     return AddressSpaceQualifier::Local;
87   if (AddressSpace == AMDGPUASI.FLAT_ADDRESS)
88     return AddressSpaceQualifier::Generic;
89   if (AddressSpace == AMDGPUASI.REGION_ADDRESS)
90     return AddressSpaceQualifier::Region;
91 
92   llvm_unreachable("Unknown address space qualifier");
93 }
94 
getValueKind(Type * Ty,StringRef TypeQual,StringRef BaseTypeName) const95 ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual,
96                                          StringRef BaseTypeName) const {
97   if (TypeQual.find("pipe") != StringRef::npos)
98     return ValueKind::Pipe;
99 
100   return StringSwitch<ValueKind>(BaseTypeName)
101              .Case("image1d_t", ValueKind::Image)
102              .Case("image1d_array_t", ValueKind::Image)
103              .Case("image1d_buffer_t", ValueKind::Image)
104              .Case("image2d_t", ValueKind::Image)
105              .Case("image2d_array_t", ValueKind::Image)
106              .Case("image2d_array_depth_t", ValueKind::Image)
107              .Case("image2d_array_msaa_t", ValueKind::Image)
108              .Case("image2d_array_msaa_depth_t", ValueKind::Image)
109              .Case("image2d_depth_t", ValueKind::Image)
110              .Case("image2d_msaa_t", ValueKind::Image)
111              .Case("image2d_msaa_depth_t", ValueKind::Image)
112              .Case("image3d_t", ValueKind::Image)
113              .Case("sampler_t", ValueKind::Sampler)
114              .Case("queue_t", ValueKind::Queue)
115              .Default(isa<PointerType>(Ty) ?
116                           (Ty->getPointerAddressSpace() ==
117                            AMDGPUASI.LOCAL_ADDRESS ?
118                            ValueKind::DynamicSharedPointer :
119                            ValueKind::GlobalBuffer) :
120                       ValueKind::ByValue);
121 }
122 
getValueType(Type * Ty,StringRef TypeName) const123 ValueType MetadataStreamer::getValueType(Type *Ty, StringRef TypeName) const {
124   switch (Ty->getTypeID()) {
125   case Type::IntegerTyID: {
126     auto Signed = !TypeName.startswith("u");
127     switch (Ty->getIntegerBitWidth()) {
128     case 8:
129       return Signed ? ValueType::I8 : ValueType::U8;
130     case 16:
131       return Signed ? ValueType::I16 : ValueType::U16;
132     case 32:
133       return Signed ? ValueType::I32 : ValueType::U32;
134     case 64:
135       return Signed ? ValueType::I64 : ValueType::U64;
136     default:
137       return ValueType::Struct;
138     }
139   }
140   case Type::HalfTyID:
141     return ValueType::F16;
142   case Type::FloatTyID:
143     return ValueType::F32;
144   case Type::DoubleTyID:
145     return ValueType::F64;
146   case Type::PointerTyID:
147     return getValueType(Ty->getPointerElementType(), TypeName);
148   case Type::VectorTyID:
149     return getValueType(Ty->getVectorElementType(), TypeName);
150   default:
151     return ValueType::Struct;
152   }
153 }
154 
getTypeName(Type * Ty,bool Signed) const155 std::string MetadataStreamer::getTypeName(Type *Ty, bool Signed) const {
156   switch (Ty->getTypeID()) {
157   case Type::IntegerTyID: {
158     if (!Signed)
159       return (Twine('u') + getTypeName(Ty, true)).str();
160 
161     auto BitWidth = Ty->getIntegerBitWidth();
162     switch (BitWidth) {
163     case 8:
164       return "char";
165     case 16:
166       return "short";
167     case 32:
168       return "int";
169     case 64:
170       return "long";
171     default:
172       return (Twine('i') + Twine(BitWidth)).str();
173     }
174   }
175   case Type::HalfTyID:
176     return "half";
177   case Type::FloatTyID:
178     return "float";
179   case Type::DoubleTyID:
180     return "double";
181   case Type::VectorTyID: {
182     auto VecTy = cast<VectorType>(Ty);
183     auto ElTy = VecTy->getElementType();
184     auto NumElements = VecTy->getVectorNumElements();
185     return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
186   }
187   default:
188     return "unknown";
189   }
190 }
191 
getWorkGroupDimensions(MDNode * Node) const192 std::vector<uint32_t> MetadataStreamer::getWorkGroupDimensions(
193     MDNode *Node) const {
194   std::vector<uint32_t> Dims;
195   if (Node->getNumOperands() != 3)
196     return Dims;
197 
198   for (auto &Op : Node->operands())
199     Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
200   return Dims;
201 }
202 
getHSACodeProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const203 Kernel::CodeProps::Metadata MetadataStreamer::getHSACodeProps(
204     const MachineFunction &MF,
205     const SIProgramInfo &ProgramInfo) const {
206   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
207   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
208   HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
209   const Function &F = MF.getFunction();
210 
211   assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
212          F.getCallingConv() == CallingConv::SPIR_KERNEL);
213 
214   unsigned MaxKernArgAlign;
215   HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
216                                                                MaxKernArgAlign);
217   HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
218   HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
219   HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
220   HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
221   HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
222   HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
223   HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
224   HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
225   HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
226   HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
227   HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
228 
229   return HSACodeProps;
230 }
231 
getHSADebugProps(const MachineFunction & MF,const SIProgramInfo & ProgramInfo) const232 Kernel::DebugProps::Metadata MetadataStreamer::getHSADebugProps(
233     const MachineFunction &MF,
234     const SIProgramInfo &ProgramInfo) const {
235   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
236   HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
237 
238   if (!STM.debuggerSupported())
239     return HSADebugProps;
240 
241   HSADebugProps.mDebuggerABIVersion.push_back(1);
242   HSADebugProps.mDebuggerABIVersion.push_back(0);
243 
244   if (STM.debuggerEmitPrologue()) {
245     HSADebugProps.mPrivateSegmentBufferSGPR =
246         ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
247     HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
248         ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
249   }
250 
251   return HSADebugProps;
252 }
253 
emitVersion()254 void MetadataStreamer::emitVersion() {
255   auto &Version = HSAMetadata.mVersion;
256 
257   Version.push_back(VersionMajor);
258   Version.push_back(VersionMinor);
259 }
260 
emitPrintf(const Module & Mod)261 void MetadataStreamer::emitPrintf(const Module &Mod) {
262   auto &Printf = HSAMetadata.mPrintf;
263 
264   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
265   if (!Node)
266     return;
267 
268   for (auto Op : Node->operands())
269     if (Op->getNumOperands())
270       Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
271 }
272 
emitKernelLanguage(const Function & Func)273 void MetadataStreamer::emitKernelLanguage(const Function &Func) {
274   auto &Kernel = HSAMetadata.mKernels.back();
275 
276   // TODO: What about other languages?
277   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
278   if (!Node || !Node->getNumOperands())
279     return;
280   auto Op0 = Node->getOperand(0);
281   if (Op0->getNumOperands() <= 1)
282     return;
283 
284   Kernel.mLanguage = "OpenCL C";
285   Kernel.mLanguageVersion.push_back(
286       mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
287   Kernel.mLanguageVersion.push_back(
288       mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
289 }
290 
emitKernelAttrs(const Function & Func)291 void MetadataStreamer::emitKernelAttrs(const Function &Func) {
292   auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
293 
294   if (auto Node = Func.getMetadata("reqd_work_group_size"))
295     Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
296   if (auto Node = Func.getMetadata("work_group_size_hint"))
297     Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
298   if (auto Node = Func.getMetadata("vec_type_hint")) {
299     Attrs.mVecTypeHint = getTypeName(
300         cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
301         mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
302   }
303   if (Func.hasFnAttribute("runtime-handle")) {
304     Attrs.mRuntimeHandle =
305         Func.getFnAttribute("runtime-handle").getValueAsString().str();
306   }
307 }
308 
emitKernelArgs(const Function & Func)309 void MetadataStreamer::emitKernelArgs(const Function &Func) {
310   for (auto &Arg : Func.args())
311     emitKernelArg(Arg);
312 
313   emitHiddenKernelArgs(Func);
314 }
315 
emitKernelArg(const Argument & Arg)316 void MetadataStreamer::emitKernelArg(const Argument &Arg) {
317   auto Func = Arg.getParent();
318   auto ArgNo = Arg.getArgNo();
319   const MDNode *Node;
320 
321   StringRef Name;
322   Node = Func->getMetadata("kernel_arg_name");
323   if (Node && ArgNo < Node->getNumOperands())
324     Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
325   else if (Arg.hasName())
326     Name = Arg.getName();
327 
328   StringRef TypeName;
329   Node = Func->getMetadata("kernel_arg_type");
330   if (Node && ArgNo < Node->getNumOperands())
331     TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
332 
333   StringRef BaseTypeName;
334   Node = Func->getMetadata("kernel_arg_base_type");
335   if (Node && ArgNo < Node->getNumOperands())
336     BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
337 
338   StringRef AccQual;
339   if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
340       Arg.hasNoAliasAttr()) {
341     AccQual = "read_only";
342   } else {
343     Node = Func->getMetadata("kernel_arg_access_qual");
344     if (Node && ArgNo < Node->getNumOperands())
345       AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
346   }
347 
348   StringRef TypeQual;
349   Node = Func->getMetadata("kernel_arg_type_qual");
350   if (Node && ArgNo < Node->getNumOperands())
351     TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
352 
353   Type *Ty = Arg.getType();
354   const DataLayout &DL = Func->getParent()->getDataLayout();
355 
356   unsigned PointeeAlign = 0;
357   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
358     if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) {
359       PointeeAlign = Arg.getParamAlignment();
360       if (PointeeAlign == 0)
361         PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
362     }
363   }
364 
365   emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
366                 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
367 }
368 
emitKernelArg(const DataLayout & DL,Type * Ty,ValueKind ValueKind,unsigned PointeeAlign,StringRef Name,StringRef TypeName,StringRef BaseTypeName,StringRef AccQual,StringRef TypeQual)369 void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty,
370                                      ValueKind ValueKind,
371                                      unsigned PointeeAlign,
372                                      StringRef Name,
373                                      StringRef TypeName, StringRef BaseTypeName,
374                                      StringRef AccQual, StringRef TypeQual) {
375   HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
376   auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
377 
378   Arg.mName = Name;
379   Arg.mTypeName = TypeName;
380   Arg.mSize = DL.getTypeAllocSize(Ty);
381   Arg.mAlign = DL.getABITypeAlignment(Ty);
382   Arg.mValueKind = ValueKind;
383   Arg.mValueType = getValueType(Ty, BaseTypeName);
384   Arg.mPointeeAlign = PointeeAlign;
385 
386   if (auto PtrTy = dyn_cast<PointerType>(Ty))
387     Arg.mAddrSpaceQual = getAddressSpaceQualifer(PtrTy->getAddressSpace());
388 
389   Arg.mAccQual = getAccessQualifier(AccQual);
390 
391   // TODO: Emit Arg.mActualAccQual.
392 
393   SmallVector<StringRef, 1> SplitTypeQuals;
394   TypeQual.split(SplitTypeQuals, " ", -1, false);
395   for (StringRef Key : SplitTypeQuals) {
396     auto P = StringSwitch<bool*>(Key)
397                  .Case("const",    &Arg.mIsConst)
398                  .Case("restrict", &Arg.mIsRestrict)
399                  .Case("volatile", &Arg.mIsVolatile)
400                  .Case("pipe",     &Arg.mIsPipe)
401                  .Default(nullptr);
402     if (P)
403       *P = true;
404   }
405 }
406 
emitHiddenKernelArgs(const Function & Func)407 void MetadataStreamer::emitHiddenKernelArgs(const Function &Func) {
408   int HiddenArgNumBytes =
409       getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
410 
411   if (!HiddenArgNumBytes)
412     return;
413 
414   auto &DL = Func.getParent()->getDataLayout();
415   auto Int64Ty = Type::getInt64Ty(Func.getContext());
416 
417   if (HiddenArgNumBytes >= 8)
418     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
419   if (HiddenArgNumBytes >= 16)
420     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
421   if (HiddenArgNumBytes >= 24)
422     emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
423 
424   auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
425                                       AMDGPUASI.GLOBAL_ADDRESS);
426 
427   // Emit "printf buffer" argument if printf is used, otherwise emit dummy
428   // "none" argument.
429   if (HiddenArgNumBytes >= 32) {
430     if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
431       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
432     else
433       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
434   }
435 
436   // Emit "default queue" and "completion action" arguments if enqueue kernel is
437   // used, otherwise emit dummy "none" arguments.
438   if (HiddenArgNumBytes >= 48) {
439     if (Func.hasFnAttribute("calls-enqueue-kernel")) {
440       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
441       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
442     } else {
443       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
444       emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
445     }
446   }
447 }
448 
begin(const Module & Mod)449 void MetadataStreamer::begin(const Module &Mod) {
450   AMDGPUASI = getAMDGPUAS(Mod);
451   emitVersion();
452   emitPrintf(Mod);
453 }
454 
end()455 void MetadataStreamer::end() {
456   std::string HSAMetadataString;
457   if (toString(HSAMetadata, HSAMetadataString))
458     return;
459 
460   if (DumpHSAMetadata)
461     dump(HSAMetadataString);
462   if (VerifyHSAMetadata)
463     verify(HSAMetadataString);
464 }
465 
emitKernel(const MachineFunction & MF,const SIProgramInfo & ProgramInfo)466 void MetadataStreamer::emitKernel(const MachineFunction &MF, const SIProgramInfo &ProgramInfo) {
467   auto &Func = MF.getFunction();
468   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
469     return;
470 
471   auto CodeProps = getHSACodeProps(MF, ProgramInfo);
472   auto DebugProps = getHSADebugProps(MF, ProgramInfo);
473 
474   HSAMetadata.mKernels.push_back(Kernel::Metadata());
475   auto &Kernel = HSAMetadata.mKernels.back();
476 
477   Kernel.mName = Func.getName();
478   Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
479   emitKernelLanguage(Func);
480   emitKernelAttrs(Func);
481   emitKernelArgs(Func);
482   HSAMetadata.mKernels.back().mCodeProps = CodeProps;
483   HSAMetadata.mKernels.back().mDebugProps = DebugProps;
484 }
485 
486 } // end namespace HSAMD
487 } // end namespace AMDGPU
488 } // end namespace llvm
489