1 //===- ConvertToROCDLIR.cpp - MLIR to LLVM IR conversion ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a translation between the MLIR LLVM + ROCDL dialects and
10 // LLVM IR with ROCDL intrinsics and metadata.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "mlir/Target/ROCDLIR.h"
15 
16 #include "mlir/Dialect/GPU/GPUDialect.h"
17 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
18 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
19 #include "mlir/IR/BuiltinOps.h"
20 #include "mlir/Target/LLVMIR/ModuleTranslation.h"
21 #include "mlir/Translation.h"
22 
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/IR/IntrinsicsAMDGPU.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/ToolOutputFile.h"
27 
28 using namespace mlir;
29 
30 // Create a call to llvm intrinsic
createIntrinsicCall(llvm::IRBuilder<> & builder,llvm::Intrinsic::ID intrinsic,ArrayRef<llvm::Value * > args={},ArrayRef<llvm::Type * > tys={})31 static llvm::Value *createIntrinsicCall(llvm::IRBuilder<> &builder,
32                                         llvm::Intrinsic::ID intrinsic,
33                                         ArrayRef<llvm::Value *> args = {},
34                                         ArrayRef<llvm::Type *> tys = {}) {
35   llvm::Module *module = builder.GetInsertBlock()->getModule();
36   llvm::Function *fn = llvm::Intrinsic::getDeclaration(module, intrinsic, tys);
37   return builder.CreateCall(fn, args);
38 }
39 
40 // Create a call to ROCm-Device-Library function
41 //   Currently this routine will work only for calling ROCDL functions that
42 // take a single int32 argument. It is likely that the interface of this
43 // function will change to make it more generic.
createDeviceFunctionCall(llvm::IRBuilder<> & builder,StringRef fn_name,int parameter)44 static llvm::Value *createDeviceFunctionCall(llvm::IRBuilder<> &builder,
45                                              StringRef fn_name, int parameter) {
46   llvm::Module *module = builder.GetInsertBlock()->getModule();
47   llvm::FunctionType *function_type = llvm::FunctionType::get(
48       llvm::Type::getInt64Ty(module->getContext()), // return type.
49       llvm::Type::getInt32Ty(module->getContext()), // parameter type.
50       false);                                       // no variadic arguments.
51   llvm::Function *fn = dyn_cast<llvm::Function>(
52       module->getOrInsertFunction(fn_name, function_type).getCallee());
53   llvm::Value *fn_op0 = llvm::ConstantInt::get(
54       llvm::Type::getInt32Ty(module->getContext()), parameter);
55   return builder.CreateCall(fn, ArrayRef<llvm::Value *>(fn_op0));
56 }
57 
58 namespace {
59 class ModuleTranslation : public LLVM::ModuleTranslation {
60 public:
61   using LLVM::ModuleTranslation::ModuleTranslation;
62 
63 protected:
convertOperation(Operation & opInst,llvm::IRBuilder<> & builder)64   LogicalResult convertOperation(Operation &opInst,
65                                  llvm::IRBuilder<> &builder) override {
66 
67 #include "mlir/Dialect/LLVMIR/ROCDLConversions.inc"
68 
69     return LLVM::ModuleTranslation::convertOperation(opInst, builder);
70   }
71 
72   /// Allow access to the constructor.
73   friend LLVM::ModuleTranslation;
74 };
75 } // namespace
76 
77 std::unique_ptr<llvm::Module>
translateModuleToROCDLIR(Operation * m,llvm::LLVMContext & llvmContext,StringRef name)78 mlir::translateModuleToROCDLIR(Operation *m, llvm::LLVMContext &llvmContext,
79                                StringRef name) {
80   // lower MLIR (with RODL Dialect) to LLVM IR (with ROCDL intrinsics)
81   auto llvmModule = LLVM::ModuleTranslation::translateModule<ModuleTranslation>(
82       m, llvmContext, name);
83 
84   // foreach GPU kernel
85   // 1. Insert AMDGPU_KERNEL calling convention.
86   // 2. Insert amdgpu-flat-workgroup-size(1, 1024) attribute.
87   for (auto func :
88        ModuleTranslation::getModuleBody(m).getOps<LLVM::LLVMFuncOp>()) {
89     if (!func->getAttrOfType<UnitAttr>(
90             gpu::GPUDialect::getKernelFuncAttrName()))
91       continue;
92 
93     auto *llvmFunc = llvmModule->getFunction(func.getName());
94 
95     llvmFunc->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
96 
97     llvmFunc->addFnAttr("amdgpu-flat-work-group-size", "1, 1024");
98   }
99 
100   return llvmModule;
101 }
102 
103 namespace mlir {
registerToROCDLIRTranslation()104 void registerToROCDLIRTranslation() {
105   TranslateFromMLIRRegistration registration(
106       "mlir-to-rocdlir",
107       [](ModuleOp module, raw_ostream &output) {
108         llvm::LLVMContext llvmContext;
109         auto llvmModule = mlir::translateModuleToROCDLIR(module, llvmContext);
110         if (!llvmModule)
111           return failure();
112 
113         llvmModule->print(output, nullptr);
114         return success();
115       },
116       [](DialectRegistry &registry) {
117         registry.insert<ROCDL::ROCDLDialect, LLVM::LLVMDialect>();
118       });
119 }
120 } // namespace mlir
121