1 //===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements a pass to convert gpu kernel functions into a
10 // corresponding binary blob that can be executed on a GPU. Currently
11 // only translates the function itself but no dependencies.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
16 
17 #include "mlir/Dialect/GPU/GPUDialect.h"
18 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
19 #include "mlir/IR/Attributes.h"
20 #include "mlir/IR/Builders.h"
21 #include "mlir/IR/BuiltinOps.h"
22 #include "mlir/Pass/Pass.h"
23 #include "mlir/Pass/PassRegistry.h"
24 #include "mlir/Support/LogicalResult.h"
25 
26 #include "llvm/ADT/Optional.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/LegacyPassManager.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/Mutex.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Target/TargetMachine.h"
36 
37 using namespace mlir;
38 
39 namespace {
40 
41 /// A pass converting tagged kernel modules to a blob with target instructions.
42 ///
43 /// If tagged as a kernel module, each contained function is translated to
44 /// user-specified IR. A user provided BlobGenerator then compiles the IR to
45 /// GPU binary code, which is then attached as an attribute to the function.
46 /// The function body is erased.
47 class GpuKernelToBlobPass
48     : public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
49 public:
GpuKernelToBlobPass(LoweringCallback loweringCallback,BlobGenerator blobGenerator,StringRef triple,StringRef targetChip,StringRef features,StringRef gpuBinaryAnnotation)50   GpuKernelToBlobPass(LoweringCallback loweringCallback,
51                       BlobGenerator blobGenerator, StringRef triple,
52                       StringRef targetChip, StringRef features,
53                       StringRef gpuBinaryAnnotation)
54       : loweringCallback(loweringCallback), blobGenerator(blobGenerator),
55         triple(triple), targetChip(targetChip), features(features),
56         blobAnnotation(gpuBinaryAnnotation) {}
57 
runOnOperation()58   void runOnOperation() override {
59     gpu::GPUModuleOp module = getOperation();
60 
61     // Lower the module to an LLVM IR module using a separate context to enable
62     // multi-threaded processing.
63     llvm::LLVMContext llvmContext;
64     std::unique_ptr<llvm::Module> llvmModule =
65         loweringCallback(module, llvmContext, "LLVMDialectModule");
66     if (!llvmModule)
67       return signalPassFailure();
68 
69     // Translate the llvm module to a target blob and attach the result as
70     // attribute to the module.
71     if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
72             *llvmModule, module.getLoc(), module.getName()))
73       module.setAttr(blobAnnotation, blobAttr);
74     else
75       signalPassFailure();
76   }
77 
78 private:
79   std::string translateModuleToISA(llvm::Module &module,
80                                    llvm::TargetMachine &targetMachine);
81 
82   /// Converts llvmModule to a blob with target instructions using the
83   /// user-provided generator. Location is used for error reporting and name is
84   /// forwarded to the blob generator to use in its logging mechanisms.
85   OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
86                                 StringRef name);
87 
88   /// Translates llvmModule to a blob with target instructions and returns the
89   /// result as attribute.
90   StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
91                                                   Location loc, StringRef name);
92 
93   LoweringCallback loweringCallback;
94   BlobGenerator blobGenerator;
95   llvm::Triple triple;
96   StringRef targetChip;
97   StringRef features;
98   StringRef blobAnnotation;
99 };
100 
101 } // anonymous namespace
102 
103 std::string
translateModuleToISA(llvm::Module & module,llvm::TargetMachine & targetMachine)104 GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
105                                           llvm::TargetMachine &targetMachine) {
106   std::string targetISA;
107   {
108     llvm::raw_string_ostream stream(targetISA);
109     llvm::buffer_ostream pstream(stream);
110     llvm::legacy::PassManager codegenPasses;
111     targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
112                                       llvm::CGFT_AssemblyFile);
113     codegenPasses.run(module);
114   }
115 
116   return targetISA;
117 }
118 
convertModuleToBlob(llvm::Module & llvmModule,Location loc,StringRef name)119 OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
120                                                    Location loc,
121                                                    StringRef name) {
122   std::unique_ptr<llvm::TargetMachine> targetMachine;
123   {
124     std::string error;
125     const llvm::Target *target =
126         llvm::TargetRegistry::lookupTarget("", triple, error);
127     if (target == nullptr) {
128       emitError(loc, "cannot initialize target triple");
129       return {};
130     }
131     targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
132                                                     features, {}, {}));
133     if (targetMachine == nullptr) {
134       emitError(loc, "connot initialize target machine");
135       return {};
136     }
137   }
138 
139   llvmModule.setDataLayout(targetMachine->createDataLayout());
140 
141   auto targetISA = translateModuleToISA(llvmModule, *targetMachine);
142 
143   return blobGenerator(targetISA, loc, name);
144 }
145 
translateGPUModuleToBinaryAnnotation(llvm::Module & llvmModule,Location loc,StringRef name)146 StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
147     llvm::Module &llvmModule, Location loc, StringRef name) {
148   auto blob = convertModuleToBlob(llvmModule, loc, name);
149   if (!blob)
150     return {};
151   return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
152 }
153 
154 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,BlobGenerator blobGenerator,StringRef triple,StringRef targetChip,StringRef features,StringRef gpuBinaryAnnotation)155 mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
156                                        BlobGenerator blobGenerator,
157                                        StringRef triple, StringRef targetChip,
158                                        StringRef features,
159                                        StringRef gpuBinaryAnnotation) {
160   return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
161                                                triple, targetChip, features,
162                                                gpuBinaryAnnotation);
163 }
164