1 //===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implementation of the offload wrapper tool. It takes offload target binaries
11 /// as input and creates wrapper bitcode file containing target binaries
12 /// packaged as data. Wrapper bitcode also includes initialization code which
13 /// registers target binaries in offloading runtime at program startup.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "clang/Basic/Version.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/GlobalVariable.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Errc.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/ErrorOr.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 #include "llvm/Support/Signals.h"
32 #include "llvm/Support/ToolOutputFile.h"
33 #include "llvm/Support/WithColor.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 #include <cassert>
37 #include <cstdint>
38 
39 using namespace llvm;
40 
41 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
42 
43 // Mark all our options with this category, everything else (except for -version
44 // and -help) will be hidden.
45 static cl::OptionCategory
46     ClangOffloadWrapperCategory("clang-offload-wrapper options");
47 
48 static cl::opt<std::string> Output("o", cl::Required,
49                                    cl::desc("Output filename"),
50                                    cl::value_desc("filename"),
51                                    cl::cat(ClangOffloadWrapperCategory));
52 
53 static cl::list<std::string> Inputs(cl::Positional, cl::OneOrMore,
54                                     cl::desc("<input files>"),
55                                     cl::cat(ClangOffloadWrapperCategory));
56 
57 static cl::opt<std::string>
58     Target("target", cl::Required,
59            cl::desc("Target triple for the output module"),
60            cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory));
61 
62 namespace {
63 
64 class BinaryWrapper {
65   LLVMContext C;
66   Module M;
67 
68   StructType *EntryTy = nullptr;
69   StructType *ImageTy = nullptr;
70   StructType *DescTy = nullptr;
71 
72 private:
getSizeTTy()73   IntegerType *getSizeTTy() {
74     switch (M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C))) {
75     case 4u:
76       return Type::getInt32Ty(C);
77     case 8u:
78       return Type::getInt64Ty(C);
79     }
80     llvm_unreachable("unsupported pointer type size");
81   }
82 
83   // struct __tgt_offload_entry {
84   //   void *addr;
85   //   char *name;
86   //   size_t size;
87   //   int32_t flags;
88   //   int32_t reserved;
89   // };
getEntryTy()90   StructType *getEntryTy() {
91     if (!EntryTy)
92       EntryTy = StructType::create("__tgt_offload_entry", Type::getInt8PtrTy(C),
93                                    Type::getInt8PtrTy(C), getSizeTTy(),
94                                    Type::getInt32Ty(C), Type::getInt32Ty(C));
95     return EntryTy;
96   }
97 
getEntryPtrTy()98   PointerType *getEntryPtrTy() { return PointerType::getUnqual(getEntryTy()); }
99 
100   // struct __tgt_device_image {
101   //   void *ImageStart;
102   //   void *ImageEnd;
103   //   __tgt_offload_entry *EntriesBegin;
104   //   __tgt_offload_entry *EntriesEnd;
105   // };
getDeviceImageTy()106   StructType *getDeviceImageTy() {
107     if (!ImageTy)
108       ImageTy = StructType::create("__tgt_device_image", Type::getInt8PtrTy(C),
109                                    Type::getInt8PtrTy(C), getEntryPtrTy(),
110                                    getEntryPtrTy());
111     return ImageTy;
112   }
113 
getDeviceImagePtrTy()114   PointerType *getDeviceImagePtrTy() {
115     return PointerType::getUnqual(getDeviceImageTy());
116   }
117 
118   // struct __tgt_bin_desc {
119   //   int32_t NumDeviceImages;
120   //   __tgt_device_image *DeviceImages;
121   //   __tgt_offload_entry *HostEntriesBegin;
122   //   __tgt_offload_entry *HostEntriesEnd;
123   // };
getBinDescTy()124   StructType *getBinDescTy() {
125     if (!DescTy)
126       DescTy = StructType::create("__tgt_bin_desc", Type::getInt32Ty(C),
127                                   getDeviceImagePtrTy(), getEntryPtrTy(),
128                                   getEntryPtrTy());
129     return DescTy;
130   }
131 
getBinDescPtrTy()132   PointerType *getBinDescPtrTy() {
133     return PointerType::getUnqual(getBinDescTy());
134   }
135 
136   /// Creates binary descriptor for the given device images. Binary descriptor
137   /// is an object that is passed to the offloading runtime at program startup
138   /// and it describes all device images available in the executable or shared
139   /// library. It is defined as follows
140   ///
141   /// __attribute__((visibility("hidden")))
142   /// extern __tgt_offload_entry *__start_omp_offloading_entries;
143   /// __attribute__((visibility("hidden")))
144   /// extern __tgt_offload_entry *__stop_omp_offloading_entries;
145   ///
146   /// static const char Image0[] = { <Bufs.front() contents> };
147   ///  ...
148   /// static const char ImageN[] = { <Bufs.back() contents> };
149   ///
150   /// static const __tgt_device_image Images[] = {
151   ///   {
152   ///     Image0,                            /*ImageStart*/
153   ///     Image0 + sizeof(Image0),           /*ImageEnd*/
154   ///     __start_omp_offloading_entries,    /*EntriesBegin*/
155   ///     __stop_omp_offloading_entries      /*EntriesEnd*/
156   ///   },
157   ///   ...
158   ///   {
159   ///     ImageN,                            /*ImageStart*/
160   ///     ImageN + sizeof(ImageN),           /*ImageEnd*/
161   ///     __start_omp_offloading_entries,    /*EntriesBegin*/
162   ///     __stop_omp_offloading_entries      /*EntriesEnd*/
163   ///   }
164   /// };
165   ///
166   /// static const __tgt_bin_desc BinDesc = {
167   ///   sizeof(Images) / sizeof(Images[0]),  /*NumDeviceImages*/
168   ///   Images,                              /*DeviceImages*/
169   ///   __start_omp_offloading_entries,      /*HostEntriesBegin*/
170   ///   __stop_omp_offloading_entries        /*HostEntriesEnd*/
171   /// };
172   ///
173   /// Global variable that represents BinDesc is returned.
createBinDesc(ArrayRef<ArrayRef<char>> Bufs)174   GlobalVariable *createBinDesc(ArrayRef<ArrayRef<char>> Bufs) {
175     // Create external begin/end symbols for the offload entries table.
176     auto *EntriesB = new GlobalVariable(
177         M, getEntryTy(), /*isConstant*/ true, GlobalValue::ExternalLinkage,
178         /*Initializer*/ nullptr, "__start_omp_offloading_entries");
179     EntriesB->setVisibility(GlobalValue::HiddenVisibility);
180     auto *EntriesE = new GlobalVariable(
181         M, getEntryTy(), /*isConstant*/ true, GlobalValue::ExternalLinkage,
182         /*Initializer*/ nullptr, "__stop_omp_offloading_entries");
183     EntriesE->setVisibility(GlobalValue::HiddenVisibility);
184 
185     // We assume that external begin/end symbols that we have created above will
186     // be defined by the linker. But linker will do that only if linker inputs
187     // have section with "omp_offloading_entries" name which is not guaranteed.
188     // So, we just create dummy zero sized object in the offload entries section
189     // to force linker to define those symbols.
190     auto *DummyInit =
191         ConstantAggregateZero::get(ArrayType::get(getEntryTy(), 0u));
192     auto *DummyEntry = new GlobalVariable(
193         M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage,
194         DummyInit, "__dummy.omp_offloading.entry");
195     DummyEntry->setSection("omp_offloading_entries");
196     DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
197 
198     auto *Zero = ConstantInt::get(getSizeTTy(), 0u);
199     Constant *ZeroZero[] = {Zero, Zero};
200 
201     // Create initializer for the images array.
202     SmallVector<Constant *, 4u> ImagesInits;
203     ImagesInits.reserve(Bufs.size());
204     for (ArrayRef<char> Buf : Bufs) {
205       auto *Data = ConstantDataArray::get(C, Buf);
206       auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
207                                        GlobalVariable::InternalLinkage, Data,
208                                        ".omp_offloading.device_image");
209       Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
210 
211       auto *Size = ConstantInt::get(getSizeTTy(), Buf.size());
212       Constant *ZeroSize[] = {Zero, Size};
213 
214       auto *ImageB = ConstantExpr::getGetElementPtr(Image->getValueType(),
215                                                     Image, ZeroZero);
216       auto *ImageE = ConstantExpr::getGetElementPtr(Image->getValueType(),
217                                                     Image, ZeroSize);
218 
219       ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(), ImageB,
220                                                 ImageE, EntriesB, EntriesE));
221     }
222 
223     // Then create images array.
224     auto *ImagesData = ConstantArray::get(
225         ArrayType::get(getDeviceImageTy(), ImagesInits.size()), ImagesInits);
226 
227     auto *Images =
228         new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
229                            GlobalValue::InternalLinkage, ImagesData,
230                            ".omp_offloading.device_images");
231     Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
232 
233     auto *ImagesB = ConstantExpr::getGetElementPtr(Images->getValueType(),
234                                                    Images, ZeroZero);
235 
236     // And finally create the binary descriptor object.
237     auto *DescInit = ConstantStruct::get(
238         getBinDescTy(),
239         ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
240         EntriesB, EntriesE);
241 
242     return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
243                               GlobalValue::InternalLinkage, DescInit,
244                               ".omp_offloading.descriptor");
245   }
246 
createRegisterFunction(GlobalVariable * BinDesc)247   void createRegisterFunction(GlobalVariable *BinDesc) {
248     auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
249     auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
250                                   ".omp_offloading.descriptor_reg", &M);
251     Func->setSection(".text.startup");
252 
253     // Get __tgt_register_lib function declaration.
254     auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(),
255                                         /*isVarArg*/ false);
256     FunctionCallee RegFuncC =
257         M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
258 
259     // Construct function body
260     IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
261     Builder.CreateCall(RegFuncC, BinDesc);
262     Builder.CreateRetVoid();
263 
264     // Add this function to constructors.
265     // Set priority to 1 so that __tgt_register_lib is executed AFTER
266     // __tgt_register_requires (we want to know what requirements have been
267     // asked for before we load a libomptarget plugin so that by the time the
268     // plugin is loaded it can report how many devices there are which can
269     // satisfy these requirements).
270     appendToGlobalCtors(M, Func, /*Priority*/ 1);
271   }
272 
createUnregisterFunction(GlobalVariable * BinDesc)273   void createUnregisterFunction(GlobalVariable *BinDesc) {
274     auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
275     auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
276                                   ".omp_offloading.descriptor_unreg", &M);
277     Func->setSection(".text.startup");
278 
279     // Get __tgt_unregister_lib function declaration.
280     auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(),
281                                           /*isVarArg*/ false);
282     FunctionCallee UnRegFuncC =
283         M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
284 
285     // Construct function body
286     IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
287     Builder.CreateCall(UnRegFuncC, BinDesc);
288     Builder.CreateRetVoid();
289 
290     // Add this function to global destructors.
291     // Match priority of __tgt_register_lib
292     appendToGlobalDtors(M, Func, /*Priority*/ 1);
293   }
294 
295 public:
BinaryWrapper(StringRef Target)296   BinaryWrapper(StringRef Target) : M("offload.wrapper.object", C) {
297     M.setTargetTriple(Target);
298   }
299 
wrapBinaries(ArrayRef<ArrayRef<char>> Binaries)300   const Module &wrapBinaries(ArrayRef<ArrayRef<char>> Binaries) {
301     GlobalVariable *Desc = createBinDesc(Binaries);
302     assert(Desc && "no binary descriptor");
303     createRegisterFunction(Desc);
304     createUnregisterFunction(Desc);
305     return M;
306   }
307 };
308 
309 } // anonymous namespace
310 
main(int argc,const char ** argv)311 int main(int argc, const char **argv) {
312   sys::PrintStackTraceOnErrorSignal(argv[0]);
313 
314   cl::HideUnrelatedOptions(ClangOffloadWrapperCategory);
315   cl::SetVersionPrinter([](raw_ostream &OS) {
316     OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n';
317   });
318   cl::ParseCommandLineOptions(
319       argc, argv,
320       "A tool to create a wrapper bitcode for offload target binaries. Takes "
321       "offload\ntarget binaries as input and produces bitcode file containing "
322       "target binaries packaged\nas data and initialization code which "
323       "registers target binaries in offload runtime.\n");
324 
325   if (Help) {
326     cl::PrintHelpMessage();
327     return 0;
328   }
329 
330   auto reportError = [argv](Error E) {
331     logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
332   };
333 
334   if (Triple(Target).getArch() == Triple::UnknownArch) {
335     reportError(createStringError(
336         errc::invalid_argument, "'" + Target + "': unsupported target triple"));
337     return 1;
338   }
339 
340   // Read device binaries.
341   SmallVector<std::unique_ptr<MemoryBuffer>, 4u> Buffers;
342   SmallVector<ArrayRef<char>, 4u> Images;
343   Buffers.reserve(Inputs.size());
344   Images.reserve(Inputs.size());
345   for (const std::string &File : Inputs) {
346     ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
347         MemoryBuffer::getFileOrSTDIN(File);
348     if (!BufOrErr) {
349       reportError(createFileError(File, BufOrErr.getError()));
350       return 1;
351     }
352     const std::unique_ptr<MemoryBuffer> &Buf =
353         Buffers.emplace_back(std::move(*BufOrErr));
354     Images.emplace_back(Buf->getBufferStart(), Buf->getBufferSize());
355   }
356 
357   // Create the output file to write the resulting bitcode to.
358   std::error_code EC;
359   ToolOutputFile Out(Output, EC, sys::fs::OF_None);
360   if (EC) {
361     reportError(createFileError(Output, EC));
362     return 1;
363   }
364 
365   // Create a wrapper for device binaries and write its bitcode to the file.
366   WriteBitcodeToFile(BinaryWrapper(Target).wrapBinaries(
367                          makeArrayRef(Images.data(), Images.size())),
368                      Out.os());
369   if (Out.os().has_error()) {
370     reportError(createFileError(Output, Out.os().error()));
371     return 1;
372   }
373 
374   // Success.
375   Out.keep();
376   return 0;
377 }
378