1 //===-- clang-offload-wrapper/ClangOffloadWrapper.cpp -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Implementation of the offload wrapper tool. It takes offload target binaries
11 /// as input and creates wrapper bitcode file containing target binaries
12 /// packaged as data. Wrapper bitcode also includes initialization code which
13 /// registers target binaries in offloading runtime at program startup.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #include "clang/Basic/Version.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Bitcode/BitcodeWriter.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/GlobalVariable.h"
23 #include "llvm/IR/IRBuilder.h"
24 #include "llvm/IR/LLVMContext.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/Support/Errc.h"
28 #include "llvm/Support/Error.h"
29 #include "llvm/Support/ErrorOr.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 #include "llvm/Support/Signals.h"
32 #include "llvm/Support/ToolOutputFile.h"
33 #include "llvm/Support/WithColor.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/Transforms/Utils/ModuleUtils.h"
36 #include <cassert>
37 #include <cstdint>
38
39 using namespace llvm;
40
41 static cl::opt<bool> Help("h", cl::desc("Alias for -help"), cl::Hidden);
42
43 // Mark all our options with this category, everything else (except for -version
44 // and -help) will be hidden.
45 static cl::OptionCategory
46 ClangOffloadWrapperCategory("clang-offload-wrapper options");
47
48 static cl::opt<std::string> Output("o", cl::Required,
49 cl::desc("Output filename"),
50 cl::value_desc("filename"),
51 cl::cat(ClangOffloadWrapperCategory));
52
53 static cl::list<std::string> Inputs(cl::Positional, cl::OneOrMore,
54 cl::desc("<input files>"),
55 cl::cat(ClangOffloadWrapperCategory));
56
57 static cl::opt<std::string>
58 Target("target", cl::Required,
59 cl::desc("Target triple for the output module"),
60 cl::value_desc("triple"), cl::cat(ClangOffloadWrapperCategory));
61
62 namespace {
63
64 class BinaryWrapper {
65 LLVMContext C;
66 Module M;
67
68 StructType *EntryTy = nullptr;
69 StructType *ImageTy = nullptr;
70 StructType *DescTy = nullptr;
71
72 private:
getSizeTTy()73 IntegerType *getSizeTTy() {
74 switch (M.getDataLayout().getPointerTypeSize(Type::getInt8PtrTy(C))) {
75 case 4u:
76 return Type::getInt32Ty(C);
77 case 8u:
78 return Type::getInt64Ty(C);
79 }
80 llvm_unreachable("unsupported pointer type size");
81 }
82
83 // struct __tgt_offload_entry {
84 // void *addr;
85 // char *name;
86 // size_t size;
87 // int32_t flags;
88 // int32_t reserved;
89 // };
getEntryTy()90 StructType *getEntryTy() {
91 if (!EntryTy)
92 EntryTy = StructType::create("__tgt_offload_entry", Type::getInt8PtrTy(C),
93 Type::getInt8PtrTy(C), getSizeTTy(),
94 Type::getInt32Ty(C), Type::getInt32Ty(C));
95 return EntryTy;
96 }
97
getEntryPtrTy()98 PointerType *getEntryPtrTy() { return PointerType::getUnqual(getEntryTy()); }
99
100 // struct __tgt_device_image {
101 // void *ImageStart;
102 // void *ImageEnd;
103 // __tgt_offload_entry *EntriesBegin;
104 // __tgt_offload_entry *EntriesEnd;
105 // };
getDeviceImageTy()106 StructType *getDeviceImageTy() {
107 if (!ImageTy)
108 ImageTy = StructType::create("__tgt_device_image", Type::getInt8PtrTy(C),
109 Type::getInt8PtrTy(C), getEntryPtrTy(),
110 getEntryPtrTy());
111 return ImageTy;
112 }
113
getDeviceImagePtrTy()114 PointerType *getDeviceImagePtrTy() {
115 return PointerType::getUnqual(getDeviceImageTy());
116 }
117
118 // struct __tgt_bin_desc {
119 // int32_t NumDeviceImages;
120 // __tgt_device_image *DeviceImages;
121 // __tgt_offload_entry *HostEntriesBegin;
122 // __tgt_offload_entry *HostEntriesEnd;
123 // };
getBinDescTy()124 StructType *getBinDescTy() {
125 if (!DescTy)
126 DescTy = StructType::create("__tgt_bin_desc", Type::getInt32Ty(C),
127 getDeviceImagePtrTy(), getEntryPtrTy(),
128 getEntryPtrTy());
129 return DescTy;
130 }
131
getBinDescPtrTy()132 PointerType *getBinDescPtrTy() {
133 return PointerType::getUnqual(getBinDescTy());
134 }
135
136 /// Creates binary descriptor for the given device images. Binary descriptor
137 /// is an object that is passed to the offloading runtime at program startup
138 /// and it describes all device images available in the executable or shared
139 /// library. It is defined as follows
140 ///
141 /// __attribute__((visibility("hidden")))
142 /// extern __tgt_offload_entry *__start_omp_offloading_entries;
143 /// __attribute__((visibility("hidden")))
144 /// extern __tgt_offload_entry *__stop_omp_offloading_entries;
145 ///
146 /// static const char Image0[] = { <Bufs.front() contents> };
147 /// ...
148 /// static const char ImageN[] = { <Bufs.back() contents> };
149 ///
150 /// static const __tgt_device_image Images[] = {
151 /// {
152 /// Image0, /*ImageStart*/
153 /// Image0 + sizeof(Image0), /*ImageEnd*/
154 /// __start_omp_offloading_entries, /*EntriesBegin*/
155 /// __stop_omp_offloading_entries /*EntriesEnd*/
156 /// },
157 /// ...
158 /// {
159 /// ImageN, /*ImageStart*/
160 /// ImageN + sizeof(ImageN), /*ImageEnd*/
161 /// __start_omp_offloading_entries, /*EntriesBegin*/
162 /// __stop_omp_offloading_entries /*EntriesEnd*/
163 /// }
164 /// };
165 ///
166 /// static const __tgt_bin_desc BinDesc = {
167 /// sizeof(Images) / sizeof(Images[0]), /*NumDeviceImages*/
168 /// Images, /*DeviceImages*/
169 /// __start_omp_offloading_entries, /*HostEntriesBegin*/
170 /// __stop_omp_offloading_entries /*HostEntriesEnd*/
171 /// };
172 ///
173 /// Global variable that represents BinDesc is returned.
createBinDesc(ArrayRef<ArrayRef<char>> Bufs)174 GlobalVariable *createBinDesc(ArrayRef<ArrayRef<char>> Bufs) {
175 // Create external begin/end symbols for the offload entries table.
176 auto *EntriesB = new GlobalVariable(
177 M, getEntryTy(), /*isConstant*/ true, GlobalValue::ExternalLinkage,
178 /*Initializer*/ nullptr, "__start_omp_offloading_entries");
179 EntriesB->setVisibility(GlobalValue::HiddenVisibility);
180 auto *EntriesE = new GlobalVariable(
181 M, getEntryTy(), /*isConstant*/ true, GlobalValue::ExternalLinkage,
182 /*Initializer*/ nullptr, "__stop_omp_offloading_entries");
183 EntriesE->setVisibility(GlobalValue::HiddenVisibility);
184
185 // We assume that external begin/end symbols that we have created above will
186 // be defined by the linker. But linker will do that only if linker inputs
187 // have section with "omp_offloading_entries" name which is not guaranteed.
188 // So, we just create dummy zero sized object in the offload entries section
189 // to force linker to define those symbols.
190 auto *DummyInit =
191 ConstantAggregateZero::get(ArrayType::get(getEntryTy(), 0u));
192 auto *DummyEntry = new GlobalVariable(
193 M, DummyInit->getType(), true, GlobalVariable::ExternalLinkage,
194 DummyInit, "__dummy.omp_offloading.entry");
195 DummyEntry->setSection("omp_offloading_entries");
196 DummyEntry->setVisibility(GlobalValue::HiddenVisibility);
197
198 auto *Zero = ConstantInt::get(getSizeTTy(), 0u);
199 Constant *ZeroZero[] = {Zero, Zero};
200
201 // Create initializer for the images array.
202 SmallVector<Constant *, 4u> ImagesInits;
203 ImagesInits.reserve(Bufs.size());
204 for (ArrayRef<char> Buf : Bufs) {
205 auto *Data = ConstantDataArray::get(C, Buf);
206 auto *Image = new GlobalVariable(M, Data->getType(), /*isConstant*/ true,
207 GlobalVariable::InternalLinkage, Data,
208 ".omp_offloading.device_image");
209 Image->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
210
211 auto *Size = ConstantInt::get(getSizeTTy(), Buf.size());
212 Constant *ZeroSize[] = {Zero, Size};
213
214 auto *ImageB = ConstantExpr::getGetElementPtr(Image->getValueType(),
215 Image, ZeroZero);
216 auto *ImageE = ConstantExpr::getGetElementPtr(Image->getValueType(),
217 Image, ZeroSize);
218
219 ImagesInits.push_back(ConstantStruct::get(getDeviceImageTy(), ImageB,
220 ImageE, EntriesB, EntriesE));
221 }
222
223 // Then create images array.
224 auto *ImagesData = ConstantArray::get(
225 ArrayType::get(getDeviceImageTy(), ImagesInits.size()), ImagesInits);
226
227 auto *Images =
228 new GlobalVariable(M, ImagesData->getType(), /*isConstant*/ true,
229 GlobalValue::InternalLinkage, ImagesData,
230 ".omp_offloading.device_images");
231 Images->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
232
233 auto *ImagesB = ConstantExpr::getGetElementPtr(Images->getValueType(),
234 Images, ZeroZero);
235
236 // And finally create the binary descriptor object.
237 auto *DescInit = ConstantStruct::get(
238 getBinDescTy(),
239 ConstantInt::get(Type::getInt32Ty(C), ImagesInits.size()), ImagesB,
240 EntriesB, EntriesE);
241
242 return new GlobalVariable(M, DescInit->getType(), /*isConstant*/ true,
243 GlobalValue::InternalLinkage, DescInit,
244 ".omp_offloading.descriptor");
245 }
246
createRegisterFunction(GlobalVariable * BinDesc)247 void createRegisterFunction(GlobalVariable *BinDesc) {
248 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
249 auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
250 ".omp_offloading.descriptor_reg", &M);
251 Func->setSection(".text.startup");
252
253 // Get __tgt_register_lib function declaration.
254 auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(),
255 /*isVarArg*/ false);
256 FunctionCallee RegFuncC =
257 M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
258
259 // Construct function body
260 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
261 Builder.CreateCall(RegFuncC, BinDesc);
262 Builder.CreateRetVoid();
263
264 // Add this function to constructors.
265 // Set priority to 1 so that __tgt_register_lib is executed AFTER
266 // __tgt_register_requires (we want to know what requirements have been
267 // asked for before we load a libomptarget plugin so that by the time the
268 // plugin is loaded it can report how many devices there are which can
269 // satisfy these requirements).
270 appendToGlobalCtors(M, Func, /*Priority*/ 1);
271 }
272
createUnregisterFunction(GlobalVariable * BinDesc)273 void createUnregisterFunction(GlobalVariable *BinDesc) {
274 auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
275 auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
276 ".omp_offloading.descriptor_unreg", &M);
277 Func->setSection(".text.startup");
278
279 // Get __tgt_unregister_lib function declaration.
280 auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(),
281 /*isVarArg*/ false);
282 FunctionCallee UnRegFuncC =
283 M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
284
285 // Construct function body
286 IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
287 Builder.CreateCall(UnRegFuncC, BinDesc);
288 Builder.CreateRetVoid();
289
290 // Add this function to global destructors.
291 // Match priority of __tgt_register_lib
292 appendToGlobalDtors(M, Func, /*Priority*/ 1);
293 }
294
295 public:
BinaryWrapper(StringRef Target)296 BinaryWrapper(StringRef Target) : M("offload.wrapper.object", C) {
297 M.setTargetTriple(Target);
298 }
299
wrapBinaries(ArrayRef<ArrayRef<char>> Binaries)300 const Module &wrapBinaries(ArrayRef<ArrayRef<char>> Binaries) {
301 GlobalVariable *Desc = createBinDesc(Binaries);
302 assert(Desc && "no binary descriptor");
303 createRegisterFunction(Desc);
304 createUnregisterFunction(Desc);
305 return M;
306 }
307 };
308
309 } // anonymous namespace
310
main(int argc,const char ** argv)311 int main(int argc, const char **argv) {
312 sys::PrintStackTraceOnErrorSignal(argv[0]);
313
314 cl::HideUnrelatedOptions(ClangOffloadWrapperCategory);
315 cl::SetVersionPrinter([](raw_ostream &OS) {
316 OS << clang::getClangToolFullVersion("clang-offload-wrapper") << '\n';
317 });
318 cl::ParseCommandLineOptions(
319 argc, argv,
320 "A tool to create a wrapper bitcode for offload target binaries. Takes "
321 "offload\ntarget binaries as input and produces bitcode file containing "
322 "target binaries packaged\nas data and initialization code which "
323 "registers target binaries in offload runtime.\n");
324
325 if (Help) {
326 cl::PrintHelpMessage();
327 return 0;
328 }
329
330 auto reportError = [argv](Error E) {
331 logAllUnhandledErrors(std::move(E), WithColor::error(errs(), argv[0]));
332 };
333
334 if (Triple(Target).getArch() == Triple::UnknownArch) {
335 reportError(createStringError(
336 errc::invalid_argument, "'" + Target + "': unsupported target triple"));
337 return 1;
338 }
339
340 // Read device binaries.
341 SmallVector<std::unique_ptr<MemoryBuffer>, 4u> Buffers;
342 SmallVector<ArrayRef<char>, 4u> Images;
343 Buffers.reserve(Inputs.size());
344 Images.reserve(Inputs.size());
345 for (const std::string &File : Inputs) {
346 ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
347 MemoryBuffer::getFileOrSTDIN(File);
348 if (!BufOrErr) {
349 reportError(createFileError(File, BufOrErr.getError()));
350 return 1;
351 }
352 const std::unique_ptr<MemoryBuffer> &Buf =
353 Buffers.emplace_back(std::move(*BufOrErr));
354 Images.emplace_back(Buf->getBufferStart(), Buf->getBufferSize());
355 }
356
357 // Create the output file to write the resulting bitcode to.
358 std::error_code EC;
359 ToolOutputFile Out(Output, EC, sys::fs::OF_None);
360 if (EC) {
361 reportError(createFileError(Output, EC));
362 return 1;
363 }
364
365 // Create a wrapper for device binaries and write its bitcode to the file.
366 WriteBitcodeToFile(BinaryWrapper(Target).wrapBinaries(
367 makeArrayRef(Images.data(), Images.size())),
368 Out.os());
369 if (Out.os().has_error()) {
370 reportError(createFileError(Output, Out.os().error()));
371 return 1;
372 }
373
374 // Success.
375 Out.keep();
376 return 0;
377 }
378