1 //===-ThinLTOCodeGenerator.cpp - LLVM Link Time Optimizer -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Thin Link Time Optimization library. This library is
11 // intended to be used by linker to optimize code at link time.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/LTO/legacy/ThinLTOCodeGenerator.h"
16 
17 #ifdef HAVE_LLVM_REVISION
18 #include "LLVMLTORevision.h"
19 #endif
20 
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/Analysis/TargetTransformInfo.h"
26 #include "llvm/Bitcode/BitcodeWriterPass.h"
27 #include "llvm/Bitcode/ReaderWriter.h"
28 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
29 #include "llvm/IR/DiagnosticPrinter.h"
30 #include "llvm/IR/LLVMContext.h"
31 #include "llvm/IR/LegacyPassManager.h"
32 #include "llvm/IR/Mangler.h"
33 #include "llvm/IRReader/IRReader.h"
34 #include "llvm/LTO/LTO.h"
35 #include "llvm/Linker/Linker.h"
36 #include "llvm/MC/SubtargetFeature.h"
37 #include "llvm/Object/IRObjectFile.h"
38 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
39 #include "llvm/Support/CachePruning.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/Path.h"
42 #include "llvm/Support/SHA1.h"
43 #include "llvm/Support/TargetRegistry.h"
44 #include "llvm/Support/ThreadPool.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Transforms/IPO.h"
47 #include "llvm/Transforms/IPO/FunctionImport.h"
48 #include "llvm/Transforms/IPO/Internalize.h"
49 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
50 #include "llvm/Transforms/ObjCARC.h"
51 #include "llvm/Transforms/Utils/FunctionImportUtils.h"
52 
53 #include <numeric>
54 
55 using namespace llvm;
56 
57 #define DEBUG_TYPE "thinlto"
58 
59 namespace llvm {
60 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
61 extern cl::opt<bool> LTODiscardValueNames;
62 }
63 
64 namespace {
65 
66 static cl::opt<int> ThreadCount("threads",
67                                 cl::init(std::thread::hardware_concurrency()));
68 
diagnosticHandler(const DiagnosticInfo & DI)69 static void diagnosticHandler(const DiagnosticInfo &DI) {
70   DiagnosticPrinterRawOStream DP(errs());
71   DI.print(DP);
72   errs() << '\n';
73 }
74 
75 // Simple helper to save temporary files for debug.
saveTempBitcode(const Module & TheModule,StringRef TempDir,unsigned count,StringRef Suffix)76 static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
77                             unsigned count, StringRef Suffix) {
78   if (TempDir.empty())
79     return;
80   // User asked to save temps, let dump the bitcode file after import.
81   auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
82   std::error_code EC;
83   raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
84   if (EC)
85     report_fatal_error(Twine("Failed to open ") + SaveTempPath +
86                        " to save optimized bitcode\n");
87   WriteBitcodeToFile(&TheModule, OS, /* ShouldPreserveUseListOrder */ true);
88 }
89 
90 static const GlobalValueSummary *
getFirstDefinitionForLinker(const GlobalValueSummaryList & GVSummaryList)91 getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
92   // If there is any strong definition anywhere, get it.
93   auto StrongDefForLinker = llvm::find_if(
94       GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
95         auto Linkage = Summary->linkage();
96         return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
97                !GlobalValue::isWeakForLinker(Linkage);
98       });
99   if (StrongDefForLinker != GVSummaryList.end())
100     return StrongDefForLinker->get();
101   // Get the first *linker visible* definition for this global in the summary
102   // list.
103   auto FirstDefForLinker = llvm::find_if(
104       GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
105         auto Linkage = Summary->linkage();
106         return !GlobalValue::isAvailableExternallyLinkage(Linkage);
107       });
108   // Extern templates can be emitted as available_externally.
109   if (FirstDefForLinker == GVSummaryList.end())
110     return nullptr;
111   return FirstDefForLinker->get();
112 }
113 
114 // Populate map of GUID to the prevailing copy for any multiply defined
115 // symbols. Currently assume first copy is prevailing, or any strong
116 // definition. Can be refined with Linker information in the future.
computePrevailingCopies(const ModuleSummaryIndex & Index,DenseMap<GlobalValue::GUID,const GlobalValueSummary * > & PrevailingCopy)117 static void computePrevailingCopies(
118     const ModuleSummaryIndex &Index,
119     DenseMap<GlobalValue::GUID, const GlobalValueSummary *> &PrevailingCopy) {
120   auto HasMultipleCopies = [&](const GlobalValueSummaryList &GVSummaryList) {
121     return GVSummaryList.size() > 1;
122   };
123 
124   for (auto &I : Index) {
125     if (HasMultipleCopies(I.second))
126       PrevailingCopy[I.first] = getFirstDefinitionForLinker(I.second);
127   }
128 }
129 
130 static StringMap<MemoryBufferRef>
generateModuleMap(const std::vector<MemoryBufferRef> & Modules)131 generateModuleMap(const std::vector<MemoryBufferRef> &Modules) {
132   StringMap<MemoryBufferRef> ModuleMap;
133   for (auto &ModuleBuffer : Modules) {
134     assert(ModuleMap.find(ModuleBuffer.getBufferIdentifier()) ==
135                ModuleMap.end() &&
136            "Expect unique Buffer Identifier");
137     ModuleMap[ModuleBuffer.getBufferIdentifier()] = ModuleBuffer;
138   }
139   return ModuleMap;
140 }
141 
promoteModule(Module & TheModule,const ModuleSummaryIndex & Index)142 static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
143   if (renameModuleForThinLTO(TheModule, Index))
144     report_fatal_error("renameModuleForThinLTO failed");
145 }
146 
147 static void
crossImportIntoModule(Module & TheModule,const ModuleSummaryIndex & Index,StringMap<MemoryBufferRef> & ModuleMap,const FunctionImporter::ImportMapTy & ImportList)148 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
149                       StringMap<MemoryBufferRef> &ModuleMap,
150                       const FunctionImporter::ImportMapTy &ImportList) {
151   ModuleLoader Loader(TheModule.getContext(), ModuleMap);
152   FunctionImporter Importer(Index, Loader);
153   Importer.importFunctions(TheModule, ImportList);
154 }
155 
optimizeModule(Module & TheModule,TargetMachine & TM)156 static void optimizeModule(Module &TheModule, TargetMachine &TM) {
157   // Populate the PassManager
158   PassManagerBuilder PMB;
159   PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
160   PMB.Inliner = createFunctionInliningPass();
161   // FIXME: should get it from the bitcode?
162   PMB.OptLevel = 3;
163   PMB.LoopVectorize = true;
164   PMB.SLPVectorize = true;
165   PMB.VerifyInput = true;
166   PMB.VerifyOutput = false;
167 
168   legacy::PassManager PM;
169 
170   // Add the TTI (required to inform the vectorizer about register size for
171   // instance)
172   PM.add(createTargetTransformInfoWrapperPass(TM.getTargetIRAnalysis()));
173 
174   // Add optimizations
175   PMB.populateThinLTOPassManager(PM);
176 
177   PM.run(TheModule);
178 }
179 
180 // Convert the PreservedSymbols map from "Name" based to "GUID" based.
181 static DenseSet<GlobalValue::GUID>
computeGUIDPreservedSymbols(const StringSet<> & PreservedSymbols,const Triple & TheTriple)182 computeGUIDPreservedSymbols(const StringSet<> &PreservedSymbols,
183                             const Triple &TheTriple) {
184   DenseSet<GlobalValue::GUID> GUIDPreservedSymbols(PreservedSymbols.size());
185   for (auto &Entry : PreservedSymbols) {
186     StringRef Name = Entry.first();
187     if (TheTriple.isOSBinFormatMachO() && Name.size() > 0 && Name[0] == '_')
188       Name = Name.drop_front();
189     GUIDPreservedSymbols.insert(GlobalValue::getGUID(Name));
190   }
191   return GUIDPreservedSymbols;
192 }
193 
codegenModule(Module & TheModule,TargetMachine & TM)194 std::unique_ptr<MemoryBuffer> codegenModule(Module &TheModule,
195                                             TargetMachine &TM) {
196   SmallVector<char, 128> OutputBuffer;
197 
198   // CodeGen
199   {
200     raw_svector_ostream OS(OutputBuffer);
201     legacy::PassManager PM;
202 
203     // If the bitcode files contain ARC code and were compiled with optimization,
204     // the ObjCARCContractPass must be run, so do it unconditionally here.
205     PM.add(createObjCARCContractPass());
206 
207     // Setup the codegen now.
208     if (TM.addPassesToEmitFile(PM, OS, TargetMachine::CGFT_ObjectFile,
209                                /* DisableVerify */ true))
210       report_fatal_error("Failed to setup codegen");
211 
212     // Run codegen now. resulting binary is in OutputBuffer.
213     PM.run(TheModule);
214   }
215   return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
216 }
217 
218 /// Manage caching for a single Module.
219 class ModuleCacheEntry {
220   SmallString<128> EntryPath;
221 
222 public:
223   // Create a cache entry. This compute a unique hash for the Module considering
224   // the current list of export/import, and offer an interface to query to
225   // access the content in the cache.
ModuleCacheEntry(StringRef CachePath,const ModuleSummaryIndex & Index,StringRef ModuleID,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const std::map<GlobalValue::GUID,GlobalValue::LinkageTypes> & ResolvedODR,const GVSummaryMapTy & DefinedFunctions,const DenseSet<GlobalValue::GUID> & PreservedSymbols)226   ModuleCacheEntry(
227       StringRef CachePath, const ModuleSummaryIndex &Index, StringRef ModuleID,
228       const FunctionImporter::ImportMapTy &ImportList,
229       const FunctionImporter::ExportSetTy &ExportList,
230       const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
231       const GVSummaryMapTy &DefinedFunctions,
232       const DenseSet<GlobalValue::GUID> &PreservedSymbols) {
233     if (CachePath.empty())
234       return;
235 
236     // Compute the unique hash for this entry
237     // This is based on the current compiler version, the module itself, the
238     // export list, the hash for every single module in the import list, the
239     // list of ResolvedODR for the module, and the list of preserved symbols.
240 
241     SHA1 Hasher;
242 
243     // Start with the compiler revision
244     Hasher.update(LLVM_VERSION_STRING);
245 #ifdef HAVE_LLVM_REVISION
246     Hasher.update(LLVM_REVISION);
247 #endif
248 
249     // Include the hash for the current module
250     auto ModHash = Index.getModuleHash(ModuleID);
251     Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
252     for (auto F : ExportList)
253       // The export list can impact the internalization, be conservative here
254       Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
255 
256     // Include the hash for every module we import functions from
257     for (auto &Entry : ImportList) {
258       auto ModHash = Index.getModuleHash(Entry.first());
259       Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
260     }
261 
262     // Include the hash for the resolved ODR.
263     for (auto &Entry : ResolvedODR) {
264       Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
265                                       sizeof(GlobalValue::GUID)));
266       Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
267                                       sizeof(GlobalValue::LinkageTypes)));
268     }
269 
270     // Include the hash for the preserved symbols.
271     for (auto &Entry : PreservedSymbols) {
272       if (DefinedFunctions.count(Entry))
273         Hasher.update(
274             ArrayRef<uint8_t>((const uint8_t *)&Entry, sizeof(GlobalValue::GUID)));
275     }
276 
277     sys::path::append(EntryPath, CachePath, toHex(Hasher.result()));
278   }
279 
280   // Access the path to this entry in the cache.
getEntryPath()281   StringRef getEntryPath() { return EntryPath; }
282 
283   // Try loading the buffer for this cache entry.
tryLoadingBuffer()284   ErrorOr<std::unique_ptr<MemoryBuffer>> tryLoadingBuffer() {
285     if (EntryPath.empty())
286       return std::error_code();
287     return MemoryBuffer::getFile(EntryPath);
288   }
289 
290   // Cache the Produced object file
291   std::unique_ptr<MemoryBuffer>
write(std::unique_ptr<MemoryBuffer> OutputBuffer)292   write(std::unique_ptr<MemoryBuffer> OutputBuffer) {
293     if (EntryPath.empty())
294       return OutputBuffer;
295 
296     // Write to a temporary to avoid race condition
297     SmallString<128> TempFilename;
298     int TempFD;
299     std::error_code EC =
300         sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
301     if (EC) {
302       errs() << "Error: " << EC.message() << "\n";
303       report_fatal_error("ThinLTO: Can't get a temporary file");
304     }
305     {
306       raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
307       OS << OutputBuffer->getBuffer();
308     }
309     // Rename to final destination (hopefully race condition won't matter here)
310     EC = sys::fs::rename(TempFilename, EntryPath);
311     if (EC) {
312       sys::fs::remove(TempFilename);
313       raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
314       if (EC)
315         report_fatal_error(Twine("Failed to open ") + EntryPath +
316                            " to save cached entry\n");
317       OS << OutputBuffer->getBuffer();
318     }
319     auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath);
320     if (auto EC = ReloadedBufferOrErr.getError()) {
321       // FIXME diagnose
322       errs() << "error: can't reload cached file '" << EntryPath
323              << "': " << EC.message() << "\n";
324       return OutputBuffer;
325     }
326     return std::move(*ReloadedBufferOrErr);
327   }
328 };
329 
330 static std::unique_ptr<MemoryBuffer>
ProcessThinLTOModule(Module & TheModule,ModuleSummaryIndex & Index,StringMap<MemoryBufferRef> & ModuleMap,TargetMachine & TM,const FunctionImporter::ImportMapTy & ImportList,const FunctionImporter::ExportSetTy & ExportList,const DenseSet<GlobalValue::GUID> & GUIDPreservedSymbols,const GVSummaryMapTy & DefinedGlobals,const ThinLTOCodeGenerator::CachingOptions & CacheOptions,bool DisableCodeGen,StringRef SaveTempsDir,unsigned count)331 ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
332                      StringMap<MemoryBufferRef> &ModuleMap, TargetMachine &TM,
333                      const FunctionImporter::ImportMapTy &ImportList,
334                      const FunctionImporter::ExportSetTy &ExportList,
335                      const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols,
336                      const GVSummaryMapTy &DefinedGlobals,
337                      const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
338                      bool DisableCodeGen, StringRef SaveTempsDir,
339                      unsigned count) {
340 
341   // "Benchmark"-like optimization: single-source case
342   bool SingleModule = (ModuleMap.size() == 1);
343 
344   if (!SingleModule) {
345     promoteModule(TheModule, Index);
346 
347     // Apply summary-based LinkOnce/Weak resolution decisions.
348     thinLTOResolveWeakForLinkerModule(TheModule, DefinedGlobals);
349 
350     // Save temps: after promotion.
351     saveTempBitcode(TheModule, SaveTempsDir, count, ".1.promoted.bc");
352   }
353 
354   // Be friendly and don't nuke totally the module when the client didn't
355   // supply anything to preserve.
356   if (!ExportList.empty() || !GUIDPreservedSymbols.empty()) {
357     // Apply summary-based internalization decisions.
358     thinLTOInternalizeModule(TheModule, DefinedGlobals);
359   }
360 
361   // Save internalized bitcode
362   saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc");
363 
364   if (!SingleModule) {
365     crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
366 
367     // Save temps: after cross-module import.
368     saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
369   }
370 
371   optimizeModule(TheModule, TM);
372 
373   saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
374 
375   if (DisableCodeGen) {
376     // Configured to stop before CodeGen, serialize the bitcode and return.
377     SmallVector<char, 128> OutputBuffer;
378     {
379       raw_svector_ostream OS(OutputBuffer);
380       ModuleSummaryIndexBuilder IndexBuilder(&TheModule);
381       WriteBitcodeToFile(&TheModule, OS, true, &IndexBuilder.getIndex());
382     }
383     return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
384   }
385 
386   return codegenModule(TheModule, TM);
387 }
388 
389 /// Resolve LinkOnce/Weak symbols. Record resolutions in the \p ResolvedODR map
390 /// for caching, and in the \p Index for application during the ThinLTO
391 /// backends. This is needed for correctness for exported symbols (ensure
392 /// at least one copy kept) and a compile-time optimization (to drop duplicate
393 /// copies when possible).
resolveWeakForLinkerInIndex(ModuleSummaryIndex & Index,StringMap<std::map<GlobalValue::GUID,GlobalValue::LinkageTypes>> & ResolvedODR)394 static void resolveWeakForLinkerInIndex(
395     ModuleSummaryIndex &Index,
396     StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>>
397         &ResolvedODR) {
398 
399   DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
400   computePrevailingCopies(Index, PrevailingCopy);
401 
402   auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
403     const auto &Prevailing = PrevailingCopy.find(GUID);
404     // Not in map means that there was only one copy, which must be prevailing.
405     if (Prevailing == PrevailingCopy.end())
406       return true;
407     return Prevailing->second == S;
408   };
409 
410   auto recordNewLinkage = [&](StringRef ModuleIdentifier,
411                               GlobalValue::GUID GUID,
412                               GlobalValue::LinkageTypes NewLinkage) {
413     ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
414   };
415 
416   thinLTOResolveWeakForLinkerInIndex(Index, isPrevailing, recordNewLinkage);
417 }
418 
419 // Initialize the TargetMachine builder for a given Triple
initTMBuilder(TargetMachineBuilder & TMBuilder,const Triple & TheTriple)420 static void initTMBuilder(TargetMachineBuilder &TMBuilder,
421                           const Triple &TheTriple) {
422   // Set a default CPU for Darwin triples (copied from LTOCodeGenerator).
423   // FIXME this looks pretty terrible...
424   if (TMBuilder.MCpu.empty() && TheTriple.isOSDarwin()) {
425     if (TheTriple.getArch() == llvm::Triple::x86_64)
426       TMBuilder.MCpu = "core2";
427     else if (TheTriple.getArch() == llvm::Triple::x86)
428       TMBuilder.MCpu = "yonah";
429     else if (TheTriple.getArch() == llvm::Triple::aarch64)
430       TMBuilder.MCpu = "cyclone";
431   }
432   TMBuilder.TheTriple = std::move(TheTriple);
433 }
434 
435 } // end anonymous namespace
436 
addModule(StringRef Identifier,StringRef Data)437 void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
438   MemoryBufferRef Buffer(Data, Identifier);
439   if (Modules.empty()) {
440     // First module added, so initialize the triple and some options
441     LLVMContext Context;
442     Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
443     initTMBuilder(TMBuilder, Triple(TheTriple));
444   }
445 #ifndef NDEBUG
446   else {
447     LLVMContext Context;
448     assert(TMBuilder.TheTriple.str() ==
449                getBitcodeTargetTriple(Buffer, Context) &&
450            "ThinLTO modules with different triple not supported");
451   }
452 #endif
453   Modules.push_back(Buffer);
454 }
455 
preserveSymbol(StringRef Name)456 void ThinLTOCodeGenerator::preserveSymbol(StringRef Name) {
457   PreservedSymbols.insert(Name);
458 }
459 
crossReferenceSymbol(StringRef Name)460 void ThinLTOCodeGenerator::crossReferenceSymbol(StringRef Name) {
461   // FIXME: At the moment, we don't take advantage of this extra information,
462   // we're conservatively considering cross-references as preserved.
463   //  CrossReferencedSymbols.insert(Name);
464   PreservedSymbols.insert(Name);
465 }
466 
467 // TargetMachine factory
create() const468 std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
469   std::string ErrMsg;
470   const Target *TheTarget =
471       TargetRegistry::lookupTarget(TheTriple.str(), ErrMsg);
472   if (!TheTarget) {
473     report_fatal_error("Can't load target for this Triple: " + ErrMsg);
474   }
475 
476   // Use MAttr as the default set of features.
477   SubtargetFeatures Features(MAttr);
478   Features.getDefaultSubtargetFeatures(TheTriple);
479   std::string FeatureStr = Features.getString();
480   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
481       TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
482       CodeModel::Default, CGOptLevel));
483 }
484 
485 /**
486  * Produce the combined summary index from all the bitcode files:
487  * "thin-link".
488  */
linkCombinedIndex()489 std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
490   std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
491   uint64_t NextModuleId = 0;
492   for (auto &ModuleBuffer : Modules) {
493     ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
494         object::ModuleSummaryIndexObjectFile::create(ModuleBuffer,
495                                                      diagnosticHandler);
496     if (std::error_code EC = ObjOrErr.getError()) {
497       // FIXME diagnose
498       errs() << "error: can't create ModuleSummaryIndexObjectFile for buffer: "
499              << EC.message() << "\n";
500       return nullptr;
501     }
502     auto Index = (*ObjOrErr)->takeIndex();
503     if (CombinedIndex) {
504       CombinedIndex->mergeFrom(std::move(Index), ++NextModuleId);
505     } else {
506       CombinedIndex = std::move(Index);
507     }
508   }
509   return CombinedIndex;
510 }
511 
512 /**
513  * Perform promotion and renaming of exported internal functions.
514  * Index is updated to reflect linkage changes from weak resolution.
515  */
promote(Module & TheModule,ModuleSummaryIndex & Index)516 void ThinLTOCodeGenerator::promote(Module &TheModule,
517                                    ModuleSummaryIndex &Index) {
518   auto ModuleCount = Index.modulePaths().size();
519   auto ModuleIdentifier = TheModule.getModuleIdentifier();
520   // Collect for each module the list of function it defines (GUID -> Summary).
521   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
522   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
523 
524   // Generate import/export list
525   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
526   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
527   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
528                            ExportLists);
529 
530   // Resolve LinkOnce/Weak symbols.
531   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
532   resolveWeakForLinkerInIndex(Index, ResolvedODR);
533 
534   thinLTOResolveWeakForLinkerModule(
535       TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
536 
537   promoteModule(TheModule, Index);
538 }
539 
540 /**
541  * Perform cross-module importing for the module identified by ModuleIdentifier.
542  */
crossModuleImport(Module & TheModule,ModuleSummaryIndex & Index)543 void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
544                                              ModuleSummaryIndex &Index) {
545   auto ModuleMap = generateModuleMap(Modules);
546   auto ModuleCount = Index.modulePaths().size();
547 
548   // Collect for each module the list of function it defines (GUID -> Summary).
549   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
550   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
551 
552   // Generate import/export list
553   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
554   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
555   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
556                            ExportLists);
557   auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
558 
559   crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
560 }
561 
562 /**
563  * Compute the list of summaries needed for importing into module.
564  */
gatherImportedSummariesForModule(StringRef ModulePath,ModuleSummaryIndex & Index,std::map<std::string,GVSummaryMapTy> & ModuleToSummariesForIndex)565 void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
566     StringRef ModulePath, ModuleSummaryIndex &Index,
567     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
568   auto ModuleCount = Index.modulePaths().size();
569 
570   // Collect for each module the list of function it defines (GUID -> Summary).
571   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
572   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
573 
574   // Generate import/export list
575   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
576   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
577   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
578                            ExportLists);
579 
580   llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
581                                          ImportLists,
582                                          ModuleToSummariesForIndex);
583 }
584 
585 /**
586  * Emit the list of files needed for importing into module.
587  */
emitImports(StringRef ModulePath,StringRef OutputName,ModuleSummaryIndex & Index)588 void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
589                                        StringRef OutputName,
590                                        ModuleSummaryIndex &Index) {
591   auto ModuleCount = Index.modulePaths().size();
592 
593   // Collect for each module the list of function it defines (GUID -> Summary).
594   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
595   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
596 
597   // Generate import/export list
598   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
599   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
600   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
601                            ExportLists);
602 
603   std::error_code EC;
604   if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists)))
605     report_fatal_error(Twine("Failed to open ") + OutputName +
606                        " to save imports lists\n");
607 }
608 
609 /**
610  * Perform internalization. Index is updated to reflect linkage changes.
611  */
internalize(Module & TheModule,ModuleSummaryIndex & Index)612 void ThinLTOCodeGenerator::internalize(Module &TheModule,
613                                        ModuleSummaryIndex &Index) {
614   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
615   auto ModuleCount = Index.modulePaths().size();
616   auto ModuleIdentifier = TheModule.getModuleIdentifier();
617 
618   // Convert the preserved symbols set from string to GUID
619   auto GUIDPreservedSymbols =
620       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
621 
622   // Collect for each module the list of function it defines (GUID -> Summary).
623   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
624   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
625 
626   // Generate import/export list
627   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
628   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
629   ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
630                            ExportLists);
631   auto &ExportList = ExportLists[ModuleIdentifier];
632 
633   // Be friendly and don't nuke totally the module when the client didn't
634   // supply anything to preserve.
635   if (ExportList.empty() && GUIDPreservedSymbols.empty())
636     return;
637 
638   // Internalization
639   auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
640     const auto &ExportList = ExportLists.find(ModuleIdentifier);
641     return (ExportList != ExportLists.end() &&
642             ExportList->second.count(GUID)) ||
643            GUIDPreservedSymbols.count(GUID);
644   };
645   thinLTOInternalizeAndPromoteInIndex(Index, isExported);
646   thinLTOInternalizeModule(TheModule,
647                            ModuleToDefinedGVSummaries[ModuleIdentifier]);
648 }
649 
650 /**
651  * Perform post-importing ThinLTO optimizations.
652  */
optimize(Module & TheModule)653 void ThinLTOCodeGenerator::optimize(Module &TheModule) {
654   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
655 
656   // Optimize now
657   optimizeModule(TheModule, *TMBuilder.create());
658 }
659 
660 /**
661  * Perform ThinLTO CodeGen.
662  */
codegen(Module & TheModule)663 std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
664   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
665   return codegenModule(TheModule, *TMBuilder.create());
666 }
667 
668 // Main entry point for the ThinLTO processing
run()669 void ThinLTOCodeGenerator::run() {
670   if (CodeGenOnly) {
671     // Perform only parallel codegen and return.
672     ThreadPool Pool;
673     assert(ProducedBinaries.empty() && "The generator should not be reused");
674     ProducedBinaries.resize(Modules.size());
675     int count = 0;
676     for (auto &ModuleBuffer : Modules) {
677       Pool.async([&](int count) {
678         LLVMContext Context;
679         Context.setDiscardValueNames(LTODiscardValueNames);
680 
681         // Parse module now
682         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
683 
684         // CodeGen
685         ProducedBinaries[count] = codegen(*TheModule);
686       }, count++);
687     }
688 
689     return;
690   }
691 
692   // Sequential linking phase
693   auto Index = linkCombinedIndex();
694 
695   // Save temps: index.
696   if (!SaveTempsDir.empty()) {
697     auto SaveTempPath = SaveTempsDir + "index.bc";
698     std::error_code EC;
699     raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
700     if (EC)
701       report_fatal_error(Twine("Failed to open ") + SaveTempPath +
702                          " to save optimized bitcode\n");
703     WriteIndexToFile(*Index, OS);
704   }
705 
706   // Prepare the resulting object vector
707   assert(ProducedBinaries.empty() && "The generator should not be reused");
708   ProducedBinaries.resize(Modules.size());
709 
710   // Prepare the module map.
711   auto ModuleMap = generateModuleMap(Modules);
712   auto ModuleCount = Modules.size();
713 
714   // Collect for each module the list of function it defines (GUID -> Summary).
715   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries(ModuleCount);
716   Index->collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
717 
718   // Collect the import/export lists for all modules from the call-graph in the
719   // combined index.
720   StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
721   StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
722   ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
723                            ExportLists);
724 
725   // Convert the preserved symbols set from string to GUID, this is needed for
726   // computing the caching hash and the internalization.
727   auto GUIDPreservedSymbols =
728       computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
729 
730   // We use a std::map here to be able to have a defined ordering when
731   // producing a hash for the cache entry.
732   // FIXME: we should be able to compute the caching hash for the entry based
733   // on the index, and nuke this map.
734   StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
735 
736   // Resolve LinkOnce/Weak symbols, this has to be computed early because it
737   // impacts the caching.
738   resolveWeakForLinkerInIndex(*Index, ResolvedODR);
739 
740   auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
741     const auto &ExportList = ExportLists.find(ModuleIdentifier);
742     return (ExportList != ExportLists.end() &&
743             ExportList->second.count(GUID)) ||
744            GUIDPreservedSymbols.count(GUID);
745   };
746 
747   // Use global summary-based analysis to identify symbols that can be
748   // internalized (because they aren't exported or preserved as per callback).
749   // Changes are made in the index, consumed in the ThinLTO backends.
750   thinLTOInternalizeAndPromoteInIndex(*Index, isExported);
751 
752   // Make sure that every module has an entry in the ExportLists and
753   // ResolvedODR maps to enable threaded access to these maps below.
754   for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
755     ExportLists[DefinedGVSummaries.first()];
756     ResolvedODR[DefinedGVSummaries.first()];
757   }
758 
759   // Compute the ordering we will process the inputs: the rough heuristic here
760   // is to sort them per size so that the largest module get schedule as soon as
761   // possible. This is purely a compile-time optimization.
762   std::vector<int> ModulesOrdering;
763   ModulesOrdering.resize(Modules.size());
764   std::iota(ModulesOrdering.begin(), ModulesOrdering.end(), 0);
765   std::sort(ModulesOrdering.begin(), ModulesOrdering.end(),
766             [&](int LeftIndex, int RightIndex) {
767               auto LSize = Modules[LeftIndex].getBufferSize();
768               auto RSize = Modules[RightIndex].getBufferSize();
769               return LSize > RSize;
770             });
771 
772   // Parallel optimizer + codegen
773   {
774     ThreadPool Pool(ThreadCount);
775     for (auto IndexCount : ModulesOrdering) {
776       auto &ModuleBuffer = Modules[IndexCount];
777       Pool.async([&](int count) {
778         auto ModuleIdentifier = ModuleBuffer.getBufferIdentifier();
779         auto &ExportList = ExportLists[ModuleIdentifier];
780 
781         auto &DefinedFunctions = ModuleToDefinedGVSummaries[ModuleIdentifier];
782 
783         // The module may be cached, this helps handling it.
784         ModuleCacheEntry CacheEntry(CacheOptions.Path, *Index, ModuleIdentifier,
785                                     ImportLists[ModuleIdentifier], ExportList,
786                                     ResolvedODR[ModuleIdentifier],
787                                     DefinedFunctions, GUIDPreservedSymbols);
788 
789         {
790           auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
791           DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
792                        << CacheEntry.getEntryPath() << "' for buffer " << count
793                        << " " << ModuleIdentifier << "\n");
794 
795           if (ErrOrBuffer) {
796             // Cache Hit!
797             ProducedBinaries[count] = std::move(ErrOrBuffer.get());
798             return;
799           }
800         }
801 
802         LLVMContext Context;
803         Context.setDiscardValueNames(LTODiscardValueNames);
804         Context.enableDebugTypeODRUniquing();
805 
806         // Parse module now
807         auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
808 
809         // Save temps: original file.
810         saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
811 
812         auto &ImportList = ImportLists[ModuleIdentifier];
813         // Run the main process now, and generates a binary
814         auto OutputBuffer = ProcessThinLTOModule(
815             *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
816             ExportList, GUIDPreservedSymbols,
817             ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
818             DisableCodeGen, SaveTempsDir, count);
819 
820         OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
821         ProducedBinaries[count] = std::move(OutputBuffer);
822       }, IndexCount);
823     }
824   }
825 
826   CachePruning(CacheOptions.Path)
827       .setPruningInterval(CacheOptions.PruningInterval)
828       .setEntryExpiration(CacheOptions.Expiration)
829       .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace)
830       .prune();
831 
832   // If statistics were requested, print them out now.
833   if (llvm::AreStatisticsEnabled())
834     llvm::PrintStatistics();
835 }
836