1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Defines aspects of the compilation that persist across multiple
12 /// functions.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "IceGlobalContext.h"
17 
18 #include "IceCfg.h"
19 #include "IceCfgNode.h"
20 #include "IceClFlags.h"
21 #include "IceDefs.h"
22 #include "IceELFObjectWriter.h"
23 #include "IceGlobalInits.h"
24 #include "IceLiveness.h"
25 #include "IceOperand.h"
26 #include "IceRevision.h"
27 #include "IceTargetLowering.h"
28 #include "IceTimerTree.h"
29 #include "IceTypes.def"
30 #include "IceTypes.h"
31 
32 #ifdef __clang__
33 #pragma clang diagnostic push
34 #pragma clang diagnostic ignored "-Wunused-parameter"
35 #endif // __clang__
36 
37 #include "llvm/Support/Timer.h"
38 
39 #ifdef __clang__
40 #pragma clang diagnostic pop
41 #endif // __clang__
42 
43 #include <algorithm> // max()
44 
45 namespace std {
46 template <> struct hash<Ice::RelocatableTuple> {
operator ()std::hash47   size_t operator()(const Ice::RelocatableTuple &Key) const {
48     // Use the relocatable's name, plus the hash of a combination of the number
49     // of OffsetExprs and the known, fixed offset for the reloc. We left shift
50     // the known relocatable by 5 trying to minimize the interaction between the
51     // bits in OffsetExpr.size() and Key.Offset.
52     return hash<Ice::SizeT>()(Key.Name.getID()) +
53            hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5));
54   }
55 };
56 } // end of namespace std
57 
58 namespace Ice {
59 
60 namespace {
61 
62 // Define the key comparison function for the constant pool's unordered_map,
63 // but only for key types of interest: integer types, floating point types, and
64 // the special RelocatableTuple.
65 template <typename KeyType, class Enable = void> struct KeyCompare {};
66 
67 template <typename KeyType>
68 struct KeyCompare<KeyType,
69                   typename std::enable_if<
70                       std::is_integral<KeyType>::value ||
71                       std::is_same<KeyType, RelocatableTuple>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompare72   bool operator()(const KeyType &Value1, const KeyType &Value2) const {
73     return Value1 == Value2;
74   }
75 };
76 template <typename KeyType>
77 struct KeyCompare<KeyType, typename std::enable_if<
78                                std::is_floating_point<KeyType>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompare79   bool operator()(const KeyType &Value1, const KeyType &Value2) const {
80     return !memcmp(&Value1, &Value2, sizeof(KeyType));
81   }
82 };
83 
84 // Define a key comparison function for sorting the constant pool's values
85 // after they are dumped to a vector. This covers integer types, floating point
86 // types, and ConstantRelocatable values.
87 template <typename ValueType, class Enable = void> struct KeyCompareLess {};
88 
89 template <typename ValueType>
90 struct KeyCompareLess<ValueType,
91                       typename std::enable_if<std::is_floating_point<
92                           typename ValueType::PrimType>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompareLess93   bool operator()(const Constant *Const1, const Constant *Const2) const {
94     using CompareType = uint64_t;
95     static_assert(sizeof(typename ValueType::PrimType) <= sizeof(CompareType),
96                   "Expected floating-point type of width 64-bit or less");
97     typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue();
98     typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue();
99     // We avoid "V1<V2" because of NaN.
100     // We avoid "memcmp(&V1,&V2,sizeof(V1))<0" which depends on the
101     // endian-ness of the host system running Subzero.
102     // Instead, compare the result of bit_cast to uint64_t.
103     uint64_t I1 = 0, I2 = 0;
104     memcpy(&I1, &V1, sizeof(V1));
105     memcpy(&I2, &V2, sizeof(V2));
106     return I1 < I2;
107   }
108 };
109 template <typename ValueType>
110 struct KeyCompareLess<ValueType,
111                       typename std::enable_if<std::is_integral<
112                           typename ValueType::PrimType>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompareLess113   bool operator()(const Constant *Const1, const Constant *Const2) const {
114     typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue();
115     typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue();
116     return V1 < V2;
117   }
118 };
119 template <typename ValueType>
120 struct KeyCompareLess<
121     ValueType, typename std::enable_if<
122                    std::is_same<ValueType, ConstantRelocatable>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompareLess123   bool operator()(const Constant *Const1, const Constant *Const2) const {
124     auto *V1 = llvm::cast<ValueType>(Const1);
125     auto *V2 = llvm::cast<ValueType>(Const2);
126     if (V1->getName() == V2->getName())
127       return V1->getOffset() < V2->getOffset();
128     return V1->getName() < V2->getName();
129   }
130 };
131 
132 // TypePool maps constants of type KeyType (e.g. float) to pointers to
133 // type ValueType (e.g. ConstantFloat).
134 template <Type Ty, typename KeyType, typename ValueType> class TypePool {
135   TypePool(const TypePool &) = delete;
136   TypePool &operator=(const TypePool &) = delete;
137 
138 public:
139   TypePool() = default;
getOrAdd(GlobalContext * Ctx,KeyType Key)140   ValueType *getOrAdd(GlobalContext *Ctx, KeyType Key) {
141     auto Iter = Pool.find(Key);
142     if (Iter != Pool.end()) {
143       Iter->second->updateLookupCount();
144       return Iter->second;
145     }
146     auto *Result = ValueType::create(Ctx, Ty, Key);
147     Pool[Key] = Result;
148     Result->updateLookupCount();
149     return Result;
150   }
getConstantPool() const151   ConstantList getConstantPool() const {
152     ConstantList Constants;
153     Constants.reserve(Pool.size());
154     for (auto &I : Pool)
155       Constants.push_back(I.second);
156     // The sort (and its KeyCompareLess machinery) is not strictly necessary,
157     // but is desirable for producing output that is deterministic across
158     // unordered_map::iterator implementations.
159     std::sort(Constants.begin(), Constants.end(), KeyCompareLess<ValueType>());
160     return Constants;
161   }
size() const162   size_t size() const { return Pool.size(); }
163 
164 private:
165   // Use the default hash function, and a custom key comparison function. The
166   // key comparison function for floating point variables can't use the default
167   // == based implementation because of special C++ semantics regarding +0.0,
168   // -0.0, and NaN comparison. However, it's OK to use the default hash for
169   // floating point values because KeyCompare is the final source of truth - in
170   // the worst case a "false" collision must be resolved.
171   using ContainerType =
172       std::unordered_map<KeyType, ValueType *, std::hash<KeyType>,
173                          KeyCompare<KeyType>>;
174   ContainerType Pool;
175 };
176 
177 // UndefPool maps ICE types to the corresponding ConstantUndef values.
178 class UndefPool {
179   UndefPool(const UndefPool &) = delete;
180   UndefPool &operator=(const UndefPool &) = delete;
181 
182 public:
UndefPool()183   UndefPool() : Pool(IceType_NUM) {}
184 
getOrAdd(GlobalContext * Ctx,Type Ty)185   ConstantUndef *getOrAdd(GlobalContext *Ctx, Type Ty) {
186     if (Pool[Ty] == nullptr)
187       Pool[Ty] = ConstantUndef::create(Ctx, Ty);
188     return Pool[Ty];
189   }
190 
191 private:
192   std::vector<ConstantUndef *> Pool;
193 };
194 
195 } // end of anonymous namespace
196 
197 // The global constant pool bundles individual pools of each type of
198 // interest.
199 class ConstantPool {
200   ConstantPool(const ConstantPool &) = delete;
201   ConstantPool &operator=(const ConstantPool &) = delete;
202 
203 public:
204   ConstantPool() = default;
205   TypePool<IceType_f32, float, ConstantFloat> Floats;
206   TypePool<IceType_f64, double, ConstantDouble> Doubles;
207   TypePool<IceType_i1, int8_t, ConstantInteger32> Integers1;
208   TypePool<IceType_i8, int8_t, ConstantInteger32> Integers8;
209   TypePool<IceType_i16, int16_t, ConstantInteger32> Integers16;
210   TypePool<IceType_i32, int32_t, ConstantInteger32> Integers32;
211   TypePool<IceType_i64, int64_t, ConstantInteger64> Integers64;
212   TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> Relocatables;
213   TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable>
214       ExternRelocatables;
215   UndefPool Undefs;
216 };
217 
waitForWorkerThreads()218 void GlobalContext::waitForWorkerThreads() {
219   if (WaitForWorkerThreadsCalled.exchange(true))
220     return;
221   optQueueNotifyEnd();
222   for (std::thread &Worker : TranslationThreads) {
223     Worker.join();
224   }
225   TranslationThreads.clear();
226 
227   // Only notify the emit queue to end after all the translation threads have
228   // ended.
229   emitQueueNotifyEnd();
230   for (std::thread &Worker : EmitterThreads) {
231     Worker.join();
232   }
233   EmitterThreads.clear();
234 
235   if (BuildDefs::timers()) {
236     auto Timers = getTimers();
237     for (ThreadContext *TLS : AllThreadContexts)
238       Timers->mergeFrom(TLS->Timers);
239   }
240   if (BuildDefs::dump()) {
241     // Do a separate loop over AllThreadContexts to avoid holding two locks at
242     // once.
243     auto Stats = getStatsCumulative();
244     for (ThreadContext *TLS : AllThreadContexts)
245       Stats->add(TLS->StatsCumulative);
246   }
247 }
248 
dump(const Cfg * Func,GlobalContext * Ctx)249 void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) {
250   if (!BuildDefs::dump())
251     return;
252   OstreamLocker _(Ctx);
253   Ostream &Str = Ctx->getStrDump();
254   const std::string Name =
255       (Func == nullptr ? "_FINAL_" : Func->getFunctionNameAndSize());
256 #define X(str, tag)                                                            \
257   Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n";
258   CODESTATS_TABLE
259 #undef X
260   Str << "|" << Name << "|Spills+Fills|"
261       << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n";
262   Str << "|" << Name << "|Memory Usage     |";
263   if (const auto MemUsed = static_cast<size_t>(
264           llvm::TimeRecord::getCurrentTime(false).getMemUsed())) {
265     static constexpr size_t _1MB = 1024 * 1024;
266     Str << (MemUsed / _1MB) << " MB";
267   } else {
268     Str << "(requires '-track-memory')";
269   }
270   Str << "\n";
271   Str << "|" << Name << "|CPool Sizes ";
272   {
273     auto Pool = Ctx->getConstPool();
274     Str << "|f32=" << Pool->Floats.size();
275     Str << "|f64=" << Pool->Doubles.size();
276     Str << "|i1=" << Pool->Integers1.size();
277     Str << "|i8=" << Pool->Integers8.size();
278     Str << "|i16=" << Pool->Integers16.size();
279     Str << "|i32=" << Pool->Integers32.size();
280     Str << "|i64=" << Pool->Integers64.size();
281     Str << "|Rel=" << Pool->Relocatables.size();
282     Str << "|ExtRel=" << Pool->ExternRelocatables.size();
283   }
284   Str << "\n";
285   if (Func != nullptr) {
286     Str << "|" << Name << "|Cfg Memory       |" << Func->getTotalMemoryMB()
287         << " MB\n";
288     Str << "|" << Name << "|Liveness Memory  |" << Func->getLivenessMemoryMB()
289         << " MB\n";
290   }
291 }
292 
293 namespace {
294 
295 // By default, wake up the main parser thread when the OptQ gets half empty.
296 static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1;
297 
298 } // end of anonymous namespace
299 
GlobalContext(Ostream * OsDump,Ostream * OsEmit,Ostream * OsError,ELFStreamer * ELFStr)300 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
301                              ELFStreamer *ELFStr)
302     : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(),
303       StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), ObjectWriter(),
304       OptQWakeupSize(std::max(DefaultOptQWakeupSize,
305                               size_t(getFlags().getNumTranslationThreads()))),
306       OptQ(/*Sequential=*/getFlags().isSequential(),
307            /*MaxSize=*/
308            getFlags().isParseParallel()
309                ? MaxOptQSize
310                : getFlags().getNumTranslationThreads()),
311       // EmitQ is allowed unlimited size.
312       EmitQ(/*Sequential=*/getFlags().isSequential()),
313       DataLowering(TargetDataLowering::createLowering(this)) {
314   assert(OsDump && "OsDump is not defined for GlobalContext");
315   assert(OsEmit && "OsEmit is not defined for GlobalContext");
316   assert(OsError && "OsError is not defined for GlobalContext");
317   // Make sure thread_local fields are properly initialized before any
318   // accesses are made.  Do this here instead of at the start of
319   // main() so that all clients (e.g. unit tests) can benefit for
320   // free.
321   GlobalContext::TlsInit();
322   Cfg::TlsInit();
323   Liveness::TlsInit();
324   // Create a new ThreadContext for the current thread.  No need to
325   // lock AllThreadContexts at this point since no other threads have
326   // access yet to this GlobalContext object.
327   ThreadContext *MyTLS = new ThreadContext();
328   AllThreadContexts.push_back(MyTLS);
329   ICE_TLS_SET_FIELD(TLS, MyTLS);
330   // Pre-register built-in stack names.
331   if (BuildDefs::timers()) {
332     // TODO(stichnot): There needs to be a strong relationship between
333     // the newTimerStackID() return values and TSK_Default/TSK_Funcs.
334     newTimerStackID("Total across all functions");
335     newTimerStackID("Per-function summary");
336   }
337   Timers.initInto(MyTLS->Timers);
338   switch (getFlags().getOutFileType()) {
339   case FT_Elf:
340     ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
341     break;
342   case FT_Asm:
343   case FT_Iasm:
344     break;
345   }
346 // Cache up front common constants.
347 #define X(tag, sizeLog2, align, elts, elty, str, rcstr)                        \
348   ConstZeroForType[IceType_##tag] = getConstantZeroInternal(IceType_##tag);
349   ICETYPE_TABLE;
350 #undef X
351   ConstantTrue = getConstantInt1Internal(1);
352 // Define runtime helper functions.
353 #define X(Tag, Name)                                                           \
354   RuntimeHelperFunc[static_cast<size_t>(RuntimeHelper::H_##Tag)] =             \
355       getConstantExternSym(getGlobalString(Name));
356   RUNTIME_HELPER_FUNCTIONS_TABLE
357 #undef X
358 
359   TargetLowering::staticInit(this);
360 
361   if (getFlags().getEmitRevision()) {
362     // Embed the Subzero revision into the compiled binary by creating a special
363     // global variable initialized with the revision string.
364     auto *Revision = VariableDeclaration::create(&Globals, true);
365     Revision->setName(this, "__Sz_revision");
366     Revision->setIsConstant(true);
367     const char *RevisionString = getSubzeroRevision();
368     Revision->addInitializer(VariableDeclaration::DataInitializer::create(
369         &Globals, RevisionString, 1 + strlen(RevisionString)));
370     Globals.push_back(Revision);
371   }
372 }
373 
translateFunctionsWrapper(ThreadContext * MyTLS)374 void GlobalContext::translateFunctionsWrapper(ThreadContext *MyTLS) {
375   ICE_TLS_SET_FIELD(TLS, MyTLS);
376   translateFunctions();
377 }
378 
translateFunctions()379 void GlobalContext::translateFunctions() {
380   TimerMarker Timer(TimerStack::TT_translateFunctions, this);
381   while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) {
382     std::unique_ptr<EmitterWorkItem> Item;
383     auto Func = OptItem->getParsedCfg();
384     // Install Func in TLS for Cfg-specific container allocators.
385     CfgLocalAllocatorScope _(Func.get());
386     // Reset per-function stats being accumulated in TLS.
387     resetStats();
388     // Set verbose level to none if the current function does NOT match the
389     // -verbose-focus command-line option.
390     if (!getFlags().matchVerboseFocusOn(Func->getFunctionName(),
391                                         Func->getSequenceNumber()))
392       Func->setVerbose(IceV_None);
393     // Disable translation if -notranslate is specified, or if the current
394     // function matches the -translate-only option.  If translation is disabled,
395     // just dump the high-level IR and continue.
396     if (getFlags().getDisableTranslation() ||
397         !getFlags().matchTranslateOnly(Func->getFunctionName(),
398                                        Func->getSequenceNumber())) {
399       Func->dump();
400       // Add a dummy work item as a placeholder.  This maintains sequence
401       // numbers so that the emitter thread will emit subsequent functions.
402       Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber());
403       emitQueueBlockingPush(std::move(Item));
404       continue; // Func goes out of scope and gets deleted
405     }
406 
407     Func->translate();
408     if (Func->hasError()) {
409       getErrorStatus()->assign(EC_Translation);
410       OstreamLocker L(this);
411       getStrError() << "ICE translation error: " << Func->getFunctionName()
412                     << ": " << Func->getError() << ": "
413                     << Func->getFunctionNameAndSize() << "\n";
414       Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber());
415     } else {
416       Func->getAssembler<>()->setInternal(Func->getInternal());
417       switch (getFlags().getOutFileType()) {
418       case FT_Elf:
419       case FT_Iasm: {
420         Func->emitIAS();
421         // The Cfg has already emitted into the assembly buffer, so
422         // stats have been fully collected into this thread's TLS.
423         // Dump them before TLS is reset for the next Cfg.
424         if (BuildDefs::dump())
425           dumpStats(Func.get());
426         auto Asm = Func->releaseAssembler();
427         // Copy relevant fields into Asm before Func is deleted.
428         Asm->setFunctionName(Func->getFunctionName());
429         Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(),
430                                            std::move(Asm));
431         Item->setGlobalInits(Func->getGlobalInits());
432       } break;
433       case FT_Asm:
434         // The Cfg has not been emitted yet, so stats are not ready
435         // to be dumped.
436         std::unique_ptr<VariableDeclarationList> GlobalInits =
437             Func->getGlobalInits();
438         Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(),
439                                            std::move(Func));
440         Item->setGlobalInits(std::move(GlobalInits));
441         break;
442       }
443     }
444     assert(Item != nullptr);
445     emitQueueBlockingPush(std::move(Item));
446     // The Cfg now gets deleted as Func goes out of scope.
447   }
448 }
449 
450 namespace {
451 
452 // Ensure Pending is large enough that Pending[Index] is valid.
resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> * Pending,uint32_t Index)453 void resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> *Pending,
454                    uint32_t Index) {
455   if (Index >= Pending->size())
456     Utils::reserveAndResize(*Pending, Index + 1);
457 }
458 
459 } // end of anonymous namespace
460 
461 // static
TlsInit()462 void GlobalContext::TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
463 
emitFileHeader()464 void GlobalContext::emitFileHeader() {
465   TimerMarker T1(Ice::TimerStack::TT_emitAsm, this);
466   if (getFlags().getOutFileType() == FT_Elf) {
467     getObjectWriter()->writeInitialELFHeader();
468   } else {
469     if (!BuildDefs::dump()) {
470       getStrError() << "emitFileHeader for non-ELF";
471       getErrorStatus()->assign(EC_Translation);
472     }
473     TargetHeaderLowering::createLowering(this)->lower();
474   }
475 }
476 
lowerConstants()477 void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); }
478 
lowerJumpTables()479 void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); }
480 
emitTargetRODataSections()481 void GlobalContext::emitTargetRODataSections() {
482   DataLowering->emitTargetRODataSections();
483 }
484 
lowerGlobals(const std::string & SectionSuffix)485 void GlobalContext::lowerGlobals(const std::string &SectionSuffix) {
486   TimerMarker T(TimerStack::TT_emitGlobalInitializers, this);
487   const bool DumpGlobalVariables =
488       BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit) &&
489       getFlags().matchVerboseFocusOn("", 0);
490   if (DumpGlobalVariables) {
491     OstreamLocker L(this);
492     Ostream &Stream = getStrDump();
493     for (const Ice::VariableDeclaration *Global : Globals) {
494       Global->dump(Stream);
495     }
496   }
497   if (getFlags().getDisableTranslation())
498     return;
499 
500   if (!BuildDefs::minimal() && Instrumentor)
501     Instrumentor->instrumentGlobals(Globals);
502 
503   DataLowering->lowerGlobals(Globals, SectionSuffix);
504   if (DisposeGlobalVariablesAfterLowering) {
505     Globals.clearAndPurge();
506   } else {
507     Globals.clear();
508   }
509 }
510 
emitterWrapper(ThreadContext * MyTLS)511 void GlobalContext::emitterWrapper(ThreadContext *MyTLS) {
512   ICE_TLS_SET_FIELD(TLS, MyTLS);
513   emitItems();
514 }
515 
emitItems()516 void GlobalContext::emitItems() {
517   const bool Threaded = !getFlags().isSequential();
518   // Pending is a vector containing the reassembled, ordered list of
519   // work items.  When we're ready for the next item, we first check
520   // whether it's in the Pending list.  If not, we take an item from
521   // the work queue, and if it's not the item we're waiting for, we
522   // insert it into Pending and repeat.  The work item is deleted
523   // after it is processed.
524   std::vector<std::unique_ptr<EmitterWorkItem>> Pending;
525   uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
526   uint32_t ShuffleStartIndex = DesiredSequenceNumber;
527   uint32_t ShuffleEndIndex = DesiredSequenceNumber;
528   bool EmitQueueEmpty = false;
529 
530   while (!EmitQueueEmpty) {
531     resizePending(&Pending, DesiredSequenceNumber);
532     // See if Pending contains DesiredSequenceNumber.
533     if (Pending[DesiredSequenceNumber] == nullptr) {
534       // We need to fetch an EmitterWorkItem from the queue.
535       auto RawItem = emitQueueBlockingPop();
536       if (RawItem == nullptr) {
537         // This is the notifier for an empty queue.
538         EmitQueueEmpty = true;
539       } else {
540         // We get an EmitterWorkItem, we need to add it to Pending.
541         uint32_t ItemSeq = RawItem->getSequenceNumber();
542         if (Threaded && ItemSeq != DesiredSequenceNumber) {
543           // Not the desired one, add it to Pending but do not increase
544           // DesiredSequenceNumber. Continue the loop, do not emit the item.
545           resizePending(&Pending, ItemSeq);
546           Pending[ItemSeq] = std::move(RawItem);
547           continue;
548         }
549         // ItemSeq == DesiredSequenceNumber, we need to check if we should
550         // emit it or not. If !Threaded, we're OK with ItemSeq !=
551         // DesiredSequenceNumber.
552         Pending[DesiredSequenceNumber] = std::move(RawItem);
553       }
554     }
555 
556     // We have the desired EmitterWorkItem or nullptr as the end notifier.
557     // If the emitter queue is not empty, increase DesiredSequenceNumber and
558     // ShuffleEndIndex.
559     if (!EmitQueueEmpty) {
560       DesiredSequenceNumber++;
561       ShuffleEndIndex++;
562     }
563 
564     // Emit the item from ShuffleStartIndex to ShuffleEndIndex.
565     for (uint32_t I = ShuffleStartIndex; I < ShuffleEndIndex; I++) {
566       std::unique_ptr<EmitterWorkItem> Item = std::move(Pending[I]);
567 
568       switch (Item->getKind()) {
569       case EmitterWorkItem::WI_Nop:
570         break;
571       case EmitterWorkItem::WI_GlobalInits: {
572         accumulateGlobals(Item->getGlobalInits());
573       } break;
574       case EmitterWorkItem::WI_Asm: {
575         lowerGlobalsIfNoCodeHasBeenSeen();
576         accumulateGlobals(Item->getGlobalInits());
577 
578         std::unique_ptr<Assembler> Asm = Item->getAsm();
579         Asm->alignFunction();
580         GlobalString Name = Asm->getFunctionName();
581         switch (getFlags().getOutFileType()) {
582         case FT_Elf:
583           getObjectWriter()->writeFunctionCode(Name, Asm->getInternal(),
584                                                Asm.get());
585           break;
586         case FT_Iasm: {
587           OstreamLocker L(this);
588           Cfg::emitTextHeader(Name, this, Asm.get());
589           Asm->emitIASBytes(this);
590         } break;
591         case FT_Asm:
592           llvm::report_fatal_error("Unexpected FT_Asm");
593           break;
594         }
595       } break;
596       case EmitterWorkItem::WI_Cfg: {
597         if (!BuildDefs::dump())
598           llvm::report_fatal_error("WI_Cfg work item created inappropriately");
599         lowerGlobalsIfNoCodeHasBeenSeen();
600         accumulateGlobals(Item->getGlobalInits());
601 
602         assert(getFlags().getOutFileType() == FT_Asm);
603         std::unique_ptr<Cfg> Func = Item->getCfg();
604         // Unfortunately, we have to temporarily install the Cfg in TLS
605         // because Variable::asType() uses the allocator to create the
606         // differently-typed copy.
607         CfgLocalAllocatorScope _(Func.get());
608         Func->emit();
609         dumpStats(Func.get());
610       } break;
611       }
612     }
613     // Update the start index for next shuffling queue
614     ShuffleStartIndex = ShuffleEndIndex;
615   }
616 
617   // In case there are no code to be generated, we invoke the conditional
618   // lowerGlobals again -- this is a no-op if code has been emitted.
619   lowerGlobalsIfNoCodeHasBeenSeen();
620 }
621 
~GlobalContext()622 GlobalContext::~GlobalContext() {
623   llvm::DeleteContainerPointers(AllThreadContexts);
624   LockedPtr<DestructorArray> Dtors = getDestructors();
625   // Destructors are invoked in the opposite object construction order.
626   for (const auto &Dtor : reverse_range(*Dtors))
627     Dtor();
628 }
629 
dumpStrings()630 void GlobalContext::dumpStrings() {
631   if (!getFlags().getDumpStrings())
632     return;
633   OstreamLocker _(this);
634   Ostream &Str = getStrDump();
635   Str << "GlobalContext strings:\n";
636   getStrings()->dump(Str);
637 }
638 
dumpConstantLookupCounts()639 void GlobalContext::dumpConstantLookupCounts() {
640   if (!BuildDefs::dump())
641     return;
642   const bool DumpCounts = (getFlags().getVerbose() & IceV_ConstPoolStats) &&
643                           getFlags().matchVerboseFocusOn("", 0);
644   if (!DumpCounts)
645     return;
646 
647   OstreamLocker _(this);
648   Ostream &Str = getStrDump();
649   Str << "Constant pool use stats: count+value+type\n";
650 #define X(WhichPool)                                                           \
651   for (auto *C : getConstPool()->WhichPool.getConstantPool()) {                \
652     Str << C->getLookupCount() << " ";                                         \
653     C->dump(Str);                                                              \
654     Str << " " << C->getType() << "\n";                                        \
655   }
656   X(Integers1);
657   X(Integers8);
658   X(Integers16);
659   X(Integers32);
660   X(Integers64);
661   X(Floats);
662   X(Doubles);
663   X(Relocatables);
664   X(ExternRelocatables);
665 #undef X
666 }
667 
668 // TODO(stichnot): Consider adding thread-local caches of constant pool entries
669 // to reduce contention.
670 
671 // All locking is done by the getConstantInt[0-9]+() target function.
getConstantInt(Type Ty,int64_t Value)672 Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) {
673   switch (Ty) {
674   case IceType_i1:
675     return getConstantInt1(Value);
676   case IceType_i8:
677     return getConstantInt8(Value);
678   case IceType_i16:
679     return getConstantInt16(Value);
680   case IceType_i32:
681     return getConstantInt32(Value);
682   case IceType_i64:
683     return getConstantInt64(Value);
684   default:
685     llvm_unreachable("Bad integer type for getConstant");
686   }
687   return nullptr;
688 }
689 
getConstantInt1Internal(int8_t ConstantInt1)690 Constant *GlobalContext::getConstantInt1Internal(int8_t ConstantInt1) {
691   ConstantInt1 &= INT8_C(1);
692   return getConstPool()->Integers1.getOrAdd(this, ConstantInt1);
693 }
694 
getConstantInt8Internal(int8_t ConstantInt8)695 Constant *GlobalContext::getConstantInt8Internal(int8_t ConstantInt8) {
696   return getConstPool()->Integers8.getOrAdd(this, ConstantInt8);
697 }
698 
getConstantInt16Internal(int16_t ConstantInt16)699 Constant *GlobalContext::getConstantInt16Internal(int16_t ConstantInt16) {
700   return getConstPool()->Integers16.getOrAdd(this, ConstantInt16);
701 }
702 
getConstantInt32Internal(int32_t ConstantInt32)703 Constant *GlobalContext::getConstantInt32Internal(int32_t ConstantInt32) {
704   return getConstPool()->Integers32.getOrAdd(this, ConstantInt32);
705 }
706 
getConstantInt64Internal(int64_t ConstantInt64)707 Constant *GlobalContext::getConstantInt64Internal(int64_t ConstantInt64) {
708   return getConstPool()->Integers64.getOrAdd(this, ConstantInt64);
709 }
710 
getConstantFloat(float ConstantFloat)711 Constant *GlobalContext::getConstantFloat(float ConstantFloat) {
712   return getConstPool()->Floats.getOrAdd(this, ConstantFloat);
713 }
714 
getConstantDouble(double ConstantDouble)715 Constant *GlobalContext::getConstantDouble(double ConstantDouble) {
716   return getConstPool()->Doubles.getOrAdd(this, ConstantDouble);
717 }
718 
getConstantSymWithEmitString(const RelocOffsetT Offset,const RelocOffsetArray & OffsetExpr,GlobalString Name,const std::string & EmitString)719 Constant *GlobalContext::getConstantSymWithEmitString(
720     const RelocOffsetT Offset, const RelocOffsetArray &OffsetExpr,
721     GlobalString Name, const std::string &EmitString) {
722   return getConstPool()->Relocatables.getOrAdd(
723       this, RelocatableTuple(Offset, OffsetExpr, Name, EmitString));
724 }
725 
getConstantSym(RelocOffsetT Offset,GlobalString Name)726 Constant *GlobalContext::getConstantSym(RelocOffsetT Offset,
727                                         GlobalString Name) {
728   constexpr char EmptyEmitString[] = "";
729   return getConstantSymWithEmitString(Offset, {}, Name, EmptyEmitString);
730 }
731 
getConstantExternSym(GlobalString Name)732 Constant *GlobalContext::getConstantExternSym(GlobalString Name) {
733   constexpr RelocOffsetT Offset = 0;
734   return getConstPool()->ExternRelocatables.getOrAdd(
735       this, RelocatableTuple(Offset, {}, Name));
736 }
737 
getConstantUndef(Type Ty)738 Constant *GlobalContext::getConstantUndef(Type Ty) {
739   return getConstPool()->Undefs.getOrAdd(this, Ty);
740 }
741 
getConstantZero(Type Ty)742 Constant *GlobalContext::getConstantZero(Type Ty) {
743   Constant *Zero = ConstZeroForType[Ty];
744   if (Zero == nullptr)
745     llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty));
746   return Zero;
747 }
748 
749 // All locking is done by the getConstant*() target function.
getConstantZeroInternal(Type Ty)750 Constant *GlobalContext::getConstantZeroInternal(Type Ty) {
751   switch (Ty) {
752   case IceType_i1:
753     return getConstantInt1Internal(0);
754   case IceType_i8:
755     return getConstantInt8Internal(0);
756   case IceType_i16:
757     return getConstantInt16Internal(0);
758   case IceType_i32:
759     return getConstantInt32Internal(0);
760   case IceType_i64:
761     return getConstantInt64Internal(0);
762   case IceType_f32:
763     return getConstantFloat(0);
764   case IceType_f64:
765     return getConstantDouble(0);
766   default:
767     return nullptr;
768   }
769 }
770 
getConstantPool(Type Ty)771 ConstantList GlobalContext::getConstantPool(Type Ty) {
772   switch (Ty) {
773   case IceType_i1:
774   case IceType_i8:
775     return getConstPool()->Integers8.getConstantPool();
776   case IceType_i16:
777     return getConstPool()->Integers16.getConstantPool();
778   case IceType_i32:
779     return getConstPool()->Integers32.getConstantPool();
780   case IceType_i64:
781     return getConstPool()->Integers64.getConstantPool();
782   case IceType_f32:
783     return getConstPool()->Floats.getConstantPool();
784   case IceType_f64:
785     return getConstPool()->Doubles.getConstantPool();
786   case IceType_v4i1:
787   case IceType_v8i1:
788   case IceType_v16i1:
789   case IceType_v16i8:
790   case IceType_v8i16:
791   case IceType_v4i32:
792   case IceType_v4f32:
793     llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty));
794     break;
795   case IceType_void:
796   case IceType_NUM:
797     break;
798   }
799   llvm_unreachable("Unknown type");
800 }
801 
getConstantExternSyms()802 ConstantList GlobalContext::getConstantExternSyms() {
803   return getConstPool()->ExternRelocatables.getConstantPool();
804 }
805 
getGlobalString(const std::string & Name)806 GlobalString GlobalContext::getGlobalString(const std::string &Name) {
807   return GlobalString::createWithString(this, Name);
808 }
809 
getJumpTables()810 JumpTableDataList GlobalContext::getJumpTables() {
811   JumpTableDataList JumpTables(*getJumpTableList());
812   // Make order deterministic by sorting into functions and then ID of the jump
813   // table within that function.
814   std::sort(JumpTables.begin(), JumpTables.end(),
815             [](const JumpTableData &A, const JumpTableData &B) {
816               if (A.getFunctionName() != B.getFunctionName())
817                 return A.getFunctionName() < B.getFunctionName();
818               return A.getId() < B.getId();
819             });
820 
821   return JumpTables;
822 }
823 
addJumpTableData(JumpTableData JumpTable)824 void GlobalContext::addJumpTableData(JumpTableData JumpTable) {
825   getJumpTableList()->emplace_back(std::move(JumpTable));
826 }
827 
newTimerStackID(const std::string & Name)828 TimerStackIdT GlobalContext::newTimerStackID(const std::string &Name) {
829   if (!BuildDefs::timers())
830     return 0;
831   auto Timers = getTimers();
832   TimerStackIdT NewID = Timers->size();
833   Timers->push_back(TimerStack(Name));
834   return NewID;
835 }
836 
getTimerID(TimerStackIdT StackID,const std::string & Name)837 TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
838                                    const std::string &Name) {
839   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
840   assert(StackID < Timers->size());
841   return Timers->at(StackID).getTimerID(Name);
842 }
843 
pushTimer(TimerIdT ID,TimerStackIdT StackID)844 void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
845   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
846   assert(StackID < Timers->size());
847   Timers->at(StackID).push(ID);
848 }
849 
popTimer(TimerIdT ID,TimerStackIdT StackID)850 void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
851   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
852   assert(StackID < Timers->size());
853   Timers->at(StackID).pop(ID);
854 }
855 
resetTimer(TimerStackIdT StackID)856 void GlobalContext::resetTimer(TimerStackIdT StackID) {
857   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
858   assert(StackID < Timers->size());
859   Timers->at(StackID).reset();
860 }
861 
getTimerName(TimerStackIdT StackID)862 std::string GlobalContext::getTimerName(TimerStackIdT StackID) {
863   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
864   assert(StackID < Timers->size());
865   return Timers->at(StackID).getName();
866 }
867 
setTimerName(TimerStackIdT StackID,const std::string & NewName)868 void GlobalContext::setTimerName(TimerStackIdT StackID,
869                                  const std::string &NewName) {
870   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
871   assert(StackID < Timers->size());
872   Timers->at(StackID).setName(NewName);
873 }
874 
875 // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the
876 // interface to take and transfer ownership, but they internally store the raw
877 // Cfg pointer in the work queue. This allows e.g. future queue optimizations
878 // such as the use of atomics to modify queue elements.
optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item)879 void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) {
880   assert(Item);
881   {
882     TimerMarker _(TimerStack::TT_qTransPush, this);
883     OptQ.blockingPush(std::move(Item));
884   }
885   if (getFlags().isSequential())
886     translateFunctions();
887 }
888 
optQueueBlockingPop()889 std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() {
890   TimerMarker _(TimerStack::TT_qTransPop, this);
891   return OptQ.blockingPop(OptQWakeupSize);
892 }
893 
emitQueueBlockingPush(std::unique_ptr<EmitterWorkItem> Item)894 void GlobalContext::emitQueueBlockingPush(
895     std::unique_ptr<EmitterWorkItem> Item) {
896   assert(Item);
897   {
898     TimerMarker _(TimerStack::TT_qEmitPush, this);
899     EmitQ.blockingPush(std::move(Item));
900   }
901   if (getFlags().isSequential())
902     emitItems();
903 }
904 
emitQueueBlockingPop()905 std::unique_ptr<EmitterWorkItem> GlobalContext::emitQueueBlockingPop() {
906   TimerMarker _(TimerStack::TT_qEmitPop, this);
907   return EmitQ.blockingPop();
908 }
909 
initParserThread()910 void GlobalContext::initParserThread() {
911   ThreadContext *Tls = new ThreadContext();
912   auto Timers = getTimers();
913   Timers->initInto(Tls->Timers);
914   AllThreadContexts.push_back(Tls);
915   ICE_TLS_SET_FIELD(TLS, Tls);
916 }
917 
startWorkerThreads()918 void GlobalContext::startWorkerThreads() {
919   size_t NumWorkers = getFlags().getNumTranslationThreads();
920   auto Timers = getTimers();
921   for (size_t i = 0; i < NumWorkers; ++i) {
922     ThreadContext *WorkerTLS = new ThreadContext();
923     Timers->initInto(WorkerTLS->Timers);
924     AllThreadContexts.push_back(WorkerTLS);
925     TranslationThreads.push_back(std::thread(
926         &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
927   }
928   if (NumWorkers) {
929     ThreadContext *WorkerTLS = new ThreadContext();
930     Timers->initInto(WorkerTLS->Timers);
931     AllThreadContexts.push_back(WorkerTLS);
932     EmitterThreads.push_back(
933         std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS));
934   }
935 }
936 
resetStats()937 void GlobalContext::resetStats() {
938   if (BuildDefs::dump())
939     ICE_TLS_GET_FIELD(TLS)->StatsFunction.reset();
940 }
941 
dumpStats(const Cfg * Func)942 void GlobalContext::dumpStats(const Cfg *Func) {
943   if (!getFlags().getDumpStats())
944     return;
945   if (Func == nullptr) {
946     getStatsCumulative()->dump(Func, this);
947   } else {
948     ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Func, this);
949   }
950 }
951 
statsUpdateEmitted(uint32_t InstCount)952 void GlobalContext::statsUpdateEmitted(uint32_t InstCount) {
953   if (!getFlags().getDumpStats())
954     return;
955   ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
956   Tls->StatsFunction.update(CodeStats::CS_InstCount, InstCount);
957   Tls->StatsCumulative.update(CodeStats::CS_InstCount, InstCount);
958 }
959 
statsUpdateRegistersSaved(uint32_t Num)960 void GlobalContext::statsUpdateRegistersSaved(uint32_t Num) {
961   if (!getFlags().getDumpStats())
962     return;
963   ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
964   Tls->StatsFunction.update(CodeStats::CS_RegsSaved, Num);
965   Tls->StatsCumulative.update(CodeStats::CS_RegsSaved, Num);
966 }
967 
statsUpdateFrameBytes(uint32_t Bytes)968 void GlobalContext::statsUpdateFrameBytes(uint32_t Bytes) {
969   if (!getFlags().getDumpStats())
970     return;
971   ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
972   Tls->StatsFunction.update(CodeStats::CS_FrameByte, Bytes);
973   Tls->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes);
974 }
975 
statsUpdateSpills()976 void GlobalContext::statsUpdateSpills() {
977   if (!getFlags().getDumpStats())
978     return;
979   ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
980   Tls->StatsFunction.update(CodeStats::CS_NumSpills);
981   Tls->StatsCumulative.update(CodeStats::CS_NumSpills);
982 }
983 
statsUpdateFills()984 void GlobalContext::statsUpdateFills() {
985   if (!getFlags().getDumpStats())
986     return;
987   ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
988   Tls->StatsFunction.update(CodeStats::CS_NumFills);
989   Tls->StatsCumulative.update(CodeStats::CS_NumFills);
990 }
991 
statsUpdateRPImms()992 void GlobalContext::statsUpdateRPImms() {
993   if (!getFlags().getDumpStats())
994     return;
995   ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
996   Tls->StatsFunction.update(CodeStats::CS_NumRPImms);
997   Tls->StatsCumulative.update(CodeStats::CS_NumRPImms);
998 }
999 
dumpTimers(TimerStackIdT StackID,bool DumpCumulative)1000 void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
1001   if (!BuildDefs::timers())
1002     return;
1003   auto Timers = getTimers();
1004   assert(Timers->size() > StackID);
1005   OstreamLocker L(this);
1006   Timers->at(StackID).dump(getStrDump(), DumpCumulative);
1007 }
1008 
dumpLocalTimers(const std::string & TimerNameOverride,TimerStackIdT StackID,bool DumpCumulative)1009 void GlobalContext::dumpLocalTimers(const std::string &TimerNameOverride,
1010                                     TimerStackIdT StackID,
1011                                     bool DumpCumulative) {
1012   if (!BuildDefs::timers())
1013     return;
1014   auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
1015   assert(Timers->size() > StackID);
1016   // Temporarily override the thread-local timer name with the given name.
1017   // Don't do it permanently because the final timer merge at the end expects
1018   // the thread-local timer names to be the same as the global timer name.
1019   auto OrigName = getTimerName(StackID);
1020   setTimerName(StackID, TimerNameOverride);
1021   {
1022     OstreamLocker _(this);
1023     Timers->at(StackID).dump(getStrDump(), DumpCumulative);
1024   }
1025   setTimerName(StackID, OrigName);
1026 }
1027 
1028 LockedPtr<StringPool>
getStrings(const GlobalContext * PoolOwner)1029 GlobalStringPoolTraits::getStrings(const GlobalContext *PoolOwner) {
1030   return PoolOwner->getStrings();
1031 }
1032 
getTimerIdFromFuncName(GlobalContext * Ctx,const std::string & FuncName)1033 TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx,
1034                                              const std::string &FuncName) {
1035   if (!BuildDefs::timers())
1036     return 0;
1037   if (!getFlags().getTimeEachFunction())
1038     return 0;
1039   return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName);
1040 }
1041 
push()1042 void TimerMarker::push() {
1043   switch (StackID) {
1044   case GlobalContext::TSK_Default:
1045     Active = getFlags().getSubzeroTimingEnabled() ||
1046              !getFlags().getTimingFocusOnString().empty();
1047     break;
1048   case GlobalContext::TSK_Funcs:
1049     Active = getFlags().getTimeEachFunction();
1050     break;
1051   default:
1052     break;
1053   }
1054   if (Active)
1055     Ctx->pushTimer(ID, StackID);
1056 }
1057 
pushCfg(const Cfg * Func)1058 void TimerMarker::pushCfg(const Cfg *Func) {
1059   Ctx = Func->getContext();
1060   Active = Func->getFocusedTiming() || getFlags().getSubzeroTimingEnabled();
1061   if (Active)
1062     Ctx->pushTimer(ID, StackID);
1063 }
1064 
1065 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
1066 
1067 } // end of namespace Ice
1068