1 //===- subzero/src/IceGlobalContext.cpp - Global context defs -------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Defines aspects of the compilation that persist across multiple
12 /// functions.
13 ///
14 //===----------------------------------------------------------------------===//
15
16 #include "IceGlobalContext.h"
17
18 #include "IceCfg.h"
19 #include "IceCfgNode.h"
20 #include "IceClFlags.h"
21 #include "IceDefs.h"
22 #include "IceELFObjectWriter.h"
23 #include "IceGlobalInits.h"
24 #include "IceLiveness.h"
25 #include "IceOperand.h"
26 #include "IceRevision.h"
27 #include "IceTargetLowering.h"
28 #include "IceTimerTree.h"
29 #include "IceTypes.def"
30 #include "IceTypes.h"
31
32 #ifdef __clang__
33 #pragma clang diagnostic push
34 #pragma clang diagnostic ignored "-Wunused-parameter"
35 #endif // __clang__
36
37 #include "llvm/Support/Timer.h"
38
39 #ifdef __clang__
40 #pragma clang diagnostic pop
41 #endif // __clang__
42
43 #include <algorithm> // max()
44
45 namespace std {
46 template <> struct hash<Ice::RelocatableTuple> {
operator ()std::hash47 size_t operator()(const Ice::RelocatableTuple &Key) const {
48 // Use the relocatable's name, plus the hash of a combination of the number
49 // of OffsetExprs and the known, fixed offset for the reloc. We left shift
50 // the known relocatable by 5 trying to minimize the interaction between the
51 // bits in OffsetExpr.size() and Key.Offset.
52 return hash<Ice::SizeT>()(Key.Name.getID()) +
53 hash<std::size_t>()(Key.OffsetExpr.size() + (Key.Offset << 5));
54 }
55 };
56 } // end of namespace std
57
58 namespace Ice {
59
60 namespace {
61
62 // Define the key comparison function for the constant pool's unordered_map,
63 // but only for key types of interest: integer types, floating point types, and
64 // the special RelocatableTuple.
65 template <typename KeyType, class Enable = void> struct KeyCompare {};
66
67 template <typename KeyType>
68 struct KeyCompare<KeyType,
69 typename std::enable_if<
70 std::is_integral<KeyType>::value ||
71 std::is_same<KeyType, RelocatableTuple>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompare72 bool operator()(const KeyType &Value1, const KeyType &Value2) const {
73 return Value1 == Value2;
74 }
75 };
76 template <typename KeyType>
77 struct KeyCompare<KeyType, typename std::enable_if<
78 std::is_floating_point<KeyType>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompare79 bool operator()(const KeyType &Value1, const KeyType &Value2) const {
80 return !memcmp(&Value1, &Value2, sizeof(KeyType));
81 }
82 };
83
84 // Define a key comparison function for sorting the constant pool's values
85 // after they are dumped to a vector. This covers integer types, floating point
86 // types, and ConstantRelocatable values.
87 template <typename ValueType, class Enable = void> struct KeyCompareLess {};
88
89 template <typename ValueType>
90 struct KeyCompareLess<ValueType,
91 typename std::enable_if<std::is_floating_point<
92 typename ValueType::PrimType>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompareLess93 bool operator()(const Constant *Const1, const Constant *Const2) const {
94 using CompareType = uint64_t;
95 static_assert(sizeof(typename ValueType::PrimType) <= sizeof(CompareType),
96 "Expected floating-point type of width 64-bit or less");
97 typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue();
98 typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue();
99 // We avoid "V1<V2" because of NaN.
100 // We avoid "memcmp(&V1,&V2,sizeof(V1))<0" which depends on the
101 // endian-ness of the host system running Subzero.
102 // Instead, compare the result of bit_cast to uint64_t.
103 uint64_t I1 = 0, I2 = 0;
104 memcpy(&I1, &V1, sizeof(V1));
105 memcpy(&I2, &V2, sizeof(V2));
106 return I1 < I2;
107 }
108 };
109 template <typename ValueType>
110 struct KeyCompareLess<ValueType,
111 typename std::enable_if<std::is_integral<
112 typename ValueType::PrimType>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompareLess113 bool operator()(const Constant *Const1, const Constant *Const2) const {
114 typename ValueType::PrimType V1 = llvm::cast<ValueType>(Const1)->getValue();
115 typename ValueType::PrimType V2 = llvm::cast<ValueType>(Const2)->getValue();
116 return V1 < V2;
117 }
118 };
119 template <typename ValueType>
120 struct KeyCompareLess<
121 ValueType, typename std::enable_if<
122 std::is_same<ValueType, ConstantRelocatable>::value>::type> {
operator ()Ice::__anon0790dc650111::KeyCompareLess123 bool operator()(const Constant *Const1, const Constant *Const2) const {
124 auto *V1 = llvm::cast<ValueType>(Const1);
125 auto *V2 = llvm::cast<ValueType>(Const2);
126 if (V1->getName() == V2->getName())
127 return V1->getOffset() < V2->getOffset();
128 return V1->getName() < V2->getName();
129 }
130 };
131
132 // TypePool maps constants of type KeyType (e.g. float) to pointers to
133 // type ValueType (e.g. ConstantFloat).
134 template <Type Ty, typename KeyType, typename ValueType> class TypePool {
135 TypePool(const TypePool &) = delete;
136 TypePool &operator=(const TypePool &) = delete;
137
138 public:
139 TypePool() = default;
getOrAdd(GlobalContext * Ctx,KeyType Key)140 ValueType *getOrAdd(GlobalContext *Ctx, KeyType Key) {
141 auto Iter = Pool.find(Key);
142 if (Iter != Pool.end()) {
143 Iter->second->updateLookupCount();
144 return Iter->second;
145 }
146 auto *Result = ValueType::create(Ctx, Ty, Key);
147 Pool[Key] = Result;
148 Result->updateLookupCount();
149 return Result;
150 }
getConstantPool() const151 ConstantList getConstantPool() const {
152 ConstantList Constants;
153 Constants.reserve(Pool.size());
154 for (auto &I : Pool)
155 Constants.push_back(I.second);
156 // The sort (and its KeyCompareLess machinery) is not strictly necessary,
157 // but is desirable for producing output that is deterministic across
158 // unordered_map::iterator implementations.
159 std::sort(Constants.begin(), Constants.end(), KeyCompareLess<ValueType>());
160 return Constants;
161 }
size() const162 size_t size() const { return Pool.size(); }
163
164 private:
165 // Use the default hash function, and a custom key comparison function. The
166 // key comparison function for floating point variables can't use the default
167 // == based implementation because of special C++ semantics regarding +0.0,
168 // -0.0, and NaN comparison. However, it's OK to use the default hash for
169 // floating point values because KeyCompare is the final source of truth - in
170 // the worst case a "false" collision must be resolved.
171 using ContainerType =
172 std::unordered_map<KeyType, ValueType *, std::hash<KeyType>,
173 KeyCompare<KeyType>>;
174 ContainerType Pool;
175 };
176
177 // UndefPool maps ICE types to the corresponding ConstantUndef values.
178 class UndefPool {
179 UndefPool(const UndefPool &) = delete;
180 UndefPool &operator=(const UndefPool &) = delete;
181
182 public:
UndefPool()183 UndefPool() : Pool(IceType_NUM) {}
184
getOrAdd(GlobalContext * Ctx,Type Ty)185 ConstantUndef *getOrAdd(GlobalContext *Ctx, Type Ty) {
186 if (Pool[Ty] == nullptr)
187 Pool[Ty] = ConstantUndef::create(Ctx, Ty);
188 return Pool[Ty];
189 }
190
191 private:
192 std::vector<ConstantUndef *> Pool;
193 };
194
195 } // end of anonymous namespace
196
197 // The global constant pool bundles individual pools of each type of
198 // interest.
199 class ConstantPool {
200 ConstantPool(const ConstantPool &) = delete;
201 ConstantPool &operator=(const ConstantPool &) = delete;
202
203 public:
204 ConstantPool() = default;
205 TypePool<IceType_f32, float, ConstantFloat> Floats;
206 TypePool<IceType_f64, double, ConstantDouble> Doubles;
207 TypePool<IceType_i1, int8_t, ConstantInteger32> Integers1;
208 TypePool<IceType_i8, int8_t, ConstantInteger32> Integers8;
209 TypePool<IceType_i16, int16_t, ConstantInteger32> Integers16;
210 TypePool<IceType_i32, int32_t, ConstantInteger32> Integers32;
211 TypePool<IceType_i64, int64_t, ConstantInteger64> Integers64;
212 TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable> Relocatables;
213 TypePool<IceType_i32, RelocatableTuple, ConstantRelocatable>
214 ExternRelocatables;
215 UndefPool Undefs;
216 };
217
waitForWorkerThreads()218 void GlobalContext::waitForWorkerThreads() {
219 if (WaitForWorkerThreadsCalled.exchange(true))
220 return;
221 optQueueNotifyEnd();
222 for (std::thread &Worker : TranslationThreads) {
223 Worker.join();
224 }
225 TranslationThreads.clear();
226
227 // Only notify the emit queue to end after all the translation threads have
228 // ended.
229 emitQueueNotifyEnd();
230 for (std::thread &Worker : EmitterThreads) {
231 Worker.join();
232 }
233 EmitterThreads.clear();
234
235 if (BuildDefs::timers()) {
236 auto Timers = getTimers();
237 for (ThreadContext *TLS : AllThreadContexts)
238 Timers->mergeFrom(TLS->Timers);
239 }
240 if (BuildDefs::dump()) {
241 // Do a separate loop over AllThreadContexts to avoid holding two locks at
242 // once.
243 auto Stats = getStatsCumulative();
244 for (ThreadContext *TLS : AllThreadContexts)
245 Stats->add(TLS->StatsCumulative);
246 }
247 }
248
dump(const Cfg * Func,GlobalContext * Ctx)249 void GlobalContext::CodeStats::dump(const Cfg *Func, GlobalContext *Ctx) {
250 if (!BuildDefs::dump())
251 return;
252 OstreamLocker _(Ctx);
253 Ostream &Str = Ctx->getStrDump();
254 const std::string Name =
255 (Func == nullptr ? "_FINAL_" : Func->getFunctionNameAndSize());
256 #define X(str, tag) \
257 Str << "|" << Name << "|" str "|" << Stats[CS_##tag] << "\n";
258 CODESTATS_TABLE
259 #undef X
260 Str << "|" << Name << "|Spills+Fills|"
261 << Stats[CS_NumSpills] + Stats[CS_NumFills] << "\n";
262 Str << "|" << Name << "|Memory Usage |";
263 if (const auto MemUsed = static_cast<size_t>(
264 llvm::TimeRecord::getCurrentTime(false).getMemUsed())) {
265 static constexpr size_t _1MB = 1024 * 1024;
266 Str << (MemUsed / _1MB) << " MB";
267 } else {
268 Str << "(requires '-track-memory')";
269 }
270 Str << "\n";
271 Str << "|" << Name << "|CPool Sizes ";
272 {
273 auto Pool = Ctx->getConstPool();
274 Str << "|f32=" << Pool->Floats.size();
275 Str << "|f64=" << Pool->Doubles.size();
276 Str << "|i1=" << Pool->Integers1.size();
277 Str << "|i8=" << Pool->Integers8.size();
278 Str << "|i16=" << Pool->Integers16.size();
279 Str << "|i32=" << Pool->Integers32.size();
280 Str << "|i64=" << Pool->Integers64.size();
281 Str << "|Rel=" << Pool->Relocatables.size();
282 Str << "|ExtRel=" << Pool->ExternRelocatables.size();
283 }
284 Str << "\n";
285 if (Func != nullptr) {
286 Str << "|" << Name << "|Cfg Memory |" << Func->getTotalMemoryMB()
287 << " MB\n";
288 Str << "|" << Name << "|Liveness Memory |" << Func->getLivenessMemoryMB()
289 << " MB\n";
290 }
291 }
292
293 namespace {
294
295 // By default, wake up the main parser thread when the OptQ gets half empty.
296 static constexpr size_t DefaultOptQWakeupSize = GlobalContext::MaxOptQSize >> 1;
297
298 } // end of anonymous namespace
299
GlobalContext(Ostream * OsDump,Ostream * OsEmit,Ostream * OsError,ELFStreamer * ELFStr)300 GlobalContext::GlobalContext(Ostream *OsDump, Ostream *OsEmit, Ostream *OsError,
301 ELFStreamer *ELFStr)
302 : Strings(new StringPool()), ConstPool(new ConstantPool()), ErrorStatus(),
303 StrDump(OsDump), StrEmit(OsEmit), StrError(OsError), IntrinsicsInfo(this),
304 ObjectWriter(),
305 OptQWakeupSize(std::max(DefaultOptQWakeupSize,
306 size_t(getFlags().getNumTranslationThreads()))),
307 OptQ(/*Sequential=*/getFlags().isSequential(),
308 /*MaxSize=*/
309 getFlags().isParseParallel()
310 ? MaxOptQSize
311 : getFlags().getNumTranslationThreads()),
312 // EmitQ is allowed unlimited size.
313 EmitQ(/*Sequential=*/getFlags().isSequential()),
314 DataLowering(TargetDataLowering::createLowering(this)) {
315 assert(OsDump && "OsDump is not defined for GlobalContext");
316 assert(OsEmit && "OsEmit is not defined for GlobalContext");
317 assert(OsError && "OsError is not defined for GlobalContext");
318 // Make sure thread_local fields are properly initialized before any
319 // accesses are made. Do this here instead of at the start of
320 // main() so that all clients (e.g. unit tests) can benefit for
321 // free.
322 GlobalContext::TlsInit();
323 Cfg::TlsInit();
324 Liveness::TlsInit();
325 // Create a new ThreadContext for the current thread. No need to
326 // lock AllThreadContexts at this point since no other threads have
327 // access yet to this GlobalContext object.
328 ThreadContext *MyTLS = new ThreadContext();
329 AllThreadContexts.push_back(MyTLS);
330 ICE_TLS_SET_FIELD(TLS, MyTLS);
331 // Pre-register built-in stack names.
332 if (BuildDefs::timers()) {
333 // TODO(stichnot): There needs to be a strong relationship between
334 // the newTimerStackID() return values and TSK_Default/TSK_Funcs.
335 newTimerStackID("Total across all functions");
336 newTimerStackID("Per-function summary");
337 }
338 Timers.initInto(MyTLS->Timers);
339 switch (getFlags().getOutFileType()) {
340 case FT_Elf:
341 ObjectWriter.reset(new ELFObjectWriter(*this, *ELFStr));
342 break;
343 case FT_Asm:
344 case FT_Iasm:
345 break;
346 }
347 // Cache up front common constants.
348 #define X(tag, sizeLog2, align, elts, elty, str, rcstr) \
349 ConstZeroForType[IceType_##tag] = getConstantZeroInternal(IceType_##tag);
350 ICETYPE_TABLE;
351 #undef X
352 ConstantTrue = getConstantInt1Internal(1);
353 // Define runtime helper functions.
354 #define X(Tag, Name) \
355 RuntimeHelperFunc[static_cast<size_t>(RuntimeHelper::H_##Tag)] = \
356 getConstantExternSym(getGlobalString(Name));
357 RUNTIME_HELPER_FUNCTIONS_TABLE
358 #undef X
359
360 TargetLowering::staticInit(this);
361
362 if (getFlags().getEmitRevision()) {
363 // Embed the Subzero revision into the compiled binary by creating a special
364 // global variable initialized with the revision string.
365 auto *Revision = VariableDeclaration::create(&Globals, true);
366 Revision->setName(this, "__Sz_revision");
367 Revision->setIsConstant(true);
368 const char *RevisionString = getSubzeroRevision();
369 Revision->addInitializer(VariableDeclaration::DataInitializer::create(
370 &Globals, RevisionString, 1 + strlen(RevisionString)));
371 Globals.push_back(Revision);
372 }
373 }
374
translateFunctionsWrapper(ThreadContext * MyTLS)375 void GlobalContext::translateFunctionsWrapper(ThreadContext *MyTLS) {
376 ICE_TLS_SET_FIELD(TLS, MyTLS);
377 translateFunctions();
378 }
379
translateFunctions()380 void GlobalContext::translateFunctions() {
381 TimerMarker Timer(TimerStack::TT_translateFunctions, this);
382 while (std::unique_ptr<OptWorkItem> OptItem = optQueueBlockingPop()) {
383 std::unique_ptr<EmitterWorkItem> Item;
384 auto Func = OptItem->getParsedCfg();
385 // Install Func in TLS for Cfg-specific container allocators.
386 CfgLocalAllocatorScope _(Func.get());
387 // Reset per-function stats being accumulated in TLS.
388 resetStats();
389 // Set verbose level to none if the current function does NOT match the
390 // -verbose-focus command-line option.
391 if (!getFlags().matchVerboseFocusOn(Func->getFunctionName(),
392 Func->getSequenceNumber()))
393 Func->setVerbose(IceV_None);
394 // Disable translation if -notranslate is specified, or if the current
395 // function matches the -translate-only option. If translation is disabled,
396 // just dump the high-level IR and continue.
397 if (getFlags().getDisableTranslation() ||
398 !getFlags().matchTranslateOnly(Func->getFunctionName(),
399 Func->getSequenceNumber())) {
400 Func->dump();
401 // Add a dummy work item as a placeholder. This maintains sequence
402 // numbers so that the emitter thread will emit subsequent functions.
403 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber());
404 emitQueueBlockingPush(std::move(Item));
405 continue; // Func goes out of scope and gets deleted
406 }
407
408 Func->translate();
409 if (Func->hasError()) {
410 getErrorStatus()->assign(EC_Translation);
411 OstreamLocker L(this);
412 getStrError() << "ICE translation error: " << Func->getFunctionName()
413 << ": " << Func->getError() << ": "
414 << Func->getFunctionNameAndSize() << "\n";
415 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber());
416 } else {
417 Func->getAssembler<>()->setInternal(Func->getInternal());
418 switch (getFlags().getOutFileType()) {
419 case FT_Elf:
420 case FT_Iasm: {
421 Func->emitIAS();
422 // The Cfg has already emitted into the assembly buffer, so
423 // stats have been fully collected into this thread's TLS.
424 // Dump them before TLS is reset for the next Cfg.
425 if (BuildDefs::dump())
426 dumpStats(Func.get());
427 auto Asm = Func->releaseAssembler();
428 // Copy relevant fields into Asm before Func is deleted.
429 Asm->setFunctionName(Func->getFunctionName());
430 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(),
431 std::move(Asm));
432 Item->setGlobalInits(Func->getGlobalInits());
433 } break;
434 case FT_Asm:
435 // The Cfg has not been emitted yet, so stats are not ready
436 // to be dumped.
437 std::unique_ptr<VariableDeclarationList> GlobalInits =
438 Func->getGlobalInits();
439 Item = makeUnique<EmitterWorkItem>(Func->getSequenceNumber(),
440 std::move(Func));
441 Item->setGlobalInits(std::move(GlobalInits));
442 break;
443 }
444 }
445 assert(Item != nullptr);
446 emitQueueBlockingPush(std::move(Item));
447 // The Cfg now gets deleted as Func goes out of scope.
448 }
449 }
450
451 namespace {
452
453 // Ensure Pending is large enough that Pending[Index] is valid.
resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> * Pending,uint32_t Index)454 void resizePending(std::vector<std::unique_ptr<EmitterWorkItem>> *Pending,
455 uint32_t Index) {
456 if (Index >= Pending->size())
457 Utils::reserveAndResize(*Pending, Index + 1);
458 }
459
460 } // end of anonymous namespace
461
462 // static
TlsInit()463 void GlobalContext::TlsInit() { ICE_TLS_INIT_FIELD(TLS); }
464
emitFileHeader()465 void GlobalContext::emitFileHeader() {
466 TimerMarker T1(Ice::TimerStack::TT_emitAsm, this);
467 if (getFlags().getOutFileType() == FT_Elf) {
468 getObjectWriter()->writeInitialELFHeader();
469 } else {
470 if (!BuildDefs::dump()) {
471 getStrError() << "emitFileHeader for non-ELF";
472 getErrorStatus()->assign(EC_Translation);
473 }
474 TargetHeaderLowering::createLowering(this)->lower();
475 }
476 }
477
lowerConstants()478 void GlobalContext::lowerConstants() { DataLowering->lowerConstants(); }
479
lowerJumpTables()480 void GlobalContext::lowerJumpTables() { DataLowering->lowerJumpTables(); }
481
emitTargetRODataSections()482 void GlobalContext::emitTargetRODataSections() {
483 DataLowering->emitTargetRODataSections();
484 }
485
saveBlockInfoPtrs()486 void GlobalContext::saveBlockInfoPtrs() {
487 for (VariableDeclaration *Global : Globals) {
488 if (Cfg::isProfileGlobal(*Global)) {
489 ProfileBlockInfos.push_back(Global);
490 }
491 }
492 }
493
lowerGlobals(const std::string & SectionSuffix)494 void GlobalContext::lowerGlobals(const std::string &SectionSuffix) {
495 TimerMarker T(TimerStack::TT_emitGlobalInitializers, this);
496 const bool DumpGlobalVariables =
497 BuildDefs::dump() && (getFlags().getVerbose() & IceV_GlobalInit) &&
498 getFlags().matchVerboseFocusOn("", 0);
499 if (DumpGlobalVariables) {
500 OstreamLocker L(this);
501 Ostream &Stream = getStrDump();
502 for (const Ice::VariableDeclaration *Global : Globals) {
503 Global->dump(Stream);
504 }
505 }
506 if (getFlags().getDisableTranslation())
507 return;
508
509 saveBlockInfoPtrs();
510 // If we need to shuffle the layout of global variables, shuffle them now.
511 if (getFlags().getReorderGlobalVariables()) {
512 // Create a random number generator for global variable reordering.
513 RandomNumberGenerator RNG(getFlags().getRandomSeed(),
514 RPE_GlobalVariableReordering);
515 RandomShuffle(Globals.begin(), Globals.end(),
516 [&RNG](int N) { return (uint32_t)RNG.next(N); });
517 }
518
519 if (!BuildDefs::minimal() && Instrumentor)
520 Instrumentor->instrumentGlobals(Globals);
521
522 DataLowering->lowerGlobals(Globals, SectionSuffix);
523 if (ProfileBlockInfos.empty() && DisposeGlobalVariablesAfterLowering) {
524 Globals.clearAndPurge();
525 } else {
526 Globals.clear();
527 }
528 }
529
lowerProfileData()530 void GlobalContext::lowerProfileData() {
531 // ProfileBlockInfoVarDecl is initialized in the constructor, and will only
532 // ever be nullptr after this method completes. This assertion is a convoluted
533 // way of ensuring lowerProfileData is invoked a single time.
534 assert(ProfileBlockInfoVarDecl == nullptr);
535
536 auto GlobalVariablePool = getInitializerAllocator();
537 ProfileBlockInfoVarDecl =
538 VariableDeclaration::createExternal(GlobalVariablePool.get());
539 ProfileBlockInfoVarDecl->setAlignment(typeWidthInBytes(IceType_i64));
540 ProfileBlockInfoVarDecl->setIsConstant(true);
541
542 // Note: if you change this symbol, make sure to update
543 // runtime/szrt_profiler.c as well.
544 ProfileBlockInfoVarDecl->setName(this, "__Sz_block_profile_info");
545
546 for (const VariableDeclaration *PBI : ProfileBlockInfos) {
547 if (Cfg::isProfileGlobal(*PBI)) {
548 constexpr RelocOffsetT BlockExecutionCounterOffset = 0;
549 ProfileBlockInfoVarDecl->addInitializer(
550 VariableDeclaration::RelocInitializer::create(
551 GlobalVariablePool.get(), PBI,
552 {RelocOffset::create(this, BlockExecutionCounterOffset)}));
553 }
554 }
555
556 // This adds a 64-bit sentinel entry to the end of our array. For 32-bit
557 // architectures this will waste 4 bytes.
558 const SizeT Sizeof64BitNullPtr = typeWidthInBytes(IceType_i64);
559 ProfileBlockInfoVarDecl->addInitializer(
560 VariableDeclaration::ZeroInitializer::create(GlobalVariablePool.get(),
561 Sizeof64BitNullPtr));
562 Globals.push_back(ProfileBlockInfoVarDecl);
563 constexpr char ProfileDataSection[] = "$sz_profiler$";
564 lowerGlobals(ProfileDataSection);
565 }
566
emitterWrapper(ThreadContext * MyTLS)567 void GlobalContext::emitterWrapper(ThreadContext *MyTLS) {
568 ICE_TLS_SET_FIELD(TLS, MyTLS);
569 emitItems();
570 }
571
emitItems()572 void GlobalContext::emitItems() {
573 const bool Threaded = !getFlags().isSequential();
574 // Pending is a vector containing the reassembled, ordered list of
575 // work items. When we're ready for the next item, we first check
576 // whether it's in the Pending list. If not, we take an item from
577 // the work queue, and if it's not the item we're waiting for, we
578 // insert it into Pending and repeat. The work item is deleted
579 // after it is processed.
580 std::vector<std::unique_ptr<EmitterWorkItem>> Pending;
581 uint32_t DesiredSequenceNumber = getFirstSequenceNumber();
582 uint32_t ShuffleStartIndex = DesiredSequenceNumber;
583 uint32_t ShuffleEndIndex = DesiredSequenceNumber;
584 bool EmitQueueEmpty = false;
585 const uint32_t ShuffleWindowSize =
586 std::max(1u, getFlags().getReorderFunctionsWindowSize());
587 bool Shuffle = Threaded && getFlags().getReorderFunctions();
588 // Create a random number generator for function reordering.
589 RandomNumberGenerator RNG(getFlags().getRandomSeed(), RPE_FunctionReordering);
590
591 while (!EmitQueueEmpty) {
592 resizePending(&Pending, DesiredSequenceNumber);
593 // See if Pending contains DesiredSequenceNumber.
594 if (Pending[DesiredSequenceNumber] == nullptr) {
595 // We need to fetch an EmitterWorkItem from the queue.
596 auto RawItem = emitQueueBlockingPop();
597 if (RawItem == nullptr) {
598 // This is the notifier for an empty queue.
599 EmitQueueEmpty = true;
600 } else {
601 // We get an EmitterWorkItem, we need to add it to Pending.
602 uint32_t ItemSeq = RawItem->getSequenceNumber();
603 if (Threaded && ItemSeq != DesiredSequenceNumber) {
604 // Not the desired one, add it to Pending but do not increase
605 // DesiredSequenceNumber. Continue the loop, do not emit the item.
606 resizePending(&Pending, ItemSeq);
607 Pending[ItemSeq] = std::move(RawItem);
608 continue;
609 }
610 // ItemSeq == DesiredSequenceNumber, we need to check if we should
611 // emit it or not. If !Threaded, we're OK with ItemSeq !=
612 // DesiredSequenceNumber.
613 Pending[DesiredSequenceNumber] = std::move(RawItem);
614 }
615 }
616 const auto *CurrentWorkItem = Pending[DesiredSequenceNumber].get();
617
618 // We have the desired EmitterWorkItem or nullptr as the end notifier.
619 // If the emitter queue is not empty, increase DesiredSequenceNumber and
620 // ShuffleEndIndex.
621 if (!EmitQueueEmpty) {
622 DesiredSequenceNumber++;
623 ShuffleEndIndex++;
624 }
625
626 if (Shuffle) {
627 // Continue fetching EmitterWorkItem if function reordering is turned on,
628 // and emit queue is not empty, and the number of consecutive pending
629 // items is smaller than the window size, and RawItem is not a
630 // WI_GlobalInits kind. Emit WI_GlobalInits kind block first to avoid
631 // holding an arbitrarily large GlobalDeclarationList.
632 if (!EmitQueueEmpty &&
633 ShuffleEndIndex - ShuffleStartIndex < ShuffleWindowSize &&
634 CurrentWorkItem->getKind() != EmitterWorkItem::WI_GlobalInits)
635 continue;
636
637 // Emit the EmitterWorkItem between Pending[ShuffleStartIndex] to
638 // Pending[ShuffleEndIndex]. If function reordering turned on, shuffle the
639 // pending items from Pending[ShuffleStartIndex] to
640 // Pending[ShuffleEndIndex].
641 RandomShuffle(Pending.begin() + ShuffleStartIndex,
642 Pending.begin() + ShuffleEndIndex,
643 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });
644 }
645
646 // Emit the item from ShuffleStartIndex to ShuffleEndIndex.
647 for (uint32_t I = ShuffleStartIndex; I < ShuffleEndIndex; I++) {
648 std::unique_ptr<EmitterWorkItem> Item = std::move(Pending[I]);
649
650 switch (Item->getKind()) {
651 case EmitterWorkItem::WI_Nop:
652 break;
653 case EmitterWorkItem::WI_GlobalInits: {
654 accumulateGlobals(Item->getGlobalInits());
655 } break;
656 case EmitterWorkItem::WI_Asm: {
657 lowerGlobalsIfNoCodeHasBeenSeen();
658 accumulateGlobals(Item->getGlobalInits());
659
660 std::unique_ptr<Assembler> Asm = Item->getAsm();
661 Asm->alignFunction();
662 GlobalString Name = Asm->getFunctionName();
663 switch (getFlags().getOutFileType()) {
664 case FT_Elf:
665 getObjectWriter()->writeFunctionCode(Name, Asm->getInternal(),
666 Asm.get());
667 break;
668 case FT_Iasm: {
669 OstreamLocker L(this);
670 Cfg::emitTextHeader(Name, this, Asm.get());
671 Asm->emitIASBytes(this);
672 } break;
673 case FT_Asm:
674 llvm::report_fatal_error("Unexpected FT_Asm");
675 break;
676 }
677 } break;
678 case EmitterWorkItem::WI_Cfg: {
679 if (!BuildDefs::dump())
680 llvm::report_fatal_error("WI_Cfg work item created inappropriately");
681 lowerGlobalsIfNoCodeHasBeenSeen();
682 accumulateGlobals(Item->getGlobalInits());
683
684 assert(getFlags().getOutFileType() == FT_Asm);
685 std::unique_ptr<Cfg> Func = Item->getCfg();
686 // Unfortunately, we have to temporarily install the Cfg in TLS
687 // because Variable::asType() uses the allocator to create the
688 // differently-typed copy.
689 CfgLocalAllocatorScope _(Func.get());
690 Func->emit();
691 dumpStats(Func.get());
692 } break;
693 }
694 }
695 // Update the start index for next shuffling queue
696 ShuffleStartIndex = ShuffleEndIndex;
697 }
698
699 // In case there are no code to be generated, we invoke the conditional
700 // lowerGlobals again -- this is a no-op if code has been emitted.
701 lowerGlobalsIfNoCodeHasBeenSeen();
702 }
703
~GlobalContext()704 GlobalContext::~GlobalContext() {
705 llvm::DeleteContainerPointers(AllThreadContexts);
706 LockedPtr<DestructorArray> Dtors = getDestructors();
707 // Destructors are invoked in the opposite object construction order.
708 for (const auto &Dtor : reverse_range(*Dtors))
709 Dtor();
710 }
711
dumpStrings()712 void GlobalContext::dumpStrings() {
713 if (!getFlags().getDumpStrings())
714 return;
715 OstreamLocker _(this);
716 Ostream &Str = getStrDump();
717 Str << "GlobalContext strings:\n";
718 getStrings()->dump(Str);
719 }
720
dumpConstantLookupCounts()721 void GlobalContext::dumpConstantLookupCounts() {
722 if (!BuildDefs::dump())
723 return;
724 const bool DumpCounts = (getFlags().getVerbose() & IceV_ConstPoolStats) &&
725 getFlags().matchVerboseFocusOn("", 0);
726 if (!DumpCounts)
727 return;
728
729 OstreamLocker _(this);
730 Ostream &Str = getStrDump();
731 Str << "Constant pool use stats: count+value+type\n";
732 #define X(WhichPool) \
733 for (auto *C : getConstPool()->WhichPool.getConstantPool()) { \
734 Str << C->getLookupCount() << " "; \
735 C->dump(Str); \
736 Str << " " << C->getType() << "\n"; \
737 }
738 X(Integers1);
739 X(Integers8);
740 X(Integers16);
741 X(Integers32);
742 X(Integers64);
743 X(Floats);
744 X(Doubles);
745 X(Relocatables);
746 X(ExternRelocatables);
747 #undef X
748 }
749
750 // TODO(stichnot): Consider adding thread-local caches of constant pool entries
751 // to reduce contention.
752
753 // All locking is done by the getConstantInt[0-9]+() target function.
getConstantInt(Type Ty,int64_t Value)754 Constant *GlobalContext::getConstantInt(Type Ty, int64_t Value) {
755 switch (Ty) {
756 case IceType_i1:
757 return getConstantInt1(Value);
758 case IceType_i8:
759 return getConstantInt8(Value);
760 case IceType_i16:
761 return getConstantInt16(Value);
762 case IceType_i32:
763 return getConstantInt32(Value);
764 case IceType_i64:
765 return getConstantInt64(Value);
766 default:
767 llvm_unreachable("Bad integer type for getConstant");
768 }
769 return nullptr;
770 }
771
getConstantInt1Internal(int8_t ConstantInt1)772 Constant *GlobalContext::getConstantInt1Internal(int8_t ConstantInt1) {
773 ConstantInt1 &= INT8_C(1);
774 return getConstPool()->Integers1.getOrAdd(this, ConstantInt1);
775 }
776
getConstantInt8Internal(int8_t ConstantInt8)777 Constant *GlobalContext::getConstantInt8Internal(int8_t ConstantInt8) {
778 return getConstPool()->Integers8.getOrAdd(this, ConstantInt8);
779 }
780
getConstantInt16Internal(int16_t ConstantInt16)781 Constant *GlobalContext::getConstantInt16Internal(int16_t ConstantInt16) {
782 return getConstPool()->Integers16.getOrAdd(this, ConstantInt16);
783 }
784
getConstantInt32Internal(int32_t ConstantInt32)785 Constant *GlobalContext::getConstantInt32Internal(int32_t ConstantInt32) {
786 return getConstPool()->Integers32.getOrAdd(this, ConstantInt32);
787 }
788
getConstantInt64Internal(int64_t ConstantInt64)789 Constant *GlobalContext::getConstantInt64Internal(int64_t ConstantInt64) {
790 return getConstPool()->Integers64.getOrAdd(this, ConstantInt64);
791 }
792
getConstantFloat(float ConstantFloat)793 Constant *GlobalContext::getConstantFloat(float ConstantFloat) {
794 return getConstPool()->Floats.getOrAdd(this, ConstantFloat);
795 }
796
getConstantDouble(double ConstantDouble)797 Constant *GlobalContext::getConstantDouble(double ConstantDouble) {
798 return getConstPool()->Doubles.getOrAdd(this, ConstantDouble);
799 }
800
getConstantSymWithEmitString(const RelocOffsetT Offset,const RelocOffsetArray & OffsetExpr,GlobalString Name,const std::string & EmitString)801 Constant *GlobalContext::getConstantSymWithEmitString(
802 const RelocOffsetT Offset, const RelocOffsetArray &OffsetExpr,
803 GlobalString Name, const std::string &EmitString) {
804 return getConstPool()->Relocatables.getOrAdd(
805 this, RelocatableTuple(Offset, OffsetExpr, Name, EmitString));
806 }
807
getConstantSym(RelocOffsetT Offset,GlobalString Name)808 Constant *GlobalContext::getConstantSym(RelocOffsetT Offset,
809 GlobalString Name) {
810 constexpr char EmptyEmitString[] = "";
811 return getConstantSymWithEmitString(Offset, {}, Name, EmptyEmitString);
812 }
813
getConstantExternSym(GlobalString Name)814 Constant *GlobalContext::getConstantExternSym(GlobalString Name) {
815 constexpr RelocOffsetT Offset = 0;
816 return getConstPool()->ExternRelocatables.getOrAdd(
817 this, RelocatableTuple(Offset, {}, Name));
818 }
819
getConstantUndef(Type Ty)820 Constant *GlobalContext::getConstantUndef(Type Ty) {
821 return getConstPool()->Undefs.getOrAdd(this, Ty);
822 }
823
getConstantZero(Type Ty)824 Constant *GlobalContext::getConstantZero(Type Ty) {
825 Constant *Zero = ConstZeroForType[Ty];
826 if (Zero == nullptr)
827 llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty));
828 return Zero;
829 }
830
831 // All locking is done by the getConstant*() target function.
getConstantZeroInternal(Type Ty)832 Constant *GlobalContext::getConstantZeroInternal(Type Ty) {
833 switch (Ty) {
834 case IceType_i1:
835 return getConstantInt1Internal(0);
836 case IceType_i8:
837 return getConstantInt8Internal(0);
838 case IceType_i16:
839 return getConstantInt16Internal(0);
840 case IceType_i32:
841 return getConstantInt32Internal(0);
842 case IceType_i64:
843 return getConstantInt64Internal(0);
844 case IceType_f32:
845 return getConstantFloat(0);
846 case IceType_f64:
847 return getConstantDouble(0);
848 default:
849 return nullptr;
850 }
851 }
852
getConstantPool(Type Ty)853 ConstantList GlobalContext::getConstantPool(Type Ty) {
854 switch (Ty) {
855 case IceType_i1:
856 case IceType_i8:
857 return getConstPool()->Integers8.getConstantPool();
858 case IceType_i16:
859 return getConstPool()->Integers16.getConstantPool();
860 case IceType_i32:
861 return getConstPool()->Integers32.getConstantPool();
862 case IceType_i64:
863 return getConstPool()->Integers64.getConstantPool();
864 case IceType_f32:
865 return getConstPool()->Floats.getConstantPool();
866 case IceType_f64:
867 return getConstPool()->Doubles.getConstantPool();
868 case IceType_v4i1:
869 case IceType_v8i1:
870 case IceType_v16i1:
871 case IceType_v16i8:
872 case IceType_v8i16:
873 case IceType_v4i32:
874 case IceType_v4f32:
875 llvm::report_fatal_error("Unsupported constant type: " + typeStdString(Ty));
876 break;
877 case IceType_void:
878 case IceType_NUM:
879 break;
880 }
881 llvm_unreachable("Unknown type");
882 }
883
getConstantExternSyms()884 ConstantList GlobalContext::getConstantExternSyms() {
885 return getConstPool()->ExternRelocatables.getConstantPool();
886 }
887
getGlobalString(const std::string & Name)888 GlobalString GlobalContext::getGlobalString(const std::string &Name) {
889 return GlobalString::createWithString(this, Name);
890 }
891
getJumpTables()892 JumpTableDataList GlobalContext::getJumpTables() {
893 JumpTableDataList JumpTables(*getJumpTableList());
894 // Make order deterministic by sorting into functions and then ID of the jump
895 // table within that function.
896 std::sort(JumpTables.begin(), JumpTables.end(),
897 [](const JumpTableData &A, const JumpTableData &B) {
898 if (A.getFunctionName() != B.getFunctionName())
899 return A.getFunctionName() < B.getFunctionName();
900 return A.getId() < B.getId();
901 });
902
903 if (getFlags().getReorderPooledConstants()) {
904 // If reorder-pooled-constants option is set to true, we also shuffle the
905 // jump tables before emitting them.
906
907 // Create a random number generator for jump tables reordering, considering
908 // jump tables as pooled constants.
909 RandomNumberGenerator RNG(getFlags().getRandomSeed(),
910 RPE_PooledConstantReordering);
911 RandomShuffle(JumpTables.begin(), JumpTables.end(),
912 [&RNG](uint64_t N) { return (uint32_t)RNG.next(N); });
913 }
914 return JumpTables;
915 }
916
addJumpTableData(JumpTableData JumpTable)917 void GlobalContext::addJumpTableData(JumpTableData JumpTable) {
918 getJumpTableList()->emplace_back(std::move(JumpTable));
919 }
920
newTimerStackID(const std::string & Name)921 TimerStackIdT GlobalContext::newTimerStackID(const std::string &Name) {
922 if (!BuildDefs::timers())
923 return 0;
924 auto Timers = getTimers();
925 TimerStackIdT NewID = Timers->size();
926 Timers->push_back(TimerStack(Name));
927 return NewID;
928 }
929
getTimerID(TimerStackIdT StackID,const std::string & Name)930 TimerIdT GlobalContext::getTimerID(TimerStackIdT StackID,
931 const std::string &Name) {
932 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
933 assert(StackID < Timers->size());
934 return Timers->at(StackID).getTimerID(Name);
935 }
936
pushTimer(TimerIdT ID,TimerStackIdT StackID)937 void GlobalContext::pushTimer(TimerIdT ID, TimerStackIdT StackID) {
938 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
939 assert(StackID < Timers->size());
940 Timers->at(StackID).push(ID);
941 }
942
popTimer(TimerIdT ID,TimerStackIdT StackID)943 void GlobalContext::popTimer(TimerIdT ID, TimerStackIdT StackID) {
944 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
945 assert(StackID < Timers->size());
946 Timers->at(StackID).pop(ID);
947 }
948
resetTimer(TimerStackIdT StackID)949 void GlobalContext::resetTimer(TimerStackIdT StackID) {
950 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
951 assert(StackID < Timers->size());
952 Timers->at(StackID).reset();
953 }
954
getTimerName(TimerStackIdT StackID)955 std::string GlobalContext::getTimerName(TimerStackIdT StackID) {
956 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
957 assert(StackID < Timers->size());
958 return Timers->at(StackID).getName();
959 }
960
setTimerName(TimerStackIdT StackID,const std::string & NewName)961 void GlobalContext::setTimerName(TimerStackIdT StackID,
962 const std::string &NewName) {
963 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
964 assert(StackID < Timers->size());
965 Timers->at(StackID).setName(NewName);
966 }
967
968 // Note: optQueueBlockingPush and optQueueBlockingPop use unique_ptr at the
969 // interface to take and transfer ownership, but they internally store the raw
970 // Cfg pointer in the work queue. This allows e.g. future queue optimizations
971 // such as the use of atomics to modify queue elements.
optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item)972 void GlobalContext::optQueueBlockingPush(std::unique_ptr<OptWorkItem> Item) {
973 assert(Item);
974 {
975 TimerMarker _(TimerStack::TT_qTransPush, this);
976 OptQ.blockingPush(std::move(Item));
977 }
978 if (getFlags().isSequential())
979 translateFunctions();
980 }
981
optQueueBlockingPop()982 std::unique_ptr<OptWorkItem> GlobalContext::optQueueBlockingPop() {
983 TimerMarker _(TimerStack::TT_qTransPop, this);
984 return OptQ.blockingPop(OptQWakeupSize);
985 }
986
emitQueueBlockingPush(std::unique_ptr<EmitterWorkItem> Item)987 void GlobalContext::emitQueueBlockingPush(
988 std::unique_ptr<EmitterWorkItem> Item) {
989 assert(Item);
990 {
991 TimerMarker _(TimerStack::TT_qEmitPush, this);
992 EmitQ.blockingPush(std::move(Item));
993 }
994 if (getFlags().isSequential())
995 emitItems();
996 }
997
emitQueueBlockingPop()998 std::unique_ptr<EmitterWorkItem> GlobalContext::emitQueueBlockingPop() {
999 TimerMarker _(TimerStack::TT_qEmitPop, this);
1000 return EmitQ.blockingPop();
1001 }
1002
initParserThread()1003 void GlobalContext::initParserThread() {
1004 ThreadContext *Tls = new ThreadContext();
1005 auto Timers = getTimers();
1006 Timers->initInto(Tls->Timers);
1007 AllThreadContexts.push_back(Tls);
1008 ICE_TLS_SET_FIELD(TLS, Tls);
1009 }
1010
startWorkerThreads()1011 void GlobalContext::startWorkerThreads() {
1012 size_t NumWorkers = getFlags().getNumTranslationThreads();
1013 auto Timers = getTimers();
1014 for (size_t i = 0; i < NumWorkers; ++i) {
1015 ThreadContext *WorkerTLS = new ThreadContext();
1016 Timers->initInto(WorkerTLS->Timers);
1017 AllThreadContexts.push_back(WorkerTLS);
1018 TranslationThreads.push_back(std::thread(
1019 &GlobalContext::translateFunctionsWrapper, this, WorkerTLS));
1020 }
1021 if (NumWorkers) {
1022 ThreadContext *WorkerTLS = new ThreadContext();
1023 Timers->initInto(WorkerTLS->Timers);
1024 AllThreadContexts.push_back(WorkerTLS);
1025 EmitterThreads.push_back(
1026 std::thread(&GlobalContext::emitterWrapper, this, WorkerTLS));
1027 }
1028 }
1029
resetStats()1030 void GlobalContext::resetStats() {
1031 if (BuildDefs::dump())
1032 ICE_TLS_GET_FIELD(TLS)->StatsFunction.reset();
1033 }
1034
dumpStats(const Cfg * Func)1035 void GlobalContext::dumpStats(const Cfg *Func) {
1036 if (!getFlags().getDumpStats())
1037 return;
1038 if (Func == nullptr) {
1039 getStatsCumulative()->dump(Func, this);
1040 } else {
1041 ICE_TLS_GET_FIELD(TLS)->StatsFunction.dump(Func, this);
1042 }
1043 }
1044
statsUpdateEmitted(uint32_t InstCount)1045 void GlobalContext::statsUpdateEmitted(uint32_t InstCount) {
1046 if (!getFlags().getDumpStats())
1047 return;
1048 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
1049 Tls->StatsFunction.update(CodeStats::CS_InstCount, InstCount);
1050 Tls->StatsCumulative.update(CodeStats::CS_InstCount, InstCount);
1051 }
1052
statsUpdateRegistersSaved(uint32_t Num)1053 void GlobalContext::statsUpdateRegistersSaved(uint32_t Num) {
1054 if (!getFlags().getDumpStats())
1055 return;
1056 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
1057 Tls->StatsFunction.update(CodeStats::CS_RegsSaved, Num);
1058 Tls->StatsCumulative.update(CodeStats::CS_RegsSaved, Num);
1059 }
1060
statsUpdateFrameBytes(uint32_t Bytes)1061 void GlobalContext::statsUpdateFrameBytes(uint32_t Bytes) {
1062 if (!getFlags().getDumpStats())
1063 return;
1064 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
1065 Tls->StatsFunction.update(CodeStats::CS_FrameByte, Bytes);
1066 Tls->StatsCumulative.update(CodeStats::CS_FrameByte, Bytes);
1067 }
1068
statsUpdateSpills()1069 void GlobalContext::statsUpdateSpills() {
1070 if (!getFlags().getDumpStats())
1071 return;
1072 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
1073 Tls->StatsFunction.update(CodeStats::CS_NumSpills);
1074 Tls->StatsCumulative.update(CodeStats::CS_NumSpills);
1075 }
1076
statsUpdateFills()1077 void GlobalContext::statsUpdateFills() {
1078 if (!getFlags().getDumpStats())
1079 return;
1080 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
1081 Tls->StatsFunction.update(CodeStats::CS_NumFills);
1082 Tls->StatsCumulative.update(CodeStats::CS_NumFills);
1083 }
1084
statsUpdateRPImms()1085 void GlobalContext::statsUpdateRPImms() {
1086 if (!getFlags().getDumpStats())
1087 return;
1088 ThreadContext *Tls = ICE_TLS_GET_FIELD(TLS);
1089 Tls->StatsFunction.update(CodeStats::CS_NumRPImms);
1090 Tls->StatsCumulative.update(CodeStats::CS_NumRPImms);
1091 }
1092
dumpTimers(TimerStackIdT StackID,bool DumpCumulative)1093 void GlobalContext::dumpTimers(TimerStackIdT StackID, bool DumpCumulative) {
1094 if (!BuildDefs::timers())
1095 return;
1096 auto Timers = getTimers();
1097 assert(Timers->size() > StackID);
1098 OstreamLocker L(this);
1099 Timers->at(StackID).dump(getStrDump(), DumpCumulative);
1100 }
1101
dumpLocalTimers(const std::string & TimerNameOverride,TimerStackIdT StackID,bool DumpCumulative)1102 void GlobalContext::dumpLocalTimers(const std::string &TimerNameOverride,
1103 TimerStackIdT StackID,
1104 bool DumpCumulative) {
1105 if (!BuildDefs::timers())
1106 return;
1107 auto *Timers = &ICE_TLS_GET_FIELD(TLS)->Timers;
1108 assert(Timers->size() > StackID);
1109 // Temporarily override the thread-local timer name with the given name.
1110 // Don't do it permanently because the final timer merge at the end expects
1111 // the thread-local timer names to be the same as the global timer name.
1112 auto OrigName = getTimerName(StackID);
1113 setTimerName(StackID, TimerNameOverride);
1114 {
1115 OstreamLocker _(this);
1116 Timers->at(StackID).dump(getStrDump(), DumpCumulative);
1117 }
1118 setTimerName(StackID, OrigName);
1119 }
1120
1121 LockedPtr<StringPool>
getStrings(const GlobalContext * PoolOwner)1122 GlobalStringPoolTraits::getStrings(const GlobalContext *PoolOwner) {
1123 return PoolOwner->getStrings();
1124 }
1125
getTimerIdFromFuncName(GlobalContext * Ctx,const std::string & FuncName)1126 TimerIdT TimerMarker::getTimerIdFromFuncName(GlobalContext *Ctx,
1127 const std::string &FuncName) {
1128 if (!BuildDefs::timers())
1129 return 0;
1130 if (!getFlags().getTimeEachFunction())
1131 return 0;
1132 return Ctx->getTimerID(GlobalContext::TSK_Funcs, FuncName);
1133 }
1134
push()1135 void TimerMarker::push() {
1136 switch (StackID) {
1137 case GlobalContext::TSK_Default:
1138 Active = getFlags().getSubzeroTimingEnabled() ||
1139 !getFlags().getTimingFocusOnString().empty();
1140 break;
1141 case GlobalContext::TSK_Funcs:
1142 Active = getFlags().getTimeEachFunction();
1143 break;
1144 default:
1145 break;
1146 }
1147 if (Active)
1148 Ctx->pushTimer(ID, StackID);
1149 }
1150
pushCfg(const Cfg * Func)1151 void TimerMarker::pushCfg(const Cfg *Func) {
1152 Ctx = Func->getContext();
1153 Active = Func->getFocusedTiming() || getFlags().getSubzeroTimingEnabled();
1154 if (Active)
1155 Ctx->pushTimer(ID, StackID);
1156 }
1157
1158 ICE_TLS_DEFINE_FIELD(GlobalContext::ThreadContext *, GlobalContext, TLS);
1159
1160 } // end of namespace Ice
1161