1 //===- subzero/src/IceRegAlloc.cpp - Linear-scan implementation -----------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the LinearScan class, which performs the linear-scan
12 /// register allocation after liveness analysis has been performed.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "IceRegAlloc.h"
17 
18 #include "IceBitVector.h"
19 #include "IceCfg.h"
20 #include "IceCfgNode.h"
21 #include "IceInst.h"
22 #include "IceInstVarIter.h"
23 #include "IceOperand.h"
24 #include "IceTargetLowering.h"
25 
26 #include "llvm/Support/Format.h"
27 
28 namespace Ice {
29 
30 namespace {
31 
32 // Returns true if Var has any definitions within Item's live range.
33 // TODO(stichnot): Consider trimming the Definitions list similar to how the
34 // live ranges are trimmed, since all the overlapsDefs() tests are whether some
35 // variable's definitions overlap Cur, and trimming is with respect Cur.start.
36 // Initial tests show no measurable performance difference, so we'll keep the
37 // code simple for now.
overlapsDefs(const Cfg * Func,const Variable * Item,const Variable * Var)38 bool overlapsDefs(const Cfg *Func, const Variable *Item, const Variable *Var) {
39   constexpr bool UseTrimmed = true;
40   VariablesMetadata *VMetadata = Func->getVMetadata();
41   if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
42     if (Item->getLiveRange().overlapsInst(FirstDef->getNumber(), UseTrimmed))
43       return true;
44   for (const Inst *Def : VMetadata->getLatterDefinitions(Var)) {
45     if (Item->getLiveRange().overlapsInst(Def->getNumber(), UseTrimmed))
46       return true;
47   }
48   return false;
49 }
50 
dumpDisableOverlap(const Cfg * Func,const Variable * Var,const char * Reason)51 void dumpDisableOverlap(const Cfg *Func, const Variable *Var,
52                         const char *Reason) {
53   if (!BuildDefs::dump())
54     return;
55   if (!Func->isVerbose(IceV_LinearScan))
56     return;
57 
58   VariablesMetadata *VMetadata = Func->getVMetadata();
59   Ostream &Str = Func->getContext()->getStrDump();
60   Str << "Disabling Overlap due to " << Reason << " " << *Var
61       << " LIVE=" << Var->getLiveRange() << " Defs=";
62   if (const Inst *FirstDef = VMetadata->getFirstDefinition(Var))
63     Str << FirstDef->getNumber();
64   const InstDefList &Defs = VMetadata->getLatterDefinitions(Var);
65   for (size_t i = 0; i < Defs.size(); ++i) {
66     Str << "," << Defs[i]->getNumber();
67   }
68   Str << "\n";
69 }
70 
dumpLiveRange(const Variable * Var,const Cfg * Func)71 void dumpLiveRange(const Variable *Var, const Cfg *Func) {
72   if (!BuildDefs::dump())
73     return;
74   Ostream &Str = Func->getContext()->getStrDump();
75   Str << "R=";
76   if (Var->hasRegTmp()) {
77     Str << llvm::format("%2d", int(Var->getRegNumTmp()));
78   } else {
79     Str << "NA";
80   }
81   Str << "  V=";
82   Var->dump(Func);
83   Str << "  Range=" << Var->getLiveRange();
84 }
85 
findMinWeightIndex(const SmallBitVector & RegMask,const llvm::SmallVector<RegWeight,LinearScan::REGS_SIZE> & Weights)86 int32_t findMinWeightIndex(
87     const SmallBitVector &RegMask,
88     const llvm::SmallVector<RegWeight, LinearScan::REGS_SIZE> &Weights) {
89   int MinWeightIndex = -1;
90   for (RegNumT i : RegNumBVIter(RegMask)) {
91     if (MinWeightIndex < 0 || Weights[i] < Weights[MinWeightIndex])
92       MinWeightIndex = i;
93   }
94   assert(getFlags().getRegAllocReserve() || MinWeightIndex >= 0);
95   return MinWeightIndex;
96 }
97 
98 } // end of anonymous namespace
99 
LinearScan(Cfg * Func)100 LinearScan::LinearScan(Cfg *Func)
101     : Func(Func), Ctx(Func->getContext()), Target(Func->getTarget()),
102       Verbose(BuildDefs::dump() && Func->isVerbose(IceV_LinearScan)),
103       UseReserve(getFlags().getRegAllocReserve()) {}
104 
105 // Prepare for full register allocation of all variables. We depend on liveness
106 // analysis to have calculated live ranges.
initForGlobal()107 void LinearScan::initForGlobal() {
108   TimerMarker T(TimerStack::TT_initUnhandled, Func);
109   FindPreference = true;
110   // For full register allocation, normally we want to enable FindOverlap
111   // (meaning we look for opportunities for two overlapping live ranges to
112   // safely share the same register). However, we disable it for phi-lowering
113   // register allocation since no overlap opportunities should be available and
114   // it's more expensive to look for opportunities.
115   FindOverlap = (Kind != RAK_Phi);
116   Unhandled.reserve(Vars.size());
117   UnhandledPrecolored.reserve(Vars.size());
118   // Gather the live ranges of all variables and add them to the Unhandled set.
119   for (Variable *Var : Vars) {
120     // Don't consider rematerializable variables.
121     if (Var->isRematerializable())
122       continue;
123     // Explicitly don't consider zero-weight variables, which are meant to be
124     // spill slots.
125     if (Var->mustNotHaveReg())
126       continue;
127     // Don't bother if the variable has a null live range, which means it was
128     // never referenced.
129     if (Var->getLiveRange().isEmpty())
130       continue;
131     Var->untrimLiveRange();
132     Unhandled.push_back(Var);
133     if (Var->hasReg()) {
134       Var->setRegNumTmp(Var->getRegNum());
135       Var->setMustHaveReg();
136       UnhandledPrecolored.push_back(Var);
137     }
138   }
139 
140   // Build the (ordered) list of FakeKill instruction numbers.
141   Kills.clear();
142   // Phi lowering should not be creating new call instructions, so there should
143   // be no infinite-weight not-yet-colored live ranges that span a call
144   // instruction, hence no need to construct the Kills list.
145   if (Kind == RAK_Phi)
146     return;
147   for (CfgNode *Node : Func->getNodes()) {
148     for (Inst &I : Node->getInsts()) {
149       if (auto *Kill = llvm::dyn_cast<InstFakeKill>(&I)) {
150         if (!Kill->isDeleted() && !Kill->getLinked()->isDeleted())
151           Kills.push_back(I.getNumber());
152       }
153     }
154   }
155 }
156 
157 // Validate the integrity of the live ranges.  If there are any errors, it
158 // prints details and returns false.  On success, it returns true.
livenessValidateIntervals(const DefUseErrorList & DefsWithoutUses,const DefUseErrorList & UsesBeforeDefs,const CfgVector<InstNumberT> & LRBegin,const CfgVector<InstNumberT> & LREnd) const159 bool LinearScan::livenessValidateIntervals(
160     const DefUseErrorList &DefsWithoutUses,
161     const DefUseErrorList &UsesBeforeDefs,
162     const CfgVector<InstNumberT> &LRBegin,
163     const CfgVector<InstNumberT> &LREnd) const {
164   if (DefsWithoutUses.empty() && UsesBeforeDefs.empty())
165     return true;
166 
167   if (!BuildDefs::dump())
168     return false;
169 
170   OstreamLocker L(Ctx);
171   Ostream &Str = Ctx->getStrDump();
172   for (SizeT VarNum : DefsWithoutUses) {
173     Variable *Var = Vars[VarNum];
174     Str << "LR def without use, instruction " << LRBegin[VarNum]
175         << ", variable " << Var->getName() << "\n";
176   }
177   for (SizeT VarNum : UsesBeforeDefs) {
178     Variable *Var = Vars[VarNum];
179     Str << "LR use before def, instruction " << LREnd[VarNum] << ", variable "
180         << Var->getName() << "\n";
181   }
182   return false;
183 }
184 
185 // Prepare for very simple register allocation of only infinite-weight Variables
186 // while respecting pre-colored Variables. Some properties we take advantage of:
187 //
188 // * Live ranges of interest consist of a single segment.
189 //
190 // * Live ranges of interest never span a call instruction.
191 //
192 // * Phi instructions are not considered because either phis have already been
193 //   lowered, or they don't contain any pre-colored or infinite-weight
194 //   Variables.
195 //
196 // * We don't need to renumber instructions before computing live ranges because
197 //   all the high-level ICE instructions are deleted prior to lowering, and the
198 //   low-level instructions are added in monotonically increasing order.
199 //
200 // * There are no opportunities for register preference or allowing overlap.
201 //
202 // Some properties we aren't (yet) taking advantage of:
203 //
204 // * Because live ranges are a single segment, the Inactive set will always be
205 //   empty, and the live range trimming operation is unnecessary.
206 //
207 // * Calculating overlap of single-segment live ranges could be optimized a bit.
initForInfOnly()208 void LinearScan::initForInfOnly() {
209   TimerMarker T(TimerStack::TT_initUnhandled, Func);
210   FindPreference = false;
211   FindOverlap = false;
212   SizeT NumVars = 0;
213 
214   // Iterate across all instructions and record the begin and end of the live
215   // range for each variable that is pre-colored or infinite weight.
216   CfgVector<InstNumberT> LRBegin(Vars.size(), Inst::NumberSentinel);
217   CfgVector<InstNumberT> LREnd(Vars.size(), Inst::NumberSentinel);
218   DefUseErrorList DefsWithoutUses, UsesBeforeDefs;
219   for (CfgNode *Node : Func->getNodes()) {
220     for (Inst &Instr : Node->getInsts()) {
221       if (Instr.isDeleted())
222         continue;
223       FOREACH_VAR_IN_INST(Var, Instr) {
224         if (Var->getIgnoreLiveness())
225           continue;
226         if (Var->hasReg() || Var->mustHaveReg()) {
227           SizeT VarNum = Var->getIndex();
228           LREnd[VarNum] = Instr.getNumber();
229           if (!Var->getIsArg() && LRBegin[VarNum] == Inst::NumberSentinel)
230             UsesBeforeDefs.push_back(VarNum);
231         }
232       }
233       if (const Variable *Var = Instr.getDest()) {
234         if (!Var->getIgnoreLiveness() &&
235             (Var->hasReg() || Var->mustHaveReg())) {
236           if (LRBegin[Var->getIndex()] == Inst::NumberSentinel) {
237             LRBegin[Var->getIndex()] = Instr.getNumber();
238             ++NumVars;
239           }
240         }
241       }
242     }
243   }
244 
245   Unhandled.reserve(NumVars);
246   UnhandledPrecolored.reserve(NumVars);
247   for (SizeT i = 0; i < Vars.size(); ++i) {
248     Variable *Var = Vars[i];
249     if (Var->isRematerializable())
250       continue;
251     if (LRBegin[i] != Inst::NumberSentinel) {
252       if (LREnd[i] == Inst::NumberSentinel) {
253         DefsWithoutUses.push_back(i);
254         continue;
255       }
256       Unhandled.push_back(Var);
257       Var->resetLiveRange();
258       Var->addLiveRange(LRBegin[i], LREnd[i]);
259       Var->untrimLiveRange();
260       if (Var->hasReg()) {
261         Var->setRegNumTmp(Var->getRegNum());
262         Var->setMustHaveReg();
263         UnhandledPrecolored.push_back(Var);
264       }
265       --NumVars;
266     }
267   }
268 
269   if (!livenessValidateIntervals(DefsWithoutUses, UsesBeforeDefs, LRBegin,
270                                  LREnd)) {
271     llvm::report_fatal_error("initForInfOnly: Liveness error");
272     return;
273   }
274 
275   if (!DefsWithoutUses.empty() || !UsesBeforeDefs.empty()) {
276     if (BuildDefs::dump()) {
277       OstreamLocker L(Ctx);
278       Ostream &Str = Ctx->getStrDump();
279       for (SizeT VarNum : DefsWithoutUses) {
280         Variable *Var = Vars[VarNum];
281         Str << "LR def without use, instruction " << LRBegin[VarNum]
282             << ", variable " << Var->getName() << "\n";
283       }
284       for (SizeT VarNum : UsesBeforeDefs) {
285         Variable *Var = Vars[VarNum];
286         Str << "LR use before def, instruction " << LREnd[VarNum]
287             << ", variable " << Var->getName() << "\n";
288       }
289     }
290     llvm::report_fatal_error("initForInfOnly: Liveness error");
291   }
292   // This isn't actually a fatal condition, but it would be nice to know if we
293   // somehow pre-calculated Unhandled's size wrong.
294   assert(NumVars == 0);
295 
296   // Don't build up the list of Kills because we know that no infinite-weight
297   // Variable has a live range spanning a call.
298   Kills.clear();
299 }
300 
initForSecondChance()301 void LinearScan::initForSecondChance() {
302   TimerMarker T(TimerStack::TT_initUnhandled, Func);
303   FindPreference = true;
304   FindOverlap = true;
305   const VarList &Vars = Func->getVariables();
306   Unhandled.reserve(Vars.size());
307   UnhandledPrecolored.reserve(Vars.size());
308   for (Variable *Var : Vars) {
309     if (Var->isRematerializable())
310       continue;
311     if (Var->hasReg()) {
312       Var->untrimLiveRange();
313       Var->setRegNumTmp(Var->getRegNum());
314       Var->setMustHaveReg();
315       UnhandledPrecolored.push_back(Var);
316       Unhandled.push_back(Var);
317     }
318   }
319   for (Variable *Var : Evicted) {
320     Var->untrimLiveRange();
321     Unhandled.push_back(Var);
322   }
323 }
324 
init(RegAllocKind Kind,CfgSet<Variable * > ExcludeVars)325 void LinearScan::init(RegAllocKind Kind, CfgSet<Variable *> ExcludeVars) {
326   this->Kind = Kind;
327   Unhandled.clear();
328   UnhandledPrecolored.clear();
329   Handled.clear();
330   Inactive.clear();
331   Active.clear();
332   Vars.clear();
333   Vars.reserve(Func->getVariables().size() - ExcludeVars.size());
334   for (auto *Var : Func->getVariables()) {
335     if (ExcludeVars.find(Var) == ExcludeVars.end())
336       Vars.emplace_back(Var);
337   }
338 
339   SizeT NumRegs = Target->getNumRegisters();
340   RegAliases.resize(NumRegs);
341   for (SizeT Reg = 0; Reg < NumRegs; ++Reg) {
342     RegAliases[Reg] = &Target->getAliasesForRegister(RegNumT::fromInt(Reg));
343   }
344 
345   switch (Kind) {
346   case RAK_Unknown:
347     llvm::report_fatal_error("Invalid RAK_Unknown");
348     break;
349   case RAK_Global:
350   case RAK_Phi:
351     initForGlobal();
352     break;
353   case RAK_InfOnly:
354     initForInfOnly();
355     break;
356   case RAK_SecondChance:
357     initForSecondChance();
358     break;
359   }
360 
361   Evicted.clear();
362 
363   auto CompareRanges = [](const Variable *L, const Variable *R) {
364     InstNumberT Lstart = L->getLiveRange().getStart();
365     InstNumberT Rstart = R->getLiveRange().getStart();
366     if (Lstart == Rstart)
367       return L->getIndex() < R->getIndex();
368     return Lstart < Rstart;
369   };
370   // Do a reverse sort so that erasing elements (from the end) is fast.
371   std::sort(Unhandled.rbegin(), Unhandled.rend(), CompareRanges);
372   std::sort(UnhandledPrecolored.rbegin(), UnhandledPrecolored.rend(),
373             CompareRanges);
374 
375   Handled.reserve(Unhandled.size());
376   Inactive.reserve(Unhandled.size());
377   Active.reserve(Unhandled.size());
378   Evicted.reserve(Unhandled.size());
379 }
380 
381 // This is called when Cur must be allocated a register but no registers are
382 // available across Cur's live range. To handle this, we find a register that is
383 // not explicitly used during Cur's live range, spill that register to a stack
384 // location right before Cur's live range begins, and fill (reload) the register
385 // from the stack location right after Cur's live range ends.
addSpillFill(IterationState & Iter)386 void LinearScan::addSpillFill(IterationState &Iter) {
387   // Identify the actual instructions that begin and end Iter.Cur's live range.
388   // Iterate through Iter.Cur's node's instruction list until we find the actual
389   // instructions with instruction numbers corresponding to Iter.Cur's recorded
390   // live range endpoints.  This sounds inefficient but shouldn't be a problem
391   // in practice because:
392   // (1) This function is almost never called in practice.
393   // (2) Since this register over-subscription problem happens only for
394   //     phi-lowered instructions, the number of instructions in the node is
395   //     proportional to the number of phi instructions in the original node,
396   //     which is never very large in practice.
397   // (3) We still have to iterate through all instructions of Iter.Cur's live
398   //     range to find all explicitly used registers (though the live range is
399   //     usually only 2-3 instructions), so the main cost that could be avoided
400   //     would be finding the instruction that begin's Iter.Cur's live range.
401   assert(!Iter.Cur->getLiveRange().isEmpty());
402   InstNumberT Start = Iter.Cur->getLiveRange().getStart();
403   InstNumberT End = Iter.Cur->getLiveRange().getEnd();
404   CfgNode *Node = Func->getVMetadata()->getLocalUseNode(Iter.Cur);
405   assert(Node);
406   InstList &Insts = Node->getInsts();
407   InstList::iterator SpillPoint = Insts.end();
408   InstList::iterator FillPoint = Insts.end();
409   // Stop searching after we have found both the SpillPoint and the FillPoint.
410   for (auto I = Insts.begin(), E = Insts.end();
411        I != E && (SpillPoint == E || FillPoint == E); ++I) {
412     if (I->getNumber() == Start)
413       SpillPoint = I;
414     if (I->getNumber() == End)
415       FillPoint = I;
416     if (SpillPoint != E) {
417       // Remove from RegMask any physical registers referenced during Cur's live
418       // range. Start looking after SpillPoint gets set, i.e. once Cur's live
419       // range begins.
420       FOREACH_VAR_IN_INST(Var, *I) {
421         if (!Var->hasRegTmp())
422           continue;
423         const auto &Aliases = *RegAliases[Var->getRegNumTmp()];
424         for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
425           Iter.RegMask[RegAlias] = false;
426         }
427       }
428     }
429   }
430   assert(SpillPoint != Insts.end());
431   assert(FillPoint != Insts.end());
432   ++FillPoint;
433   // TODO(stichnot): Randomize instead of *.begin() which maps to find_first().
434   const RegNumT RegNum = *RegNumBVIter(Iter.RegMask).begin();
435   Iter.Cur->setRegNumTmp(RegNum);
436   Variable *Preg = Target->getPhysicalRegister(RegNum, Iter.Cur->getType());
437   // TODO(stichnot): Add SpillLoc to VariablesMetadata tracking so that SpillLoc
438   // is correctly identified as !isMultiBlock(), reducing stack frame size.
439   Variable *SpillLoc = Func->makeVariable(Iter.Cur->getType());
440   // Add "reg=FakeDef;spill=reg" before SpillPoint
441   Target->lowerInst(Node, SpillPoint, InstFakeDef::create(Func, Preg));
442   Target->lowerInst(Node, SpillPoint, InstAssign::create(Func, SpillLoc, Preg));
443   // add "reg=spill;FakeUse(reg)" before FillPoint
444   Target->lowerInst(Node, FillPoint, InstAssign::create(Func, Preg, SpillLoc));
445   Target->lowerInst(Node, FillPoint, InstFakeUse::create(Func, Preg));
446 }
447 
handleActiveRangeExpiredOrInactive(const Variable * Cur)448 void LinearScan::handleActiveRangeExpiredOrInactive(const Variable *Cur) {
449   for (SizeT I = Active.size(); I > 0; --I) {
450     const SizeT Index = I - 1;
451     Variable *Item = Active[Index];
452     Item->trimLiveRange(Cur->getLiveRange().getStart());
453     bool Moved = false;
454     if (Item->rangeEndsBefore(Cur)) {
455       // Move Item from Active to Handled list.
456       dumpLiveRangeTrace("Expiring     ", Item);
457       moveItem(Active, Index, Handled);
458       Moved = true;
459     } else if (!Item->rangeOverlapsStart(Cur)) {
460       // Move Item from Active to Inactive list.
461       dumpLiveRangeTrace("Inactivating ", Item);
462       moveItem(Active, Index, Inactive);
463       Moved = true;
464     }
465     if (Moved) {
466       // Decrement Item from RegUses[].
467       assert(Item->hasRegTmp());
468       const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
469       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
470         --RegUses[RegAlias];
471         assert(RegUses[RegAlias] >= 0);
472       }
473     }
474   }
475 }
476 
handleInactiveRangeExpiredOrReactivated(const Variable * Cur)477 void LinearScan::handleInactiveRangeExpiredOrReactivated(const Variable *Cur) {
478   for (SizeT I = Inactive.size(); I > 0; --I) {
479     const SizeT Index = I - 1;
480     Variable *Item = Inactive[Index];
481     Item->trimLiveRange(Cur->getLiveRange().getStart());
482     if (Item->rangeEndsBefore(Cur)) {
483       // Move Item from Inactive to Handled list.
484       dumpLiveRangeTrace("Expiring     ", Item);
485       moveItem(Inactive, Index, Handled);
486     } else if (Item->rangeOverlapsStart(Cur)) {
487       // Move Item from Inactive to Active list.
488       dumpLiveRangeTrace("Reactivating ", Item);
489       moveItem(Inactive, Index, Active);
490       // Increment Item in RegUses[].
491       assert(Item->hasRegTmp());
492       const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
493       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
494         assert(RegUses[RegAlias] >= 0);
495         ++RegUses[RegAlias];
496       }
497     }
498   }
499 }
500 
501 // Infer register preference and allowable overlap. Only form a preference when
502 // the current Variable has an unambiguous "first" definition. The preference is
503 // some source Variable of the defining instruction that either is assigned a
504 // register that is currently free, or that is assigned a register that is not
505 // free but overlap is allowed. Overlap is allowed when the Variable under
506 // consideration is single-definition, and its definition is a simple assignment
507 // - i.e., the register gets copied/aliased but is never modified.  Furthermore,
508 // overlap is only allowed when preferred Variable definition instructions do
509 // not appear within the current Variable's live range.
findRegisterPreference(IterationState & Iter)510 void LinearScan::findRegisterPreference(IterationState &Iter) {
511   Iter.Prefer = nullptr;
512   Iter.PreferReg = RegNumT();
513   Iter.AllowOverlap = false;
514 
515   if (!FindPreference)
516     return;
517 
518   VariablesMetadata *VMetadata = Func->getVMetadata();
519   const Inst *DefInst = VMetadata->getFirstDefinitionSingleBlock(Iter.Cur);
520   if (DefInst == nullptr)
521     return;
522 
523   assert(DefInst->getDest() == Iter.Cur);
524   const bool IsSingleDefAssign =
525       DefInst->isVarAssign() && !VMetadata->isMultiDef(Iter.Cur);
526   FOREACH_VAR_IN_INST(SrcVar, *DefInst) {
527     // Only consider source variables that have (so far) been assigned a
528     // register.
529     if (!SrcVar->hasRegTmp())
530       continue;
531 
532     // That register must be one in the RegMask set, e.g. don't try to prefer
533     // the stack pointer as a result of the stacksave intrinsic.
534     const auto &Aliases = *RegAliases[SrcVar->getRegNumTmp()];
535     const int SrcReg = (Iter.RegMask & Aliases).find_first();
536     if (SrcReg == -1)
537       continue;
538 
539     if (FindOverlap && IsSingleDefAssign && !Iter.Free[SrcReg]) {
540       // Don't bother trying to enable AllowOverlap if the register is already
541       // free (hence the test on Iter.Free[SrcReg]).
542       Iter.AllowOverlap = !overlapsDefs(Func, Iter.Cur, SrcVar);
543     }
544     if (Iter.AllowOverlap || Iter.Free[SrcReg]) {
545       Iter.Prefer = SrcVar;
546       Iter.PreferReg = RegNumT::fromInt(SrcReg);
547       // Stop looking for a preference after the first valid preference is
548       // found.  One might think that we should look at all instruction
549       // variables to find the best <Prefer,AllowOverlap> combination, but note
550       // that AllowOverlap can only be true for a simple assignment statement
551       // which can have only one source operand, so it's not possible for
552       // AllowOverlap to be true beyond the first source operand.
553       FOREACH_VAR_IN_INST_BREAK;
554     }
555   }
556   if (BuildDefs::dump() && Verbose && Iter.Prefer) {
557     Ostream &Str = Ctx->getStrDump();
558     Str << "Initial Iter.Prefer=";
559     Iter.Prefer->dump(Func);
560     Str << " R=" << Iter.PreferReg << " LIVE=" << Iter.Prefer->getLiveRange()
561         << " Overlap=" << Iter.AllowOverlap << "\n";
562   }
563 }
564 
565 // Remove registers from the Iter.Free[] list where an Inactive range overlaps
566 // with the current range.
filterFreeWithInactiveRanges(IterationState & Iter)567 void LinearScan::filterFreeWithInactiveRanges(IterationState &Iter) {
568   for (const Variable *Item : Inactive) {
569     if (!Item->rangeOverlaps(Iter.Cur))
570       continue;
571     const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
572     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
573       // Don't assert(Iter.Free[RegAlias]) because in theory (though probably
574       // never in practice) there could be two inactive variables that were
575       // marked with AllowOverlap.
576       Iter.Free[RegAlias] = false;
577       Iter.FreeUnfiltered[RegAlias] = false;
578       // Disable AllowOverlap if an Inactive variable, which is not Prefer,
579       // shares Prefer's register, and has a definition within Cur's live range.
580       if (Iter.AllowOverlap && Item != Iter.Prefer &&
581           RegAlias == Iter.PreferReg && overlapsDefs(Func, Iter.Cur, Item)) {
582         Iter.AllowOverlap = false;
583         dumpDisableOverlap(Func, Item, "Inactive");
584       }
585     }
586   }
587 }
588 
589 // Remove registers from the Iter.Free[] list where an Unhandled pre-colored
590 // range overlaps with the current range, and set those registers to infinite
591 // weight so that they aren't candidates for eviction.
592 // Cur->rangeEndsBefore(Item) is an early exit check that turns a guaranteed
593 // O(N^2) algorithm into expected linear complexity.
filterFreeWithPrecoloredRanges(IterationState & Iter)594 void LinearScan::filterFreeWithPrecoloredRanges(IterationState &Iter) {
595   // TODO(stichnot): Partition UnhandledPrecolored according to register class,
596   // to restrict the number of overlap comparisons needed.
597   for (Variable *Item : reverse_range(UnhandledPrecolored)) {
598     assert(Item->hasReg());
599     if (Iter.Cur->rangeEndsBefore(Item))
600       break;
601     if (!Item->rangeOverlaps(Iter.Cur))
602       continue;
603     const auto &Aliases =
604         *RegAliases[Item->getRegNum()]; // Note: not getRegNumTmp()
605     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
606       Iter.Weights[RegAlias].setWeight(RegWeight::Inf);
607       Iter.Free[RegAlias] = false;
608       Iter.FreeUnfiltered[RegAlias] = false;
609       Iter.PrecoloredUnhandledMask[RegAlias] = true;
610       // Disable Iter.AllowOverlap if the preferred register is one of these
611       // pre-colored unhandled overlapping ranges.
612       if (Iter.AllowOverlap && RegAlias == Iter.PreferReg) {
613         Iter.AllowOverlap = false;
614         dumpDisableOverlap(Func, Item, "PrecoloredUnhandled");
615       }
616     }
617   }
618 }
619 
allocatePrecoloredRegister(Variable * Cur)620 void LinearScan::allocatePrecoloredRegister(Variable *Cur) {
621   const auto RegNum = Cur->getRegNum();
622   // RegNumTmp should have already been set above.
623   assert(Cur->getRegNumTmp() == RegNum);
624   dumpLiveRangeTrace("Precoloring  ", Cur);
625   Active.push_back(Cur);
626   const auto &Aliases = *RegAliases[RegNum];
627   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
628     assert(RegUses[RegAlias] >= 0);
629     ++RegUses[RegAlias];
630   }
631   assert(!UnhandledPrecolored.empty());
632   assert(UnhandledPrecolored.back() == Cur);
633   UnhandledPrecolored.pop_back();
634 }
635 
allocatePreferredRegister(IterationState & Iter)636 void LinearScan::allocatePreferredRegister(IterationState &Iter) {
637   Iter.Cur->setRegNumTmp(Iter.PreferReg);
638   dumpLiveRangeTrace("Preferring   ", Iter.Cur);
639   const auto &Aliases = *RegAliases[Iter.PreferReg];
640   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
641     assert(RegUses[RegAlias] >= 0);
642     ++RegUses[RegAlias];
643   }
644   Active.push_back(Iter.Cur);
645 }
646 
allocateFreeRegister(IterationState & Iter,bool Filtered)647 void LinearScan::allocateFreeRegister(IterationState &Iter, bool Filtered) {
648   const RegNumT RegNum =
649       *RegNumBVIter(Filtered ? Iter.Free : Iter.FreeUnfiltered).begin();
650   Iter.Cur->setRegNumTmp(RegNum);
651   if (Filtered)
652     dumpLiveRangeTrace("Allocating Y ", Iter.Cur);
653   else
654     dumpLiveRangeTrace("Allocating X ", Iter.Cur);
655   const auto &Aliases = *RegAliases[RegNum];
656   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
657     assert(RegUses[RegAlias] >= 0);
658     ++RegUses[RegAlias];
659   }
660   Active.push_back(Iter.Cur);
661 }
662 
handleNoFreeRegisters(IterationState & Iter)663 void LinearScan::handleNoFreeRegisters(IterationState &Iter) {
664   // Check Active ranges.
665   for (const Variable *Item : Active) {
666     assert(Item->rangeOverlaps(Iter.Cur));
667     assert(Item->hasRegTmp());
668     const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
669     // We add the Item's weight to each alias/subregister to represent that,
670     // should we decide to pick any of them, then we would incur that many
671     // memory accesses.
672     RegWeight W = Item->getWeight(Func);
673     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
674       Iter.Weights[RegAlias].addWeight(W);
675     }
676   }
677   // Same as above, but check Inactive ranges instead of Active.
678   for (const Variable *Item : Inactive) {
679     if (!Item->rangeOverlaps(Iter.Cur))
680       continue;
681     assert(Item->hasRegTmp());
682     const auto &Aliases = *RegAliases[Item->getRegNumTmp()];
683     RegWeight W = Item->getWeight(Func);
684     for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
685       Iter.Weights[RegAlias].addWeight(W);
686     }
687   }
688 
689   // All the weights are now calculated. Find the register with smallest weight.
690   int32_t MinWeightIndex = findMinWeightIndex(Iter.RegMask, Iter.Weights);
691 
692   if (MinWeightIndex < 0 ||
693       Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
694     if (!Iter.Cur->mustHaveReg()) {
695       // Iter.Cur doesn't have priority over any other live ranges, so don't
696       // allocate any register to it, and move it to the Handled state.
697       Handled.push_back(Iter.Cur);
698       return;
699     }
700     if (Kind == RAK_Phi) {
701       // Iter.Cur is infinite-weight but all physical registers are already
702       // taken, so we need to force one to be temporarily available.
703       addSpillFill(Iter);
704       Handled.push_back(Iter.Cur);
705       return;
706     }
707     // The remaining portion of the enclosing "if" block should only be
708     // reachable if we are manually limiting physical registers for testing.
709     if (UseReserve) {
710       if (Iter.FreeUnfiltered.any()) {
711         // There is some available physical register held in reserve, so use it.
712         constexpr bool NotFiltered = false;
713         allocateFreeRegister(Iter, NotFiltered);
714         // Iter.Cur is now on the Active list.
715         return;
716       }
717       // At this point, we need to find some reserve register that is already
718       // assigned to a non-infinite-weight variable.  This could happen if some
719       // variable was previously assigned an alias of such a register.
720       MinWeightIndex = findMinWeightIndex(Iter.RegMaskUnfiltered, Iter.Weights);
721     }
722     if (Iter.Cur->getWeight(Func) <= Iter.Weights[MinWeightIndex]) {
723       dumpLiveRangeTrace("Failing      ", Iter.Cur);
724       Func->setError("Unable to find a physical register for an "
725                      "infinite-weight live range "
726                      "(consider using -reg-reserve): " +
727                      Iter.Cur->getName());
728       Handled.push_back(Iter.Cur);
729       return;
730     }
731     // At this point, MinWeightIndex points to a valid reserve register to
732     // reallocate to Iter.Cur, so drop into the eviction code.
733   }
734 
735   // Evict all live ranges in Active that register number MinWeightIndex is
736   // assigned to.
737   const auto &Aliases = *RegAliases[MinWeightIndex];
738   for (SizeT I = Active.size(); I > 0; --I) {
739     const SizeT Index = I - 1;
740     Variable *Item = Active[Index];
741     const auto RegNum = Item->getRegNumTmp();
742     if (Aliases[RegNum]) {
743       dumpLiveRangeTrace("Evicting A   ", Item);
744       const auto &Aliases = *RegAliases[RegNum];
745       for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
746         --RegUses[RegAlias];
747         assert(RegUses[RegAlias] >= 0);
748       }
749       Item->setRegNumTmp(RegNumT());
750       moveItem(Active, Index, Handled);
751       Evicted.push_back(Item);
752     }
753   }
754   // Do the same for Inactive.
755   for (SizeT I = Inactive.size(); I > 0; --I) {
756     const SizeT Index = I - 1;
757     Variable *Item = Inactive[Index];
758     // Note: The Item->rangeOverlaps(Cur) clause is not part of the description
759     // of AssignMemLoc() in the original paper. But there doesn't seem to be any
760     // need to evict an inactive live range that doesn't overlap with the live
761     // range currently being considered. It's especially bad if we would end up
762     // evicting an infinite-weight but currently-inactive live range. The most
763     // common situation for this would be a scratch register kill set for call
764     // instructions.
765     if (Aliases[Item->getRegNumTmp()] && Item->rangeOverlaps(Iter.Cur)) {
766       dumpLiveRangeTrace("Evicting I   ", Item);
767       Item->setRegNumTmp(RegNumT());
768       moveItem(Inactive, Index, Handled);
769       Evicted.push_back(Item);
770     }
771   }
772   // Assign the register to Cur.
773   Iter.Cur->setRegNumTmp(RegNumT::fromInt(MinWeightIndex));
774   for (RegNumT RegAlias : RegNumBVIter(Aliases)) {
775     assert(RegUses[RegAlias] >= 0);
776     ++RegUses[RegAlias];
777   }
778   Active.push_back(Iter.Cur);
779   dumpLiveRangeTrace("Allocating Z ", Iter.Cur);
780 }
781 
assignFinalRegisters(const SmallBitVector & RegMaskFull,const SmallBitVector & PreDefinedRegisters,bool Randomized)782 void LinearScan::assignFinalRegisters(const SmallBitVector &RegMaskFull,
783                                       const SmallBitVector &PreDefinedRegisters,
784                                       bool Randomized) {
785   const size_t NumRegisters = RegMaskFull.size();
786   llvm::SmallVector<RegNumT, REGS_SIZE> Permutation(NumRegisters);
787   if (Randomized) {
788     // Create a random number generator for regalloc randomization. Merge
789     // function's sequence and Kind value as the Salt. Because regAlloc() is
790     // called twice under O2, the second time with RAK_Phi, we check Kind ==
791     // RAK_Phi to determine the lowest-order bit to make sure the Salt is
792     // different.
793     uint64_t Salt =
794         (Func->getSequenceNumber() << 1) ^ (Kind == RAK_Phi ? 0u : 1u);
795     Target->makeRandomRegisterPermutation(
796         Permutation, PreDefinedRegisters | ~RegMaskFull, Salt);
797   }
798 
799   // Finish up by setting RegNum = RegNumTmp (or a random permutation thereof)
800   // for each Variable.
801   for (Variable *Item : Handled) {
802     const auto RegNum = Item->getRegNumTmp();
803     auto AssignedRegNum = RegNum;
804 
805     if (Randomized && Item->hasRegTmp() && !Item->hasReg()) {
806       AssignedRegNum = Permutation[RegNum];
807     }
808     if (BuildDefs::dump() && Verbose) {
809       Ostream &Str = Ctx->getStrDump();
810       if (!Item->hasRegTmp()) {
811         Str << "Not assigning ";
812         Item->dump(Func);
813         Str << "\n";
814       } else {
815         Str << (AssignedRegNum == Item->getRegNum() ? "Reassigning "
816                                                     : "Assigning ")
817             << Target->getRegName(AssignedRegNum, Item->getType()) << "(r"
818             << AssignedRegNum << ") to ";
819         Item->dump(Func);
820         Str << "\n";
821       }
822     }
823     Item->setRegNum(AssignedRegNum);
824   }
825 }
826 
827 // Implements the linear-scan algorithm. Based on "Linear Scan Register
828 // Allocation in the Context of SSA Form and Register Constraints" by Hanspeter
829 // Mössenböck and Michael Pfeiffer,
830 // ftp://ftp.ssw.uni-linz.ac.at/pub/Papers/Moe02.PDF. This implementation is
831 // modified to take affinity into account and allow two interfering variables to
832 // share the same register in certain cases.
833 //
834 // Requires running Cfg::liveness(Liveness_Intervals) in preparation. Results
835 // are assigned to Variable::RegNum for each Variable.
scan(const SmallBitVector & RegMaskFull,bool Randomized)836 void LinearScan::scan(const SmallBitVector &RegMaskFull, bool Randomized) {
837   TimerMarker T(TimerStack::TT_linearScan, Func);
838   assert(RegMaskFull.any()); // Sanity check
839   if (Verbose)
840     Ctx->lockStr();
841   Func->resetCurrentNode();
842   const size_t NumRegisters = RegMaskFull.size();
843   SmallBitVector PreDefinedRegisters(NumRegisters);
844   if (Randomized) {
845     for (Variable *Var : UnhandledPrecolored) {
846       PreDefinedRegisters[Var->getRegNum()] = true;
847     }
848   }
849 
850   // Build a LiveRange representing the Kills list.
851   LiveRange KillsRange(Kills);
852   KillsRange.untrim();
853 
854   // Reset the register use count.
855   RegUses.resize(NumRegisters);
856   std::fill(RegUses.begin(), RegUses.end(), 0);
857 
858   // Unhandled is already set to all ranges in increasing order of start points.
859   assert(Active.empty());
860   assert(Inactive.empty());
861   assert(Handled.empty());
862   const TargetLowering::RegSetMask RegsInclude =
863       TargetLowering::RegSet_CallerSave;
864   const TargetLowering::RegSetMask RegsExclude = TargetLowering::RegSet_None;
865   const SmallBitVector KillsMask =
866       Target->getRegisterSet(RegsInclude, RegsExclude);
867 
868   // Allocate memory once outside the loop.
869   IterationState Iter;
870   Iter.Weights.reserve(NumRegisters);
871   Iter.PrecoloredUnhandledMask.reserve(NumRegisters);
872 
873   while (!Unhandled.empty()) {
874     Iter.Cur = Unhandled.back();
875     Unhandled.pop_back();
876     dumpLiveRangeTrace("\nConsidering  ", Iter.Cur);
877     if (UseReserve)
878       assert(Target->getAllRegistersForVariable(Iter.Cur).any());
879     else
880       assert(Target->getRegistersForVariable(Iter.Cur).any());
881     Iter.RegMask = RegMaskFull & Target->getRegistersForVariable(Iter.Cur);
882     Iter.RegMaskUnfiltered =
883         RegMaskFull & Target->getAllRegistersForVariable(Iter.Cur);
884     KillsRange.trim(Iter.Cur->getLiveRange().getStart());
885 
886     // Check for pre-colored ranges. If Cur is pre-colored, it definitely gets
887     // that register. Previously processed live ranges would have avoided that
888     // register due to it being pre-colored. Future processed live ranges won't
889     // evict that register because the live range has infinite weight.
890     if (Iter.Cur->hasReg()) {
891       allocatePrecoloredRegister(Iter.Cur);
892       continue;
893     }
894 
895     handleActiveRangeExpiredOrInactive(Iter.Cur);
896     handleInactiveRangeExpiredOrReactivated(Iter.Cur);
897 
898     // Calculate available registers into Iter.Free[] and Iter.FreeUnfiltered[].
899     Iter.Free = Iter.RegMask;
900     Iter.FreeUnfiltered = Iter.RegMaskUnfiltered;
901     for (SizeT i = 0; i < Iter.RegMask.size(); ++i) {
902       if (RegUses[i] > 0) {
903         Iter.Free[i] = false;
904         Iter.FreeUnfiltered[i] = false;
905       }
906     }
907 
908     findRegisterPreference(Iter);
909     filterFreeWithInactiveRanges(Iter);
910 
911     // Disable AllowOverlap if an Active variable, which is not Prefer, shares
912     // Prefer's register, and has a definition within Cur's live range.
913     if (Iter.AllowOverlap) {
914       const auto &Aliases = *RegAliases[Iter.PreferReg];
915       for (const Variable *Item : Active) {
916         const RegNumT RegNum = Item->getRegNumTmp();
917         if (Item != Iter.Prefer && Aliases[RegNum] &&
918             overlapsDefs(Func, Iter.Cur, Item)) {
919           Iter.AllowOverlap = false;
920           dumpDisableOverlap(Func, Item, "Active");
921         }
922       }
923     }
924 
925     Iter.Weights.resize(Iter.RegMask.size());
926     std::fill(Iter.Weights.begin(), Iter.Weights.end(), RegWeight());
927 
928     Iter.PrecoloredUnhandledMask.resize(Iter.RegMask.size());
929     Iter.PrecoloredUnhandledMask.reset();
930 
931     filterFreeWithPrecoloredRanges(Iter);
932 
933     // Remove scratch registers from the Iter.Free[] list, and mark their
934     // Iter.Weights[] as infinite, if KillsRange overlaps Cur's live range.
935     constexpr bool UseTrimmed = true;
936     if (Iter.Cur->getLiveRange().overlaps(KillsRange, UseTrimmed)) {
937       Iter.Free.reset(KillsMask);
938       Iter.FreeUnfiltered.reset(KillsMask);
939       for (RegNumT i : RegNumBVIter(KillsMask)) {
940         Iter.Weights[i].setWeight(RegWeight::Inf);
941         if (Iter.PreferReg == i)
942           Iter.AllowOverlap = false;
943       }
944     }
945 
946     // Print info about physical register availability.
947     if (BuildDefs::dump() && Verbose) {
948       Ostream &Str = Ctx->getStrDump();
949       for (RegNumT i : RegNumBVIter(Iter.RegMaskUnfiltered)) {
950         Str << Target->getRegName(i, Iter.Cur->getType()) << "(U=" << RegUses[i]
951             << ",F=" << Iter.Free[i] << ",P=" << Iter.PrecoloredUnhandledMask[i]
952             << ") ";
953       }
954       Str << "\n";
955     }
956 
957     if (Iter.Prefer && (Iter.AllowOverlap || Iter.Free[Iter.PreferReg])) {
958       // First choice: a preferred register that is either free or is allowed to
959       // overlap with its linked variable.
960       allocatePreferredRegister(Iter);
961     } else if (Iter.Free.any()) {
962       // Second choice: any free register.
963       constexpr bool Filtered = true;
964       allocateFreeRegister(Iter, Filtered);
965     } else {
966       // Fallback: there are no free registers, so we look for the lowest-weight
967       // register and see if Cur has higher weight.
968       handleNoFreeRegisters(Iter);
969     }
970     dump(Func);
971   }
972 
973   // Move anything Active or Inactive to Handled for easier handling.
974   Handled.insert(Handled.end(), Active.begin(), Active.end());
975   Active.clear();
976   Handled.insert(Handled.end(), Inactive.begin(), Inactive.end());
977   Inactive.clear();
978   dump(Func);
979 
980   assignFinalRegisters(RegMaskFull, PreDefinedRegisters, Randomized);
981 
982   if (Verbose)
983     Ctx->unlockStr();
984 }
985 
986 // ======================== Dump routines ======================== //
987 
dumpLiveRangeTrace(const char * Label,const Variable * Item)988 void LinearScan::dumpLiveRangeTrace(const char *Label, const Variable *Item) {
989   if (!BuildDefs::dump())
990     return;
991 
992   if (Verbose) {
993     Ostream &Str = Ctx->getStrDump();
994     Str << Label;
995     dumpLiveRange(Item, Func);
996     Str << "\n";
997   }
998 }
999 
dump(Cfg * Func) const1000 void LinearScan::dump(Cfg *Func) const {
1001   if (!BuildDefs::dump())
1002     return;
1003   if (!Verbose)
1004     return;
1005   Ostream &Str = Func->getContext()->getStrDump();
1006   Func->resetCurrentNode();
1007   Str << "**** Current regalloc state:\n";
1008   Str << "++++++ Handled:\n";
1009   for (const Variable *Item : Handled) {
1010     dumpLiveRange(Item, Func);
1011     Str << "\n";
1012   }
1013   Str << "++++++ Unhandled:\n";
1014   for (const Variable *Item : reverse_range(Unhandled)) {
1015     dumpLiveRange(Item, Func);
1016     Str << "\n";
1017   }
1018   Str << "++++++ Active:\n";
1019   for (const Variable *Item : Active) {
1020     dumpLiveRange(Item, Func);
1021     Str << "\n";
1022   }
1023   Str << "++++++ Inactive:\n";
1024   for (const Variable *Item : Inactive) {
1025     dumpLiveRange(Item, Func);
1026     Str << "\n";
1027   }
1028 }
1029 
1030 } // end of namespace Ice
1031