1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the RAGreedy function pass for register allocation in
11 // optimized builds.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/Passes.h"
16 #include "AllocationOrder.h"
17 #include "InterferenceCache.h"
18 #include "LiveDebugVariables.h"
19 #include "RegAllocBase.h"
20 #include "SpillPlacement.h"
21 #include "Spiller.h"
22 #include "SplitKit.h"
23 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/CodeGen/CalcSpillWeights.h"
26 #include "llvm/CodeGen/EdgeBundles.h"
27 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
28 #include "llvm/CodeGen/LiveRangeEdit.h"
29 #include "llvm/CodeGen/LiveRegMatrix.h"
30 #include "llvm/CodeGen/LiveStackAnalysis.h"
31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32 #include "llvm/CodeGen/MachineDominators.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineLoopInfo.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/RegAllocRegistry.h"
37 #include "llvm/CodeGen/RegisterClassInfo.h"
38 #include "llvm/CodeGen/VirtRegMap.h"
39 #include "llvm/IR/LLVMContext.h"
40 #include "llvm/PassAnalysisSupport.h"
41 #include "llvm/Support/BranchProbability.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/Timer.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include "llvm/Target/TargetSubtargetInfo.h"
48 #include <queue>
49 
50 using namespace llvm;
51 
52 #define DEBUG_TYPE "regalloc"
53 
54 STATISTIC(NumGlobalSplits, "Number of split global live ranges");
55 STATISTIC(NumLocalSplits,  "Number of split local live ranges");
56 STATISTIC(NumEvicted,      "Number of interferences evicted");
57 
58 static cl::opt<SplitEditor::ComplementSpillMode>
59 SplitSpillMode("split-spill-mode", cl::Hidden,
60   cl::desc("Spill mode for splitting live ranges"),
61   cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
62              clEnumValN(SplitEditor::SM_Size,  "size",  "Optimize for size"),
63              clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
64              clEnumValEnd),
65   cl::init(SplitEditor::SM_Partition));
66 
67 static cl::opt<unsigned>
68 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden,
69                              cl::desc("Last chance recoloring max depth"),
70                              cl::init(5));
71 
72 static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
73     "lcr-max-interf", cl::Hidden,
74     cl::desc("Last chance recoloring maximum number of considered"
75              " interference at a time"),
76     cl::init(8));
77 
78 static cl::opt<bool>
79 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
80                  cl::desc("Exhaustive Search for registers bypassing the depth "
81                           "and interference cutoffs of last chance recoloring"));
82 
83 static cl::opt<bool> EnableLocalReassignment(
84     "enable-local-reassign", cl::Hidden,
85     cl::desc("Local reassignment can yield better allocation decisions, but "
86              "may be compile time intensive"),
87     cl::init(false));
88 
89 // FIXME: Find a good default for this flag and remove the flag.
90 static cl::opt<unsigned>
91 CSRFirstTimeCost("regalloc-csr-first-time-cost",
92               cl::desc("Cost for first time use of callee-saved register."),
93               cl::init(0), cl::Hidden);
94 
95 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
96                                        createGreedyRegisterAllocator);
97 
98 namespace {
99 class RAGreedy : public MachineFunctionPass,
100                  public RegAllocBase,
101                  private LiveRangeEdit::Delegate {
102   // Convenient shortcuts.
103   typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue;
104   typedef SmallPtrSet<LiveInterval *, 4> SmallLISet;
105   typedef SmallSet<unsigned, 16> SmallVirtRegSet;
106 
107   // context
108   MachineFunction *MF;
109 
110   // Shortcuts to some useful interface.
111   const TargetInstrInfo *TII;
112   const TargetRegisterInfo *TRI;
113   RegisterClassInfo RCI;
114 
115   // analyses
116   SlotIndexes *Indexes;
117   MachineBlockFrequencyInfo *MBFI;
118   MachineDominatorTree *DomTree;
119   MachineLoopInfo *Loops;
120   EdgeBundles *Bundles;
121   SpillPlacement *SpillPlacer;
122   LiveDebugVariables *DebugVars;
123 
124   // state
125   std::unique_ptr<Spiller> SpillerInstance;
126   PQueue Queue;
127   unsigned NextCascade;
128 
129   // Live ranges pass through a number of stages as we try to allocate them.
130   // Some of the stages may also create new live ranges:
131   //
132   // - Region splitting.
133   // - Per-block splitting.
134   // - Local splitting.
135   // - Spilling.
136   //
137   // Ranges produced by one of the stages skip the previous stages when they are
138   // dequeued. This improves performance because we can skip interference checks
139   // that are unlikely to give any results. It also guarantees that the live
140   // range splitting algorithm terminates, something that is otherwise hard to
141   // ensure.
142   enum LiveRangeStage {
143     /// Newly created live range that has never been queued.
144     RS_New,
145 
146     /// Only attempt assignment and eviction. Then requeue as RS_Split.
147     RS_Assign,
148 
149     /// Attempt live range splitting if assignment is impossible.
150     RS_Split,
151 
152     /// Attempt more aggressive live range splitting that is guaranteed to make
153     /// progress.  This is used for split products that may not be making
154     /// progress.
155     RS_Split2,
156 
157     /// Live range will be spilled.  No more splitting will be attempted.
158     RS_Spill,
159 
160     /// There is nothing more we can do to this live range.  Abort compilation
161     /// if it can't be assigned.
162     RS_Done
163   };
164 
165   // Enum CutOffStage to keep a track whether the register allocation failed
166   // because of the cutoffs encountered in last chance recoloring.
167   // Note: This is used as bitmask. New value should be next power of 2.
168   enum CutOffStage {
169     // No cutoffs encountered
170     CO_None = 0,
171 
172     // lcr-max-depth cutoff encountered
173     CO_Depth = 1,
174 
175     // lcr-max-interf cutoff encountered
176     CO_Interf = 2
177   };
178 
179   uint8_t CutOffInfo;
180 
181 #ifndef NDEBUG
182   static const char *const StageName[];
183 #endif
184 
185   // RegInfo - Keep additional information about each live range.
186   struct RegInfo {
187     LiveRangeStage Stage;
188 
189     // Cascade - Eviction loop prevention. See canEvictInterference().
190     unsigned Cascade;
191 
RegInfo__anon4e4d313f0111::RAGreedy::RegInfo192     RegInfo() : Stage(RS_New), Cascade(0) {}
193   };
194 
195   IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
196 
getStage(const LiveInterval & VirtReg) const197   LiveRangeStage getStage(const LiveInterval &VirtReg) const {
198     return ExtraRegInfo[VirtReg.reg].Stage;
199   }
200 
setStage(const LiveInterval & VirtReg,LiveRangeStage Stage)201   void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
202     ExtraRegInfo.resize(MRI->getNumVirtRegs());
203     ExtraRegInfo[VirtReg.reg].Stage = Stage;
204   }
205 
206   template<typename Iterator>
setStage(Iterator Begin,Iterator End,LiveRangeStage NewStage)207   void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
208     ExtraRegInfo.resize(MRI->getNumVirtRegs());
209     for (;Begin != End; ++Begin) {
210       unsigned Reg = *Begin;
211       if (ExtraRegInfo[Reg].Stage == RS_New)
212         ExtraRegInfo[Reg].Stage = NewStage;
213     }
214   }
215 
216   /// Cost of evicting interference.
217   struct EvictionCost {
218     unsigned BrokenHints; ///< Total number of broken hints.
219     float MaxWeight;      ///< Maximum spill weight evicted.
220 
EvictionCost__anon4e4d313f0111::RAGreedy::EvictionCost221     EvictionCost(): BrokenHints(0), MaxWeight(0) {}
222 
isMax__anon4e4d313f0111::RAGreedy::EvictionCost223     bool isMax() const { return BrokenHints == ~0u; }
224 
setMax__anon4e4d313f0111::RAGreedy::EvictionCost225     void setMax() { BrokenHints = ~0u; }
226 
setBrokenHints__anon4e4d313f0111::RAGreedy::EvictionCost227     void setBrokenHints(unsigned NHints) { BrokenHints = NHints; }
228 
operator <__anon4e4d313f0111::RAGreedy::EvictionCost229     bool operator<(const EvictionCost &O) const {
230       return std::tie(BrokenHints, MaxWeight) <
231              std::tie(O.BrokenHints, O.MaxWeight);
232     }
233   };
234 
235   // splitting state.
236   std::unique_ptr<SplitAnalysis> SA;
237   std::unique_ptr<SplitEditor> SE;
238 
239   /// Cached per-block interference maps
240   InterferenceCache IntfCache;
241 
242   /// All basic blocks where the current register has uses.
243   SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
244 
245   /// Global live range splitting candidate info.
246   struct GlobalSplitCandidate {
247     // Register intended for assignment, or 0.
248     unsigned PhysReg;
249 
250     // SplitKit interval index for this candidate.
251     unsigned IntvIdx;
252 
253     // Interference for PhysReg.
254     InterferenceCache::Cursor Intf;
255 
256     // Bundles where this candidate should be live.
257     BitVector LiveBundles;
258     SmallVector<unsigned, 8> ActiveBlocks;
259 
reset__anon4e4d313f0111::RAGreedy::GlobalSplitCandidate260     void reset(InterferenceCache &Cache, unsigned Reg) {
261       PhysReg = Reg;
262       IntvIdx = 0;
263       Intf.setPhysReg(Cache, Reg);
264       LiveBundles.clear();
265       ActiveBlocks.clear();
266     }
267 
268     // Set B[i] = C for every live bundle where B[i] was NoCand.
getBundles__anon4e4d313f0111::RAGreedy::GlobalSplitCandidate269     unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) {
270       unsigned Count = 0;
271       for (int i = LiveBundles.find_first(); i >= 0;
272            i = LiveBundles.find_next(i))
273         if (B[i] == NoCand) {
274           B[i] = C;
275           Count++;
276         }
277       return Count;
278     }
279   };
280 
281   /// Candidate info for each PhysReg in AllocationOrder.
282   /// This vector never shrinks, but grows to the size of the largest register
283   /// class.
284   SmallVector<GlobalSplitCandidate, 32> GlobalCand;
285 
286   enum : unsigned { NoCand = ~0u };
287 
288   /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to
289   /// NoCand which indicates the stack interval.
290   SmallVector<unsigned, 32> BundleCand;
291 
292   /// Callee-save register cost, calculated once per machine function.
293   BlockFrequency CSRCost;
294 
295   /// Run or not the local reassignment heuristic. This information is
296   /// obtained from the TargetSubtargetInfo.
297   bool EnableLocalReassign;
298 
299   /// Set of broken hints that may be reconciled later because of eviction.
300   SmallSetVector<LiveInterval *, 8> SetOfBrokenHints;
301 
302 public:
303   RAGreedy();
304 
305   /// Return the pass name.
getPassName() const306   const char* getPassName() const override {
307     return "Greedy Register Allocator";
308   }
309 
310   /// RAGreedy analysis usage.
311   void getAnalysisUsage(AnalysisUsage &AU) const override;
312   void releaseMemory() override;
spiller()313   Spiller &spiller() override { return *SpillerInstance; }
314   void enqueue(LiveInterval *LI) override;
315   LiveInterval *dequeue() override;
316   unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override;
317   void aboutToRemoveInterval(LiveInterval &) override;
318 
319   /// Perform register allocation.
320   bool runOnMachineFunction(MachineFunction &mf) override;
321 
322   static char ID;
323 
324 private:
325   unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &,
326                              SmallVirtRegSet &, unsigned = 0);
327 
328   bool LRE_CanEraseVirtReg(unsigned) override;
329   void LRE_WillShrinkVirtReg(unsigned) override;
330   void LRE_DidCloneVirtReg(unsigned, unsigned) override;
331   void enqueue(PQueue &CurQueue, LiveInterval *LI);
332   LiveInterval *dequeue(PQueue &CurQueue);
333 
334   BlockFrequency calcSpillCost();
335   bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&);
336   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
337   void growRegion(GlobalSplitCandidate &Cand);
338   BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&);
339   bool calcCompactRegion(GlobalSplitCandidate&);
340   void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>);
341   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
342   unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg);
343   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
344   bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
345   void evictInterference(LiveInterval&, unsigned,
346                          SmallVectorImpl<unsigned>&);
347   bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
348                                   SmallLISet &RecoloringCandidates,
349                                   const SmallVirtRegSet &FixedRegisters);
350 
351   unsigned tryAssign(LiveInterval&, AllocationOrder&,
352                      SmallVectorImpl<unsigned>&);
353   unsigned tryEvict(LiveInterval&, AllocationOrder&,
354                     SmallVectorImpl<unsigned>&, unsigned = ~0u);
355   unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
356                           SmallVectorImpl<unsigned>&);
357   /// Calculate cost of region splitting.
358   unsigned calculateRegionSplitCost(LiveInterval &VirtReg,
359                                     AllocationOrder &Order,
360                                     BlockFrequency &BestCost,
361                                     unsigned &NumCands, bool IgnoreCSR);
362   /// Perform region splitting.
363   unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
364                          bool HasCompact,
365                          SmallVectorImpl<unsigned> &NewVRegs);
366   /// Check other options before using a callee-saved register for the first
367   /// time.
368   unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
369                                  unsigned PhysReg, unsigned &CostPerUseLimit,
370                                  SmallVectorImpl<unsigned> &NewVRegs);
371   void initializeCSRCost();
372   unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
373                          SmallVectorImpl<unsigned>&);
374   unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
375                                SmallVectorImpl<unsigned>&);
376   unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
377     SmallVectorImpl<unsigned>&);
378   unsigned trySplit(LiveInterval&, AllocationOrder&,
379                     SmallVectorImpl<unsigned>&);
380   unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
381                                    SmallVectorImpl<unsigned> &,
382                                    SmallVirtRegSet &, unsigned);
383   bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &,
384                                SmallVirtRegSet &, unsigned);
385   void tryHintRecoloring(LiveInterval &);
386   void tryHintsRecoloring();
387 
388   /// Model the information carried by one end of a copy.
389   struct HintInfo {
390     /// The frequency of the copy.
391     BlockFrequency Freq;
392     /// The virtual register or physical register.
393     unsigned Reg;
394     /// Its currently assigned register.
395     /// In case of a physical register Reg == PhysReg.
396     unsigned PhysReg;
HintInfo__anon4e4d313f0111::RAGreedy::HintInfo397     HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg)
398         : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {}
399   };
400   typedef SmallVector<HintInfo, 4> HintsInfo;
401   BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned);
402   void collectHintInfo(unsigned, HintsInfo &);
403 };
404 } // end anonymous namespace
405 
406 char RAGreedy::ID = 0;
407 
408 #ifndef NDEBUG
409 const char *const RAGreedy::StageName[] = {
410     "RS_New",
411     "RS_Assign",
412     "RS_Split",
413     "RS_Split2",
414     "RS_Spill",
415     "RS_Done"
416 };
417 #endif
418 
419 // Hysteresis to use when comparing floats.
420 // This helps stabilize decisions based on float comparisons.
421 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875
422 
423 
createGreedyRegisterAllocator()424 FunctionPass* llvm::createGreedyRegisterAllocator() {
425   return new RAGreedy();
426 }
427 
RAGreedy()428 RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
429   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
430   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
431   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
432   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
433   initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
434   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
435   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
436   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
437   initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
438   initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
439   initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
440   initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
441   initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
442 }
443 
getAnalysisUsage(AnalysisUsage & AU) const444 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
445   AU.setPreservesCFG();
446   AU.addRequired<MachineBlockFrequencyInfo>();
447   AU.addPreserved<MachineBlockFrequencyInfo>();
448   AU.addRequired<AliasAnalysis>();
449   AU.addPreserved<AliasAnalysis>();
450   AU.addRequired<LiveIntervals>();
451   AU.addPreserved<LiveIntervals>();
452   AU.addRequired<SlotIndexes>();
453   AU.addPreserved<SlotIndexes>();
454   AU.addRequired<LiveDebugVariables>();
455   AU.addPreserved<LiveDebugVariables>();
456   AU.addRequired<LiveStacks>();
457   AU.addPreserved<LiveStacks>();
458   AU.addRequired<MachineDominatorTree>();
459   AU.addPreserved<MachineDominatorTree>();
460   AU.addRequired<MachineLoopInfo>();
461   AU.addPreserved<MachineLoopInfo>();
462   AU.addRequired<VirtRegMap>();
463   AU.addPreserved<VirtRegMap>();
464   AU.addRequired<LiveRegMatrix>();
465   AU.addPreserved<LiveRegMatrix>();
466   AU.addRequired<EdgeBundles>();
467   AU.addRequired<SpillPlacement>();
468   MachineFunctionPass::getAnalysisUsage(AU);
469 }
470 
471 
472 //===----------------------------------------------------------------------===//
473 //                     LiveRangeEdit delegate methods
474 //===----------------------------------------------------------------------===//
475 
LRE_CanEraseVirtReg(unsigned VirtReg)476 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) {
477   if (VRM->hasPhys(VirtReg)) {
478     LiveInterval &LI = LIS->getInterval(VirtReg);
479     Matrix->unassign(LI);
480     aboutToRemoveInterval(LI);
481     return true;
482   }
483   // Unassigned virtreg is probably in the priority queue.
484   // RegAllocBase will erase it after dequeueing.
485   return false;
486 }
487 
LRE_WillShrinkVirtReg(unsigned VirtReg)488 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) {
489   if (!VRM->hasPhys(VirtReg))
490     return;
491 
492   // Register is assigned, put it back on the queue for reassignment.
493   LiveInterval &LI = LIS->getInterval(VirtReg);
494   Matrix->unassign(LI);
495   enqueue(&LI);
496 }
497 
LRE_DidCloneVirtReg(unsigned New,unsigned Old)498 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
499   // Cloning a register we haven't even heard about yet?  Just ignore it.
500   if (!ExtraRegInfo.inBounds(Old))
501     return;
502 
503   // LRE may clone a virtual register because dead code elimination causes it to
504   // be split into connected components. The new components are much smaller
505   // than the original, so they should get a new chance at being assigned.
506   // same stage as the parent.
507   ExtraRegInfo[Old].Stage = RS_Assign;
508   ExtraRegInfo.grow(New);
509   ExtraRegInfo[New] = ExtraRegInfo[Old];
510 }
511 
releaseMemory()512 void RAGreedy::releaseMemory() {
513   SpillerInstance.reset();
514   ExtraRegInfo.clear();
515   GlobalCand.clear();
516 }
517 
enqueue(LiveInterval * LI)518 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); }
519 
enqueue(PQueue & CurQueue,LiveInterval * LI)520 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
521   // Prioritize live ranges by size, assigning larger ranges first.
522   // The queue holds (size, reg) pairs.
523   const unsigned Size = LI->getSize();
524   const unsigned Reg = LI->reg;
525   assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
526          "Can only enqueue virtual registers");
527   unsigned Prio;
528 
529   ExtraRegInfo.grow(Reg);
530   if (ExtraRegInfo[Reg].Stage == RS_New)
531     ExtraRegInfo[Reg].Stage = RS_Assign;
532 
533   if (ExtraRegInfo[Reg].Stage == RS_Split) {
534     // Unsplit ranges that couldn't be allocated immediately are deferred until
535     // everything else has been allocated.
536     Prio = Size;
537   } else {
538     // Giant live ranges fall back to the global assignment heuristic, which
539     // prevents excessive spilling in pathological cases.
540     bool ReverseLocal = TRI->reverseLocalAssignment();
541     const TargetRegisterClass &RC = *MRI->getRegClass(Reg);
542     bool ForceGlobal = !ReverseLocal &&
543       (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs());
544 
545     if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() &&
546         LIS->intervalIsInOneMBB(*LI)) {
547       // Allocate original local ranges in linear instruction order. Since they
548       // are singly defined, this produces optimal coloring in the absence of
549       // global interference and other constraints.
550       if (!ReverseLocal)
551         Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex());
552       else {
553         // Allocating bottom up may allow many short LRGs to be assigned first
554         // to one of the cheap registers. This could be much faster for very
555         // large blocks on targets with many physical registers.
556         Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex());
557       }
558       Prio |= RC.AllocationPriority << 24;
559     } else {
560       // Allocate global and split ranges in long->short order. Long ranges that
561       // don't fit should be spilled (or split) ASAP so they don't create
562       // interference.  Mark a bit to prioritize global above local ranges.
563       Prio = (1u << 29) + Size;
564     }
565     // Mark a higher bit to prioritize global and local above RS_Split.
566     Prio |= (1u << 31);
567 
568     // Boost ranges that have a physical register hint.
569     if (VRM->hasKnownPreference(Reg))
570       Prio |= (1u << 30);
571   }
572   // The virtual register number is a tie breaker for same-sized ranges.
573   // Give lower vreg numbers higher priority to assign them first.
574   CurQueue.push(std::make_pair(Prio, ~Reg));
575 }
576 
dequeue()577 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
578 
dequeue(PQueue & CurQueue)579 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
580   if (CurQueue.empty())
581     return nullptr;
582   LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
583   CurQueue.pop();
584   return LI;
585 }
586 
587 
588 //===----------------------------------------------------------------------===//
589 //                            Direct Assignment
590 //===----------------------------------------------------------------------===//
591 
592 /// tryAssign - Try to assign VirtReg to an available register.
tryAssign(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs)593 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
594                              AllocationOrder &Order,
595                              SmallVectorImpl<unsigned> &NewVRegs) {
596   Order.rewind();
597   unsigned PhysReg;
598   while ((PhysReg = Order.next()))
599     if (!Matrix->checkInterference(VirtReg, PhysReg))
600       break;
601   if (!PhysReg || Order.isHint())
602     return PhysReg;
603 
604   // PhysReg is available, but there may be a better choice.
605 
606   // If we missed a simple hint, try to cheaply evict interference from the
607   // preferred register.
608   if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
609     if (Order.isHint(Hint)) {
610       DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
611       EvictionCost MaxCost;
612       MaxCost.setBrokenHints(1);
613       if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
614         evictInterference(VirtReg, Hint, NewVRegs);
615         return Hint;
616       }
617     }
618 
619   // Try to evict interference from a cheaper alternative.
620   unsigned Cost = TRI->getCostPerUse(PhysReg);
621 
622   // Most registers have 0 additional cost.
623   if (!Cost)
624     return PhysReg;
625 
626   DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost
627                << '\n');
628   unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
629   return CheapReg ? CheapReg : PhysReg;
630 }
631 
632 
633 //===----------------------------------------------------------------------===//
634 //                         Interference eviction
635 //===----------------------------------------------------------------------===//
636 
canReassign(LiveInterval & VirtReg,unsigned PrevReg)637 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) {
638   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
639   unsigned PhysReg;
640   while ((PhysReg = Order.next())) {
641     if (PhysReg == PrevReg)
642       continue;
643 
644     MCRegUnitIterator Units(PhysReg, TRI);
645     for (; Units.isValid(); ++Units) {
646       // Instantiate a "subquery", not to be confused with the Queries array.
647       LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]);
648       if (subQ.checkInterference())
649         break;
650     }
651     // If no units have interference, break out with the current PhysReg.
652     if (!Units.isValid())
653       break;
654   }
655   if (PhysReg)
656     DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
657           << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI)
658           << '\n');
659   return PhysReg;
660 }
661 
662 /// shouldEvict - determine if A should evict the assigned live range B. The
663 /// eviction policy defined by this function together with the allocation order
664 /// defined by enqueue() decides which registers ultimately end up being split
665 /// and spilled.
666 ///
667 /// Cascade numbers are used to prevent infinite loops if this function is a
668 /// cyclic relation.
669 ///
670 /// @param A          The live range to be assigned.
671 /// @param IsHint     True when A is about to be assigned to its preferred
672 ///                   register.
673 /// @param B          The live range to be evicted.
674 /// @param BreaksHint True when B is already assigned to its preferred register.
shouldEvict(LiveInterval & A,bool IsHint,LiveInterval & B,bool BreaksHint)675 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
676                            LiveInterval &B, bool BreaksHint) {
677   bool CanSplit = getStage(B) < RS_Spill;
678 
679   // Be fairly aggressive about following hints as long as the evictee can be
680   // split.
681   if (CanSplit && IsHint && !BreaksHint)
682     return true;
683 
684   if (A.weight > B.weight) {
685     DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n');
686     return true;
687   }
688   return false;
689 }
690 
691 /// canEvictInterference - Return true if all interferences between VirtReg and
692 /// PhysReg can be evicted.
693 ///
694 /// @param VirtReg Live range that is about to be assigned.
695 /// @param PhysReg Desired register for assignment.
696 /// @param IsHint  True when PhysReg is VirtReg's preferred register.
697 /// @param MaxCost Only look for cheaper candidates and update with new cost
698 ///                when returning true.
699 /// @returns True when interference can be evicted cheaper than MaxCost.
canEvictInterference(LiveInterval & VirtReg,unsigned PhysReg,bool IsHint,EvictionCost & MaxCost)700 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
701                                     bool IsHint, EvictionCost &MaxCost) {
702   // It is only possible to evict virtual register interference.
703   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
704     return false;
705 
706   bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
707 
708   // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
709   // involved in an eviction before. If a cascade number was assigned, deny
710   // evicting anything with the same or a newer cascade number. This prevents
711   // infinite eviction loops.
712   //
713   // This works out so a register without a cascade number is allowed to evict
714   // anything, and it can be evicted by anything.
715   unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
716   if (!Cascade)
717     Cascade = NextCascade;
718 
719   EvictionCost Cost;
720   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
721     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
722     // If there is 10 or more interferences, chances are one is heavier.
723     if (Q.collectInterferingVRegs(10) >= 10)
724       return false;
725 
726     // Check if any interfering live range is heavier than MaxWeight.
727     for (unsigned i = Q.interferingVRegs().size(); i; --i) {
728       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
729       assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
730              "Only expecting virtual register interference from query");
731       // Never evict spill products. They cannot split or spill.
732       if (getStage(*Intf) == RS_Done)
733         return false;
734       // Once a live range becomes small enough, it is urgent that we find a
735       // register for it. This is indicated by an infinite spill weight. These
736       // urgent live ranges get to evict almost anything.
737       //
738       // Also allow urgent evictions of unspillable ranges from a strictly
739       // larger allocation order.
740       bool Urgent = !VirtReg.isSpillable() &&
741         (Intf->isSpillable() ||
742          RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) <
743          RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg)));
744       // Only evict older cascades or live ranges without a cascade.
745       unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
746       if (Cascade <= IntfCascade) {
747         if (!Urgent)
748           return false;
749         // We permit breaking cascades for urgent evictions. It should be the
750         // last resort, though, so make it really expensive.
751         Cost.BrokenHints += 10;
752       }
753       // Would this break a satisfied hint?
754       bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
755       // Update eviction cost.
756       Cost.BrokenHints += BreaksHint;
757       Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
758       // Abort if this would be too expensive.
759       if (!(Cost < MaxCost))
760         return false;
761       if (Urgent)
762         continue;
763       // Apply the eviction policy for non-urgent evictions.
764       if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
765         return false;
766       // If !MaxCost.isMax(), then we're just looking for a cheap register.
767       // Evicting another local live range in this case could lead to suboptimal
768       // coloring.
769       if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) &&
770           (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) {
771         return false;
772       }
773     }
774   }
775   MaxCost = Cost;
776   return true;
777 }
778 
779 /// evictInterference - Evict any interferring registers that prevent VirtReg
780 /// from being assigned to Physreg. This assumes that canEvictInterference
781 /// returned true.
evictInterference(LiveInterval & VirtReg,unsigned PhysReg,SmallVectorImpl<unsigned> & NewVRegs)782 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
783                                  SmallVectorImpl<unsigned> &NewVRegs) {
784   // Make sure that VirtReg has a cascade number, and assign that cascade
785   // number to every evicted register. These live ranges than then only be
786   // evicted by a newer cascade, preventing infinite loops.
787   unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
788   if (!Cascade)
789     Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
790 
791   DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
792                << " interference: Cascade " << Cascade << '\n');
793 
794   // Collect all interfering virtregs first.
795   SmallVector<LiveInterval*, 8> Intfs;
796   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
797     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
798     assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
799     ArrayRef<LiveInterval*> IVR = Q.interferingVRegs();
800     Intfs.append(IVR.begin(), IVR.end());
801   }
802 
803   // Evict them second. This will invalidate the queries.
804   for (unsigned i = 0, e = Intfs.size(); i != e; ++i) {
805     LiveInterval *Intf = Intfs[i];
806     // The same VirtReg may be present in multiple RegUnits. Skip duplicates.
807     if (!VRM->hasPhys(Intf->reg))
808       continue;
809     Matrix->unassign(*Intf);
810     assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
811             VirtReg.isSpillable() < Intf->isSpillable()) &&
812            "Cannot decrease cascade number, illegal eviction");
813     ExtraRegInfo[Intf->reg].Cascade = Cascade;
814     ++NumEvicted;
815     NewVRegs.push_back(Intf->reg);
816   }
817 }
818 
819 /// tryEvict - Try to evict all interferences for a physreg.
820 /// @param  VirtReg Currently unassigned virtual register.
821 /// @param  Order   Physregs to try.
822 /// @return         Physreg to assign VirtReg, or 0.
tryEvict(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs,unsigned CostPerUseLimit)823 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
824                             AllocationOrder &Order,
825                             SmallVectorImpl<unsigned> &NewVRegs,
826                             unsigned CostPerUseLimit) {
827   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
828 
829   // Keep track of the cheapest interference seen so far.
830   EvictionCost BestCost;
831   BestCost.setMax();
832   unsigned BestPhys = 0;
833   unsigned OrderLimit = Order.getOrder().size();
834 
835   // When we are just looking for a reduced cost per use, don't break any
836   // hints, and only evict smaller spill weights.
837   if (CostPerUseLimit < ~0u) {
838     BestCost.BrokenHints = 0;
839     BestCost.MaxWeight = VirtReg.weight;
840 
841     // Check of any registers in RC are below CostPerUseLimit.
842     const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg);
843     unsigned MinCost = RegClassInfo.getMinCost(RC);
844     if (MinCost >= CostPerUseLimit) {
845       DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost
846                    << ", no cheaper registers to be found.\n");
847       return 0;
848     }
849 
850     // It is normal for register classes to have a long tail of registers with
851     // the same cost. We don't need to look at them if they're too expensive.
852     if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) {
853       OrderLimit = RegClassInfo.getLastCostChange(RC);
854       DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n");
855     }
856   }
857 
858   Order.rewind();
859   while (unsigned PhysReg = Order.next(OrderLimit)) {
860     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
861       continue;
862     // The first use of a callee-saved register in a function has cost 1.
863     // Don't start using a CSR when the CostPerUseLimit is low.
864     if (CostPerUseLimit == 1)
865      if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
866        if (!MRI->isPhysRegUsed(CSR)) {
867          DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
868                       << PrintReg(CSR, TRI) << '\n');
869          continue;
870        }
871 
872     if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
873       continue;
874 
875     // Best so far.
876     BestPhys = PhysReg;
877 
878     // Stop if the hint can be used.
879     if (Order.isHint())
880       break;
881   }
882 
883   if (!BestPhys)
884     return 0;
885 
886   evictInterference(VirtReg, BestPhys, NewVRegs);
887   return BestPhys;
888 }
889 
890 
891 //===----------------------------------------------------------------------===//
892 //                              Region Splitting
893 //===----------------------------------------------------------------------===//
894 
895 /// addSplitConstraints - Fill out the SplitConstraints vector based on the
896 /// interference pattern in Physreg and its aliases. Add the constraints to
897 /// SpillPlacement and return the static cost of this split in Cost, assuming
898 /// that all preferences in SplitConstraints are met.
899 /// Return false if there are no bundles with positive bias.
addSplitConstraints(InterferenceCache::Cursor Intf,BlockFrequency & Cost)900 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf,
901                                    BlockFrequency &Cost) {
902   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
903 
904   // Reset interference dependent info.
905   SplitConstraints.resize(UseBlocks.size());
906   BlockFrequency StaticCost = 0;
907   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
908     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
909     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
910 
911     BC.Number = BI.MBB->getNumber();
912     Intf.moveToBlock(BC.Number);
913     BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
914     BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare;
915     BC.ChangesValue = BI.FirstDef.isValid();
916 
917     if (!Intf.hasInterference())
918       continue;
919 
920     // Number of spill code instructions to insert.
921     unsigned Ins = 0;
922 
923     // Interference for the live-in value.
924     if (BI.LiveIn) {
925       if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number))
926         BC.Entry = SpillPlacement::MustSpill, ++Ins;
927       else if (Intf.first() < BI.FirstInstr)
928         BC.Entry = SpillPlacement::PrefSpill, ++Ins;
929       else if (Intf.first() < BI.LastInstr)
930         ++Ins;
931     }
932 
933     // Interference for the live-out value.
934     if (BI.LiveOut) {
935       if (Intf.last() >= SA->getLastSplitPoint(BC.Number))
936         BC.Exit = SpillPlacement::MustSpill, ++Ins;
937       else if (Intf.last() > BI.LastInstr)
938         BC.Exit = SpillPlacement::PrefSpill, ++Ins;
939       else if (Intf.last() > BI.FirstInstr)
940         ++Ins;
941     }
942 
943     // Accumulate the total frequency of inserted spill code.
944     while (Ins--)
945       StaticCost += SpillPlacer->getBlockFrequency(BC.Number);
946   }
947   Cost = StaticCost;
948 
949   // Add constraints for use-blocks. Note that these are the only constraints
950   // that may add a positive bias, it is downhill from here.
951   SpillPlacer->addConstraints(SplitConstraints);
952   return SpillPlacer->scanActiveBundles();
953 }
954 
955 
956 /// addThroughConstraints - Add constraints and links to SpillPlacer from the
957 /// live-through blocks in Blocks.
addThroughConstraints(InterferenceCache::Cursor Intf,ArrayRef<unsigned> Blocks)958 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
959                                      ArrayRef<unsigned> Blocks) {
960   const unsigned GroupSize = 8;
961   SpillPlacement::BlockConstraint BCS[GroupSize];
962   unsigned TBS[GroupSize];
963   unsigned B = 0, T = 0;
964 
965   for (unsigned i = 0; i != Blocks.size(); ++i) {
966     unsigned Number = Blocks[i];
967     Intf.moveToBlock(Number);
968 
969     if (!Intf.hasInterference()) {
970       assert(T < GroupSize && "Array overflow");
971       TBS[T] = Number;
972       if (++T == GroupSize) {
973         SpillPlacer->addLinks(makeArrayRef(TBS, T));
974         T = 0;
975       }
976       continue;
977     }
978 
979     assert(B < GroupSize && "Array overflow");
980     BCS[B].Number = Number;
981 
982     // Interference for the live-in value.
983     if (Intf.first() <= Indexes->getMBBStartIdx(Number))
984       BCS[B].Entry = SpillPlacement::MustSpill;
985     else
986       BCS[B].Entry = SpillPlacement::PrefSpill;
987 
988     // Interference for the live-out value.
989     if (Intf.last() >= SA->getLastSplitPoint(Number))
990       BCS[B].Exit = SpillPlacement::MustSpill;
991     else
992       BCS[B].Exit = SpillPlacement::PrefSpill;
993 
994     if (++B == GroupSize) {
995       SpillPlacer->addConstraints(makeArrayRef(BCS, B));
996       B = 0;
997     }
998   }
999 
1000   SpillPlacer->addConstraints(makeArrayRef(BCS, B));
1001   SpillPlacer->addLinks(makeArrayRef(TBS, T));
1002 }
1003 
growRegion(GlobalSplitCandidate & Cand)1004 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
1005   // Keep track of through blocks that have not been added to SpillPlacer.
1006   BitVector Todo = SA->getThroughBlocks();
1007   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
1008   unsigned AddedTo = 0;
1009 #ifndef NDEBUG
1010   unsigned Visited = 0;
1011 #endif
1012 
1013   for (;;) {
1014     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
1015     // Find new through blocks in the periphery of PrefRegBundles.
1016     for (int i = 0, e = NewBundles.size(); i != e; ++i) {
1017       unsigned Bundle = NewBundles[i];
1018       // Look at all blocks connected to Bundle in the full graph.
1019       ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle);
1020       for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end();
1021            I != E; ++I) {
1022         unsigned Block = *I;
1023         if (!Todo.test(Block))
1024           continue;
1025         Todo.reset(Block);
1026         // This is a new through block. Add it to SpillPlacer later.
1027         ActiveBlocks.push_back(Block);
1028 #ifndef NDEBUG
1029         ++Visited;
1030 #endif
1031       }
1032     }
1033     // Any new blocks to add?
1034     if (ActiveBlocks.size() == AddedTo)
1035       break;
1036 
1037     // Compute through constraints from the interference, or assume that all
1038     // through blocks prefer spilling when forming compact regions.
1039     auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo);
1040     if (Cand.PhysReg)
1041       addThroughConstraints(Cand.Intf, NewBlocks);
1042     else
1043       // Provide a strong negative bias on through blocks to prevent unwanted
1044       // liveness on loop backedges.
1045       SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true);
1046     AddedTo = ActiveBlocks.size();
1047 
1048     // Perhaps iterating can enable more bundles?
1049     SpillPlacer->iterate();
1050   }
1051   DEBUG(dbgs() << ", v=" << Visited);
1052 }
1053 
1054 /// calcCompactRegion - Compute the set of edge bundles that should be live
1055 /// when splitting the current live range into compact regions.  Compact
1056 /// regions can be computed without looking at interference.  They are the
1057 /// regions formed by removing all the live-through blocks from the live range.
1058 ///
1059 /// Returns false if the current live range is already compact, or if the
1060 /// compact regions would form single block regions anyway.
calcCompactRegion(GlobalSplitCandidate & Cand)1061 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) {
1062   // Without any through blocks, the live range is already compact.
1063   if (!SA->getNumThroughBlocks())
1064     return false;
1065 
1066   // Compact regions don't correspond to any physreg.
1067   Cand.reset(IntfCache, 0);
1068 
1069   DEBUG(dbgs() << "Compact region bundles");
1070 
1071   // Use the spill placer to determine the live bundles. GrowRegion pretends
1072   // that all the through blocks have interference when PhysReg is unset.
1073   SpillPlacer->prepare(Cand.LiveBundles);
1074 
1075   // The static split cost will be zero since Cand.Intf reports no interference.
1076   BlockFrequency Cost;
1077   if (!addSplitConstraints(Cand.Intf, Cost)) {
1078     DEBUG(dbgs() << ", none.\n");
1079     return false;
1080   }
1081 
1082   growRegion(Cand);
1083   SpillPlacer->finish();
1084 
1085   if (!Cand.LiveBundles.any()) {
1086     DEBUG(dbgs() << ", none.\n");
1087     return false;
1088   }
1089 
1090   DEBUG({
1091     for (int i = Cand.LiveBundles.find_first(); i>=0;
1092          i = Cand.LiveBundles.find_next(i))
1093     dbgs() << " EB#" << i;
1094     dbgs() << ".\n";
1095   });
1096   return true;
1097 }
1098 
1099 /// calcSpillCost - Compute how expensive it would be to split the live range in
1100 /// SA around all use blocks instead of forming bundle regions.
calcSpillCost()1101 BlockFrequency RAGreedy::calcSpillCost() {
1102   BlockFrequency Cost = 0;
1103   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1104   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1105     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1106     unsigned Number = BI.MBB->getNumber();
1107     // We normally only need one spill instruction - a load or a store.
1108     Cost += SpillPlacer->getBlockFrequency(Number);
1109 
1110     // Unless the value is redefined in the block.
1111     if (BI.LiveIn && BI.LiveOut && BI.FirstDef)
1112       Cost += SpillPlacer->getBlockFrequency(Number);
1113   }
1114   return Cost;
1115 }
1116 
1117 /// calcGlobalSplitCost - Return the global split cost of following the split
1118 /// pattern in LiveBundles. This cost should be added to the local cost of the
1119 /// interference pattern in SplitConstraints.
1120 ///
calcGlobalSplitCost(GlobalSplitCandidate & Cand)1121 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
1122   BlockFrequency GlobalCost = 0;
1123   const BitVector &LiveBundles = Cand.LiveBundles;
1124   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1125   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1126     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1127     SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
1128     bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
1129     bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
1130     unsigned Ins = 0;
1131 
1132     if (BI.LiveIn)
1133       Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
1134     if (BI.LiveOut)
1135       Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
1136     while (Ins--)
1137       GlobalCost += SpillPlacer->getBlockFrequency(BC.Number);
1138   }
1139 
1140   for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) {
1141     unsigned Number = Cand.ActiveBlocks[i];
1142     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
1143     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
1144     if (!RegIn && !RegOut)
1145       continue;
1146     if (RegIn && RegOut) {
1147       // We need double spill code if this block has interference.
1148       Cand.Intf.moveToBlock(Number);
1149       if (Cand.Intf.hasInterference()) {
1150         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1151         GlobalCost += SpillPlacer->getBlockFrequency(Number);
1152       }
1153       continue;
1154     }
1155     // live-in / stack-out or stack-in live-out.
1156     GlobalCost += SpillPlacer->getBlockFrequency(Number);
1157   }
1158   return GlobalCost;
1159 }
1160 
1161 /// splitAroundRegion - Split the current live range around the regions
1162 /// determined by BundleCand and GlobalCand.
1163 ///
1164 /// Before calling this function, GlobalCand and BundleCand must be initialized
1165 /// so each bundle is assigned to a valid candidate, or NoCand for the
1166 /// stack-bound bundles.  The shared SA/SE SplitAnalysis and SplitEditor
1167 /// objects must be initialized for the current live range, and intervals
1168 /// created for the used candidates.
1169 ///
1170 /// @param LREdit    The LiveRangeEdit object handling the current split.
1171 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value
1172 ///                  must appear in this list.
splitAroundRegion(LiveRangeEdit & LREdit,ArrayRef<unsigned> UsedCands)1173 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit,
1174                                  ArrayRef<unsigned> UsedCands) {
1175   // These are the intervals created for new global ranges. We may create more
1176   // intervals for local ranges.
1177   const unsigned NumGlobalIntvs = LREdit.size();
1178   DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n");
1179   assert(NumGlobalIntvs && "No global intervals configured");
1180 
1181   // Isolate even single instructions when dealing with a proper sub-class.
1182   // That guarantees register class inflation for the stack interval because it
1183   // is all copies.
1184   unsigned Reg = SA->getParent().reg;
1185   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1186 
1187   // First handle all the blocks with uses.
1188   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1189   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1190     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1191     unsigned Number = BI.MBB->getNumber();
1192     unsigned IntvIn = 0, IntvOut = 0;
1193     SlotIndex IntfIn, IntfOut;
1194     if (BI.LiveIn) {
1195       unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
1196       if (CandIn != NoCand) {
1197         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1198         IntvIn = Cand.IntvIdx;
1199         Cand.Intf.moveToBlock(Number);
1200         IntfIn = Cand.Intf.first();
1201       }
1202     }
1203     if (BI.LiveOut) {
1204       unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
1205       if (CandOut != NoCand) {
1206         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1207         IntvOut = Cand.IntvIdx;
1208         Cand.Intf.moveToBlock(Number);
1209         IntfOut = Cand.Intf.last();
1210       }
1211     }
1212 
1213     // Create separate intervals for isolated blocks with multiple uses.
1214     if (!IntvIn && !IntvOut) {
1215       DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
1216       if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1217         SE->splitSingleBlock(BI);
1218       continue;
1219     }
1220 
1221     if (IntvIn && IntvOut)
1222       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1223     else if (IntvIn)
1224       SE->splitRegInBlock(BI, IntvIn, IntfIn);
1225     else
1226       SE->splitRegOutBlock(BI, IntvOut, IntfOut);
1227   }
1228 
1229   // Handle live-through blocks. The relevant live-through blocks are stored in
1230   // the ActiveBlocks list with each candidate. We need to filter out
1231   // duplicates.
1232   BitVector Todo = SA->getThroughBlocks();
1233   for (unsigned c = 0; c != UsedCands.size(); ++c) {
1234     ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks;
1235     for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1236       unsigned Number = Blocks[i];
1237       if (!Todo.test(Number))
1238         continue;
1239       Todo.reset(Number);
1240 
1241       unsigned IntvIn = 0, IntvOut = 0;
1242       SlotIndex IntfIn, IntfOut;
1243 
1244       unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)];
1245       if (CandIn != NoCand) {
1246         GlobalSplitCandidate &Cand = GlobalCand[CandIn];
1247         IntvIn = Cand.IntvIdx;
1248         Cand.Intf.moveToBlock(Number);
1249         IntfIn = Cand.Intf.first();
1250       }
1251 
1252       unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)];
1253       if (CandOut != NoCand) {
1254         GlobalSplitCandidate &Cand = GlobalCand[CandOut];
1255         IntvOut = Cand.IntvIdx;
1256         Cand.Intf.moveToBlock(Number);
1257         IntfOut = Cand.Intf.last();
1258       }
1259       if (!IntvIn && !IntvOut)
1260         continue;
1261       SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut);
1262     }
1263   }
1264 
1265   ++NumGlobalSplits;
1266 
1267   SmallVector<unsigned, 8> IntvMap;
1268   SE->finish(&IntvMap);
1269   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1270 
1271   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1272   unsigned OrigBlocks = SA->getNumLiveBlocks();
1273 
1274   // Sort out the new intervals created by splitting. We get four kinds:
1275   // - Remainder intervals should not be split again.
1276   // - Candidate intervals can be assigned to Cand.PhysReg.
1277   // - Block-local splits are candidates for local splitting.
1278   // - DCE leftovers should go back on the queue.
1279   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
1280     LiveInterval &Reg = LIS->getInterval(LREdit.get(i));
1281 
1282     // Ignore old intervals from DCE.
1283     if (getStage(Reg) != RS_New)
1284       continue;
1285 
1286     // Remainder interval. Don't try splitting again, spill if it doesn't
1287     // allocate.
1288     if (IntvMap[i] == 0) {
1289       setStage(Reg, RS_Spill);
1290       continue;
1291     }
1292 
1293     // Global intervals. Allow repeated splitting as long as the number of live
1294     // blocks is strictly decreasing.
1295     if (IntvMap[i] < NumGlobalIntvs) {
1296       if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
1297         DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
1298                      << " blocks as original.\n");
1299         // Don't allow repeated splitting as a safe guard against looping.
1300         setStage(Reg, RS_Split2);
1301       }
1302       continue;
1303     }
1304 
1305     // Other intervals are treated as new. This includes local intervals created
1306     // for blocks with multiple uses, and anything created by DCE.
1307   }
1308 
1309   if (VerifyEnabled)
1310     MF->verify(this, "After splitting live range around region");
1311 }
1312 
tryRegionSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs)1313 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1314                                   SmallVectorImpl<unsigned> &NewVRegs) {
1315   unsigned NumCands = 0;
1316   BlockFrequency BestCost;
1317 
1318   // Check if we can split this live range around a compact region.
1319   bool HasCompact = calcCompactRegion(GlobalCand.front());
1320   if (HasCompact) {
1321     // Yes, keep GlobalCand[0] as the compact region candidate.
1322     NumCands = 1;
1323     BestCost = BlockFrequency::getMaxFrequency();
1324   } else {
1325     // No benefit from the compact region, our fallback will be per-block
1326     // splitting. Make sure we find a solution that is cheaper than spilling.
1327     BestCost = calcSpillCost();
1328     DEBUG(dbgs() << "Cost of isolating all blocks = ";
1329                  MBFI->printBlockFreq(dbgs(), BestCost) << '\n');
1330   }
1331 
1332   unsigned BestCand =
1333       calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands,
1334                                false/*IgnoreCSR*/);
1335 
1336   // No solutions found, fall back to single block splitting.
1337   if (!HasCompact && BestCand == NoCand)
1338     return 0;
1339 
1340   return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs);
1341 }
1342 
calculateRegionSplitCost(LiveInterval & VirtReg,AllocationOrder & Order,BlockFrequency & BestCost,unsigned & NumCands,bool IgnoreCSR)1343 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg,
1344                                             AllocationOrder &Order,
1345                                             BlockFrequency &BestCost,
1346                                             unsigned &NumCands,
1347                                             bool IgnoreCSR) {
1348   unsigned BestCand = NoCand;
1349   Order.rewind();
1350   while (unsigned PhysReg = Order.next()) {
1351    if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
1352      if (IgnoreCSR && !MRI->isPhysRegUsed(CSR))
1353        continue;
1354 
1355     // Discard bad candidates before we run out of interference cache cursors.
1356     // This will only affect register classes with a lot of registers (>32).
1357     if (NumCands == IntfCache.getMaxCursors()) {
1358       unsigned WorstCount = ~0u;
1359       unsigned Worst = 0;
1360       for (unsigned i = 0; i != NumCands; ++i) {
1361         if (i == BestCand || !GlobalCand[i].PhysReg)
1362           continue;
1363         unsigned Count = GlobalCand[i].LiveBundles.count();
1364         if (Count < WorstCount)
1365           Worst = i, WorstCount = Count;
1366       }
1367       --NumCands;
1368       GlobalCand[Worst] = GlobalCand[NumCands];
1369       if (BestCand == NumCands)
1370         BestCand = Worst;
1371     }
1372 
1373     if (GlobalCand.size() <= NumCands)
1374       GlobalCand.resize(NumCands+1);
1375     GlobalSplitCandidate &Cand = GlobalCand[NumCands];
1376     Cand.reset(IntfCache, PhysReg);
1377 
1378     SpillPlacer->prepare(Cand.LiveBundles);
1379     BlockFrequency Cost;
1380     if (!addSplitConstraints(Cand.Intf, Cost)) {
1381       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
1382       continue;
1383     }
1384     DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = ";
1385                  MBFI->printBlockFreq(dbgs(), Cost));
1386     if (Cost >= BestCost) {
1387       DEBUG({
1388         if (BestCand == NoCand)
1389           dbgs() << " worse than no bundles\n";
1390         else
1391           dbgs() << " worse than "
1392                  << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n';
1393       });
1394       continue;
1395     }
1396     growRegion(Cand);
1397 
1398     SpillPlacer->finish();
1399 
1400     // No live bundles, defer to splitSingleBlocks().
1401     if (!Cand.LiveBundles.any()) {
1402       DEBUG(dbgs() << " no bundles.\n");
1403       continue;
1404     }
1405 
1406     Cost += calcGlobalSplitCost(Cand);
1407     DEBUG({
1408       dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost)
1409                                 << " with bundles";
1410       for (int i = Cand.LiveBundles.find_first(); i>=0;
1411            i = Cand.LiveBundles.find_next(i))
1412         dbgs() << " EB#" << i;
1413       dbgs() << ".\n";
1414     });
1415     if (Cost < BestCost) {
1416       BestCand = NumCands;
1417       BestCost = Cost;
1418     }
1419     ++NumCands;
1420   }
1421   return BestCand;
1422 }
1423 
doRegionSplit(LiveInterval & VirtReg,unsigned BestCand,bool HasCompact,SmallVectorImpl<unsigned> & NewVRegs)1424 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand,
1425                                  bool HasCompact,
1426                                  SmallVectorImpl<unsigned> &NewVRegs) {
1427   SmallVector<unsigned, 8> UsedCands;
1428   // Prepare split editor.
1429   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1430   SE->reset(LREdit, SplitSpillMode);
1431 
1432   // Assign all edge bundles to the preferred candidate, or NoCand.
1433   BundleCand.assign(Bundles->getNumBundles(), NoCand);
1434 
1435   // Assign bundles for the best candidate region.
1436   if (BestCand != NoCand) {
1437     GlobalSplitCandidate &Cand = GlobalCand[BestCand];
1438     if (unsigned B = Cand.getBundles(BundleCand, BestCand)) {
1439       UsedCands.push_back(BestCand);
1440       Cand.IntvIdx = SE->openIntv();
1441       DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in "
1442                    << B << " bundles, intv " << Cand.IntvIdx << ".\n");
1443       (void)B;
1444     }
1445   }
1446 
1447   // Assign bundles for the compact region.
1448   if (HasCompact) {
1449     GlobalSplitCandidate &Cand = GlobalCand.front();
1450     assert(!Cand.PhysReg && "Compact region has no physreg");
1451     if (unsigned B = Cand.getBundles(BundleCand, 0)) {
1452       UsedCands.push_back(0);
1453       Cand.IntvIdx = SE->openIntv();
1454       DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv "
1455                    << Cand.IntvIdx << ".\n");
1456       (void)B;
1457     }
1458   }
1459 
1460   splitAroundRegion(LREdit, UsedCands);
1461   return 0;
1462 }
1463 
1464 
1465 //===----------------------------------------------------------------------===//
1466 //                            Per-Block Splitting
1467 //===----------------------------------------------------------------------===//
1468 
1469 /// tryBlockSplit - Split a global live range around every block with uses. This
1470 /// creates a lot of local live ranges, that will be split by tryLocalSplit if
1471 /// they don't allocate.
tryBlockSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs)1472 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1473                                  SmallVectorImpl<unsigned> &NewVRegs) {
1474   assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed");
1475   unsigned Reg = VirtReg.reg;
1476   bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg));
1477   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1478   SE->reset(LREdit, SplitSpillMode);
1479   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
1480   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
1481     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
1482     if (SA->shouldSplitSingleBlock(BI, SingleInstrs))
1483       SE->splitSingleBlock(BI);
1484   }
1485   // No blocks were split.
1486   if (LREdit.empty())
1487     return 0;
1488 
1489   // We did split for some blocks.
1490   SmallVector<unsigned, 8> IntvMap;
1491   SE->finish(&IntvMap);
1492 
1493   // Tell LiveDebugVariables about the new ranges.
1494   DebugVars->splitRegister(Reg, LREdit.regs(), *LIS);
1495 
1496   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1497 
1498   // Sort out the new intervals created by splitting. The remainder interval
1499   // goes straight to spilling, the new local ranges get to stay RS_New.
1500   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
1501     LiveInterval &LI = LIS->getInterval(LREdit.get(i));
1502     if (getStage(LI) == RS_New && IntvMap[i] == 0)
1503       setStage(LI, RS_Spill);
1504   }
1505 
1506   if (VerifyEnabled)
1507     MF->verify(this, "After splitting live range around basic blocks");
1508   return 0;
1509 }
1510 
1511 
1512 //===----------------------------------------------------------------------===//
1513 //                         Per-Instruction Splitting
1514 //===----------------------------------------------------------------------===//
1515 
1516 /// Get the number of allocatable registers that match the constraints of \p Reg
1517 /// on \p MI and that are also in \p SuperRC.
getNumAllocatableRegsForConstraints(const MachineInstr * MI,unsigned Reg,const TargetRegisterClass * SuperRC,const TargetInstrInfo * TII,const TargetRegisterInfo * TRI,const RegisterClassInfo & RCI)1518 static unsigned getNumAllocatableRegsForConstraints(
1519     const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC,
1520     const TargetInstrInfo *TII, const TargetRegisterInfo *TRI,
1521     const RegisterClassInfo &RCI) {
1522   assert(SuperRC && "Invalid register class");
1523 
1524   const TargetRegisterClass *ConstrainedRC =
1525       MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI,
1526                                              /* ExploreBundle */ true);
1527   if (!ConstrainedRC)
1528     return 0;
1529   return RCI.getNumAllocatableRegs(ConstrainedRC);
1530 }
1531 
1532 /// tryInstructionSplit - Split a live range around individual instructions.
1533 /// This is normally not worthwhile since the spiller is doing essentially the
1534 /// same thing. However, when the live range is in a constrained register
1535 /// class, it may help to insert copies such that parts of the live range can
1536 /// be moved to a larger register class.
1537 ///
1538 /// This is similar to spilling to a larger register class.
1539 unsigned
tryInstructionSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs)1540 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1541                               SmallVectorImpl<unsigned> &NewVRegs) {
1542   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
1543   // There is no point to this if there are no larger sub-classes.
1544   if (!RegClassInfo.isProperSubClass(CurRC))
1545     return 0;
1546 
1547   // Always enable split spill mode, since we're effectively spilling to a
1548   // register.
1549   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1550   SE->reset(LREdit, SplitEditor::SM_Size);
1551 
1552   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1553   if (Uses.size() <= 1)
1554     return 0;
1555 
1556   DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n");
1557 
1558   const TargetRegisterClass *SuperRC =
1559       TRI->getLargestLegalSuperClass(CurRC, *MF);
1560   unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC);
1561   // Split around every non-copy instruction if this split will relax
1562   // the constraints on the virtual register.
1563   // Otherwise, splitting just inserts uncoalescable copies that do not help
1564   // the allocation.
1565   for (unsigned i = 0; i != Uses.size(); ++i) {
1566     if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]))
1567       if (MI->isFullCopy() ||
1568           SuperRCNumAllocatableRegs ==
1569               getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII,
1570                                                   TRI, RCI)) {
1571         DEBUG(dbgs() << "    skip:\t" << Uses[i] << '\t' << *MI);
1572         continue;
1573       }
1574     SE->openIntv();
1575     SlotIndex SegStart = SE->enterIntvBefore(Uses[i]);
1576     SlotIndex SegStop  = SE->leaveIntvAfter(Uses[i]);
1577     SE->useIntv(SegStart, SegStop);
1578   }
1579 
1580   if (LREdit.empty()) {
1581     DEBUG(dbgs() << "All uses were copies.\n");
1582     return 0;
1583   }
1584 
1585   SmallVector<unsigned, 8> IntvMap;
1586   SE->finish(&IntvMap);
1587   DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
1588   ExtraRegInfo.resize(MRI->getNumVirtRegs());
1589 
1590   // Assign all new registers to RS_Spill. This was the last chance.
1591   setStage(LREdit.begin(), LREdit.end(), RS_Spill);
1592   return 0;
1593 }
1594 
1595 
1596 //===----------------------------------------------------------------------===//
1597 //                             Local Splitting
1598 //===----------------------------------------------------------------------===//
1599 
1600 
1601 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted
1602 /// in order to use PhysReg between two entries in SA->UseSlots.
1603 ///
1604 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
1605 ///
calcGapWeights(unsigned PhysReg,SmallVectorImpl<float> & GapWeight)1606 void RAGreedy::calcGapWeights(unsigned PhysReg,
1607                               SmallVectorImpl<float> &GapWeight) {
1608   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1609   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1610   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1611   const unsigned NumGaps = Uses.size()-1;
1612 
1613   // Start and end points for the interference check.
1614   SlotIndex StartIdx =
1615     BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr;
1616   SlotIndex StopIdx =
1617     BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr;
1618 
1619   GapWeight.assign(NumGaps, 0.0f);
1620 
1621   // Add interference from each overlapping register.
1622   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1623     if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
1624           .checkInterference())
1625       continue;
1626 
1627     // We know that VirtReg is a continuous interval from FirstInstr to
1628     // LastInstr, so we don't need InterferenceQuery.
1629     //
1630     // Interference that overlaps an instruction is counted in both gaps
1631     // surrounding the instruction. The exception is interference before
1632     // StartIdx and after StopIdx.
1633     //
1634     LiveIntervalUnion::SegmentIter IntI =
1635       Matrix->getLiveUnions()[*Units] .find(StartIdx);
1636     for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
1637       // Skip the gaps before IntI.
1638       while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
1639         if (++Gap == NumGaps)
1640           break;
1641       if (Gap == NumGaps)
1642         break;
1643 
1644       // Update the gaps covered by IntI.
1645       const float weight = IntI.value()->weight;
1646       for (; Gap != NumGaps; ++Gap) {
1647         GapWeight[Gap] = std::max(GapWeight[Gap], weight);
1648         if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
1649           break;
1650       }
1651       if (Gap == NumGaps)
1652         break;
1653     }
1654   }
1655 
1656   // Add fixed interference.
1657   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1658     const LiveRange &LR = LIS->getRegUnit(*Units);
1659     LiveRange::const_iterator I = LR.find(StartIdx);
1660     LiveRange::const_iterator E = LR.end();
1661 
1662     // Same loop as above. Mark any overlapped gaps as HUGE_VALF.
1663     for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) {
1664       while (Uses[Gap+1].getBoundaryIndex() < I->start)
1665         if (++Gap == NumGaps)
1666           break;
1667       if (Gap == NumGaps)
1668         break;
1669 
1670       for (; Gap != NumGaps; ++Gap) {
1671         GapWeight[Gap] = llvm::huge_valf;
1672         if (Uses[Gap+1].getBaseIndex() >= I->end)
1673           break;
1674       }
1675       if (Gap == NumGaps)
1676         break;
1677     }
1678   }
1679 }
1680 
1681 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
1682 /// basic block.
1683 ///
tryLocalSplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs)1684 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
1685                                  SmallVectorImpl<unsigned> &NewVRegs) {
1686   assert(SA->getUseBlocks().size() == 1 && "Not a local interval");
1687   const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front();
1688 
1689   // Note that it is possible to have an interval that is live-in or live-out
1690   // while only covering a single block - A phi-def can use undef values from
1691   // predecessors, and the block could be a single-block loop.
1692   // We don't bother doing anything clever about such a case, we simply assume
1693   // that the interval is continuous from FirstInstr to LastInstr. We should
1694   // make sure that we don't do anything illegal to such an interval, though.
1695 
1696   ArrayRef<SlotIndex> Uses = SA->getUseSlots();
1697   if (Uses.size() <= 2)
1698     return 0;
1699   const unsigned NumGaps = Uses.size()-1;
1700 
1701   DEBUG({
1702     dbgs() << "tryLocalSplit: ";
1703     for (unsigned i = 0, e = Uses.size(); i != e; ++i)
1704       dbgs() << ' ' << Uses[i];
1705     dbgs() << '\n';
1706   });
1707 
1708   // If VirtReg is live across any register mask operands, compute a list of
1709   // gaps with register masks.
1710   SmallVector<unsigned, 8> RegMaskGaps;
1711   if (Matrix->checkRegMaskInterference(VirtReg)) {
1712     // Get regmask slots for the whole block.
1713     ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
1714     DEBUG(dbgs() << RMS.size() << " regmasks in block:");
1715     // Constrain to VirtReg's live range.
1716     unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
1717                                    Uses.front().getRegSlot()) - RMS.begin();
1718     unsigned re = RMS.size();
1719     for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
1720       // Look for Uses[i] <= RMS <= Uses[i+1].
1721       assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i]));
1722       if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri]))
1723         continue;
1724       // Skip a regmask on the same instruction as the last use. It doesn't
1725       // overlap the live range.
1726       if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps)
1727         break;
1728       DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]);
1729       RegMaskGaps.push_back(i);
1730       // Advance ri to the next gap. A regmask on one of the uses counts in
1731       // both gaps.
1732       while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1]))
1733         ++ri;
1734     }
1735     DEBUG(dbgs() << '\n');
1736   }
1737 
1738   // Since we allow local split results to be split again, there is a risk of
1739   // creating infinite loops. It is tempting to require that the new live
1740   // ranges have less instructions than the original. That would guarantee
1741   // convergence, but it is too strict. A live range with 3 instructions can be
1742   // split 2+3 (including the COPY), and we want to allow that.
1743   //
1744   // Instead we use these rules:
1745   //
1746   // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the
1747   //    noop split, of course).
1748   // 2. Require progress be made for ranges with getStage() == RS_Split2. All
1749   //    the new ranges must have fewer instructions than before the split.
1750   // 3. New ranges with the same number of instructions are marked RS_Split2,
1751   //    smaller ranges are marked RS_New.
1752   //
1753   // These rules allow a 3 -> 2+3 split once, which we need. They also prevent
1754   // excessive splitting and infinite loops.
1755   //
1756   bool ProgressRequired = getStage(VirtReg) >= RS_Split2;
1757 
1758   // Best split candidate.
1759   unsigned BestBefore = NumGaps;
1760   unsigned BestAfter = 0;
1761   float BestDiff = 0;
1762 
1763   const float blockFreq =
1764     SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() *
1765     (1.0f / MBFI->getEntryFreq());
1766   SmallVector<float, 8> GapWeight;
1767 
1768   Order.rewind();
1769   while (unsigned PhysReg = Order.next()) {
1770     // Keep track of the largest spill weight that would need to be evicted in
1771     // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
1772     calcGapWeights(PhysReg, GapWeight);
1773 
1774     // Remove any gaps with regmask clobbers.
1775     if (Matrix->checkRegMaskInterference(VirtReg, PhysReg))
1776       for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i)
1777         GapWeight[RegMaskGaps[i]] = llvm::huge_valf;
1778 
1779     // Try to find the best sequence of gaps to close.
1780     // The new spill weight must be larger than any gap interference.
1781 
1782     // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
1783     unsigned SplitBefore = 0, SplitAfter = 1;
1784 
1785     // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
1786     // It is the spill weight that needs to be evicted.
1787     float MaxGap = GapWeight[0];
1788 
1789     for (;;) {
1790       // Live before/after split?
1791       const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
1792       const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
1793 
1794       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
1795                    << Uses[SplitBefore] << '-' << Uses[SplitAfter]
1796                    << " i=" << MaxGap);
1797 
1798       // Stop before the interval gets so big we wouldn't be making progress.
1799       if (!LiveBefore && !LiveAfter) {
1800         DEBUG(dbgs() << " all\n");
1801         break;
1802       }
1803       // Should the interval be extended or shrunk?
1804       bool Shrink = true;
1805 
1806       // How many gaps would the new range have?
1807       unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter;
1808 
1809       // Legally, without causing looping?
1810       bool Legal = !ProgressRequired || NewGaps < NumGaps;
1811 
1812       if (Legal && MaxGap < llvm::huge_valf) {
1813         // Estimate the new spill weight. Each instruction reads or writes the
1814         // register. Conservatively assume there are no read-modify-write
1815         // instructions.
1816         //
1817         // Try to guess the size of the new interval.
1818         const float EstWeight = normalizeSpillWeight(
1819             blockFreq * (NewGaps + 1),
1820             Uses[SplitBefore].distance(Uses[SplitAfter]) +
1821                 (LiveBefore + LiveAfter) * SlotIndex::InstrDist,
1822             1);
1823         // Would this split be possible to allocate?
1824         // Never allocate all gaps, we wouldn't be making progress.
1825         DEBUG(dbgs() << " w=" << EstWeight);
1826         if (EstWeight * Hysteresis >= MaxGap) {
1827           Shrink = false;
1828           float Diff = EstWeight - MaxGap;
1829           if (Diff > BestDiff) {
1830             DEBUG(dbgs() << " (best)");
1831             BestDiff = Hysteresis * Diff;
1832             BestBefore = SplitBefore;
1833             BestAfter = SplitAfter;
1834           }
1835         }
1836       }
1837 
1838       // Try to shrink.
1839       if (Shrink) {
1840         if (++SplitBefore < SplitAfter) {
1841           DEBUG(dbgs() << " shrink\n");
1842           // Recompute the max when necessary.
1843           if (GapWeight[SplitBefore - 1] >= MaxGap) {
1844             MaxGap = GapWeight[SplitBefore];
1845             for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
1846               MaxGap = std::max(MaxGap, GapWeight[i]);
1847           }
1848           continue;
1849         }
1850         MaxGap = 0;
1851       }
1852 
1853       // Try to extend the interval.
1854       if (SplitAfter >= NumGaps) {
1855         DEBUG(dbgs() << " end\n");
1856         break;
1857       }
1858 
1859       DEBUG(dbgs() << " extend\n");
1860       MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]);
1861     }
1862   }
1863 
1864   // Didn't find any candidates?
1865   if (BestBefore == NumGaps)
1866     return 0;
1867 
1868   DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
1869                << '-' << Uses[BestAfter] << ", " << BestDiff
1870                << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
1871 
1872   LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
1873   SE->reset(LREdit);
1874 
1875   SE->openIntv();
1876   SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
1877   SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
1878   SE->useIntv(SegStart, SegStop);
1879   SmallVector<unsigned, 8> IntvMap;
1880   SE->finish(&IntvMap);
1881   DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS);
1882 
1883   // If the new range has the same number of instructions as before, mark it as
1884   // RS_Split2 so the next split will be forced to make progress. Otherwise,
1885   // leave the new intervals as RS_New so they can compete.
1886   bool LiveBefore = BestBefore != 0 || BI.LiveIn;
1887   bool LiveAfter = BestAfter != NumGaps || BI.LiveOut;
1888   unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter;
1889   if (NewGaps >= NumGaps) {
1890     DEBUG(dbgs() << "Tagging non-progress ranges: ");
1891     assert(!ProgressRequired && "Didn't make progress when it was required.");
1892     for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
1893       if (IntvMap[i] == 1) {
1894         setStage(LIS->getInterval(LREdit.get(i)), RS_Split2);
1895         DEBUG(dbgs() << PrintReg(LREdit.get(i)));
1896       }
1897     DEBUG(dbgs() << '\n');
1898   }
1899   ++NumLocalSplits;
1900 
1901   return 0;
1902 }
1903 
1904 //===----------------------------------------------------------------------===//
1905 //                          Live Range Splitting
1906 //===----------------------------------------------------------------------===//
1907 
1908 /// trySplit - Try to split VirtReg or one of its interferences, making it
1909 /// assignable.
1910 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
trySplit(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs)1911 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
1912                             SmallVectorImpl<unsigned>&NewVRegs) {
1913   // Ranges must be Split2 or less.
1914   if (getStage(VirtReg) >= RS_Spill)
1915     return 0;
1916 
1917   // Local intervals are handled separately.
1918   if (LIS->intervalIsInOneMBB(VirtReg)) {
1919     NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
1920     SA->analyze(&VirtReg);
1921     unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
1922     if (PhysReg || !NewVRegs.empty())
1923       return PhysReg;
1924     return tryInstructionSplit(VirtReg, Order, NewVRegs);
1925   }
1926 
1927   NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
1928 
1929   SA->analyze(&VirtReg);
1930 
1931   // FIXME: SplitAnalysis may repair broken live ranges coming from the
1932   // coalescer. That may cause the range to become allocatable which means that
1933   // tryRegionSplit won't be making progress. This check should be replaced with
1934   // an assertion when the coalescer is fixed.
1935   if (SA->didRepairRange()) {
1936     // VirtReg has changed, so all cached queries are invalid.
1937     Matrix->invalidateVirtRegs();
1938     if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
1939       return PhysReg;
1940   }
1941 
1942   // First try to split around a region spanning multiple blocks. RS_Split2
1943   // ranges already made dubious progress with region splitting, so they go
1944   // straight to single block splitting.
1945   if (getStage(VirtReg) < RS_Split2) {
1946     unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
1947     if (PhysReg || !NewVRegs.empty())
1948       return PhysReg;
1949   }
1950 
1951   // Then isolate blocks.
1952   return tryBlockSplit(VirtReg, Order, NewVRegs);
1953 }
1954 
1955 //===----------------------------------------------------------------------===//
1956 //                          Last Chance Recoloring
1957 //===----------------------------------------------------------------------===//
1958 
1959 /// mayRecolorAllInterferences - Check if the virtual registers that
1960 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be
1961 /// recolored to free \p PhysReg.
1962 /// When true is returned, \p RecoloringCandidates has been augmented with all
1963 /// the live intervals that need to be recolored in order to free \p PhysReg
1964 /// for \p VirtReg.
1965 /// \p FixedRegisters contains all the virtual registers that cannot be
1966 /// recolored.
1967 bool
mayRecolorAllInterferences(unsigned PhysReg,LiveInterval & VirtReg,SmallLISet & RecoloringCandidates,const SmallVirtRegSet & FixedRegisters)1968 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
1969                                      SmallLISet &RecoloringCandidates,
1970                                      const SmallVirtRegSet &FixedRegisters) {
1971   const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg);
1972 
1973   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
1974     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
1975     // If there is LastChanceRecoloringMaxInterference or more interferences,
1976     // chances are one would not be recolorable.
1977     if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
1978         LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
1979       DEBUG(dbgs() << "Early abort: too many interferences.\n");
1980       CutOffInfo |= CO_Interf;
1981       return false;
1982     }
1983     for (unsigned i = Q.interferingVRegs().size(); i; --i) {
1984       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
1985       // If Intf is done and sit on the same register class as VirtReg,
1986       // it would not be recolorable as it is in the same state as VirtReg.
1987       if ((getStage(*Intf) == RS_Done &&
1988            MRI->getRegClass(Intf->reg) == CurRC) ||
1989           FixedRegisters.count(Intf->reg)) {
1990         DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n");
1991         return false;
1992       }
1993       RecoloringCandidates.insert(Intf);
1994     }
1995   }
1996   return true;
1997 }
1998 
1999 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring
2000 /// its interferences.
2001 /// Last chance recoloring chooses a color for \p VirtReg and recolors every
2002 /// virtual register that was using it. The recoloring process may recursively
2003 /// use the last chance recoloring. Therefore, when a virtual register has been
2004 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot
2005 /// be last-chance-recolored again during this recoloring "session".
2006 /// E.g.,
2007 /// Let
2008 /// vA can use {R1, R2    }
2009 /// vB can use {    R2, R3}
2010 /// vC can use {R1        }
2011 /// Where vA, vB, and vC cannot be split anymore (they are reloads for
2012 /// instance) and they all interfere.
2013 ///
2014 /// vA is assigned R1
2015 /// vB is assigned R2
2016 /// vC tries to evict vA but vA is already done.
2017 /// Regular register allocation fails.
2018 ///
2019 /// Last chance recoloring kicks in:
2020 /// vC does as if vA was evicted => vC uses R1.
2021 /// vC is marked as fixed.
2022 /// vA needs to find a color.
2023 /// None are available.
2024 /// vA cannot evict vC: vC is a fixed virtual register now.
2025 /// vA does as if vB was evicted => vA uses R2.
2026 /// vB needs to find a color.
2027 /// R3 is available.
2028 /// Recoloring => vC = R1, vA = R2, vB = R3
2029 ///
2030 /// \p Order defines the preferred allocation order for \p VirtReg.
2031 /// \p NewRegs will contain any new virtual register that have been created
2032 /// (split, spill) during the process and that must be assigned.
2033 /// \p FixedRegisters contains all the virtual registers that cannot be
2034 /// recolored.
2035 /// \p Depth gives the current depth of the last chance recoloring.
2036 /// \return a physical register that can be used for VirtReg or ~0u if none
2037 /// exists.
tryLastChanceRecoloring(LiveInterval & VirtReg,AllocationOrder & Order,SmallVectorImpl<unsigned> & NewVRegs,SmallVirtRegSet & FixedRegisters,unsigned Depth)2038 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
2039                                            AllocationOrder &Order,
2040                                            SmallVectorImpl<unsigned> &NewVRegs,
2041                                            SmallVirtRegSet &FixedRegisters,
2042                                            unsigned Depth) {
2043   DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n');
2044   // Ranges must be Done.
2045   assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) &&
2046          "Last chance recoloring should really be last chance");
2047   // Set the max depth to LastChanceRecoloringMaxDepth.
2048   // We may want to reconsider that if we end up with a too large search space
2049   // for target with hundreds of registers.
2050   // Indeed, in that case we may want to cut the search space earlier.
2051   if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
2052     DEBUG(dbgs() << "Abort because max depth has been reached.\n");
2053     CutOffInfo |= CO_Depth;
2054     return ~0u;
2055   }
2056 
2057   // Set of Live intervals that will need to be recolored.
2058   SmallLISet RecoloringCandidates;
2059   // Record the original mapping virtual register to physical register in case
2060   // the recoloring fails.
2061   DenseMap<unsigned, unsigned> VirtRegToPhysReg;
2062   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
2063   // this recoloring "session".
2064   FixedRegisters.insert(VirtReg.reg);
2065 
2066   Order.rewind();
2067   while (unsigned PhysReg = Order.next()) {
2068     DEBUG(dbgs() << "Try to assign: " << VirtReg << " to "
2069                  << PrintReg(PhysReg, TRI) << '\n');
2070     RecoloringCandidates.clear();
2071     VirtRegToPhysReg.clear();
2072 
2073     // It is only possible to recolor virtual register interference.
2074     if (Matrix->checkInterference(VirtReg, PhysReg) >
2075         LiveRegMatrix::IK_VirtReg) {
2076       DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n");
2077 
2078       continue;
2079     }
2080 
2081     // Early give up on this PhysReg if it is obvious we cannot recolor all
2082     // the interferences.
2083     if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates,
2084                                     FixedRegisters)) {
2085       DEBUG(dbgs() << "Some inteferences cannot be recolored.\n");
2086       continue;
2087     }
2088 
2089     // RecoloringCandidates contains all the virtual registers that interfer
2090     // with VirtReg on PhysReg (or one of its aliases).
2091     // Enqueue them for recoloring and perform the actual recoloring.
2092     PQueue RecoloringQueue;
2093     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
2094                               EndIt = RecoloringCandidates.end();
2095          It != EndIt; ++It) {
2096       unsigned ItVirtReg = (*It)->reg;
2097       enqueue(RecoloringQueue, *It);
2098       assert(VRM->hasPhys(ItVirtReg) &&
2099              "Interferences are supposed to be with allocated vairables");
2100 
2101       // Record the current allocation.
2102       VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg);
2103       // unset the related struct.
2104       Matrix->unassign(**It);
2105     }
2106 
2107     // Do as if VirtReg was assigned to PhysReg so that the underlying
2108     // recoloring has the right information about the interferes and
2109     // available colors.
2110     Matrix->assign(VirtReg, PhysReg);
2111 
2112     // Save the current recoloring state.
2113     // If we cannot recolor all the interferences, we will have to start again
2114     // at this point for the next physical register.
2115     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
2116     if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
2117                                 Depth)) {
2118       // Do not mess up with the global assignment process.
2119       // I.e., VirtReg must be unassigned.
2120       Matrix->unassign(VirtReg);
2121       return PhysReg;
2122     }
2123 
2124     DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to "
2125                  << PrintReg(PhysReg, TRI) << '\n');
2126 
2127     // The recoloring attempt failed, undo the changes.
2128     FixedRegisters = SaveFixedRegisters;
2129     Matrix->unassign(VirtReg);
2130 
2131     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
2132                               EndIt = RecoloringCandidates.end();
2133          It != EndIt; ++It) {
2134       unsigned ItVirtReg = (*It)->reg;
2135       if (VRM->hasPhys(ItVirtReg))
2136         Matrix->unassign(**It);
2137       Matrix->assign(**It, VirtRegToPhysReg[ItVirtReg]);
2138     }
2139   }
2140 
2141   // Last chance recoloring did not worked either, give up.
2142   return ~0u;
2143 }
2144 
2145 /// tryRecoloringCandidates - Try to assign a new color to every register
2146 /// in \RecoloringQueue.
2147 /// \p NewRegs will contain any new virtual register created during the
2148 /// recoloring process.
2149 /// \p FixedRegisters[in/out] contains all the registers that have been
2150 /// recolored.
2151 /// \return true if all virtual registers in RecoloringQueue were successfully
2152 /// recolored, false otherwise.
tryRecoloringCandidates(PQueue & RecoloringQueue,SmallVectorImpl<unsigned> & NewVRegs,SmallVirtRegSet & FixedRegisters,unsigned Depth)2153 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
2154                                        SmallVectorImpl<unsigned> &NewVRegs,
2155                                        SmallVirtRegSet &FixedRegisters,
2156                                        unsigned Depth) {
2157   while (!RecoloringQueue.empty()) {
2158     LiveInterval *LI = dequeue(RecoloringQueue);
2159     DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
2160     unsigned PhysReg;
2161     PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
2162     if (PhysReg == ~0u || !PhysReg)
2163       return false;
2164     DEBUG(dbgs() << "Recoloring of " << *LI
2165                  << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
2166     Matrix->assign(*LI, PhysReg);
2167     FixedRegisters.insert(LI->reg);
2168   }
2169   return true;
2170 }
2171 
2172 //===----------------------------------------------------------------------===//
2173 //                            Main Entry Point
2174 //===----------------------------------------------------------------------===//
2175 
selectOrSplit(LiveInterval & VirtReg,SmallVectorImpl<unsigned> & NewVRegs)2176 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
2177                                  SmallVectorImpl<unsigned> &NewVRegs) {
2178   CutOffInfo = CO_None;
2179   LLVMContext &Ctx = MF->getFunction()->getContext();
2180   SmallVirtRegSet FixedRegisters;
2181   unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
2182   if (Reg == ~0U && (CutOffInfo != CO_None)) {
2183     uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
2184     if (CutOffEncountered == CO_Depth)
2185       Ctx.emitError("register allocation failed: maximum depth for recoloring "
2186                     "reached. Use -fexhaustive-register-search to skip "
2187                     "cutoffs");
2188     else if (CutOffEncountered == CO_Interf)
2189       Ctx.emitError("register allocation failed: maximum interference for "
2190                     "recoloring reached. Use -fexhaustive-register-search "
2191                     "to skip cutoffs");
2192     else if (CutOffEncountered == (CO_Depth | CO_Interf))
2193       Ctx.emitError("register allocation failed: maximum interference and "
2194                     "depth for recoloring reached. Use "
2195                     "-fexhaustive-register-search to skip cutoffs");
2196   }
2197   return Reg;
2198 }
2199 
2200 /// Using a CSR for the first time has a cost because it causes push|pop
2201 /// to be added to prologue|epilogue. Splitting a cold section of the live
2202 /// range can have lower cost than using the CSR for the first time;
2203 /// Spilling a live range in the cold path can have lower cost than using
2204 /// the CSR for the first time. Returns the physical register if we decide
2205 /// to use the CSR; otherwise return 0.
tryAssignCSRFirstTime(LiveInterval & VirtReg,AllocationOrder & Order,unsigned PhysReg,unsigned & CostPerUseLimit,SmallVectorImpl<unsigned> & NewVRegs)2206 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
2207                                          AllocationOrder &Order,
2208                                          unsigned PhysReg,
2209                                          unsigned &CostPerUseLimit,
2210                                          SmallVectorImpl<unsigned> &NewVRegs) {
2211   if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
2212     // We choose spill over using the CSR for the first time if the spill cost
2213     // is lower than CSRCost.
2214     SA->analyze(&VirtReg);
2215     if (calcSpillCost() >= CSRCost)
2216       return PhysReg;
2217 
2218     // We are going to spill, set CostPerUseLimit to 1 to make sure that
2219     // we will not use a callee-saved register in tryEvict.
2220     CostPerUseLimit = 1;
2221     return 0;
2222   }
2223   if (getStage(VirtReg) < RS_Split) {
2224     // We choose pre-splitting over using the CSR for the first time if
2225     // the cost of splitting is lower than CSRCost.
2226     SA->analyze(&VirtReg);
2227     unsigned NumCands = 0;
2228     BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
2229     unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
2230                                                  NumCands, true /*IgnoreCSR*/);
2231     if (BestCand == NoCand)
2232       // Use the CSR if we can't find a region split below CSRCost.
2233       return PhysReg;
2234 
2235     // Perform the actual pre-splitting.
2236     doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs);
2237     return 0;
2238   }
2239   return PhysReg;
2240 }
2241 
aboutToRemoveInterval(LiveInterval & LI)2242 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) {
2243   // Do not keep invalid information around.
2244   SetOfBrokenHints.remove(&LI);
2245 }
2246 
initializeCSRCost()2247 void RAGreedy::initializeCSRCost() {
2248   // We use the larger one out of the command-line option and the value report
2249   // by TRI.
2250   CSRCost = BlockFrequency(
2251       std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
2252   if (!CSRCost.getFrequency())
2253     return;
2254 
2255   // Raw cost is relative to Entry == 2^14; scale it appropriately.
2256   uint64_t ActualEntry = MBFI->getEntryFreq();
2257   if (!ActualEntry) {
2258     CSRCost = 0;
2259     return;
2260   }
2261   uint64_t FixedEntry = 1 << 14;
2262   if (ActualEntry < FixedEntry)
2263     CSRCost *= BranchProbability(ActualEntry, FixedEntry);
2264   else if (ActualEntry <= UINT32_MAX)
2265     // Invert the fraction and divide.
2266     CSRCost /= BranchProbability(FixedEntry, ActualEntry);
2267   else
2268     // Can't use BranchProbability in general, since it takes 32-bit numbers.
2269     CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
2270 }
2271 
2272 /// \brief Collect the hint info for \p Reg.
2273 /// The results are stored into \p Out.
2274 /// \p Out is not cleared before being populated.
collectHintInfo(unsigned Reg,HintsInfo & Out)2275 void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
2276   for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
2277     if (!Instr.isFullCopy())
2278       continue;
2279     // Look for the other end of the copy.
2280     unsigned OtherReg = Instr.getOperand(0).getReg();
2281     if (OtherReg == Reg) {
2282       OtherReg = Instr.getOperand(1).getReg();
2283       if (OtherReg == Reg)
2284         continue;
2285     }
2286     // Get the current assignment.
2287     unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
2288                                 ? OtherReg
2289                                 : VRM->getPhys(OtherReg);
2290     // Push the collected information.
2291     Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg,
2292                            OtherPhysReg));
2293   }
2294 }
2295 
2296 /// \brief Using the given \p List, compute the cost of the broken hints if
2297 /// \p PhysReg was used.
2298 /// \return The cost of \p List for \p PhysReg.
getBrokenHintFreq(const HintsInfo & List,unsigned PhysReg)2299 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List,
2300                                            unsigned PhysReg) {
2301   BlockFrequency Cost = 0;
2302   for (const HintInfo &Info : List) {
2303     if (Info.PhysReg != PhysReg)
2304       Cost += Info.Freq;
2305   }
2306   return Cost;
2307 }
2308 
2309 /// \brief Using the register assigned to \p VirtReg, try to recolor
2310 /// all the live ranges that are copy-related with \p VirtReg.
2311 /// The recoloring is then propagated to all the live-ranges that have
2312 /// been recolored and so on, until no more copies can be coalesced or
2313 /// it is not profitable.
2314 /// For a given live range, profitability is determined by the sum of the
2315 /// frequencies of the non-identity copies it would introduce with the old
2316 /// and new register.
tryHintRecoloring(LiveInterval & VirtReg)2317 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
2318   // We have a broken hint, check if it is possible to fix it by
2319   // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted
2320   // some register and PhysReg may be available for the other live-ranges.
2321   SmallSet<unsigned, 4> Visited;
2322   SmallVector<unsigned, 2> RecoloringCandidates;
2323   HintsInfo Info;
2324   unsigned Reg = VirtReg.reg;
2325   unsigned PhysReg = VRM->getPhys(Reg);
2326   // Start the recoloring algorithm from the input live-interval, then
2327   // it will propagate to the ones that are copy-related with it.
2328   Visited.insert(Reg);
2329   RecoloringCandidates.push_back(Reg);
2330 
2331   DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '('
2332                << PrintReg(PhysReg, TRI) << ")\n");
2333 
2334   do {
2335     Reg = RecoloringCandidates.pop_back_val();
2336 
2337     // We cannot recolor physcal register.
2338     if (TargetRegisterInfo::isPhysicalRegister(Reg))
2339       continue;
2340 
2341     assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
2342 
2343     // Get the live interval mapped with this virtual register to be able
2344     // to check for the interference with the new color.
2345     LiveInterval &LI = LIS->getInterval(Reg);
2346     unsigned CurrPhys = VRM->getPhys(Reg);
2347     // Check that the new color matches the register class constraints and
2348     // that it is free for this live range.
2349     if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
2350                                 Matrix->checkInterference(LI, PhysReg)))
2351       continue;
2352 
2353     DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI)
2354                  << ") is recolorable.\n");
2355 
2356     // Gather the hint info.
2357     Info.clear();
2358     collectHintInfo(Reg, Info);
2359     // Check if recoloring the live-range will increase the cost of the
2360     // non-identity copies.
2361     if (CurrPhys != PhysReg) {
2362       DEBUG(dbgs() << "Checking profitability:\n");
2363       BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys);
2364       BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg);
2365       DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency()
2366                    << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n');
2367       if (OldCopiesCost < NewCopiesCost) {
2368         DEBUG(dbgs() << "=> Not profitable.\n");
2369         continue;
2370       }
2371       // At this point, the cost is either cheaper or equal. If it is
2372       // equal, we consider this is profitable because it may expose
2373       // more recoloring opportunities.
2374       DEBUG(dbgs() << "=> Profitable.\n");
2375       // Recolor the live-range.
2376       Matrix->unassign(LI);
2377       Matrix->assign(LI, PhysReg);
2378     }
2379     // Push all copy-related live-ranges to keep reconciling the broken
2380     // hints.
2381     for (const HintInfo &HI : Info) {
2382       if (Visited.insert(HI.Reg).second)
2383         RecoloringCandidates.push_back(HI.Reg);
2384     }
2385   } while (!RecoloringCandidates.empty());
2386 }
2387 
2388 /// \brief Try to recolor broken hints.
2389 /// Broken hints may be repaired by recoloring when an evicted variable
2390 /// freed up a register for a larger live-range.
2391 /// Consider the following example:
2392 /// BB1:
2393 ///   a =
2394 ///   b =
2395 /// BB2:
2396 ///   ...
2397 ///   = b
2398 ///   = a
2399 /// Let us assume b gets split:
2400 /// BB1:
2401 ///   a =
2402 ///   b =
2403 /// BB2:
2404 ///   c = b
2405 ///   ...
2406 ///   d = c
2407 ///   = d
2408 ///   = a
2409 /// Because of how the allocation work, b, c, and d may be assigned different
2410 /// colors. Now, if a gets evicted later:
2411 /// BB1:
2412 ///   a =
2413 ///   st a, SpillSlot
2414 ///   b =
2415 /// BB2:
2416 ///   c = b
2417 ///   ...
2418 ///   d = c
2419 ///   = d
2420 ///   e = ld SpillSlot
2421 ///   = e
2422 /// This is likely that we can assign the same register for b, c, and d,
2423 /// getting rid of 2 copies.
tryHintsRecoloring()2424 void RAGreedy::tryHintsRecoloring() {
2425   for (LiveInterval *LI : SetOfBrokenHints) {
2426     assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
2427            "Recoloring is possible only for virtual registers");
2428     // Some dead defs may be around (e.g., because of debug uses).
2429     // Ignore those.
2430     if (!VRM->hasPhys(LI->reg))
2431       continue;
2432     tryHintRecoloring(*LI);
2433   }
2434 }
2435 
selectOrSplitImpl(LiveInterval & VirtReg,SmallVectorImpl<unsigned> & NewVRegs,SmallVirtRegSet & FixedRegisters,unsigned Depth)2436 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
2437                                      SmallVectorImpl<unsigned> &NewVRegs,
2438                                      SmallVirtRegSet &FixedRegisters,
2439                                      unsigned Depth) {
2440   unsigned CostPerUseLimit = ~0u;
2441   // First try assigning a free register.
2442   AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo);
2443   if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
2444     // We check other options if we are using a CSR for the first time.
2445     bool CSRFirstUse = false;
2446     if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
2447       if (!MRI->isPhysRegUsed(CSR))
2448         CSRFirstUse = true;
2449 
2450     // When NewVRegs is not empty, we may have made decisions such as evicting
2451     // a virtual register, go with the earlier decisions and use the physical
2452     // register.
2453     if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) {
2454       unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
2455                                               CostPerUseLimit, NewVRegs);
2456       if (CSRReg || !NewVRegs.empty())
2457         // Return now if we decide to use a CSR or create new vregs due to
2458         // pre-splitting.
2459         return CSRReg;
2460     } else
2461       return PhysReg;
2462   }
2463 
2464   LiveRangeStage Stage = getStage(VirtReg);
2465   DEBUG(dbgs() << StageName[Stage]
2466                << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
2467 
2468   // Try to evict a less worthy live range, but only for ranges from the primary
2469   // queue. The RS_Split ranges already failed to do this, and they should not
2470   // get a second chance until they have been split.
2471   if (Stage != RS_Split)
2472     if (unsigned PhysReg =
2473             tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
2474       unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
2475       // If VirtReg has a hint and that hint is broken record this
2476       // virtual register as a recoloring candidate for broken hint.
2477       // Indeed, since we evicted a variable in its neighborhood it is
2478       // likely we can at least partially recolor some of the
2479       // copy-related live-ranges.
2480       if (Hint && Hint != PhysReg)
2481         SetOfBrokenHints.insert(&VirtReg);
2482       return PhysReg;
2483     }
2484 
2485   assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
2486 
2487   // The first time we see a live range, don't try to split or spill.
2488   // Wait until the second time, when all smaller ranges have been allocated.
2489   // This gives a better picture of the interference to split around.
2490   if (Stage < RS_Split) {
2491     setStage(VirtReg, RS_Split);
2492     DEBUG(dbgs() << "wait for second round\n");
2493     NewVRegs.push_back(VirtReg.reg);
2494     return 0;
2495   }
2496 
2497   // If we couldn't allocate a register from spilling, there is probably some
2498   // invalid inline assembly. The base class wil report it.
2499   if (Stage >= RS_Done || !VirtReg.isSpillable())
2500     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
2501                                    Depth);
2502 
2503   // Try splitting VirtReg or interferences.
2504   unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
2505   if (PhysReg || !NewVRegs.empty())
2506     return PhysReg;
2507 
2508   // Finally spill VirtReg itself.
2509   NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
2510   LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this);
2511   spiller().spill(LRE);
2512   setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
2513 
2514   if (VerifyEnabled)
2515     MF->verify(this, "After spilling");
2516 
2517   // The live virtual register requesting allocation was spilled, so tell
2518   // the caller not to allocate anything during this round.
2519   return 0;
2520 }
2521 
runOnMachineFunction(MachineFunction & mf)2522 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
2523   DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
2524                << "********** Function: " << mf.getName() << '\n');
2525 
2526   MF = &mf;
2527   TRI = MF->getSubtarget().getRegisterInfo();
2528   TII = MF->getSubtarget().getInstrInfo();
2529   RCI.runOnMachineFunction(mf);
2530 
2531   EnableLocalReassign = EnableLocalReassignment ||
2532                         MF->getSubtarget().enableRALocalReassignment(
2533                             MF->getTarget().getOptLevel());
2534 
2535   if (VerifyEnabled)
2536     MF->verify(this, "Before greedy register allocator");
2537 
2538   RegAllocBase::init(getAnalysis<VirtRegMap>(),
2539                      getAnalysis<LiveIntervals>(),
2540                      getAnalysis<LiveRegMatrix>());
2541   Indexes = &getAnalysis<SlotIndexes>();
2542   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
2543   DomTree = &getAnalysis<MachineDominatorTree>();
2544   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
2545   Loops = &getAnalysis<MachineLoopInfo>();
2546   Bundles = &getAnalysis<EdgeBundles>();
2547   SpillPlacer = &getAnalysis<SpillPlacement>();
2548   DebugVars = &getAnalysis<LiveDebugVariables>();
2549 
2550   initializeCSRCost();
2551 
2552   calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
2553 
2554   DEBUG(LIS->dump());
2555 
2556   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
2557   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI));
2558   ExtraRegInfo.clear();
2559   ExtraRegInfo.resize(MRI->getNumVirtRegs());
2560   NextCascade = 1;
2561   IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI);
2562   GlobalCand.resize(32);  // This will grow as needed.
2563   SetOfBrokenHints.clear();
2564 
2565   allocatePhysRegs();
2566   tryHintsRecoloring();
2567   releaseMemory();
2568   return true;
2569 }
2570