1 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that collect the Linker Optimization Hint (LOH).
11 // This pass should be run at the very end of the compilation flow, just before
12 // assembly printer.
13 // To be useful for the linker, the LOH must be printed into the assembly file.
14 //
15 // A LOH describes a sequence of instructions that may be optimized by the
16 // linker.
17 // This same sequence cannot be optimized by the compiler because some of
18 // the information will be known at link time.
19 // For instance, consider the following sequence:
20 //     L1: adrp xA, sym@PAGE
21 //     L2: add xB, xA, sym@PAGEOFF
22 //     L3: ldr xC, [xB, #imm]
23 // This sequence can be turned into:
24 // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25 //     L3: ldr xC, sym+#imm
26 // It may also be turned into either the following more efficient
27 // code sequences:
28 // - If sym@PAGEOFF + #imm fits the encoding space of L3.
29 //     L1: adrp xA, sym@PAGE
30 //     L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31 // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
32 //     L1: adr xA, sym
33 //     L3: ldr xC, [xB, #imm]
34 //
35 // To be valid a LOH must meet all the requirements needed by all the related
36 // possible linker transformations.
37 // For instance, using the running example, the constraints to emit
38 // ".loh AdrpAddLdr" are:
39 // - L1, L2, and L3 instructions are of the expected type, i.e.,
40 //   respectively ADRP, ADD (immediate), and LD.
41 // - The result of L1 is used only by L2.
42 // - The register argument (xA) used in the ADD instruction is defined
43 //   only by L1.
44 // - The result of L2 is used only by L3.
45 // - The base address (xB) in L3 is defined only L2.
46 // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47 //   @PAGE/@PAGEOFF with no additional constants
48 //
49 // Currently supported LOHs are:
50 // * So called non-ADRP-related:
51 //   - .loh AdrpAddLdr L1, L2, L3:
52 //     L1: adrp xA, sym@PAGE
53 //     L2: add xB, xA, sym@PAGEOFF
54 //     L3: ldr xC, [xB, #imm]
55 //   - .loh AdrpLdrGotLdr L1, L2, L3:
56 //     L1: adrp xA, sym@GOTPAGE
57 //     L2: ldr xB, [xA, sym@GOTPAGEOFF]
58 //     L3: ldr xC, [xB, #imm]
59 //   - .loh AdrpLdr L1, L3:
60 //     L1: adrp xA, sym@PAGE
61 //     L3: ldr xC, [xA, sym@PAGEOFF]
62 //   - .loh AdrpAddStr L1, L2, L3:
63 //     L1: adrp xA, sym@PAGE
64 //     L2: add xB, xA, sym@PAGEOFF
65 //     L3: str xC, [xB, #imm]
66 //   - .loh AdrpLdrGotStr L1, L2, L3:
67 //     L1: adrp xA, sym@GOTPAGE
68 //     L2: ldr xB, [xA, sym@GOTPAGEOFF]
69 //     L3: str xC, [xB, #imm]
70 //   - .loh AdrpAdd L1, L2:
71 //     L1: adrp xA, sym@PAGE
72 //     L2: add xB, xA, sym@PAGEOFF
73 //   For all these LOHs, L1, L2, L3 form a simple chain:
74 //   L1 result is used only by L2 and L2 result by L3.
75 //   L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
76 //   by L1.
77 // All these LOHs aim at using more efficient load/store patterns by folding
78 // some instructions used to compute the address directly into the load/store.
79 //
80 // * So called ADRP-related:
81 //  - .loh AdrpAdrp L2, L1:
82 //    L2: ADRP xA, sym1@PAGE
83 //    L1: ADRP xA, sym2@PAGE
84 //    L2 dominates L1 and xA is not redifined between L2 and L1
85 // This LOH aims at getting rid of redundant ADRP instructions.
86 //
87 // The overall design for emitting the LOHs is:
88 // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89 // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90 //     1. Associates them a label.
91 //     2. Emits them in a MCStreamer (EmitLOHDirective).
92 //         - The MCMachOStreamer records them into the MCAssembler.
93 //         - The MCAsmStreamer prints them.
94 //         - Other MCStreamers ignore them.
95 //     3. Closes the MCStreamer:
96 //         - The MachObjectWriter gets them from the MCAssembler and writes
97 //           them in the object file.
98 //         - Other ObjectWriters ignore them.
99 //===----------------------------------------------------------------------===//
100 
101 #include "AArch64.h"
102 #include "AArch64InstrInfo.h"
103 #include "AArch64MachineFunctionInfo.h"
104 #include "AArch64Subtarget.h"
105 #include "MCTargetDesc/AArch64AddressingModes.h"
106 #include "llvm/ADT/BitVector.h"
107 #include "llvm/ADT/DenseMap.h"
108 #include "llvm/ADT/MapVector.h"
109 #include "llvm/ADT/SetVector.h"
110 #include "llvm/ADT/SmallVector.h"
111 #include "llvm/ADT/Statistic.h"
112 #include "llvm/CodeGen/MachineBasicBlock.h"
113 #include "llvm/CodeGen/MachineDominators.h"
114 #include "llvm/CodeGen/MachineFunctionPass.h"
115 #include "llvm/CodeGen/MachineInstr.h"
116 #include "llvm/CodeGen/MachineInstrBuilder.h"
117 #include "llvm/Support/CommandLine.h"
118 #include "llvm/Support/Debug.h"
119 #include "llvm/Support/ErrorHandling.h"
120 #include "llvm/Support/raw_ostream.h"
121 #include "llvm/Target/TargetInstrInfo.h"
122 #include "llvm/Target/TargetMachine.h"
123 #include "llvm/Target/TargetRegisterInfo.h"
124 using namespace llvm;
125 
126 #define DEBUG_TYPE "aarch64-collect-loh"
127 
128 static cl::opt<bool>
129 PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
130                    cl::desc("Restrict analysis to registers invovled"
131                             " in LOHs"),
132                    cl::init(true));
133 
134 static cl::opt<bool>
135 BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
136                     cl::desc("Restrict analysis at basic block scope"),
137                     cl::init(true));
138 
139 STATISTIC(NumADRPSimpleCandidate,
140           "Number of simplifiable ADRP dominate by another");
141 STATISTIC(NumADRPComplexCandidate2,
142           "Number of simplifiable ADRP reachable by 2 defs");
143 STATISTIC(NumADRPComplexCandidate3,
144           "Number of simplifiable ADRP reachable by 3 defs");
145 STATISTIC(NumADRPComplexCandidateOther,
146           "Number of simplifiable ADRP reachable by 4 or more defs");
147 STATISTIC(NumADDToSTRWithImm,
148           "Number of simplifiable STR with imm reachable by ADD");
149 STATISTIC(NumLDRToSTRWithImm,
150           "Number of simplifiable STR with imm reachable by LDR");
151 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
152 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
153 STATISTIC(NumADDToLDRWithImm,
154           "Number of simplifiable LDR with imm reachable by ADD");
155 STATISTIC(NumLDRToLDRWithImm,
156           "Number of simplifiable LDR with imm reachable by LDR");
157 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
158 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
159 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
160 STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
161 STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
162 STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
163 STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
164 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
165 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
166 
167 namespace llvm {
168 void initializeAArch64CollectLOHPass(PassRegistry &);
169 }
170 
171 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
172 
173 namespace {
174 struct AArch64CollectLOH : public MachineFunctionPass {
175   static char ID;
AArch64CollectLOH__anonfe85bc120111::AArch64CollectLOH176   AArch64CollectLOH() : MachineFunctionPass(ID) {
177     initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
178   }
179 
180   bool runOnMachineFunction(MachineFunction &MF) override;
181 
getRequiredProperties__anonfe85bc120111::AArch64CollectLOH182   MachineFunctionProperties getRequiredProperties() const override {
183     return MachineFunctionProperties().set(
184         MachineFunctionProperties::Property::AllVRegsAllocated);
185   }
186 
getPassName__anonfe85bc120111::AArch64CollectLOH187   const char *getPassName() const override {
188     return AARCH64_COLLECT_LOH_NAME;
189   }
190 
getAnalysisUsage__anonfe85bc120111::AArch64CollectLOH191   void getAnalysisUsage(AnalysisUsage &AU) const override {
192     AU.setPreservesAll();
193     MachineFunctionPass::getAnalysisUsage(AU);
194     AU.addRequired<MachineDominatorTree>();
195   }
196 
197 private:
198 };
199 
200 /// A set of MachineInstruction.
201 typedef SetVector<const MachineInstr *> SetOfMachineInstr;
202 /// Map a basic block to a set of instructions per register.
203 /// This is used to represent the exposed uses of a basic block
204 /// per register.
205 typedef MapVector<const MachineBasicBlock *,
206                   std::unique_ptr<SetOfMachineInstr[]>>
207 BlockToSetOfInstrsPerColor;
208 /// Map a basic block to an instruction per register.
209 /// This is used to represent the live-out definitions of a basic block
210 /// per register.
211 typedef MapVector<const MachineBasicBlock *,
212                   std::unique_ptr<const MachineInstr *[]>>
213 BlockToInstrPerColor;
214 /// Map an instruction to a set of instructions. Used to represent the
215 /// mapping def to reachable uses or use to definitions.
216 typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
217 /// Map a basic block to a BitVector.
218 /// This is used to record the kill registers per basic block.
219 typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
220 
221 /// Map a register to a dense id.
222 typedef DenseMap<unsigned, unsigned> MapRegToId;
223 /// Map a dense id to a register. Used for debug purposes.
224 typedef SmallVector<unsigned, 32> MapIdToReg;
225 } // end anonymous namespace.
226 
227 char AArch64CollectLOH::ID = 0;
228 
229 INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
230                       AARCH64_COLLECT_LOH_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)231 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
232 INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
233                     AARCH64_COLLECT_LOH_NAME, false, false)
234 
235 /// Given a couple (MBB, reg) get the corresponding set of instruction from
236 /// the given "sets".
237 /// If this couple does not reference any set, an empty set is added to "sets"
238 /// for this couple and returned.
239 /// \param nbRegs is used internally allocate some memory. It must be consistent
240 /// with the way sets is used.
241 static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
242                                  const MachineBasicBlock &MBB, unsigned reg,
243                                  unsigned nbRegs) {
244   SetOfMachineInstr *result;
245   BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
246   if (it != sets.end())
247     result = it->second.get();
248   else
249     result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
250 
251   return result[reg];
252 }
253 
254 /// Given a couple (reg, MI) get the corresponding set of instructions from the
255 /// the given "sets".
256 /// This is used to get the uses record in sets of a definition identified by
257 /// MI and reg, i.e., MI defines reg.
258 /// If the couple does not reference anything, an empty set is added to
259 /// "sets[reg]".
260 /// \pre set[reg] is valid.
getUses(InstrToInstrs * sets,unsigned reg,const MachineInstr & MI)261 static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
262                                   const MachineInstr &MI) {
263   return sets[reg][&MI];
264 }
265 
266 /// Same as getUses but does not modify the input map: sets.
267 /// \return NULL if the couple (reg, MI) is not in sets.
getUses(const InstrToInstrs * sets,unsigned reg,const MachineInstr & MI)268 static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
269                                         const MachineInstr &MI) {
270   InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
271   if (Res != sets[reg].end())
272     return &(Res->second);
273   return nullptr;
274 }
275 
276 /// Initialize the reaching definition algorithm:
277 /// For each basic block BB in MF, record:
278 /// - its kill set.
279 /// - its reachable uses (uses that are exposed to BB's predecessors).
280 /// - its the generated definitions.
281 /// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
282 /// the list of uses of exposed defintions.
283 /// \param ADRPMode specifies to only consider ADRP instructions for generated
284 /// definition. It also consider definitions of ADRP instructions as uses and
285 /// ignore other uses. The ADRPMode is used to collect the information for LHO
286 /// that involve ADRP operation only.
initReachingDef(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,BlockToInstrPerColor & Gen,BlockToRegSet & Kill,BlockToSetOfInstrsPerColor & ReachableUses,const MapRegToId & RegToId,const MachineInstr * DummyOp,bool ADRPMode)287 static void initReachingDef(const MachineFunction &MF,
288                             InstrToInstrs *ColorOpToReachedUses,
289                             BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
290                             BlockToSetOfInstrsPerColor &ReachableUses,
291                             const MapRegToId &RegToId,
292                             const MachineInstr *DummyOp, bool ADRPMode) {
293   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
294   unsigned NbReg = RegToId.size();
295 
296   for (const MachineBasicBlock &MBB : MF) {
297     auto &BBGen = Gen[&MBB];
298     BBGen = make_unique<const MachineInstr *[]>(NbReg);
299     std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
300 
301     BitVector &BBKillSet = Kill[&MBB];
302     BBKillSet.resize(NbReg);
303     for (const MachineInstr &MI : MBB) {
304       bool IsADRP = MI.getOpcode() == AArch64::ADRP;
305 
306       // Process uses first.
307       if (IsADRP || !ADRPMode)
308         for (const MachineOperand &MO : MI.operands()) {
309           // Treat ADRP def as use, as the goal of the analysis is to find
310           // ADRP defs reached by other ADRP defs.
311           if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
312               (ADRPMode && (!IsADRP || !MO.isDef())))
313             continue;
314           unsigned CurReg = MO.getReg();
315           MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
316           if (ItCurRegId == RegToId.end())
317             continue;
318           CurReg = ItCurRegId->second;
319 
320           // if CurReg has not been defined, this use is reachable.
321           if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
322             getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
323           // current basic block definition for this color, if any, is in Gen.
324           if (BBGen[CurReg])
325             getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
326         }
327 
328       // Process clobbers.
329       for (const MachineOperand &MO : MI.operands()) {
330         if (!MO.isRegMask())
331           continue;
332         // Clobbers kill the related colors.
333         const uint32_t *PreservedRegs = MO.getRegMask();
334 
335         // Set generated regs.
336         for (const auto &Entry : RegToId) {
337           unsigned Reg = Entry.second;
338           // Use the global register ID when querying APIs external to this
339           // pass.
340           if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
341             // Do not register clobbered definition for no ADRP.
342             // This definition is not used anyway (otherwise register
343             // allocation is wrong).
344             BBGen[Reg] = ADRPMode ? &MI : nullptr;
345             BBKillSet.set(Reg);
346           }
347         }
348       }
349 
350       // Process register defs.
351       for (const MachineOperand &MO : MI.operands()) {
352         if (!MO.isReg() || !MO.isDef())
353           continue;
354         unsigned CurReg = MO.getReg();
355         MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
356         if (ItCurRegId == RegToId.end())
357           continue;
358 
359         for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
360           MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
361           // If this alias has not been recorded, then it is not interesting
362           // for the current analysis.
363           // We can end up in this situation because of tuple registers.
364           // E.g., Let say we are interested in S1. When we register
365           // S1, we will also register its aliases and in particular
366           // the tuple Q1_Q2.
367           // Now, when we encounter Q1_Q2, we will look through its aliases
368           // and will find that S2 is not registered.
369           if (ItRegId == RegToId.end())
370             continue;
371 
372           BBKillSet.set(ItRegId->second);
373           BBGen[ItRegId->second] = &MI;
374         }
375         BBGen[ItCurRegId->second] = &MI;
376       }
377     }
378 
379     // If we restrict our analysis to basic block scope, conservatively add a
380     // dummy
381     // use for each generated value.
382     if (!ADRPMode && DummyOp && !MBB.succ_empty())
383       for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
384         if (BBGen[CurReg])
385           getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
386   }
387 }
388 
389 /// Reaching def core algorithm:
390 /// while an Out has changed
391 ///    for each bb
392 ///       for each color
393 ///           In[bb][color] = U Out[bb.predecessors][color]
394 ///           insert reachableUses[bb][color] in each in[bb][color]
395 ///                 op.reachedUses
396 ///
397 ///           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
reachingDefAlgorithm(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,BlockToSetOfInstrsPerColor & In,BlockToSetOfInstrsPerColor & Out,BlockToInstrPerColor & Gen,BlockToRegSet & Kill,BlockToSetOfInstrsPerColor & ReachableUses,unsigned NbReg)398 static void reachingDefAlgorithm(const MachineFunction &MF,
399                                  InstrToInstrs *ColorOpToReachedUses,
400                                  BlockToSetOfInstrsPerColor &In,
401                                  BlockToSetOfInstrsPerColor &Out,
402                                  BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
403                                  BlockToSetOfInstrsPerColor &ReachableUses,
404                                  unsigned NbReg) {
405   bool HasChanged;
406   do {
407     HasChanged = false;
408     for (const MachineBasicBlock &MBB : MF) {
409       unsigned CurReg;
410       for (CurReg = 0; CurReg < NbReg; ++CurReg) {
411         SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
412         SetOfMachineInstr &BBReachableUses =
413             getSet(ReachableUses, MBB, CurReg, NbReg);
414         SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
415         unsigned Size = BBOutSet.size();
416         //   In[bb][color] = U Out[bb.predecessors][color]
417         for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
418           SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
419           BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
420         }
421         //   insert reachableUses[bb][color] in each in[bb][color] op.reachedses
422         for (const MachineInstr *MI : BBInSet) {
423           SetOfMachineInstr &OpReachedUses =
424               getUses(ColorOpToReachedUses, CurReg, *MI);
425           OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
426         }
427         //           Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
428         if (!Kill[&MBB].test(CurReg))
429           BBOutSet.insert(BBInSet.begin(), BBInSet.end());
430         if (Gen[&MBB][CurReg])
431           BBOutSet.insert(Gen[&MBB][CurReg]);
432         HasChanged |= BBOutSet.size() != Size;
433       }
434     }
435   } while (HasChanged);
436 }
437 
438 /// Reaching definition algorithm.
439 /// \param MF function on which the algorithm will operate.
440 /// \param[out] ColorOpToReachedUses will contain the result of the reaching
441 /// def algorithm.
442 /// \param ADRPMode specify whether the reaching def algorithm should be tuned
443 /// for ADRP optimization. \see initReachingDef for more details.
444 /// \param DummyOp if not NULL, the algorithm will work at
445 /// basic block scope and will set for every exposed definition a use to
446 /// @p DummyOp.
447 /// \pre ColorOpToReachedUses is an array of at least number of registers of
448 /// InstrToInstrs.
reachingDef(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,const MapRegToId & RegToId,bool ADRPMode=false,const MachineInstr * DummyOp=nullptr)449 static void reachingDef(const MachineFunction &MF,
450                         InstrToInstrs *ColorOpToReachedUses,
451                         const MapRegToId &RegToId, bool ADRPMode = false,
452                         const MachineInstr *DummyOp = nullptr) {
453   // structures:
454   // For each basic block.
455   // Out: a set per color of definitions that reach the
456   //      out boundary of this block.
457   // In: Same as Out but for in boundary.
458   // Gen: generated color in this block (one operation per color).
459   // Kill: register set of killed color in this block.
460   // ReachableUses: a set per color of uses (operation) reachable
461   //                for "In" definitions.
462   BlockToSetOfInstrsPerColor Out, In, ReachableUses;
463   BlockToInstrPerColor Gen;
464   BlockToRegSet Kill;
465 
466   // Initialize Gen, kill and reachableUses.
467   initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
468                   DummyOp, ADRPMode);
469 
470   // Algo.
471   if (!DummyOp)
472     reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
473                          ReachableUses, RegToId.size());
474 }
475 
476 #ifndef NDEBUG
477 /// print the result of the reaching definition algorithm.
printReachingDef(const InstrToInstrs * ColorOpToReachedUses,unsigned NbReg,const TargetRegisterInfo * TRI,const MapIdToReg & IdToReg)478 static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
479                              unsigned NbReg, const TargetRegisterInfo *TRI,
480                              const MapIdToReg &IdToReg) {
481   unsigned CurReg;
482   for (CurReg = 0; CurReg < NbReg; ++CurReg) {
483     if (ColorOpToReachedUses[CurReg].empty())
484       continue;
485     DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
486 
487     for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
488       DEBUG(dbgs() << "Def:\n");
489       DEBUG(DefsIt.first->print(dbgs()));
490       DEBUG(dbgs() << "Reachable uses:\n");
491       for (const MachineInstr *MI : DefsIt.second) {
492         DEBUG(MI->print(dbgs()));
493       }
494     }
495   }
496 }
497 #endif // NDEBUG
498 
499 /// Answer the following question: Can Def be one of the definition
500 /// involved in a part of a LOH?
canDefBePartOfLOH(const MachineInstr * Def)501 static bool canDefBePartOfLOH(const MachineInstr *Def) {
502   unsigned Opc = Def->getOpcode();
503   // Accept ADRP, ADDLow and LOADGot.
504   switch (Opc) {
505   default:
506     return false;
507   case AArch64::ADRP:
508     return true;
509   case AArch64::ADDXri:
510     // Check immediate to see if the immediate is an address.
511     switch (Def->getOperand(2).getType()) {
512     default:
513       return false;
514     case MachineOperand::MO_GlobalAddress:
515     case MachineOperand::MO_JumpTableIndex:
516     case MachineOperand::MO_ConstantPoolIndex:
517     case MachineOperand::MO_BlockAddress:
518       return true;
519     }
520   case AArch64::LDRXui:
521     // Check immediate to see if the immediate is an address.
522     switch (Def->getOperand(2).getType()) {
523     default:
524       return false;
525     case MachineOperand::MO_GlobalAddress:
526       return true;
527     }
528   }
529   // Unreachable.
530   return false;
531 }
532 
533 /// Check whether the given instruction can the end of a LOH chain involving a
534 /// store.
isCandidateStore(const MachineInstr * Instr)535 static bool isCandidateStore(const MachineInstr *Instr) {
536   switch (Instr->getOpcode()) {
537   default:
538     return false;
539   case AArch64::STRBBui:
540   case AArch64::STRHHui:
541   case AArch64::STRBui:
542   case AArch64::STRHui:
543   case AArch64::STRWui:
544   case AArch64::STRXui:
545   case AArch64::STRSui:
546   case AArch64::STRDui:
547   case AArch64::STRQui:
548     // In case we have str xA, [xA, #imm], this is two different uses
549     // of xA and we cannot fold, otherwise the xA stored may be wrong,
550     // even if #imm == 0.
551     if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
552       return true;
553   }
554   return false;
555 }
556 
557 /// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
558 /// Build the Use to Defs information and filter out obvious non-LOH candidates.
559 /// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
560 /// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
561 /// i.e., no simple chain.
562 /// \param ADRPMode -- \see initReachingDef.
reachedUsesToDefs(InstrToInstrs & UseToReachingDefs,const InstrToInstrs * ColorOpToReachedUses,const MapRegToId & RegToId,bool ADRPMode=false)563 static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
564                               const InstrToInstrs *ColorOpToReachedUses,
565                               const MapRegToId &RegToId,
566                               bool ADRPMode = false) {
567 
568   SetOfMachineInstr NotCandidate;
569   unsigned NbReg = RegToId.size();
570   MapRegToId::const_iterator EndIt = RegToId.end();
571   for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
572     // If this color is never defined, continue.
573     if (ColorOpToReachedUses[CurReg].empty())
574       continue;
575 
576     for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
577       for (const MachineInstr *MI : DefsIt.second) {
578         const MachineInstr *Def = DefsIt.first;
579         MapRegToId::const_iterator It;
580         // if all the reaching defs are not adrp, this use will not be
581         // simplifiable.
582         if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
583             (!ADRPMode && !canDefBePartOfLOH(Def)) ||
584             (!ADRPMode && isCandidateStore(MI) &&
585              // store are LOH candidate iff the end of the chain is used as
586              // base.
587              ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
588               It->second != CurReg))) {
589           NotCandidate.insert(MI);
590           continue;
591         }
592         // Do not consider self reaching as a simplifiable case for ADRP.
593         if (!ADRPMode || MI != DefsIt.first) {
594           UseToReachingDefs[MI].insert(DefsIt.first);
595           // If UsesIt has several reaching definitions, it is not
596           // candidate for simplificaton in non-ADRPMode.
597           if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
598             NotCandidate.insert(MI);
599         }
600       }
601     }
602   }
603   for (const MachineInstr *Elem : NotCandidate) {
604     DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
605     // It would have been better if we could just remove the entry
606     // from the map.  Because of that, we have to filter the garbage
607     // (second.empty) in the subsequence analysis.
608     UseToReachingDefs[Elem].clear();
609   }
610 }
611 
612 /// Based on the use to defs information (in ADRPMode), compute the
613 /// opportunities of LOH ADRP-related.
computeADRP(const InstrToInstrs & UseToDefs,AArch64FunctionInfo & AArch64FI,const MachineDominatorTree * MDT)614 static void computeADRP(const InstrToInstrs &UseToDefs,
615                         AArch64FunctionInfo &AArch64FI,
616                         const MachineDominatorTree *MDT) {
617   DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
618   for (const auto &Entry : UseToDefs) {
619     unsigned Size = Entry.second.size();
620     if (Size == 0)
621       continue;
622     if (Size == 1) {
623       const MachineInstr *L2 = *Entry.second.begin();
624       const MachineInstr *L1 = Entry.first;
625       if (!MDT->dominates(L2, L1)) {
626         DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
627                      << '\n');
628         continue;
629       }
630       DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
631       AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
632       ++NumADRPSimpleCandidate;
633     }
634 #ifdef DEBUG
635     else if (Size == 2)
636       ++NumADRPComplexCandidate2;
637     else if (Size == 3)
638       ++NumADRPComplexCandidate3;
639     else
640       ++NumADRPComplexCandidateOther;
641 #endif
642     // if Size < 1, the use should have been removed from the candidates
643     assert(Size >= 1 && "No reaching defs for that use!");
644   }
645 }
646 
647 /// Check whether the given instruction can be the end of a LOH chain
648 /// involving a load.
isCandidateLoad(const MachineInstr * Instr)649 static bool isCandidateLoad(const MachineInstr *Instr) {
650   switch (Instr->getOpcode()) {
651   default:
652     return false;
653   case AArch64::LDRSBWui:
654   case AArch64::LDRSBXui:
655   case AArch64::LDRSHWui:
656   case AArch64::LDRSHXui:
657   case AArch64::LDRSWui:
658   case AArch64::LDRBui:
659   case AArch64::LDRHui:
660   case AArch64::LDRWui:
661   case AArch64::LDRXui:
662   case AArch64::LDRSui:
663   case AArch64::LDRDui:
664   case AArch64::LDRQui:
665     if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
666       return false;
667     return true;
668   }
669   // Unreachable.
670   return false;
671 }
672 
673 /// Check whether the given instruction can load a litteral.
supportLoadFromLiteral(const MachineInstr * Instr)674 static bool supportLoadFromLiteral(const MachineInstr *Instr) {
675   switch (Instr->getOpcode()) {
676   default:
677     return false;
678   case AArch64::LDRSWui:
679   case AArch64::LDRWui:
680   case AArch64::LDRXui:
681   case AArch64::LDRSui:
682   case AArch64::LDRDui:
683   case AArch64::LDRQui:
684     return true;
685   }
686   // Unreachable.
687   return false;
688 }
689 
690 /// Check whether the given instruction is a LOH candidate.
691 /// \param UseToDefs is used to check that Instr is at the end of LOH supported
692 /// chain.
693 /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
694 /// already been filtered out.
isCandidate(const MachineInstr * Instr,const InstrToInstrs & UseToDefs,const MachineDominatorTree * MDT)695 static bool isCandidate(const MachineInstr *Instr,
696                         const InstrToInstrs &UseToDefs,
697                         const MachineDominatorTree *MDT) {
698   if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
699     return false;
700 
701   const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
702   if (Def->getOpcode() != AArch64::ADRP) {
703     // At this point, Def is ADDXri or LDRXui of the right type of
704     // symbol, because we filtered out the uses that were not defined
705     // by these kind of instructions (+ ADRP).
706 
707     // Check if this forms a simple chain: each intermediate node must
708     // dominates the next one.
709     if (!MDT->dominates(Def, Instr))
710       return false;
711     // Move one node up in the simple chain.
712     if (UseToDefs.find(Def) ==
713             UseToDefs.end()
714             // The map may contain garbage we have to ignore.
715         ||
716         UseToDefs.find(Def)->second.empty())
717       return false;
718     Instr = Def;
719     Def = *UseToDefs.find(Def)->second.begin();
720   }
721   // Check if we reached the top of the simple chain:
722   // - top is ADRP.
723   // - check the simple chain property: each intermediate node must
724   // dominates the next one.
725   if (Def->getOpcode() == AArch64::ADRP)
726     return MDT->dominates(Def, Instr);
727   return false;
728 }
729 
registerADRCandidate(const MachineInstr & Use,const InstrToInstrs & UseToDefs,const InstrToInstrs * DefsPerColorToUses,AArch64FunctionInfo & AArch64FI,SetOfMachineInstr * InvolvedInLOHs,const MapRegToId & RegToId)730 static bool registerADRCandidate(const MachineInstr &Use,
731                                  const InstrToInstrs &UseToDefs,
732                                  const InstrToInstrs *DefsPerColorToUses,
733                                  AArch64FunctionInfo &AArch64FI,
734                                  SetOfMachineInstr *InvolvedInLOHs,
735                                  const MapRegToId &RegToId) {
736   // Look for opportunities to turn ADRP -> ADD or
737   // ADRP -> LDR GOTPAGEOFF into ADR.
738   // If ADRP has more than one use. Give up.
739   if (Use.getOpcode() != AArch64::ADDXri &&
740       (Use.getOpcode() != AArch64::LDRXui ||
741        !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
742     return false;
743   InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
744   // The map may contain garbage that we need to ignore.
745   if (It == UseToDefs.end() || It->second.empty())
746     return false;
747   const MachineInstr &Def = **It->second.begin();
748   if (Def.getOpcode() != AArch64::ADRP)
749     return false;
750   // Check the number of users of ADRP.
751   const SetOfMachineInstr *Users =
752       getUses(DefsPerColorToUses,
753               RegToId.find(Def.getOperand(0).getReg())->second, Def);
754   if (Users->size() > 1) {
755     ++NumADRComplexCandidate;
756     return false;
757   }
758   ++NumADRSimpleCandidate;
759   assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
760          "ADRP already involved in LOH.");
761   assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
762          "ADD already involved in LOH.");
763   DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
764 
765   AArch64FI.addLOHDirective(
766       Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
767       {&Def, &Use});
768   return true;
769 }
770 
771 /// Based on the use to defs information (in non-ADRPMode), compute the
772 /// opportunities of LOH non-ADRP-related
computeOthers(const InstrToInstrs & UseToDefs,const InstrToInstrs * DefsPerColorToUses,AArch64FunctionInfo & AArch64FI,const MapRegToId & RegToId,const MachineDominatorTree * MDT)773 static void computeOthers(const InstrToInstrs &UseToDefs,
774                           const InstrToInstrs *DefsPerColorToUses,
775                           AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
776                           const MachineDominatorTree *MDT) {
777   SetOfMachineInstr *InvolvedInLOHs = nullptr;
778 #ifdef DEBUG
779   SetOfMachineInstr InvolvedInLOHsStorage;
780   InvolvedInLOHs = &InvolvedInLOHsStorage;
781 #endif // DEBUG
782   DEBUG(dbgs() << "*** Compute LOH for Others\n");
783   // ADRP -> ADD/LDR -> LDR/STR pattern.
784   // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
785 
786   // FIXME: When the statistics are not important,
787   // This initial filtering loop can be merged into the next loop.
788   // Currently, we didn't do it to have the same code for both DEBUG and
789   // NDEBUG builds. Indeed, the iterator of the second loop would need
790   // to be changed.
791   SetOfMachineInstr PotentialCandidates;
792   SetOfMachineInstr PotentialADROpportunities;
793   for (auto &Use : UseToDefs) {
794     // If no definition is available, this is a non candidate.
795     if (Use.second.empty())
796       continue;
797     // Keep only instructions that are load or store and at the end of
798     // a ADRP -> ADD/LDR/Nothing chain.
799     // We already filtered out the no-chain cases.
800     if (!isCandidate(Use.first, UseToDefs, MDT)) {
801       PotentialADROpportunities.insert(Use.first);
802       continue;
803     }
804     PotentialCandidates.insert(Use.first);
805   }
806 
807   // Make the following distinctions for statistics as the linker does
808   // know how to decode instructions:
809   // - ADD/LDR/Nothing make there different patterns.
810   // - LDR/STR make two different patterns.
811   // Hence, 6 - 1 base patterns.
812   // (because ADRP-> Nothing -> STR is not simplifiable)
813 
814   // The linker is only able to have a simple semantic, i.e., if pattern A
815   // do B.
816   // However, we want to see the opportunity we may miss if we were able to
817   // catch more complex cases.
818 
819   // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
820   // A potential candidate becomes a candidate, if its current immediate
821   // operand is zero and all nodes of the chain have respectively only one user
822 #ifdef DEBUG
823   SetOfMachineInstr DefsOfPotentialCandidates;
824 #endif
825   for (const MachineInstr *Candidate : PotentialCandidates) {
826     // Get the definition of the candidate i.e., ADD or LDR.
827     const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
828     // Record the elements of the chain.
829     const MachineInstr *L1 = Def;
830     const MachineInstr *L2 = nullptr;
831     unsigned ImmediateDefOpc = Def->getOpcode();
832     if (Def->getOpcode() != AArch64::ADRP) {
833       // Check the number of users of this node.
834       const SetOfMachineInstr *Users =
835           getUses(DefsPerColorToUses,
836                   RegToId.find(Def->getOperand(0).getReg())->second, *Def);
837       if (Users->size() > 1) {
838 #ifdef DEBUG
839         // if all the uses of this def are in potential candidate, this is
840         // a complex candidate of level 2.
841         bool IsLevel2 = true;
842         for (const MachineInstr *MI : *Users) {
843           if (!PotentialCandidates.count(MI)) {
844             ++NumTooCplxLvl2;
845             IsLevel2 = false;
846             break;
847           }
848         }
849         if (IsLevel2)
850           ++NumCplxLvl2;
851 #endif // DEBUG
852         PotentialADROpportunities.insert(Def);
853         continue;
854       }
855       L2 = Def;
856       Def = *UseToDefs.find(Def)->second.begin();
857       L1 = Def;
858     } // else the element in the middle of the chain is nothing, thus
859       // Def already contains the first element of the chain.
860 
861     // Check the number of users of the first node in the chain, i.e., ADRP
862     const SetOfMachineInstr *Users =
863         getUses(DefsPerColorToUses,
864                 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
865     if (Users->size() > 1) {
866 #ifdef DEBUG
867       // if all the uses of this def are in the defs of the potential candidate,
868       // this is a complex candidate of level 1
869       if (DefsOfPotentialCandidates.empty()) {
870         // lazy init
871         DefsOfPotentialCandidates = PotentialCandidates;
872         for (const MachineInstr *Candidate : PotentialCandidates) {
873           if (!UseToDefs.find(Candidate)->second.empty())
874             DefsOfPotentialCandidates.insert(
875                 *UseToDefs.find(Candidate)->second.begin());
876         }
877       }
878       bool Found = false;
879       for (auto &Use : *Users) {
880         if (!DefsOfPotentialCandidates.count(Use)) {
881           ++NumTooCplxLvl1;
882           Found = true;
883           break;
884         }
885       }
886       if (!Found)
887         ++NumCplxLvl1;
888 #endif // DEBUG
889       continue;
890     }
891 
892     bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
893     // If the chain is three instructions long and ldr is the second element,
894     // then this ldr must load form GOT, otherwise this is not a correct chain.
895     if (L2 && !IsL2Add &&
896         !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
897       continue;
898     SmallVector<const MachineInstr *, 3> Args;
899     MCLOHType Kind;
900     if (isCandidateLoad(Candidate)) {
901       if (!L2) {
902         // At this point, the candidate LOH indicates that the ldr instruction
903         // may use a direct access to the symbol. There is not such encoding
904         // for loads of byte and half.
905         if (!supportLoadFromLiteral(Candidate))
906           continue;
907 
908         DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
909                      << '\n');
910         Kind = MCLOH_AdrpLdr;
911         Args.push_back(L1);
912         Args.push_back(Candidate);
913         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
914                "L1 already involved in LOH.");
915         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
916                "Candidate already involved in LOH.");
917         ++NumADRPToLDR;
918       } else {
919         DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
920                      << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
921                      << '\n');
922 
923         Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
924         Args.push_back(L1);
925         Args.push_back(L2);
926         Args.push_back(Candidate);
927 
928         PotentialADROpportunities.remove(L2);
929         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
930                "L1 already involved in LOH.");
931         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
932                "L2 already involved in LOH.");
933         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
934                "Candidate already involved in LOH.");
935 #ifdef DEBUG
936         // get the immediate of the load
937         if (Candidate->getOperand(2).getImm() == 0)
938           if (ImmediateDefOpc == AArch64::ADDXri)
939             ++NumADDToLDR;
940           else
941             ++NumLDRToLDR;
942         else if (ImmediateDefOpc == AArch64::ADDXri)
943           ++NumADDToLDRWithImm;
944         else
945           ++NumLDRToLDRWithImm;
946 #endif // DEBUG
947       }
948     } else {
949       if (ImmediateDefOpc == AArch64::ADRP)
950         continue;
951       else {
952 
953         DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
954                      << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
955                      << '\n');
956 
957         Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
958         Args.push_back(L1);
959         Args.push_back(L2);
960         Args.push_back(Candidate);
961 
962         PotentialADROpportunities.remove(L2);
963         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
964                "L1 already involved in LOH.");
965         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
966                "L2 already involved in LOH.");
967         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
968                "Candidate already involved in LOH.");
969 #ifdef DEBUG
970         // get the immediate of the store
971         if (Candidate->getOperand(2).getImm() == 0)
972           if (ImmediateDefOpc == AArch64::ADDXri)
973             ++NumADDToSTR;
974           else
975             ++NumLDRToSTR;
976         else if (ImmediateDefOpc == AArch64::ADDXri)
977           ++NumADDToSTRWithImm;
978         else
979           ++NumLDRToSTRWithImm;
980 #endif // DEBUG
981       }
982     }
983     AArch64FI.addLOHDirective(Kind, Args);
984   }
985 
986   // Now, we grabbed all the big patterns, check ADR opportunities.
987   for (const MachineInstr *Candidate : PotentialADROpportunities)
988     registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
989                          InvolvedInLOHs, RegToId);
990 }
991 
992 /// Look for every register defined by potential LOHs candidates.
993 /// Map these registers with dense id in @p RegToId and vice-versa in
994 /// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
collectInvolvedReg(const MachineFunction & MF,MapRegToId & RegToId,MapIdToReg & IdToReg,const TargetRegisterInfo * TRI)995 static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
996                                MapIdToReg &IdToReg,
997                                const TargetRegisterInfo *TRI) {
998   unsigned CurRegId = 0;
999   if (!PreCollectRegister) {
1000     unsigned NbReg = TRI->getNumRegs();
1001     for (; CurRegId < NbReg; ++CurRegId) {
1002       RegToId[CurRegId] = CurRegId;
1003       DEBUG(IdToReg.push_back(CurRegId));
1004       DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1005     }
1006     return;
1007   }
1008 
1009   DEBUG(dbgs() << "** Collect Involved Register\n");
1010   for (const auto &MBB : MF) {
1011     for (const MachineInstr &MI : MBB) {
1012       if (!canDefBePartOfLOH(&MI) &&
1013           !isCandidateLoad(&MI) && !isCandidateStore(&MI))
1014         continue;
1015 
1016       // Process defs
1017       for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
1018                                             IOEnd = MI.operands_end();
1019            IO != IOEnd; ++IO) {
1020         if (!IO->isReg() || !IO->isDef())
1021           continue;
1022         unsigned CurReg = IO->getReg();
1023         for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1024           if (RegToId.find(*AI) == RegToId.end()) {
1025             DEBUG(IdToReg.push_back(*AI);
1026                   assert(IdToReg[CurRegId] == *AI &&
1027                          "Reg index mismatches insertion index."));
1028             RegToId[*AI] = CurRegId++;
1029             DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1030           }
1031       }
1032     }
1033   }
1034 }
1035 
runOnMachineFunction(MachineFunction & MF)1036 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
1037   if (skipFunction(*MF.getFunction()))
1038     return false;
1039 
1040   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1041   const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1042 
1043   MapRegToId RegToId;
1044   MapIdToReg IdToReg;
1045   AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
1046   assert(AArch64FI && "No MachineFunctionInfo for this function!");
1047 
1048   DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
1049 
1050   collectInvolvedReg(MF, RegToId, IdToReg, TRI);
1051   if (RegToId.empty())
1052     return false;
1053 
1054   MachineInstr *DummyOp = nullptr;
1055   if (BasicBlockScopeOnly) {
1056     const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1057     // For local analysis, create a dummy operation to record uses that are not
1058     // local.
1059     DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
1060   }
1061 
1062   unsigned NbReg = RegToId.size();
1063   bool Modified = false;
1064 
1065   // Start with ADRP.
1066   InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
1067 
1068   // Compute the reaching def in ADRP mode, meaning ADRP definitions
1069   // are first considered as uses.
1070   reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
1071   DEBUG(dbgs() << "ADRP reaching defs\n");
1072   DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1073 
1074   // Translate the definition to uses map into a use to definitions map to ease
1075   // statistic computation.
1076   InstrToInstrs ADRPToReachingDefs;
1077   reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1078 
1079   // Compute LOH for ADRP.
1080   computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
1081   delete[] ColorOpToReachedUses;
1082 
1083   // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
1084   ColorOpToReachedUses = new InstrToInstrs[NbReg];
1085 
1086   // first perform a regular reaching def analysis.
1087   reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
1088   DEBUG(dbgs() << "All reaching defs\n");
1089   DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1090 
1091   // Turn that into a use to defs to ease statistic computation.
1092   InstrToInstrs UsesToReachingDefs;
1093   reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1094 
1095   // Compute other than AdrpAdrp LOH.
1096   computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
1097                 MDT);
1098   delete[] ColorOpToReachedUses;
1099 
1100   if (BasicBlockScopeOnly)
1101     MF.DeleteMachineInstr(DummyOp);
1102 
1103   return Modified;
1104 }
1105 
1106 /// createAArch64CollectLOHPass - returns an instance of the Statistic for
1107 /// linker optimization pass.
createAArch64CollectLOHPass()1108 FunctionPass *llvm::createAArch64CollectLOHPass() {
1109   return new AArch64CollectLOH();
1110 }
1111