1 //===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass that collect the Linker Optimization Hint (LOH).
11 // This pass should be run at the very end of the compilation flow, just before
12 // assembly printer.
13 // To be useful for the linker, the LOH must be printed into the assembly file.
14 //
15 // A LOH describes a sequence of instructions that may be optimized by the
16 // linker.
17 // This same sequence cannot be optimized by the compiler because some of
18 // the information will be known at link time.
19 // For instance, consider the following sequence:
20 // L1: adrp xA, sym@PAGE
21 // L2: add xB, xA, sym@PAGEOFF
22 // L3: ldr xC, [xB, #imm]
23 // This sequence can be turned into:
24 // A literal load if sym@PAGE + sym@PAGEOFF + #imm - address(L3) is < 1MB:
25 // L3: ldr xC, sym+#imm
26 // It may also be turned into either the following more efficient
27 // code sequences:
28 // - If sym@PAGEOFF + #imm fits the encoding space of L3.
29 // L1: adrp xA, sym@PAGE
30 // L3: ldr xC, [xB, sym@PAGEOFF + #imm]
31 // - If sym@PAGE + sym@PAGEOFF - address(L1) < 1MB:
32 // L1: adr xA, sym
33 // L3: ldr xC, [xB, #imm]
34 //
35 // To be valid a LOH must meet all the requirements needed by all the related
36 // possible linker transformations.
37 // For instance, using the running example, the constraints to emit
38 // ".loh AdrpAddLdr" are:
39 // - L1, L2, and L3 instructions are of the expected type, i.e.,
40 // respectively ADRP, ADD (immediate), and LD.
41 // - The result of L1 is used only by L2.
42 // - The register argument (xA) used in the ADD instruction is defined
43 // only by L1.
44 // - The result of L2 is used only by L3.
45 // - The base address (xB) in L3 is defined only L2.
46 // - The ADRP in L1 and the ADD in L2 must reference the same symbol using
47 // @PAGE/@PAGEOFF with no additional constants
48 //
49 // Currently supported LOHs are:
50 // * So called non-ADRP-related:
51 // - .loh AdrpAddLdr L1, L2, L3:
52 // L1: adrp xA, sym@PAGE
53 // L2: add xB, xA, sym@PAGEOFF
54 // L3: ldr xC, [xB, #imm]
55 // - .loh AdrpLdrGotLdr L1, L2, L3:
56 // L1: adrp xA, sym@GOTPAGE
57 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
58 // L3: ldr xC, [xB, #imm]
59 // - .loh AdrpLdr L1, L3:
60 // L1: adrp xA, sym@PAGE
61 // L3: ldr xC, [xA, sym@PAGEOFF]
62 // - .loh AdrpAddStr L1, L2, L3:
63 // L1: adrp xA, sym@PAGE
64 // L2: add xB, xA, sym@PAGEOFF
65 // L3: str xC, [xB, #imm]
66 // - .loh AdrpLdrGotStr L1, L2, L3:
67 // L1: adrp xA, sym@GOTPAGE
68 // L2: ldr xB, [xA, sym@GOTPAGEOFF]
69 // L3: str xC, [xB, #imm]
70 // - .loh AdrpAdd L1, L2:
71 // L1: adrp xA, sym@PAGE
72 // L2: add xB, xA, sym@PAGEOFF
73 // For all these LOHs, L1, L2, L3 form a simple chain:
74 // L1 result is used only by L2 and L2 result by L3.
75 // L3 LOH-related argument is defined only by L2 and L2 LOH-related argument
76 // by L1.
77 // All these LOHs aim at using more efficient load/store patterns by folding
78 // some instructions used to compute the address directly into the load/store.
79 //
80 // * So called ADRP-related:
81 // - .loh AdrpAdrp L2, L1:
82 // L2: ADRP xA, sym1@PAGE
83 // L1: ADRP xA, sym2@PAGE
84 // L2 dominates L1 and xA is not redifined between L2 and L1
85 // This LOH aims at getting rid of redundant ADRP instructions.
86 //
87 // The overall design for emitting the LOHs is:
88 // 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
89 // 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
90 // 1. Associates them a label.
91 // 2. Emits them in a MCStreamer (EmitLOHDirective).
92 // - The MCMachOStreamer records them into the MCAssembler.
93 // - The MCAsmStreamer prints them.
94 // - Other MCStreamers ignore them.
95 // 3. Closes the MCStreamer:
96 // - The MachObjectWriter gets them from the MCAssembler and writes
97 // them in the object file.
98 // - Other ObjectWriters ignore them.
99 //===----------------------------------------------------------------------===//
100
101 #include "AArch64.h"
102 #include "AArch64InstrInfo.h"
103 #include "AArch64MachineFunctionInfo.h"
104 #include "AArch64Subtarget.h"
105 #include "MCTargetDesc/AArch64AddressingModes.h"
106 #include "llvm/ADT/BitVector.h"
107 #include "llvm/ADT/DenseMap.h"
108 #include "llvm/ADT/MapVector.h"
109 #include "llvm/ADT/SetVector.h"
110 #include "llvm/ADT/SmallVector.h"
111 #include "llvm/ADT/Statistic.h"
112 #include "llvm/CodeGen/MachineBasicBlock.h"
113 #include "llvm/CodeGen/MachineDominators.h"
114 #include "llvm/CodeGen/MachineFunctionPass.h"
115 #include "llvm/CodeGen/MachineInstr.h"
116 #include "llvm/CodeGen/MachineInstrBuilder.h"
117 #include "llvm/Support/CommandLine.h"
118 #include "llvm/Support/Debug.h"
119 #include "llvm/Support/ErrorHandling.h"
120 #include "llvm/Support/raw_ostream.h"
121 #include "llvm/Target/TargetInstrInfo.h"
122 #include "llvm/Target/TargetMachine.h"
123 #include "llvm/Target/TargetRegisterInfo.h"
124 using namespace llvm;
125
126 #define DEBUG_TYPE "aarch64-collect-loh"
127
128 static cl::opt<bool>
129 PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
130 cl::desc("Restrict analysis to registers invovled"
131 " in LOHs"),
132 cl::init(true));
133
134 static cl::opt<bool>
135 BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
136 cl::desc("Restrict analysis at basic block scope"),
137 cl::init(true));
138
139 STATISTIC(NumADRPSimpleCandidate,
140 "Number of simplifiable ADRP dominate by another");
141 STATISTIC(NumADRPComplexCandidate2,
142 "Number of simplifiable ADRP reachable by 2 defs");
143 STATISTIC(NumADRPComplexCandidate3,
144 "Number of simplifiable ADRP reachable by 3 defs");
145 STATISTIC(NumADRPComplexCandidateOther,
146 "Number of simplifiable ADRP reachable by 4 or more defs");
147 STATISTIC(NumADDToSTRWithImm,
148 "Number of simplifiable STR with imm reachable by ADD");
149 STATISTIC(NumLDRToSTRWithImm,
150 "Number of simplifiable STR with imm reachable by LDR");
151 STATISTIC(NumADDToSTR, "Number of simplifiable STR reachable by ADD");
152 STATISTIC(NumLDRToSTR, "Number of simplifiable STR reachable by LDR");
153 STATISTIC(NumADDToLDRWithImm,
154 "Number of simplifiable LDR with imm reachable by ADD");
155 STATISTIC(NumLDRToLDRWithImm,
156 "Number of simplifiable LDR with imm reachable by LDR");
157 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
158 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
159 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
160 STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
161 STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
162 STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
163 STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
164 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
165 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
166
167 namespace llvm {
168 void initializeAArch64CollectLOHPass(PassRegistry &);
169 }
170
171 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
172
173 namespace {
174 struct AArch64CollectLOH : public MachineFunctionPass {
175 static char ID;
AArch64CollectLOH__anonfe85bc120111::AArch64CollectLOH176 AArch64CollectLOH() : MachineFunctionPass(ID) {
177 initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
178 }
179
180 bool runOnMachineFunction(MachineFunction &MF) override;
181
getRequiredProperties__anonfe85bc120111::AArch64CollectLOH182 MachineFunctionProperties getRequiredProperties() const override {
183 return MachineFunctionProperties().set(
184 MachineFunctionProperties::Property::AllVRegsAllocated);
185 }
186
getPassName__anonfe85bc120111::AArch64CollectLOH187 const char *getPassName() const override {
188 return AARCH64_COLLECT_LOH_NAME;
189 }
190
getAnalysisUsage__anonfe85bc120111::AArch64CollectLOH191 void getAnalysisUsage(AnalysisUsage &AU) const override {
192 AU.setPreservesAll();
193 MachineFunctionPass::getAnalysisUsage(AU);
194 AU.addRequired<MachineDominatorTree>();
195 }
196
197 private:
198 };
199
200 /// A set of MachineInstruction.
201 typedef SetVector<const MachineInstr *> SetOfMachineInstr;
202 /// Map a basic block to a set of instructions per register.
203 /// This is used to represent the exposed uses of a basic block
204 /// per register.
205 typedef MapVector<const MachineBasicBlock *,
206 std::unique_ptr<SetOfMachineInstr[]>>
207 BlockToSetOfInstrsPerColor;
208 /// Map a basic block to an instruction per register.
209 /// This is used to represent the live-out definitions of a basic block
210 /// per register.
211 typedef MapVector<const MachineBasicBlock *,
212 std::unique_ptr<const MachineInstr *[]>>
213 BlockToInstrPerColor;
214 /// Map an instruction to a set of instructions. Used to represent the
215 /// mapping def to reachable uses or use to definitions.
216 typedef MapVector<const MachineInstr *, SetOfMachineInstr> InstrToInstrs;
217 /// Map a basic block to a BitVector.
218 /// This is used to record the kill registers per basic block.
219 typedef MapVector<const MachineBasicBlock *, BitVector> BlockToRegSet;
220
221 /// Map a register to a dense id.
222 typedef DenseMap<unsigned, unsigned> MapRegToId;
223 /// Map a dense id to a register. Used for debug purposes.
224 typedef SmallVector<unsigned, 32> MapIdToReg;
225 } // end anonymous namespace.
226
227 char AArch64CollectLOH::ID = 0;
228
229 INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
230 AARCH64_COLLECT_LOH_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)231 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
232 INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
233 AARCH64_COLLECT_LOH_NAME, false, false)
234
235 /// Given a couple (MBB, reg) get the corresponding set of instruction from
236 /// the given "sets".
237 /// If this couple does not reference any set, an empty set is added to "sets"
238 /// for this couple and returned.
239 /// \param nbRegs is used internally allocate some memory. It must be consistent
240 /// with the way sets is used.
241 static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
242 const MachineBasicBlock &MBB, unsigned reg,
243 unsigned nbRegs) {
244 SetOfMachineInstr *result;
245 BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
246 if (it != sets.end())
247 result = it->second.get();
248 else
249 result = (sets[&MBB] = make_unique<SetOfMachineInstr[]>(nbRegs)).get();
250
251 return result[reg];
252 }
253
254 /// Given a couple (reg, MI) get the corresponding set of instructions from the
255 /// the given "sets".
256 /// This is used to get the uses record in sets of a definition identified by
257 /// MI and reg, i.e., MI defines reg.
258 /// If the couple does not reference anything, an empty set is added to
259 /// "sets[reg]".
260 /// \pre set[reg] is valid.
getUses(InstrToInstrs * sets,unsigned reg,const MachineInstr & MI)261 static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
262 const MachineInstr &MI) {
263 return sets[reg][&MI];
264 }
265
266 /// Same as getUses but does not modify the input map: sets.
267 /// \return NULL if the couple (reg, MI) is not in sets.
getUses(const InstrToInstrs * sets,unsigned reg,const MachineInstr & MI)268 static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
269 const MachineInstr &MI) {
270 InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
271 if (Res != sets[reg].end())
272 return &(Res->second);
273 return nullptr;
274 }
275
276 /// Initialize the reaching definition algorithm:
277 /// For each basic block BB in MF, record:
278 /// - its kill set.
279 /// - its reachable uses (uses that are exposed to BB's predecessors).
280 /// - its the generated definitions.
281 /// \param DummyOp if not NULL, specifies a Dummy Operation to be added to
282 /// the list of uses of exposed defintions.
283 /// \param ADRPMode specifies to only consider ADRP instructions for generated
284 /// definition. It also consider definitions of ADRP instructions as uses and
285 /// ignore other uses. The ADRPMode is used to collect the information for LHO
286 /// that involve ADRP operation only.
initReachingDef(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,BlockToInstrPerColor & Gen,BlockToRegSet & Kill,BlockToSetOfInstrsPerColor & ReachableUses,const MapRegToId & RegToId,const MachineInstr * DummyOp,bool ADRPMode)287 static void initReachingDef(const MachineFunction &MF,
288 InstrToInstrs *ColorOpToReachedUses,
289 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
290 BlockToSetOfInstrsPerColor &ReachableUses,
291 const MapRegToId &RegToId,
292 const MachineInstr *DummyOp, bool ADRPMode) {
293 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
294 unsigned NbReg = RegToId.size();
295
296 for (const MachineBasicBlock &MBB : MF) {
297 auto &BBGen = Gen[&MBB];
298 BBGen = make_unique<const MachineInstr *[]>(NbReg);
299 std::fill(BBGen.get(), BBGen.get() + NbReg, nullptr);
300
301 BitVector &BBKillSet = Kill[&MBB];
302 BBKillSet.resize(NbReg);
303 for (const MachineInstr &MI : MBB) {
304 bool IsADRP = MI.getOpcode() == AArch64::ADRP;
305
306 // Process uses first.
307 if (IsADRP || !ADRPMode)
308 for (const MachineOperand &MO : MI.operands()) {
309 // Treat ADRP def as use, as the goal of the analysis is to find
310 // ADRP defs reached by other ADRP defs.
311 if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
312 (ADRPMode && (!IsADRP || !MO.isDef())))
313 continue;
314 unsigned CurReg = MO.getReg();
315 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
316 if (ItCurRegId == RegToId.end())
317 continue;
318 CurReg = ItCurRegId->second;
319
320 // if CurReg has not been defined, this use is reachable.
321 if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
322 getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
323 // current basic block definition for this color, if any, is in Gen.
324 if (BBGen[CurReg])
325 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
326 }
327
328 // Process clobbers.
329 for (const MachineOperand &MO : MI.operands()) {
330 if (!MO.isRegMask())
331 continue;
332 // Clobbers kill the related colors.
333 const uint32_t *PreservedRegs = MO.getRegMask();
334
335 // Set generated regs.
336 for (const auto &Entry : RegToId) {
337 unsigned Reg = Entry.second;
338 // Use the global register ID when querying APIs external to this
339 // pass.
340 if (MachineOperand::clobbersPhysReg(PreservedRegs, Entry.first)) {
341 // Do not register clobbered definition for no ADRP.
342 // This definition is not used anyway (otherwise register
343 // allocation is wrong).
344 BBGen[Reg] = ADRPMode ? &MI : nullptr;
345 BBKillSet.set(Reg);
346 }
347 }
348 }
349
350 // Process register defs.
351 for (const MachineOperand &MO : MI.operands()) {
352 if (!MO.isReg() || !MO.isDef())
353 continue;
354 unsigned CurReg = MO.getReg();
355 MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
356 if (ItCurRegId == RegToId.end())
357 continue;
358
359 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI) {
360 MapRegToId::const_iterator ItRegId = RegToId.find(*AI);
361 // If this alias has not been recorded, then it is not interesting
362 // for the current analysis.
363 // We can end up in this situation because of tuple registers.
364 // E.g., Let say we are interested in S1. When we register
365 // S1, we will also register its aliases and in particular
366 // the tuple Q1_Q2.
367 // Now, when we encounter Q1_Q2, we will look through its aliases
368 // and will find that S2 is not registered.
369 if (ItRegId == RegToId.end())
370 continue;
371
372 BBKillSet.set(ItRegId->second);
373 BBGen[ItRegId->second] = &MI;
374 }
375 BBGen[ItCurRegId->second] = &MI;
376 }
377 }
378
379 // If we restrict our analysis to basic block scope, conservatively add a
380 // dummy
381 // use for each generated value.
382 if (!ADRPMode && DummyOp && !MBB.succ_empty())
383 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
384 if (BBGen[CurReg])
385 getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
386 }
387 }
388
389 /// Reaching def core algorithm:
390 /// while an Out has changed
391 /// for each bb
392 /// for each color
393 /// In[bb][color] = U Out[bb.predecessors][color]
394 /// insert reachableUses[bb][color] in each in[bb][color]
395 /// op.reachedUses
396 ///
397 /// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
reachingDefAlgorithm(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,BlockToSetOfInstrsPerColor & In,BlockToSetOfInstrsPerColor & Out,BlockToInstrPerColor & Gen,BlockToRegSet & Kill,BlockToSetOfInstrsPerColor & ReachableUses,unsigned NbReg)398 static void reachingDefAlgorithm(const MachineFunction &MF,
399 InstrToInstrs *ColorOpToReachedUses,
400 BlockToSetOfInstrsPerColor &In,
401 BlockToSetOfInstrsPerColor &Out,
402 BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
403 BlockToSetOfInstrsPerColor &ReachableUses,
404 unsigned NbReg) {
405 bool HasChanged;
406 do {
407 HasChanged = false;
408 for (const MachineBasicBlock &MBB : MF) {
409 unsigned CurReg;
410 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
411 SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
412 SetOfMachineInstr &BBReachableUses =
413 getSet(ReachableUses, MBB, CurReg, NbReg);
414 SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
415 unsigned Size = BBOutSet.size();
416 // In[bb][color] = U Out[bb.predecessors][color]
417 for (const MachineBasicBlock *PredMBB : MBB.predecessors()) {
418 SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
419 BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
420 }
421 // insert reachableUses[bb][color] in each in[bb][color] op.reachedses
422 for (const MachineInstr *MI : BBInSet) {
423 SetOfMachineInstr &OpReachedUses =
424 getUses(ColorOpToReachedUses, CurReg, *MI);
425 OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
426 }
427 // Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
428 if (!Kill[&MBB].test(CurReg))
429 BBOutSet.insert(BBInSet.begin(), BBInSet.end());
430 if (Gen[&MBB][CurReg])
431 BBOutSet.insert(Gen[&MBB][CurReg]);
432 HasChanged |= BBOutSet.size() != Size;
433 }
434 }
435 } while (HasChanged);
436 }
437
438 /// Reaching definition algorithm.
439 /// \param MF function on which the algorithm will operate.
440 /// \param[out] ColorOpToReachedUses will contain the result of the reaching
441 /// def algorithm.
442 /// \param ADRPMode specify whether the reaching def algorithm should be tuned
443 /// for ADRP optimization. \see initReachingDef for more details.
444 /// \param DummyOp if not NULL, the algorithm will work at
445 /// basic block scope and will set for every exposed definition a use to
446 /// @p DummyOp.
447 /// \pre ColorOpToReachedUses is an array of at least number of registers of
448 /// InstrToInstrs.
reachingDef(const MachineFunction & MF,InstrToInstrs * ColorOpToReachedUses,const MapRegToId & RegToId,bool ADRPMode=false,const MachineInstr * DummyOp=nullptr)449 static void reachingDef(const MachineFunction &MF,
450 InstrToInstrs *ColorOpToReachedUses,
451 const MapRegToId &RegToId, bool ADRPMode = false,
452 const MachineInstr *DummyOp = nullptr) {
453 // structures:
454 // For each basic block.
455 // Out: a set per color of definitions that reach the
456 // out boundary of this block.
457 // In: Same as Out but for in boundary.
458 // Gen: generated color in this block (one operation per color).
459 // Kill: register set of killed color in this block.
460 // ReachableUses: a set per color of uses (operation) reachable
461 // for "In" definitions.
462 BlockToSetOfInstrsPerColor Out, In, ReachableUses;
463 BlockToInstrPerColor Gen;
464 BlockToRegSet Kill;
465
466 // Initialize Gen, kill and reachableUses.
467 initReachingDef(MF, ColorOpToReachedUses, Gen, Kill, ReachableUses, RegToId,
468 DummyOp, ADRPMode);
469
470 // Algo.
471 if (!DummyOp)
472 reachingDefAlgorithm(MF, ColorOpToReachedUses, In, Out, Gen, Kill,
473 ReachableUses, RegToId.size());
474 }
475
476 #ifndef NDEBUG
477 /// print the result of the reaching definition algorithm.
printReachingDef(const InstrToInstrs * ColorOpToReachedUses,unsigned NbReg,const TargetRegisterInfo * TRI,const MapIdToReg & IdToReg)478 static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
479 unsigned NbReg, const TargetRegisterInfo *TRI,
480 const MapIdToReg &IdToReg) {
481 unsigned CurReg;
482 for (CurReg = 0; CurReg < NbReg; ++CurReg) {
483 if (ColorOpToReachedUses[CurReg].empty())
484 continue;
485 DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
486
487 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
488 DEBUG(dbgs() << "Def:\n");
489 DEBUG(DefsIt.first->print(dbgs()));
490 DEBUG(dbgs() << "Reachable uses:\n");
491 for (const MachineInstr *MI : DefsIt.second) {
492 DEBUG(MI->print(dbgs()));
493 }
494 }
495 }
496 }
497 #endif // NDEBUG
498
499 /// Answer the following question: Can Def be one of the definition
500 /// involved in a part of a LOH?
canDefBePartOfLOH(const MachineInstr * Def)501 static bool canDefBePartOfLOH(const MachineInstr *Def) {
502 unsigned Opc = Def->getOpcode();
503 // Accept ADRP, ADDLow and LOADGot.
504 switch (Opc) {
505 default:
506 return false;
507 case AArch64::ADRP:
508 return true;
509 case AArch64::ADDXri:
510 // Check immediate to see if the immediate is an address.
511 switch (Def->getOperand(2).getType()) {
512 default:
513 return false;
514 case MachineOperand::MO_GlobalAddress:
515 case MachineOperand::MO_JumpTableIndex:
516 case MachineOperand::MO_ConstantPoolIndex:
517 case MachineOperand::MO_BlockAddress:
518 return true;
519 }
520 case AArch64::LDRXui:
521 // Check immediate to see if the immediate is an address.
522 switch (Def->getOperand(2).getType()) {
523 default:
524 return false;
525 case MachineOperand::MO_GlobalAddress:
526 return true;
527 }
528 }
529 // Unreachable.
530 return false;
531 }
532
533 /// Check whether the given instruction can the end of a LOH chain involving a
534 /// store.
isCandidateStore(const MachineInstr * Instr)535 static bool isCandidateStore(const MachineInstr *Instr) {
536 switch (Instr->getOpcode()) {
537 default:
538 return false;
539 case AArch64::STRBBui:
540 case AArch64::STRHHui:
541 case AArch64::STRBui:
542 case AArch64::STRHui:
543 case AArch64::STRWui:
544 case AArch64::STRXui:
545 case AArch64::STRSui:
546 case AArch64::STRDui:
547 case AArch64::STRQui:
548 // In case we have str xA, [xA, #imm], this is two different uses
549 // of xA and we cannot fold, otherwise the xA stored may be wrong,
550 // even if #imm == 0.
551 if (Instr->getOperand(0).getReg() != Instr->getOperand(1).getReg())
552 return true;
553 }
554 return false;
555 }
556
557 /// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
558 /// Build the Use to Defs information and filter out obvious non-LOH candidates.
559 /// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
560 /// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
561 /// i.e., no simple chain.
562 /// \param ADRPMode -- \see initReachingDef.
reachedUsesToDefs(InstrToInstrs & UseToReachingDefs,const InstrToInstrs * ColorOpToReachedUses,const MapRegToId & RegToId,bool ADRPMode=false)563 static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
564 const InstrToInstrs *ColorOpToReachedUses,
565 const MapRegToId &RegToId,
566 bool ADRPMode = false) {
567
568 SetOfMachineInstr NotCandidate;
569 unsigned NbReg = RegToId.size();
570 MapRegToId::const_iterator EndIt = RegToId.end();
571 for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg) {
572 // If this color is never defined, continue.
573 if (ColorOpToReachedUses[CurReg].empty())
574 continue;
575
576 for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
577 for (const MachineInstr *MI : DefsIt.second) {
578 const MachineInstr *Def = DefsIt.first;
579 MapRegToId::const_iterator It;
580 // if all the reaching defs are not adrp, this use will not be
581 // simplifiable.
582 if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
583 (!ADRPMode && !canDefBePartOfLOH(Def)) ||
584 (!ADRPMode && isCandidateStore(MI) &&
585 // store are LOH candidate iff the end of the chain is used as
586 // base.
587 ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
588 It->second != CurReg))) {
589 NotCandidate.insert(MI);
590 continue;
591 }
592 // Do not consider self reaching as a simplifiable case for ADRP.
593 if (!ADRPMode || MI != DefsIt.first) {
594 UseToReachingDefs[MI].insert(DefsIt.first);
595 // If UsesIt has several reaching definitions, it is not
596 // candidate for simplificaton in non-ADRPMode.
597 if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
598 NotCandidate.insert(MI);
599 }
600 }
601 }
602 }
603 for (const MachineInstr *Elem : NotCandidate) {
604 DEBUG(dbgs() << "Too many reaching defs: " << *Elem << "\n");
605 // It would have been better if we could just remove the entry
606 // from the map. Because of that, we have to filter the garbage
607 // (second.empty) in the subsequence analysis.
608 UseToReachingDefs[Elem].clear();
609 }
610 }
611
612 /// Based on the use to defs information (in ADRPMode), compute the
613 /// opportunities of LOH ADRP-related.
computeADRP(const InstrToInstrs & UseToDefs,AArch64FunctionInfo & AArch64FI,const MachineDominatorTree * MDT)614 static void computeADRP(const InstrToInstrs &UseToDefs,
615 AArch64FunctionInfo &AArch64FI,
616 const MachineDominatorTree *MDT) {
617 DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
618 for (const auto &Entry : UseToDefs) {
619 unsigned Size = Entry.second.size();
620 if (Size == 0)
621 continue;
622 if (Size == 1) {
623 const MachineInstr *L2 = *Entry.second.begin();
624 const MachineInstr *L1 = Entry.first;
625 if (!MDT->dominates(L2, L1)) {
626 DEBUG(dbgs() << "Dominance check failed:\n" << *L2 << '\n' << *L1
627 << '\n');
628 continue;
629 }
630 DEBUG(dbgs() << "Record AdrpAdrp:\n" << *L2 << '\n' << *L1 << '\n');
631 AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
632 ++NumADRPSimpleCandidate;
633 }
634 #ifdef DEBUG
635 else if (Size == 2)
636 ++NumADRPComplexCandidate2;
637 else if (Size == 3)
638 ++NumADRPComplexCandidate3;
639 else
640 ++NumADRPComplexCandidateOther;
641 #endif
642 // if Size < 1, the use should have been removed from the candidates
643 assert(Size >= 1 && "No reaching defs for that use!");
644 }
645 }
646
647 /// Check whether the given instruction can be the end of a LOH chain
648 /// involving a load.
isCandidateLoad(const MachineInstr * Instr)649 static bool isCandidateLoad(const MachineInstr *Instr) {
650 switch (Instr->getOpcode()) {
651 default:
652 return false;
653 case AArch64::LDRSBWui:
654 case AArch64::LDRSBXui:
655 case AArch64::LDRSHWui:
656 case AArch64::LDRSHXui:
657 case AArch64::LDRSWui:
658 case AArch64::LDRBui:
659 case AArch64::LDRHui:
660 case AArch64::LDRWui:
661 case AArch64::LDRXui:
662 case AArch64::LDRSui:
663 case AArch64::LDRDui:
664 case AArch64::LDRQui:
665 if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
666 return false;
667 return true;
668 }
669 // Unreachable.
670 return false;
671 }
672
673 /// Check whether the given instruction can load a litteral.
supportLoadFromLiteral(const MachineInstr * Instr)674 static bool supportLoadFromLiteral(const MachineInstr *Instr) {
675 switch (Instr->getOpcode()) {
676 default:
677 return false;
678 case AArch64::LDRSWui:
679 case AArch64::LDRWui:
680 case AArch64::LDRXui:
681 case AArch64::LDRSui:
682 case AArch64::LDRDui:
683 case AArch64::LDRQui:
684 return true;
685 }
686 // Unreachable.
687 return false;
688 }
689
690 /// Check whether the given instruction is a LOH candidate.
691 /// \param UseToDefs is used to check that Instr is at the end of LOH supported
692 /// chain.
693 /// \pre UseToDefs contains only on def per use, i.e., obvious non candidate are
694 /// already been filtered out.
isCandidate(const MachineInstr * Instr,const InstrToInstrs & UseToDefs,const MachineDominatorTree * MDT)695 static bool isCandidate(const MachineInstr *Instr,
696 const InstrToInstrs &UseToDefs,
697 const MachineDominatorTree *MDT) {
698 if (!isCandidateLoad(Instr) && !isCandidateStore(Instr))
699 return false;
700
701 const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
702 if (Def->getOpcode() != AArch64::ADRP) {
703 // At this point, Def is ADDXri or LDRXui of the right type of
704 // symbol, because we filtered out the uses that were not defined
705 // by these kind of instructions (+ ADRP).
706
707 // Check if this forms a simple chain: each intermediate node must
708 // dominates the next one.
709 if (!MDT->dominates(Def, Instr))
710 return false;
711 // Move one node up in the simple chain.
712 if (UseToDefs.find(Def) ==
713 UseToDefs.end()
714 // The map may contain garbage we have to ignore.
715 ||
716 UseToDefs.find(Def)->second.empty())
717 return false;
718 Instr = Def;
719 Def = *UseToDefs.find(Def)->second.begin();
720 }
721 // Check if we reached the top of the simple chain:
722 // - top is ADRP.
723 // - check the simple chain property: each intermediate node must
724 // dominates the next one.
725 if (Def->getOpcode() == AArch64::ADRP)
726 return MDT->dominates(Def, Instr);
727 return false;
728 }
729
registerADRCandidate(const MachineInstr & Use,const InstrToInstrs & UseToDefs,const InstrToInstrs * DefsPerColorToUses,AArch64FunctionInfo & AArch64FI,SetOfMachineInstr * InvolvedInLOHs,const MapRegToId & RegToId)730 static bool registerADRCandidate(const MachineInstr &Use,
731 const InstrToInstrs &UseToDefs,
732 const InstrToInstrs *DefsPerColorToUses,
733 AArch64FunctionInfo &AArch64FI,
734 SetOfMachineInstr *InvolvedInLOHs,
735 const MapRegToId &RegToId) {
736 // Look for opportunities to turn ADRP -> ADD or
737 // ADRP -> LDR GOTPAGEOFF into ADR.
738 // If ADRP has more than one use. Give up.
739 if (Use.getOpcode() != AArch64::ADDXri &&
740 (Use.getOpcode() != AArch64::LDRXui ||
741 !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
742 return false;
743 InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
744 // The map may contain garbage that we need to ignore.
745 if (It == UseToDefs.end() || It->second.empty())
746 return false;
747 const MachineInstr &Def = **It->second.begin();
748 if (Def.getOpcode() != AArch64::ADRP)
749 return false;
750 // Check the number of users of ADRP.
751 const SetOfMachineInstr *Users =
752 getUses(DefsPerColorToUses,
753 RegToId.find(Def.getOperand(0).getReg())->second, Def);
754 if (Users->size() > 1) {
755 ++NumADRComplexCandidate;
756 return false;
757 }
758 ++NumADRSimpleCandidate;
759 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
760 "ADRP already involved in LOH.");
761 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
762 "ADD already involved in LOH.");
763 DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
764
765 AArch64FI.addLOHDirective(
766 Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd : MCLOH_AdrpLdrGot,
767 {&Def, &Use});
768 return true;
769 }
770
771 /// Based on the use to defs information (in non-ADRPMode), compute the
772 /// opportunities of LOH non-ADRP-related
computeOthers(const InstrToInstrs & UseToDefs,const InstrToInstrs * DefsPerColorToUses,AArch64FunctionInfo & AArch64FI,const MapRegToId & RegToId,const MachineDominatorTree * MDT)773 static void computeOthers(const InstrToInstrs &UseToDefs,
774 const InstrToInstrs *DefsPerColorToUses,
775 AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
776 const MachineDominatorTree *MDT) {
777 SetOfMachineInstr *InvolvedInLOHs = nullptr;
778 #ifdef DEBUG
779 SetOfMachineInstr InvolvedInLOHsStorage;
780 InvolvedInLOHs = &InvolvedInLOHsStorage;
781 #endif // DEBUG
782 DEBUG(dbgs() << "*** Compute LOH for Others\n");
783 // ADRP -> ADD/LDR -> LDR/STR pattern.
784 // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
785
786 // FIXME: When the statistics are not important,
787 // This initial filtering loop can be merged into the next loop.
788 // Currently, we didn't do it to have the same code for both DEBUG and
789 // NDEBUG builds. Indeed, the iterator of the second loop would need
790 // to be changed.
791 SetOfMachineInstr PotentialCandidates;
792 SetOfMachineInstr PotentialADROpportunities;
793 for (auto &Use : UseToDefs) {
794 // If no definition is available, this is a non candidate.
795 if (Use.second.empty())
796 continue;
797 // Keep only instructions that are load or store and at the end of
798 // a ADRP -> ADD/LDR/Nothing chain.
799 // We already filtered out the no-chain cases.
800 if (!isCandidate(Use.first, UseToDefs, MDT)) {
801 PotentialADROpportunities.insert(Use.first);
802 continue;
803 }
804 PotentialCandidates.insert(Use.first);
805 }
806
807 // Make the following distinctions for statistics as the linker does
808 // know how to decode instructions:
809 // - ADD/LDR/Nothing make there different patterns.
810 // - LDR/STR make two different patterns.
811 // Hence, 6 - 1 base patterns.
812 // (because ADRP-> Nothing -> STR is not simplifiable)
813
814 // The linker is only able to have a simple semantic, i.e., if pattern A
815 // do B.
816 // However, we want to see the opportunity we may miss if we were able to
817 // catch more complex cases.
818
819 // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
820 // A potential candidate becomes a candidate, if its current immediate
821 // operand is zero and all nodes of the chain have respectively only one user
822 #ifdef DEBUG
823 SetOfMachineInstr DefsOfPotentialCandidates;
824 #endif
825 for (const MachineInstr *Candidate : PotentialCandidates) {
826 // Get the definition of the candidate i.e., ADD or LDR.
827 const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
828 // Record the elements of the chain.
829 const MachineInstr *L1 = Def;
830 const MachineInstr *L2 = nullptr;
831 unsigned ImmediateDefOpc = Def->getOpcode();
832 if (Def->getOpcode() != AArch64::ADRP) {
833 // Check the number of users of this node.
834 const SetOfMachineInstr *Users =
835 getUses(DefsPerColorToUses,
836 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
837 if (Users->size() > 1) {
838 #ifdef DEBUG
839 // if all the uses of this def are in potential candidate, this is
840 // a complex candidate of level 2.
841 bool IsLevel2 = true;
842 for (const MachineInstr *MI : *Users) {
843 if (!PotentialCandidates.count(MI)) {
844 ++NumTooCplxLvl2;
845 IsLevel2 = false;
846 break;
847 }
848 }
849 if (IsLevel2)
850 ++NumCplxLvl2;
851 #endif // DEBUG
852 PotentialADROpportunities.insert(Def);
853 continue;
854 }
855 L2 = Def;
856 Def = *UseToDefs.find(Def)->second.begin();
857 L1 = Def;
858 } // else the element in the middle of the chain is nothing, thus
859 // Def already contains the first element of the chain.
860
861 // Check the number of users of the first node in the chain, i.e., ADRP
862 const SetOfMachineInstr *Users =
863 getUses(DefsPerColorToUses,
864 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
865 if (Users->size() > 1) {
866 #ifdef DEBUG
867 // if all the uses of this def are in the defs of the potential candidate,
868 // this is a complex candidate of level 1
869 if (DefsOfPotentialCandidates.empty()) {
870 // lazy init
871 DefsOfPotentialCandidates = PotentialCandidates;
872 for (const MachineInstr *Candidate : PotentialCandidates) {
873 if (!UseToDefs.find(Candidate)->second.empty())
874 DefsOfPotentialCandidates.insert(
875 *UseToDefs.find(Candidate)->second.begin());
876 }
877 }
878 bool Found = false;
879 for (auto &Use : *Users) {
880 if (!DefsOfPotentialCandidates.count(Use)) {
881 ++NumTooCplxLvl1;
882 Found = true;
883 break;
884 }
885 }
886 if (!Found)
887 ++NumCplxLvl1;
888 #endif // DEBUG
889 continue;
890 }
891
892 bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
893 // If the chain is three instructions long and ldr is the second element,
894 // then this ldr must load form GOT, otherwise this is not a correct chain.
895 if (L2 && !IsL2Add &&
896 !(L2->getOperand(2).getTargetFlags() & AArch64II::MO_GOT))
897 continue;
898 SmallVector<const MachineInstr *, 3> Args;
899 MCLOHType Kind;
900 if (isCandidateLoad(Candidate)) {
901 if (!L2) {
902 // At this point, the candidate LOH indicates that the ldr instruction
903 // may use a direct access to the symbol. There is not such encoding
904 // for loads of byte and half.
905 if (!supportLoadFromLiteral(Candidate))
906 continue;
907
908 DEBUG(dbgs() << "Record AdrpLdr:\n" << *L1 << '\n' << *Candidate
909 << '\n');
910 Kind = MCLOH_AdrpLdr;
911 Args.push_back(L1);
912 Args.push_back(Candidate);
913 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
914 "L1 already involved in LOH.");
915 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
916 "Candidate already involved in LOH.");
917 ++NumADRPToLDR;
918 } else {
919 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
920 << "Ldr:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
921 << '\n');
922
923 Kind = IsL2Add ? MCLOH_AdrpAddLdr : MCLOH_AdrpLdrGotLdr;
924 Args.push_back(L1);
925 Args.push_back(L2);
926 Args.push_back(Candidate);
927
928 PotentialADROpportunities.remove(L2);
929 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
930 "L1 already involved in LOH.");
931 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
932 "L2 already involved in LOH.");
933 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
934 "Candidate already involved in LOH.");
935 #ifdef DEBUG
936 // get the immediate of the load
937 if (Candidate->getOperand(2).getImm() == 0)
938 if (ImmediateDefOpc == AArch64::ADDXri)
939 ++NumADDToLDR;
940 else
941 ++NumLDRToLDR;
942 else if (ImmediateDefOpc == AArch64::ADDXri)
943 ++NumADDToLDRWithImm;
944 else
945 ++NumLDRToLDRWithImm;
946 #endif // DEBUG
947 }
948 } else {
949 if (ImmediateDefOpc == AArch64::ADRP)
950 continue;
951 else {
952
953 DEBUG(dbgs() << "Record Adrp" << (IsL2Add ? "Add" : "LdrGot")
954 << "Str:\n" << *L1 << '\n' << *L2 << '\n' << *Candidate
955 << '\n');
956
957 Kind = IsL2Add ? MCLOH_AdrpAddStr : MCLOH_AdrpLdrGotStr;
958 Args.push_back(L1);
959 Args.push_back(L2);
960 Args.push_back(Candidate);
961
962 PotentialADROpportunities.remove(L2);
963 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L1)) &&
964 "L1 already involved in LOH.");
965 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(L2)) &&
966 "L2 already involved in LOH.");
967 assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
968 "Candidate already involved in LOH.");
969 #ifdef DEBUG
970 // get the immediate of the store
971 if (Candidate->getOperand(2).getImm() == 0)
972 if (ImmediateDefOpc == AArch64::ADDXri)
973 ++NumADDToSTR;
974 else
975 ++NumLDRToSTR;
976 else if (ImmediateDefOpc == AArch64::ADDXri)
977 ++NumADDToSTRWithImm;
978 else
979 ++NumLDRToSTRWithImm;
980 #endif // DEBUG
981 }
982 }
983 AArch64FI.addLOHDirective(Kind, Args);
984 }
985
986 // Now, we grabbed all the big patterns, check ADR opportunities.
987 for (const MachineInstr *Candidate : PotentialADROpportunities)
988 registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
989 InvolvedInLOHs, RegToId);
990 }
991
992 /// Look for every register defined by potential LOHs candidates.
993 /// Map these registers with dense id in @p RegToId and vice-versa in
994 /// @p IdToReg. @p IdToReg is populated only in DEBUG mode.
collectInvolvedReg(const MachineFunction & MF,MapRegToId & RegToId,MapIdToReg & IdToReg,const TargetRegisterInfo * TRI)995 static void collectInvolvedReg(const MachineFunction &MF, MapRegToId &RegToId,
996 MapIdToReg &IdToReg,
997 const TargetRegisterInfo *TRI) {
998 unsigned CurRegId = 0;
999 if (!PreCollectRegister) {
1000 unsigned NbReg = TRI->getNumRegs();
1001 for (; CurRegId < NbReg; ++CurRegId) {
1002 RegToId[CurRegId] = CurRegId;
1003 DEBUG(IdToReg.push_back(CurRegId));
1004 DEBUG(assert(IdToReg[CurRegId] == CurRegId && "Reg index mismatches"));
1005 }
1006 return;
1007 }
1008
1009 DEBUG(dbgs() << "** Collect Involved Register\n");
1010 for (const auto &MBB : MF) {
1011 for (const MachineInstr &MI : MBB) {
1012 if (!canDefBePartOfLOH(&MI) &&
1013 !isCandidateLoad(&MI) && !isCandidateStore(&MI))
1014 continue;
1015
1016 // Process defs
1017 for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
1018 IOEnd = MI.operands_end();
1019 IO != IOEnd; ++IO) {
1020 if (!IO->isReg() || !IO->isDef())
1021 continue;
1022 unsigned CurReg = IO->getReg();
1023 for (MCRegAliasIterator AI(CurReg, TRI, true); AI.isValid(); ++AI)
1024 if (RegToId.find(*AI) == RegToId.end()) {
1025 DEBUG(IdToReg.push_back(*AI);
1026 assert(IdToReg[CurRegId] == *AI &&
1027 "Reg index mismatches insertion index."));
1028 RegToId[*AI] = CurRegId++;
1029 DEBUG(dbgs() << "Register: " << PrintReg(*AI, TRI) << '\n');
1030 }
1031 }
1032 }
1033 }
1034 }
1035
runOnMachineFunction(MachineFunction & MF)1036 bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
1037 if (skipFunction(*MF.getFunction()))
1038 return false;
1039
1040 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1041 const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
1042
1043 MapRegToId RegToId;
1044 MapIdToReg IdToReg;
1045 AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
1046 assert(AArch64FI && "No MachineFunctionInfo for this function!");
1047
1048 DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
1049
1050 collectInvolvedReg(MF, RegToId, IdToReg, TRI);
1051 if (RegToId.empty())
1052 return false;
1053
1054 MachineInstr *DummyOp = nullptr;
1055 if (BasicBlockScopeOnly) {
1056 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
1057 // For local analysis, create a dummy operation to record uses that are not
1058 // local.
1059 DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
1060 }
1061
1062 unsigned NbReg = RegToId.size();
1063 bool Modified = false;
1064
1065 // Start with ADRP.
1066 InstrToInstrs *ColorOpToReachedUses = new InstrToInstrs[NbReg];
1067
1068 // Compute the reaching def in ADRP mode, meaning ADRP definitions
1069 // are first considered as uses.
1070 reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
1071 DEBUG(dbgs() << "ADRP reaching defs\n");
1072 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1073
1074 // Translate the definition to uses map into a use to definitions map to ease
1075 // statistic computation.
1076 InstrToInstrs ADRPToReachingDefs;
1077 reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
1078
1079 // Compute LOH for ADRP.
1080 computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
1081 delete[] ColorOpToReachedUses;
1082
1083 // Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
1084 ColorOpToReachedUses = new InstrToInstrs[NbReg];
1085
1086 // first perform a regular reaching def analysis.
1087 reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
1088 DEBUG(dbgs() << "All reaching defs\n");
1089 DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
1090
1091 // Turn that into a use to defs to ease statistic computation.
1092 InstrToInstrs UsesToReachingDefs;
1093 reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
1094
1095 // Compute other than AdrpAdrp LOH.
1096 computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
1097 MDT);
1098 delete[] ColorOpToReachedUses;
1099
1100 if (BasicBlockScopeOnly)
1101 MF.DeleteMachineInstr(DummyOp);
1102
1103 return Modified;
1104 }
1105
1106 /// createAArch64CollectLOHPass - returns an instance of the Statistic for
1107 /// linker optimization pass.
createAArch64CollectLOHPass()1108 FunctionPass *llvm::createAArch64CollectLOHPass() {
1109 return new AArch64CollectLOH();
1110 }
1111