1 //===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "AArch64.h"
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Support/ErrorHandling.h"
23 #include "llvm/Support/Format.h"
24 #include "llvm/Support/raw_ostream.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "aarch64-branch-relax"
28 
29 static cl::opt<bool>
30 BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true),
31                  cl::desc("Relax out of range conditional branches"));
32 
33 static cl::opt<unsigned>
34 TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
35                     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
36 
37 static cl::opt<unsigned>
38 CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
39                     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
40 
41 static cl::opt<unsigned>
42 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
43                     cl::desc("Restrict range of Bcc instructions (DEBUG)"));
44 
45 STATISTIC(NumSplit, "Number of basic blocks split");
46 STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
47 
48 namespace {
49 class AArch64BranchRelaxation : public MachineFunctionPass {
50   /// BasicBlockInfo - Information about the offset and size of a single
51   /// basic block.
52   struct BasicBlockInfo {
53     /// Offset - Distance from the beginning of the function to the beginning
54     /// of this basic block.
55     ///
56     /// The offset is always aligned as required by the basic block.
57     unsigned Offset;
58 
59     /// Size - Size of the basic block in bytes.  If the block contains
60     /// inline assembly, this is a worst case estimate.
61     ///
62     /// The size does not include any alignment padding whether from the
63     /// beginning of the block, or from an aligned jump table at the end.
64     unsigned Size;
65 
BasicBlockInfo__anondc10878e0111::AArch64BranchRelaxation::BasicBlockInfo66     BasicBlockInfo() : Offset(0), Size(0) {}
67 
68     /// Compute the offset immediately following this block.  If LogAlign is
69     /// specified, return the offset the successor block will get if it has
70     /// this alignment.
postOffset__anondc10878e0111::AArch64BranchRelaxation::BasicBlockInfo71     unsigned postOffset(unsigned LogAlign = 0) const {
72       unsigned PO = Offset + Size;
73       unsigned Align = 1 << LogAlign;
74       return (PO + Align - 1) / Align * Align;
75     }
76   };
77 
78   SmallVector<BasicBlockInfo, 16> BlockInfo;
79 
80   MachineFunction *MF;
81   const AArch64InstrInfo *TII;
82 
83   bool relaxBranchInstructions();
84   void scanFunction();
85   MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
86   void adjustBlockOffsets(MachineBasicBlock &MBB);
87   bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
88   bool fixupConditionalBranch(MachineInstr *MI);
89   void computeBlockSize(const MachineBasicBlock &MBB);
90   unsigned getInstrOffset(MachineInstr *MI) const;
91   void dumpBBs();
92   void verify();
93 
94 public:
95   static char ID;
AArch64BranchRelaxation()96   AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
97 
98   bool runOnMachineFunction(MachineFunction &MF) override;
99 
getPassName() const100   const char *getPassName() const override {
101     return "AArch64 branch relaxation pass";
102   }
103 };
104 char AArch64BranchRelaxation::ID = 0;
105 }
106 
107 /// verify - check BBOffsets, BBSizes, alignment of islands
verify()108 void AArch64BranchRelaxation::verify() {
109 #ifndef NDEBUG
110   unsigned PrevNum = MF->begin()->getNumber();
111   for (MachineBasicBlock &MBB : *MF) {
112     unsigned Align = MBB.getAlignment();
113     unsigned Num = MBB.getNumber();
114     assert(BlockInfo[Num].Offset % (1u << Align) == 0);
115     assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
116     PrevNum = Num;
117   }
118 #endif
119 }
120 
121 /// print block size and offset information - debugging
dumpBBs()122 void AArch64BranchRelaxation::dumpBBs() {
123   for (auto &MBB : *MF) {
124     const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
125     dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
126            << format("size=%#x\n", BBI.Size);
127   }
128 }
129 
130 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
131 /// into the block immediately after it.
BBHasFallthrough(MachineBasicBlock * MBB)132 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
133   // Get the next machine basic block in the function.
134   MachineFunction::iterator MBBI = MBB;
135   // Can't fall off end of function.
136   MachineBasicBlock *NextBB = std::next(MBBI);
137   if (NextBB == MBB->getParent()->end())
138     return false;
139 
140   for (MachineBasicBlock *S : MBB->successors())
141     if (S == NextBB)
142       return true;
143 
144   return false;
145 }
146 
147 /// scanFunction - Do the initial scan of the function, building up
148 /// information about each block.
scanFunction()149 void AArch64BranchRelaxation::scanFunction() {
150   BlockInfo.clear();
151   BlockInfo.resize(MF->getNumBlockIDs());
152 
153   // First thing, compute the size of all basic blocks, and see if the function
154   // has any inline assembly in it. If so, we have to be conservative about
155   // alignment assumptions, as we don't know for sure the size of any
156   // instructions in the inline assembly.
157   for (MachineBasicBlock &MBB : *MF)
158     computeBlockSize(MBB);
159 
160   // Compute block offsets and known bits.
161   adjustBlockOffsets(*MF->begin());
162 }
163 
164 /// computeBlockSize - Compute the size for MBB.
165 /// This function updates BlockInfo directly.
computeBlockSize(const MachineBasicBlock & MBB)166 void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
167   unsigned Size = 0;
168   for (const MachineInstr &MI : MBB)
169     Size += TII->GetInstSizeInBytes(&MI);
170   BlockInfo[MBB.getNumber()].Size = Size;
171 }
172 
173 /// getInstrOffset - Return the current offset of the specified machine
174 /// instruction from the start of the function.  This offset changes as stuff is
175 /// moved around inside the function.
getInstrOffset(MachineInstr * MI) const176 unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
177   MachineBasicBlock *MBB = MI->getParent();
178 
179   // The offset is composed of two things: the sum of the sizes of all MBB's
180   // before this instruction's block, and the offset from the start of the block
181   // it is in.
182   unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
183 
184   // Sum instructions before MI in MBB.
185   for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
186     assert(I != MBB->end() && "Didn't find MI in its own basic block?");
187     Offset += TII->GetInstSizeInBytes(I);
188   }
189   return Offset;
190 }
191 
adjustBlockOffsets(MachineBasicBlock & Start)192 void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
193   unsigned PrevNum = Start.getNumber();
194   for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
195     unsigned Num = MBB.getNumber();
196     if (!Num) // block zero is never changed from offset zero.
197       continue;
198     // Get the offset and known bits at the end of the layout predecessor.
199     // Include the alignment of the current block.
200     unsigned LogAlign = MBB.getAlignment();
201     BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
202     PrevNum = Num;
203   }
204 }
205 
206 /// Split the basic block containing MI into two blocks, which are joined by
207 /// an unconditional branch.  Update data structures and renumber blocks to
208 /// account for this change and returns the newly created block.
209 /// NOTE: Successor list of the original BB is out of date after this function,
210 /// and must be updated by the caller! Other transforms follow using this
211 /// utility function, so no point updating now rather than waiting.
212 MachineBasicBlock *
splitBlockBeforeInstr(MachineInstr * MI)213 AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
214   MachineBasicBlock *OrigBB = MI->getParent();
215 
216   // Create a new MBB for the code after the OrigBB.
217   MachineBasicBlock *NewBB =
218       MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
219   MachineFunction::iterator MBBI = OrigBB;
220   ++MBBI;
221   MF->insert(MBBI, NewBB);
222 
223   // Splice the instructions starting with MI over to NewBB.
224   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
225 
226   // Add an unconditional branch from OrigBB to NewBB.
227   // Note the new unconditional branch is not being recorded.
228   // There doesn't seem to be meaningful DebugInfo available; this doesn't
229   // correspond to anything in the source.
230   BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB);
231 
232   // Insert an entry into BlockInfo to align it properly with the block numbers.
233   BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
234 
235   // Figure out how large the OrigBB is.  As the first half of the original
236   // block, it cannot contain a tablejump.  The size includes
237   // the new jump we added.  (It should be possible to do this without
238   // recounting everything, but it's very confusing, and this is rarely
239   // executed.)
240   computeBlockSize(*OrigBB);
241 
242   // Figure out how large the NewMBB is.  As the second half of the original
243   // block, it may contain a tablejump.
244   computeBlockSize(*NewBB);
245 
246   // All BBOffsets following these blocks must be modified.
247   adjustBlockOffsets(*OrigBB);
248 
249   ++NumSplit;
250 
251   return NewBB;
252 }
253 
254 /// isBlockInRange - Returns true if the distance between specific MI and
255 /// specific BB can fit in MI's displacement field.
isBlockInRange(MachineInstr * MI,MachineBasicBlock * DestBB,unsigned Bits)256 bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI,
257                                              MachineBasicBlock *DestBB,
258                                              unsigned Bits) {
259   unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
260   unsigned BrOffset = getInstrOffset(MI);
261   unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
262 
263   DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
264                << " from BB#" << MI->getParent()->getNumber()
265                << " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
266                << " to " << DestOffset << " offset "
267                << int(DestOffset - BrOffset) << "\t" << *MI);
268 
269   // Branch before the Dest.
270   if (BrOffset <= DestOffset)
271     return (DestOffset - BrOffset <= MaxOffs);
272   return (BrOffset - DestOffset <= MaxOffs);
273 }
274 
isConditionalBranch(unsigned Opc)275 static bool isConditionalBranch(unsigned Opc) {
276   switch (Opc) {
277   default:
278     return false;
279   case AArch64::TBZW:
280   case AArch64::TBNZW:
281   case AArch64::TBZX:
282   case AArch64::TBNZX:
283   case AArch64::CBZW:
284   case AArch64::CBNZW:
285   case AArch64::CBZX:
286   case AArch64::CBNZX:
287   case AArch64::Bcc:
288     return true;
289   }
290 }
291 
getDestBlock(MachineInstr * MI)292 static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
293   switch (MI->getOpcode()) {
294   default:
295     llvm_unreachable("unexpected opcode!");
296   case AArch64::TBZW:
297   case AArch64::TBNZW:
298   case AArch64::TBZX:
299   case AArch64::TBNZX:
300     return MI->getOperand(2).getMBB();
301   case AArch64::CBZW:
302   case AArch64::CBNZW:
303   case AArch64::CBZX:
304   case AArch64::CBNZX:
305   case AArch64::Bcc:
306     return MI->getOperand(1).getMBB();
307   }
308 }
309 
getOppositeConditionOpcode(unsigned Opc)310 static unsigned getOppositeConditionOpcode(unsigned Opc) {
311   switch (Opc) {
312   default:
313     llvm_unreachable("unexpected opcode!");
314   case AArch64::TBNZW:   return AArch64::TBZW;
315   case AArch64::TBNZX:   return AArch64::TBZX;
316   case AArch64::TBZW:    return AArch64::TBNZW;
317   case AArch64::TBZX:    return AArch64::TBNZX;
318   case AArch64::CBNZW:   return AArch64::CBZW;
319   case AArch64::CBNZX:   return AArch64::CBZX;
320   case AArch64::CBZW:    return AArch64::CBNZW;
321   case AArch64::CBZX:    return AArch64::CBNZX;
322   case AArch64::Bcc:     return AArch64::Bcc; // Condition is an operand for Bcc.
323   }
324 }
325 
getBranchDisplacementBits(unsigned Opc)326 static unsigned getBranchDisplacementBits(unsigned Opc) {
327   switch (Opc) {
328   default:
329     llvm_unreachable("unexpected opcode!");
330   case AArch64::TBNZW:
331   case AArch64::TBZW:
332   case AArch64::TBNZX:
333   case AArch64::TBZX:
334     return TBZDisplacementBits;
335   case AArch64::CBNZW:
336   case AArch64::CBZW:
337   case AArch64::CBNZX:
338   case AArch64::CBZX:
339     return CBZDisplacementBits;
340   case AArch64::Bcc:
341     return BCCDisplacementBits;
342   }
343 }
344 
invertBccCondition(MachineInstr * MI)345 static inline void invertBccCondition(MachineInstr *MI) {
346   assert(MI->getOpcode() == AArch64::Bcc && "Unexpected opcode!");
347   AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(0).getImm();
348   CC = AArch64CC::getInvertedCondCode(CC);
349   MI->getOperand(0).setImm((int64_t)CC);
350 }
351 
352 /// fixupConditionalBranch - Fix up a conditional branch whose destination is
353 /// too far away to fit in its displacement field. It is converted to an inverse
354 /// conditional branch + an unconditional branch to the destination.
fixupConditionalBranch(MachineInstr * MI)355 bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
356   MachineBasicBlock *DestBB = getDestBlock(MI);
357 
358   // Add an unconditional branch to the destination and invert the branch
359   // condition to jump over it:
360   // tbz L1
361   // =>
362   // tbnz L2
363   // b   L1
364   // L2:
365 
366   // If the branch is at the end of its MBB and that has a fall-through block,
367   // direct the updated conditional branch to the fall-through block. Otherwise,
368   // split the MBB before the next instruction.
369   MachineBasicBlock *MBB = MI->getParent();
370   MachineInstr *BMI = &MBB->back();
371   bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
372 
373   if (BMI != MI) {
374     if (std::next(MachineBasicBlock::iterator(MI)) ==
375             std::prev(MBB->getLastNonDebugInstr()) &&
376         BMI->getOpcode() == AArch64::B) {
377       // Last MI in the BB is an unconditional branch. Can we simply invert the
378       // condition and swap destinations:
379       // beq L1
380       // b   L2
381       // =>
382       // bne L2
383       // b   L1
384       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
385       if (isBlockInRange(MI, NewDest,
386                          getBranchDisplacementBits(MI->getOpcode()))) {
387         DEBUG(dbgs() << "  Invert condition and swap its destination with "
388                      << *BMI);
389         BMI->getOperand(0).setMBB(DestBB);
390         unsigned OpNum = (MI->getOpcode() == AArch64::TBZW ||
391                           MI->getOpcode() == AArch64::TBNZW ||
392                           MI->getOpcode() == AArch64::TBZX ||
393                           MI->getOpcode() == AArch64::TBNZX)
394                              ? 2
395                              : 1;
396         MI->getOperand(OpNum).setMBB(NewDest);
397         MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
398         if (MI->getOpcode() == AArch64::Bcc)
399           invertBccCondition(MI);
400         return true;
401       }
402     }
403   }
404 
405   if (NeedSplit) {
406     // Analyze the branch so we know how to update the successor lists.
407     MachineBasicBlock *TBB, *FBB;
408     SmallVector<MachineOperand, 2> Cond;
409     TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, false);
410 
411     MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
412     // No need for the branch to the next block. We're adding an unconditional
413     // branch to the destination.
414     int delta = TII->GetInstSizeInBytes(&MBB->back());
415     BlockInfo[MBB->getNumber()].Size -= delta;
416     MBB->back().eraseFromParent();
417     // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below
418 
419     // Update the successor lists according to the transformation to follow.
420     // Do it here since if there's no split, no update is needed.
421     MBB->replaceSuccessor(FBB, NewBB);
422     NewBB->addSuccessor(FBB);
423   }
424   MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
425 
426   DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
427                << ", invert condition and change dest. to BB#"
428                << NextBB->getNumber() << "\n");
429 
430   // Insert a new conditional branch and a new unconditional branch.
431   MachineInstrBuilder MIB = BuildMI(
432       MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
433                                 .addOperand(MI->getOperand(0));
434   if (MI->getOpcode() == AArch64::TBZW || MI->getOpcode() == AArch64::TBNZW ||
435       MI->getOpcode() == AArch64::TBZX || MI->getOpcode() == AArch64::TBNZX)
436     MIB.addOperand(MI->getOperand(1));
437   if (MI->getOpcode() == AArch64::Bcc)
438     invertBccCondition(MIB);
439   MIB.addMBB(NextBB);
440   BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
441   BuildMI(MBB, DebugLoc(), TII->get(AArch64::B)).addMBB(DestBB);
442   BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
443 
444   // Remove the old conditional branch.  It may or may not still be in MBB.
445   BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
446   MI->eraseFromParent();
447 
448   // Finally, keep the block offsets up to date.
449   adjustBlockOffsets(*MBB);
450   return true;
451 }
452 
relaxBranchInstructions()453 bool AArch64BranchRelaxation::relaxBranchInstructions() {
454   bool Changed = false;
455   // Relaxing branches involves creating new basic blocks, so re-eval
456   // end() for termination.
457   for (auto &MBB : *MF) {
458     MachineInstr *MI = MBB.getFirstTerminator();
459     if (isConditionalBranch(MI->getOpcode()) &&
460         !isBlockInRange(MI, getDestBlock(MI),
461                         getBranchDisplacementBits(MI->getOpcode()))) {
462       fixupConditionalBranch(MI);
463       ++NumRelaxed;
464       Changed = true;
465     }
466   }
467   return Changed;
468 }
469 
runOnMachineFunction(MachineFunction & mf)470 bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
471   MF = &mf;
472 
473   // If the pass is disabled, just bail early.
474   if (!BranchRelaxation)
475     return false;
476 
477   DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
478 
479   TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo();
480 
481   // Renumber all of the machine basic blocks in the function, guaranteeing that
482   // the numbers agree with the position of the block in the function.
483   MF->RenumberBlocks();
484 
485   // Do the initial scan of the function, building up information about the
486   // sizes of each block.
487   scanFunction();
488 
489   DEBUG(dbgs() << "  Basic blocks before relaxation\n");
490   DEBUG(dumpBBs());
491 
492   bool MadeChange = false;
493   while (relaxBranchInstructions())
494     MadeChange = true;
495 
496   // After a while, this might be made debug-only, but it is not expensive.
497   verify();
498 
499   DEBUG(dbgs() << "  Basic blocks after relaxation\n");
500   DEBUG(dbgs() << '\n'; dumpBBs());
501 
502   BlockInfo.clear();
503 
504   return MadeChange;
505 }
506 
507 /// createAArch64BranchRelaxation - returns an instance of the constpool
508 /// island pass.
createAArch64BranchRelaxation()509 FunctionPass *llvm::createAArch64BranchRelaxation() {
510   return new AArch64BranchRelaxation();
511 }
512