1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 //
10 // This pass performs peephole optimizations to clean up ugly code
11 // sequences at the MachineInstruction layer.  It runs at the end of
12 // the SSA phases, following VSX swap removal.  A pass of dead code
13 // elimination follows this one for quick clean-up of any dead
14 // instructions introduced here.  Although we could do this as callbacks
15 // from the generic peephole pass, this would have a couple of bad
16 // effects:  it might remove optimization opportunities for VSX swap
17 // removal, and it would miss cleanups made possible following VSX
18 // swap removal.
19 //
20 //===---------------------------------------------------------------------===//
21 
22 #include "PPC.h"
23 #include "PPCInstrBuilder.h"
24 #include "PPCInstrInfo.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Support/Debug.h"
32 #include "MCTargetDesc/PPCPredicates.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "ppc-mi-peepholes"
37 
38 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
39 STATISTIC(MultiTOCSaves,
40           "Number of functions with multiple TOC saves that must be kept");
41 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
42 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
43 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
44 STATISTIC(NumConvertedToImmediateForm,
45           "Number of instructions converted to their immediate form");
46 STATISTIC(NumFunctionsEnteredInMIPeephole,
47           "Number of functions entered in PPC MI Peepholes");
48 STATISTIC(NumFixedPointIterations,
49           "Number of fixed-point iterations converting reg-reg instructions "
50           "to reg-imm ones");
51 
52 static cl::opt<bool>
53 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
54                    cl::desc("Iterate to a fixed point when attempting to "
55                             "convert reg-reg instructions to reg-imm"));
56 
57 static cl::opt<bool>
58 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
59               cl::desc("Convert eligible reg+reg instructions to reg+imm"));
60 
61 static cl::opt<bool>
62     EnableSExtElimination("ppc-eliminate-signext",
63                           cl::desc("enable elimination of sign-extensions"),
64                           cl::init(false), cl::Hidden);
65 
66 static cl::opt<bool>
67     EnableZExtElimination("ppc-eliminate-zeroext",
68                           cl::desc("enable elimination of zero-extensions"),
69                           cl::init(false), cl::Hidden);
70 
71 namespace {
72 
73 struct PPCMIPeephole : public MachineFunctionPass {
74 
75   static char ID;
76   const PPCInstrInfo *TII;
77   MachineFunction *MF;
78   MachineRegisterInfo *MRI;
79 
PPCMIPeephole__anon2bc1337a0111::PPCMIPeephole80   PPCMIPeephole() : MachineFunctionPass(ID) {
81     initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
82   }
83 
84 private:
85   MachineDominatorTree *MDT;
86 
87   // Initialize class variables.
88   void initialize(MachineFunction &MFParm);
89 
90   // Perform peepholes.
91   bool simplifyCode(void);
92 
93   // Perform peepholes.
94   bool eliminateRedundantCompare(void);
95   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
96   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
97                       MachineInstr *MI);
98 
99 public:
100 
getAnalysisUsage__anon2bc1337a0111::PPCMIPeephole101   void getAnalysisUsage(AnalysisUsage &AU) const override {
102     AU.addRequired<MachineDominatorTree>();
103     AU.addPreserved<MachineDominatorTree>();
104     MachineFunctionPass::getAnalysisUsage(AU);
105   }
106 
107   // Main entry point for this pass.
runOnMachineFunction__anon2bc1337a0111::PPCMIPeephole108   bool runOnMachineFunction(MachineFunction &MF) override {
109     if (skipFunction(MF.getFunction()))
110       return false;
111     initialize(MF);
112     return simplifyCode();
113   }
114 };
115 
116 // Initialize class variables.
initialize(MachineFunction & MFParm)117 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
118   MF = &MFParm;
119   MRI = &MF->getRegInfo();
120   MDT = &getAnalysis<MachineDominatorTree>();
121   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
122   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
123   LLVM_DEBUG(MF->dump());
124 }
125 
getVRegDefOrNull(MachineOperand * Op,MachineRegisterInfo * MRI)126 static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
127                                       MachineRegisterInfo *MRI) {
128   assert(Op && "Invalid Operand!");
129   if (!Op->isReg())
130     return nullptr;
131 
132   unsigned Reg = Op->getReg();
133   if (!TargetRegisterInfo::isVirtualRegister(Reg))
134     return nullptr;
135 
136   return MRI->getVRegDef(Reg);
137 }
138 
139 // This function returns number of known zero bits in output of MI
140 // starting from the most significant bit.
141 static unsigned
getKnownLeadingZeroCount(MachineInstr * MI,const PPCInstrInfo * TII)142 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
143   unsigned Opcode = MI->getOpcode();
144   if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
145       Opcode == PPC::RLDCL  || Opcode == PPC::RLDCLo)
146     return MI->getOperand(3).getImm();
147 
148   if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
149        MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
150     return MI->getOperand(3).getImm();
151 
152   if ((Opcode == PPC::RLWINM  || Opcode == PPC::RLWINMo ||
153        Opcode == PPC::RLWNM   || Opcode == PPC::RLWNMo  ||
154        Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
155        MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
156     return 32 + MI->getOperand(3).getImm();
157 
158   if (Opcode == PPC::ANDIo) {
159     uint16_t Imm = MI->getOperand(2).getImm();
160     return 48 + countLeadingZeros(Imm);
161   }
162 
163   if (Opcode == PPC::CNTLZW  || Opcode == PPC::CNTLZWo ||
164       Opcode == PPC::CNTTZW  || Opcode == PPC::CNTTZWo ||
165       Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
166     // The result ranges from 0 to 32.
167     return 58;
168 
169   if (Opcode == PPC::CNTLZD  || Opcode == PPC::CNTLZDo ||
170       Opcode == PPC::CNTTZD  || Opcode == PPC::CNTTZDo)
171     // The result ranges from 0 to 64.
172     return 57;
173 
174   if (Opcode == PPC::LHZ   || Opcode == PPC::LHZX  ||
175       Opcode == PPC::LHZ8  || Opcode == PPC::LHZX8 ||
176       Opcode == PPC::LHZU  || Opcode == PPC::LHZUX ||
177       Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
178     return 48;
179 
180   if (Opcode == PPC::LBZ   || Opcode == PPC::LBZX  ||
181       Opcode == PPC::LBZ8  || Opcode == PPC::LBZX8 ||
182       Opcode == PPC::LBZU  || Opcode == PPC::LBZUX ||
183       Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
184     return 56;
185 
186   if (TII->isZeroExtended(*MI))
187     return 32;
188 
189   return 0;
190 }
191 
192 // This function maintains a map for the pairs <TOC Save Instr, Keep>
193 // Each time a new TOC save is encountered, it checks if any of the existing
194 // ones are dominated by the new one. If so, it marks the existing one as
195 // redundant by setting it's entry in the map as false. It then adds the new
196 // instruction to the map with either true or false depending on if any
197 // existing instructions dominated the new one.
UpdateTOCSaves(std::map<MachineInstr *,bool> & TOCSaves,MachineInstr * MI)198 void PPCMIPeephole::UpdateTOCSaves(
199   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
200   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
201   bool Keep = true;
202   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
203     MachineInstr *CurrInst = It->first;
204     // If new instruction dominates an existing one, mark existing one as
205     // redundant.
206     if (It->second && MDT->dominates(MI, CurrInst))
207       It->second = false;
208     // Check if the new instruction is redundant.
209     if (MDT->dominates(CurrInst, MI)) {
210       Keep = false;
211       break;
212     }
213   }
214   // Add new instruction to map.
215   TOCSaves[MI] = Keep;
216 }
217 
218 // Perform peephole optimizations.
simplifyCode(void)219 bool PPCMIPeephole::simplifyCode(void) {
220   bool Simplified = false;
221   MachineInstr* ToErase = nullptr;
222   std::map<MachineInstr *, bool> TOCSaves;
223   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
224   NumFunctionsEnteredInMIPeephole++;
225   if (ConvertRegReg) {
226     // Fixed-point conversion of reg/reg instructions fed by load-immediate
227     // into reg/imm instructions. FIXME: This is expensive, control it with
228     // an option.
229     bool SomethingChanged = false;
230     do {
231       NumFixedPointIterations++;
232       SomethingChanged = false;
233       for (MachineBasicBlock &MBB : *MF) {
234         for (MachineInstr &MI : MBB) {
235           if (MI.isDebugInstr())
236             continue;
237 
238           if (TII->convertToImmediateForm(MI)) {
239             // We don't erase anything in case the def has other uses. Let DCE
240             // remove it if it can be removed.
241             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
242             LLVM_DEBUG(MI.dump());
243             NumConvertedToImmediateForm++;
244             SomethingChanged = true;
245             Simplified = true;
246             continue;
247           }
248         }
249       }
250     } while (SomethingChanged && FixedPointRegToImm);
251   }
252 
253   for (MachineBasicBlock &MBB : *MF) {
254     for (MachineInstr &MI : MBB) {
255 
256       // If the previous instruction was marked for elimination,
257       // remove it now.
258       if (ToErase) {
259         ToErase->eraseFromParent();
260         ToErase = nullptr;
261       }
262 
263       // Ignore debug instructions.
264       if (MI.isDebugInstr())
265         continue;
266 
267       // Per-opcode peepholes.
268       switch (MI.getOpcode()) {
269 
270       default:
271         break;
272 
273       case PPC::STD: {
274         MachineFrameInfo &MFI = MF->getFrameInfo();
275         if (MFI.hasVarSizedObjects() ||
276             !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
277           break;
278         // When encountering a TOC save instruction, call UpdateTOCSaves
279         // to add it to the TOCSaves map and mark any existing TOC saves
280         // it dominates as redundant.
281         if (TII->isTOCSaveMI(MI))
282           UpdateTOCSaves(TOCSaves, &MI);
283         break;
284       }
285       case PPC::XXPERMDI: {
286         // Perform simplifications of 2x64 vector swaps and splats.
287         // A swap is identified by an immediate value of 2, and a splat
288         // is identified by an immediate value of 0 or 3.
289         int Immed = MI.getOperand(3).getImm();
290 
291         if (Immed != 1) {
292 
293           // For each of these simplifications, we need the two source
294           // regs to match.  Unfortunately, MachineCSE ignores COPY and
295           // SUBREG_TO_REG, so for example we can see
296           //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
297           // We have to look through chains of COPY and SUBREG_TO_REG
298           // to find the real source values for comparison.
299           unsigned TrueReg1 =
300             TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
301           unsigned TrueReg2 =
302             TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
303 
304           if (TrueReg1 == TrueReg2
305               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
306             MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
307             unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
308 
309             // If this is a splat fed by a splatting load, the splat is
310             // redundant. Replace with a copy. This doesn't happen directly due
311             // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
312             // a load of a double to a vector of 64-bit integers.
313             auto isConversionOfLoadAndSplat = [=]() -> bool {
314               if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
315                 return false;
316               unsigned DefReg =
317                 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
318               if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
319                 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
320                 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
321                   return true;
322               }
323               return false;
324             };
325             if (DefMI && (Immed == 0 || Immed == 3)) {
326               if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
327                 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
328                                      "to load-and-splat/copy: ");
329                 LLVM_DEBUG(MI.dump());
330                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
331                         MI.getOperand(0).getReg())
332                     .add(MI.getOperand(1));
333                 ToErase = &MI;
334                 Simplified = true;
335               }
336             }
337 
338             // If this is a splat or a swap fed by another splat, we
339             // can replace it with a copy.
340             if (DefOpc == PPC::XXPERMDI) {
341               unsigned FeedImmed = DefMI->getOperand(3).getImm();
342               unsigned FeedReg1 =
343                 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
344               unsigned FeedReg2 =
345                 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
346 
347               if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
348                 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
349                                      "to splat/copy: ");
350                 LLVM_DEBUG(MI.dump());
351                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
352                         MI.getOperand(0).getReg())
353                     .add(MI.getOperand(1));
354                 ToErase = &MI;
355                 Simplified = true;
356               }
357 
358               // If this is a splat fed by a swap, we can simplify modify
359               // the splat to splat the other value from the swap's input
360               // parameter.
361               else if ((Immed == 0 || Immed == 3)
362                        && FeedImmed == 2 && FeedReg1 == FeedReg2) {
363                 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
364                 LLVM_DEBUG(MI.dump());
365                 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
366                 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
367                 MI.getOperand(3).setImm(3 - Immed);
368                 Simplified = true;
369               }
370 
371               // If this is a swap fed by a swap, we can replace it
372               // with a copy from the first swap's input.
373               else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
374                 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
375                 LLVM_DEBUG(MI.dump());
376                 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
377                         MI.getOperand(0).getReg())
378                     .add(DefMI->getOperand(1));
379                 ToErase = &MI;
380                 Simplified = true;
381               }
382             } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
383                        (DefMI->getOperand(2).getImm() == 0 ||
384                         DefMI->getOperand(2).getImm() == 3)) {
385               // Splat fed by another splat - switch the output of the first
386               // and remove the second.
387               DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
388               ToErase = &MI;
389               Simplified = true;
390               LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
391               LLVM_DEBUG(MI.dump());
392             }
393           }
394         }
395         break;
396       }
397       case PPC::VSPLTB:
398       case PPC::VSPLTH:
399       case PPC::XXSPLTW: {
400         unsigned MyOpcode = MI.getOpcode();
401         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
402         unsigned TrueReg =
403           TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
404         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
405           break;
406         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
407         if (!DefMI)
408           break;
409         unsigned DefOpcode = DefMI->getOpcode();
410         auto isConvertOfSplat = [=]() -> bool {
411           if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
412             return false;
413           unsigned ConvReg = DefMI->getOperand(1).getReg();
414           if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
415             return false;
416           MachineInstr *Splt = MRI->getVRegDef(ConvReg);
417           return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
418             Splt->getOpcode() == PPC::XXSPLTW);
419         };
420         bool AlreadySplat = (MyOpcode == DefOpcode) ||
421           (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
422           (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
423           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
424           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
425           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
426           (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
427         // If the instruction[s] that feed this splat have already splat
428         // the value, this splat is redundant.
429         if (AlreadySplat) {
430           LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
431           LLVM_DEBUG(MI.dump());
432           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
433                   MI.getOperand(0).getReg())
434               .add(MI.getOperand(OpNo));
435           ToErase = &MI;
436           Simplified = true;
437         }
438         // Splat fed by a shift. Usually when we align value to splat into
439         // vector element zero.
440         if (DefOpcode == PPC::XXSLDWI) {
441           unsigned ShiftRes = DefMI->getOperand(0).getReg();
442           unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
443           unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
444           unsigned ShiftImm = DefMI->getOperand(3).getImm();
445           unsigned SplatImm = MI.getOperand(2).getImm();
446           if (ShiftOp1 == ShiftOp2) {
447             unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
448             if (MRI->hasOneNonDBGUse(ShiftRes)) {
449               LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
450               LLVM_DEBUG(DefMI->dump());
451               ToErase = DefMI;
452             }
453             Simplified = true;
454             LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
455                               << " to " << NewElem << " in instruction: ");
456             LLVM_DEBUG(MI.dump());
457             MI.getOperand(1).setReg(ShiftOp1);
458             MI.getOperand(2).setImm(NewElem);
459           }
460         }
461         break;
462       }
463       case PPC::XVCVDPSP: {
464         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
465         unsigned TrueReg =
466           TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
467         if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
468           break;
469         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
470 
471         // This can occur when building a vector of single precision or integer
472         // values.
473         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
474           unsigned DefsReg1 =
475             TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
476           unsigned DefsReg2 =
477             TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
478           if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
479               !TargetRegisterInfo::isVirtualRegister(DefsReg2))
480             break;
481           MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
482           MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
483 
484           if (!P1 || !P2)
485             break;
486 
487           // Remove the passed FRSP instruction if it only feeds this MI and
488           // set any uses of that FRSP (in this MI) to the source of the FRSP.
489           auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
490             if (RoundInstr->getOpcode() == PPC::FRSP &&
491                 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
492               Simplified = true;
493               unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
494               unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
495               MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
496               for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
497                 if (Use.getOperand(i).isReg() &&
498                     Use.getOperand(i).getReg() == FRSPDefines)
499                   Use.getOperand(i).setReg(ConvReg1);
500               LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
501               LLVM_DEBUG(RoundInstr->dump());
502               LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
503               LLVM_DEBUG(MI.dump());
504               LLVM_DEBUG(dbgs() << "Through instruction:\n");
505               LLVM_DEBUG(DefMI->dump());
506               RoundInstr->eraseFromParent();
507             }
508           };
509 
510           // If the input to XVCVDPSP is a vector that was built (even
511           // partially) out of FRSP's, the FRSP(s) can safely be removed
512           // since this instruction performs the same operation.
513           if (P1 != P2) {
514             removeFRSPIfPossible(P1);
515             removeFRSPIfPossible(P2);
516             break;
517           }
518           removeFRSPIfPossible(P1);
519         }
520         break;
521       }
522       case PPC::EXTSH:
523       case PPC::EXTSH8:
524       case PPC::EXTSH8_32_64: {
525         if (!EnableSExtElimination) break;
526         unsigned NarrowReg = MI.getOperand(1).getReg();
527         if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
528           break;
529 
530         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
531         // If we've used a zero-extending load that we will sign-extend,
532         // just do a sign-extending load.
533         if (SrcMI->getOpcode() == PPC::LHZ ||
534             SrcMI->getOpcode() == PPC::LHZX) {
535           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
536             break;
537           auto is64Bit = [] (unsigned Opcode) {
538             return Opcode == PPC::EXTSH8;
539           };
540           auto isXForm = [] (unsigned Opcode) {
541             return Opcode == PPC::LHZX;
542           };
543           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
544             if (is64Bit)
545               if (isXForm) return PPC::LHAX8;
546               else         return PPC::LHA8;
547             else
548               if (isXForm) return PPC::LHAX;
549               else         return PPC::LHA;
550           };
551           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
552                                        isXForm(SrcMI->getOpcode()));
553           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
554           LLVM_DEBUG(SrcMI->dump());
555           LLVM_DEBUG(dbgs() << "and sign-extension\n");
556           LLVM_DEBUG(MI.dump());
557           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
558           SrcMI->setDesc(TII->get(Opc));
559           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
560           ToErase = &MI;
561           Simplified = true;
562           NumEliminatedSExt++;
563         }
564         break;
565       }
566       case PPC::EXTSW:
567       case PPC::EXTSW_32:
568       case PPC::EXTSW_32_64: {
569         if (!EnableSExtElimination) break;
570         unsigned NarrowReg = MI.getOperand(1).getReg();
571         if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
572           break;
573 
574         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
575         // If we've used a zero-extending load that we will sign-extend,
576         // just do a sign-extending load.
577         if (SrcMI->getOpcode() == PPC::LWZ ||
578             SrcMI->getOpcode() == PPC::LWZX) {
579           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
580             break;
581           auto is64Bit = [] (unsigned Opcode) {
582             return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
583           };
584           auto isXForm = [] (unsigned Opcode) {
585             return Opcode == PPC::LWZX;
586           };
587           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
588             if (is64Bit)
589               if (isXForm) return PPC::LWAX;
590               else         return PPC::LWA;
591             else
592               if (isXForm) return PPC::LWAX_32;
593               else         return PPC::LWA_32;
594           };
595           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
596                                        isXForm(SrcMI->getOpcode()));
597           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
598           LLVM_DEBUG(SrcMI->dump());
599           LLVM_DEBUG(dbgs() << "and sign-extension\n");
600           LLVM_DEBUG(MI.dump());
601           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
602           SrcMI->setDesc(TII->get(Opc));
603           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
604           ToErase = &MI;
605           Simplified = true;
606           NumEliminatedSExt++;
607         } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
608                    TII->isSignExtended(*SrcMI)) {
609           // We can eliminate EXTSW if the input is known to be already
610           // sign-extended.
611           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
612           unsigned TmpReg =
613             MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
614           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
615                   TmpReg);
616           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
617                   MI.getOperand(0).getReg())
618               .addReg(TmpReg)
619               .addReg(NarrowReg)
620               .addImm(PPC::sub_32);
621           ToErase = &MI;
622           Simplified = true;
623           NumEliminatedSExt++;
624         }
625         break;
626       }
627       case PPC::RLDICL: {
628         // We can eliminate RLDICL (e.g. for zero-extension)
629         // if all bits to clear are already zero in the input.
630         // This code assume following code sequence for zero-extension.
631         //   %6 = COPY %5:sub_32; (optional)
632         //   %8 = IMPLICIT_DEF;
633         //   %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
634         if (!EnableZExtElimination) break;
635 
636         if (MI.getOperand(2).getImm() != 0)
637           break;
638 
639         unsigned SrcReg = MI.getOperand(1).getReg();
640         if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
641           break;
642 
643         MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
644         if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
645               SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
646           break;
647 
648         MachineInstr *ImpDefMI, *SubRegMI;
649         ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
650         SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
651         if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
652 
653         SrcMI = SubRegMI;
654         if (SubRegMI->getOpcode() == PPC::COPY) {
655           unsigned CopyReg = SubRegMI->getOperand(1).getReg();
656           if (TargetRegisterInfo::isVirtualRegister(CopyReg))
657             SrcMI = MRI->getVRegDef(CopyReg);
658         }
659 
660         unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
661         if (MI.getOperand(3).getImm() <= KnownZeroCount) {
662           LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
663           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
664                   MI.getOperand(0).getReg())
665               .addReg(SrcReg);
666           ToErase = &MI;
667           Simplified = true;
668           NumEliminatedZExt++;
669         }
670         break;
671       }
672 
673       // TODO: Any instruction that has an immediate form fed only by a PHI
674       // whose operands are all load immediate can be folded away. We currently
675       // do this for ADD instructions, but should expand it to arithmetic and
676       // binary instructions with immediate forms in the future.
677       case PPC::ADD4:
678       case PPC::ADD8: {
679         auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
680           assert(PhiOp && "Invalid Operand!");
681           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
682 
683           return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
684                  MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
685         };
686 
687         auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
688                                             MachineOperand *PhiOp) {
689           assert(PhiOp && "Invalid Operand!");
690           assert(DominatorOp && "Invalid Operand!");
691           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
692           MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
693 
694           // Note: the vregs only show up at odd indices position of PHI Node,
695           // the even indices position save the BB info.
696           for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
697             MachineInstr *LiMI =
698                 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
699             if (!LiMI ||
700                 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
701                 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
702                 !MDT->dominates(DefDomMI, LiMI))
703               return false;
704           }
705 
706           return true;
707         };
708 
709         MachineOperand Op1 = MI.getOperand(1);
710         MachineOperand Op2 = MI.getOperand(2);
711         if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
712           std::swap(Op1, Op2);
713         else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
714           break; // We don't have an ADD fed by LI's that can be transformed
715 
716         // Now we know that Op1 is the PHI node and Op2 is the dominator
717         unsigned DominatorReg = Op2.getReg();
718 
719         const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
720                                              ? &PPC::G8RC_and_G8RC_NOX0RegClass
721                                              : &PPC::GPRC_and_GPRC_NOR0RegClass;
722         MRI->setRegClass(DominatorReg, TRC);
723 
724         // replace LIs with ADDIs
725         MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
726         for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
727           MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
728           LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
729           LLVM_DEBUG(LiMI->dump());
730 
731           // There could be repeated registers in the PHI, e.g: %1 =
732           // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
733           // already replaced the def instruction, skip.
734           if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
735             continue;
736 
737           assert((LiMI->getOpcode() == PPC::LI ||
738                   LiMI->getOpcode() == PPC::LI8) &&
739                  "Invalid Opcode!");
740           auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
741           LiMI->RemoveOperand(1);                    // remove the imm of LI
742           LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
743                                                               : PPC::ADDI8));
744           MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
745               .addReg(DominatorReg)
746               .addImm(LiImm); // restore the imm of LI
747           LLVM_DEBUG(LiMI->dump());
748         }
749 
750         // Replace ADD with COPY
751         LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
752         LLVM_DEBUG(MI.dump());
753         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
754                 MI.getOperand(0).getReg())
755             .add(Op1);
756         ToErase = &MI;
757         Simplified = true;
758         NumOptADDLIs++;
759         break;
760       }
761       }
762     }
763 
764     // If the last instruction was marked for elimination,
765     // remove it now.
766     if (ToErase) {
767       ToErase->eraseFromParent();
768       ToErase = nullptr;
769     }
770   }
771 
772   // Eliminate all the TOC save instructions which are redundant.
773   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
774   // We try to eliminate redundant compare instruction.
775   Simplified |= eliminateRedundantCompare();
776 
777   return Simplified;
778 }
779 
780 // helper functions for eliminateRedundantCompare
isEqOrNe(MachineInstr * BI)781 static bool isEqOrNe(MachineInstr *BI) {
782   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
783   unsigned PredCond = PPC::getPredicateCondition(Pred);
784   return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
785 }
786 
isSupportedCmpOp(unsigned opCode)787 static bool isSupportedCmpOp(unsigned opCode) {
788   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
789           opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
790           opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
791           opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
792 }
793 
is64bitCmpOp(unsigned opCode)794 static bool is64bitCmpOp(unsigned opCode) {
795   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
796           opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
797 }
798 
isSignedCmpOp(unsigned opCode)799 static bool isSignedCmpOp(unsigned opCode) {
800   return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
801           opCode == PPC::CMPDI || opCode == PPC::CMPWI);
802 }
803 
getSignedCmpOpCode(unsigned opCode)804 static unsigned getSignedCmpOpCode(unsigned opCode) {
805   if (opCode == PPC::CMPLD)  return PPC::CMPD;
806   if (opCode == PPC::CMPLW)  return PPC::CMPW;
807   if (opCode == PPC::CMPLDI) return PPC::CMPDI;
808   if (opCode == PPC::CMPLWI) return PPC::CMPWI;
809   return opCode;
810 }
811 
812 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
813 // (LT x) to (LE x-1)
getPredicateToDecImm(MachineInstr * BI,MachineInstr * CMPI)814 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
815   uint64_t Imm = CMPI->getOperand(2).getImm();
816   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
817   if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
818     return 0;
819 
820   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
821   unsigned PredCond = PPC::getPredicateCondition(Pred);
822   unsigned PredHint = PPC::getPredicateHint(Pred);
823   if (PredCond == PPC::PRED_GE)
824     return PPC::getPredicate(PPC::PRED_GT, PredHint);
825   if (PredCond == PPC::PRED_LT)
826     return PPC::getPredicate(PPC::PRED_LE, PredHint);
827 
828   return 0;
829 }
830 
831 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
832 // (LE x) to (LT x+1)
getPredicateToIncImm(MachineInstr * BI,MachineInstr * CMPI)833 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
834   uint64_t Imm = CMPI->getOperand(2).getImm();
835   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
836   if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
837     return 0;
838 
839   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
840   unsigned PredCond = PPC::getPredicateCondition(Pred);
841   unsigned PredHint = PPC::getPredicateHint(Pred);
842   if (PredCond == PPC::PRED_GT)
843     return PPC::getPredicate(PPC::PRED_GE, PredHint);
844   if (PredCond == PPC::PRED_LE)
845     return PPC::getPredicate(PPC::PRED_LT, PredHint);
846 
847   return 0;
848 }
849 
850 // This takes a Phi node and returns a register value for the specified BB.
getIncomingRegForBlock(MachineInstr * Phi,MachineBasicBlock * MBB)851 static unsigned getIncomingRegForBlock(MachineInstr *Phi,
852                                        MachineBasicBlock *MBB) {
853   for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
854     MachineOperand &MO = Phi->getOperand(I);
855     if (MO.getMBB() == MBB)
856       return Phi->getOperand(I-1).getReg();
857   }
858   llvm_unreachable("invalid src basic block for this Phi node\n");
859   return 0;
860 }
861 
862 // This function tracks the source of the register through register copy.
863 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
864 // assuming that the control comes from BB1 into BB2.
getSrcVReg(unsigned Reg,MachineBasicBlock * BB1,MachineBasicBlock * BB2,MachineRegisterInfo * MRI)865 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
866                            MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
867   unsigned SrcReg = Reg;
868   while (1) {
869     unsigned NextReg = SrcReg;
870     MachineInstr *Inst = MRI->getVRegDef(SrcReg);
871     if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
872       NextReg = getIncomingRegForBlock(Inst, BB1);
873       // We track through PHI only once to avoid infinite loop.
874       BB1 = nullptr;
875     }
876     else if (Inst->isFullCopy())
877       NextReg = Inst->getOperand(1).getReg();
878     if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
879       break;
880     SrcReg = NextReg;
881   }
882   return SrcReg;
883 }
884 
eligibleForCompareElimination(MachineBasicBlock & MBB,MachineBasicBlock * & PredMBB,MachineBasicBlock * & MBBtoMoveCmp,MachineRegisterInfo * MRI)885 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
886                                           MachineBasicBlock *&PredMBB,
887                                           MachineBasicBlock *&MBBtoMoveCmp,
888                                           MachineRegisterInfo *MRI) {
889 
890   auto isEligibleBB = [&](MachineBasicBlock &BB) {
891     auto BII = BB.getFirstInstrTerminator();
892     // We optimize BBs ending with a conditional branch.
893     // We check only for BCC here, not BCCLR, because BCCLR
894     // will be formed only later in the pipeline.
895     if (BB.succ_size() == 2 &&
896         BII != BB.instr_end() &&
897         (*BII).getOpcode() == PPC::BCC &&
898         (*BII).getOperand(1).isReg()) {
899       // We optimize only if the condition code is used only by one BCC.
900       unsigned CndReg = (*BII).getOperand(1).getReg();
901       if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
902           !MRI->hasOneNonDBGUse(CndReg))
903         return false;
904 
905       MachineInstr *CMPI = MRI->getVRegDef(CndReg);
906       // We assume compare and branch are in the same BB for ease of analysis.
907       if (CMPI->getParent() != &BB)
908         return false;
909 
910       // We skip this BB if a physical register is used in comparison.
911       for (MachineOperand &MO : CMPI->operands())
912         if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
913           return false;
914 
915       return true;
916     }
917     return false;
918   };
919 
920   // If this BB has more than one successor, we can create a new BB and
921   // move the compare instruction in the new BB.
922   // So far, we do not move compare instruction to a BB having multiple
923   // successors to avoid potentially increasing code size.
924   auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
925     return BB.succ_size() == 1;
926   };
927 
928   if (!isEligibleBB(MBB))
929     return false;
930 
931   unsigned NumPredBBs = MBB.pred_size();
932   if (NumPredBBs == 1) {
933     MachineBasicBlock *TmpMBB = *MBB.pred_begin();
934     if (isEligibleBB(*TmpMBB)) {
935       PredMBB = TmpMBB;
936       MBBtoMoveCmp = nullptr;
937       return true;
938     }
939   }
940   else if (NumPredBBs == 2) {
941     // We check for partially redundant case.
942     // So far, we support cases with only two predecessors
943     // to avoid increasing the number of instructions.
944     MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
945     MachineBasicBlock *Pred1MBB = *PI;
946     MachineBasicBlock *Pred2MBB = *(PI+1);
947 
948     if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
949       // We assume Pred1MBB is the BB containing the compare to be merged and
950       // Pred2MBB is the BB to which we will append a compare instruction.
951       // Hence we can proceed as is.
952     }
953     else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
954       // We need to swap Pred1MBB and Pred2MBB to canonicalize.
955       std::swap(Pred1MBB, Pred2MBB);
956     }
957     else return false;
958 
959     // Here, Pred2MBB is the BB to which we need to append a compare inst.
960     // We cannot move the compare instruction if operands are not available
961     // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
962     MachineInstr *BI = &*MBB.getFirstInstrTerminator();
963     MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
964     for (int I = 1; I <= 2; I++)
965       if (CMPI->getOperand(I).isReg()) {
966         MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
967         if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
968           return false;
969       }
970 
971     PredMBB = Pred1MBB;
972     MBBtoMoveCmp = Pred2MBB;
973     return true;
974   }
975 
976   return false;
977 }
978 
979 // This function will iterate over the input map containing a pair of TOC save
980 // instruction and a flag. The flag will be set to false if the TOC save is
981 // proven redundant. This function will erase from the basic block all the TOC
982 // saves marked as redundant.
eliminateRedundantTOCSaves(std::map<MachineInstr *,bool> & TOCSaves)983 bool PPCMIPeephole::eliminateRedundantTOCSaves(
984     std::map<MachineInstr *, bool> &TOCSaves) {
985   bool Simplified = false;
986   int NumKept = 0;
987   for (auto TOCSave : TOCSaves) {
988     if (!TOCSave.second) {
989       TOCSave.first->eraseFromParent();
990       RemoveTOCSave++;
991       Simplified = true;
992     } else {
993       NumKept++;
994     }
995   }
996 
997   if (NumKept > 1)
998     MultiTOCSaves++;
999 
1000   return Simplified;
1001 }
1002 
1003 // If multiple conditional branches are executed based on the (essentially)
1004 // same comparison, we merge compare instructions into one and make multiple
1005 // conditional branches on this comparison.
1006 // For example,
1007 //   if (a == 0) { ... }
1008 //   else if (a < 0) { ... }
1009 // can be executed by one compare and two conditional branches instead of
1010 // two pairs of a compare and a conditional branch.
1011 //
1012 // This method merges two compare instructions in two MBBs and modifies the
1013 // compare and conditional branch instructions if needed.
1014 // For the above example, the input for this pass looks like:
1015 //   cmplwi r3, 0
1016 //   beq    0, .LBB0_3
1017 //   cmpwi  r3, -1
1018 //   bgt    0, .LBB0_4
1019 // So, before merging two compares, we need to modify these instructions as
1020 //   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
1021 //   beq    0, .LBB0_3
1022 //   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
1023 //   bge    0, .LBB0_4
1024 
eliminateRedundantCompare(void)1025 bool PPCMIPeephole::eliminateRedundantCompare(void) {
1026   bool Simplified = false;
1027 
1028   for (MachineBasicBlock &MBB2 : *MF) {
1029     MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1030 
1031     // For fully redundant case, we select two basic blocks MBB1 and MBB2
1032     // as an optimization target if
1033     // - both MBBs end with a conditional branch,
1034     // - MBB1 is the only predecessor of MBB2, and
1035     // - compare does not take a physical register as a operand in both MBBs.
1036     // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1037     //
1038     // As partially redundant case, we additionally handle if MBB2 has one
1039     // additional predecessor, which has only one successor (MBB2).
1040     // In this case, we move the compare instruction originally in MBB2 into
1041     // MBBtoMoveCmp. This partially redundant case is typically appear by
1042     // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1043     //
1044     // Overview of CFG of related basic blocks
1045     // Fully redundant case        Partially redundant case
1046     //   --------                   ----------------  --------
1047     //   | MBB1 | (w/ 2 succ)       | MBBtoMoveCmp |  | MBB1 | (w/ 2 succ)
1048     //   --------                   ----------------  --------
1049     //      |    \                     (w/ 1 succ) \     |    \
1050     //      |     \                                 \    |     \
1051     //      |                                        \   |
1052     //   --------                                     --------
1053     //   | MBB2 | (w/ 1 pred                          | MBB2 | (w/ 2 pred
1054     //   -------- and 2 succ)                         -------- and 2 succ)
1055     //      |    \                                       |    \
1056     //      |     \                                      |     \
1057     //
1058     if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1059       continue;
1060 
1061     MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
1062     MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1063 
1064     MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
1065     MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1066     bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1067 
1068     // We cannot optimize an unsupported compare opcode or
1069     // a mix of 32-bit and 64-bit comaprisons
1070     if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1071         !isSupportedCmpOp(CMPI2->getOpcode()) ||
1072         is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1073       continue;
1074 
1075     unsigned NewOpCode = 0;
1076     unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1077     int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1078     bool SwapOperands = false;
1079 
1080     if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1081       // Typically, unsigned comparison is used for equality check, but
1082       // we replace it with a signed comparison if the comparison
1083       // to be merged is a signed comparison.
1084       // In other cases of opcode mismatch, we cannot optimize this.
1085 
1086       // We cannot change opcode when comparing against an immediate
1087       // if the most significant bit of the immediate is one
1088       // due to the difference in sign extension.
1089       auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1090         if (!I->getOperand(2).isImm())
1091           return false;
1092         int16_t Imm = (int16_t)I->getOperand(2).getImm();
1093         return Imm < 0;
1094       };
1095 
1096       if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1097           CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1098         NewOpCode = CMPI1->getOpcode();
1099       else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1100                getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1101         NewOpCode = CMPI2->getOpcode();
1102       else continue;
1103     }
1104 
1105     if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1106       // In case of comparisons between two registers, these two registers
1107       // must be same to merge two comparisons.
1108       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1109                                          nullptr, nullptr, MRI);
1110       unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1111                                          nullptr, nullptr, MRI);
1112       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1113                                          MBB1, &MBB2, MRI);
1114       unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1115                                          MBB1, &MBB2, MRI);
1116 
1117       if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1118         // Same pair of registers in the same order; ready to merge as is.
1119       }
1120       else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1121         // Same pair of registers in different order.
1122         // We reverse the predicate to merge compare instructions.
1123         PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1124         NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1125         // In case of partial redundancy, we need to swap operands
1126         // in another compare instruction.
1127         SwapOperands = true;
1128       }
1129       else continue;
1130     }
1131     else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1132       // In case of comparisons between a register and an immediate,
1133       // the operand register must be same for two compare instructions.
1134       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1135                                          nullptr, nullptr, MRI);
1136       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1137                                          MBB1, &MBB2, MRI);
1138       if (Cmp1Operand1 != Cmp2Operand1)
1139         continue;
1140 
1141       NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1142       NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1143 
1144       // If immediate are not same, we try to adjust by changing predicate;
1145       // e.g. GT imm means GE (imm+1).
1146       if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1147         int Diff = Imm1 - Imm2;
1148         if (Diff < -2 || Diff > 2)
1149           continue;
1150 
1151         unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1152         unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1153         unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1154         unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1155         if (Diff == 2) {
1156           if (PredToInc2 && PredToDec1) {
1157             NewPredicate2 = PredToInc2;
1158             NewPredicate1 = PredToDec1;
1159             NewImm2++;
1160             NewImm1--;
1161           }
1162         }
1163         else if (Diff == 1) {
1164           if (PredToInc2) {
1165             NewImm2++;
1166             NewPredicate2 = PredToInc2;
1167           }
1168           else if (PredToDec1) {
1169             NewImm1--;
1170             NewPredicate1 = PredToDec1;
1171           }
1172         }
1173         else if (Diff == -1) {
1174           if (PredToDec2) {
1175             NewImm2--;
1176             NewPredicate2 = PredToDec2;
1177           }
1178           else if (PredToInc1) {
1179             NewImm1++;
1180             NewPredicate1 = PredToInc1;
1181           }
1182         }
1183         else if (Diff == -2) {
1184           if (PredToDec2 && PredToInc1) {
1185             NewPredicate2 = PredToDec2;
1186             NewPredicate1 = PredToInc1;
1187             NewImm2--;
1188             NewImm1++;
1189           }
1190         }
1191       }
1192 
1193       // We cannot merge two compares if the immediates are not same.
1194       if (NewImm2 != NewImm1)
1195         continue;
1196     }
1197 
1198     LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1199     LLVM_DEBUG(CMPI1->dump());
1200     LLVM_DEBUG(BI1->dump());
1201     LLVM_DEBUG(CMPI2->dump());
1202     LLVM_DEBUG(BI2->dump());
1203 
1204     // We adjust opcode, predicates and immediate as we determined above.
1205     if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1206       CMPI1->setDesc(TII->get(NewOpCode));
1207     }
1208     if (NewPredicate1) {
1209       BI1->getOperand(0).setImm(NewPredicate1);
1210     }
1211     if (NewPredicate2) {
1212       BI2->getOperand(0).setImm(NewPredicate2);
1213     }
1214     if (NewImm1 != Imm1) {
1215       CMPI1->getOperand(2).setImm(NewImm1);
1216     }
1217 
1218     if (IsPartiallyRedundant) {
1219       // We touch up the compare instruction in MBB2 and move it to
1220       // a previous BB to handle partially redundant case.
1221       if (SwapOperands) {
1222         unsigned Op1 = CMPI2->getOperand(1).getReg();
1223         unsigned Op2 = CMPI2->getOperand(2).getReg();
1224         CMPI2->getOperand(1).setReg(Op2);
1225         CMPI2->getOperand(2).setReg(Op1);
1226       }
1227       if (NewImm2 != Imm2)
1228         CMPI2->getOperand(2).setImm(NewImm2);
1229 
1230       for (int I = 1; I <= 2; I++) {
1231         if (CMPI2->getOperand(I).isReg()) {
1232           MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1233           if (Inst->getParent() != &MBB2)
1234             continue;
1235 
1236           assert(Inst->getOpcode() == PPC::PHI &&
1237                  "We cannot support if an operand comes from this BB.");
1238           unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1239           CMPI2->getOperand(I).setReg(SrcReg);
1240         }
1241       }
1242       auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1243       MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1244 
1245       DebugLoc DL = CMPI2->getDebugLoc();
1246       unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1247       BuildMI(MBB2, MBB2.begin(), DL,
1248               TII->get(PPC::PHI), NewVReg)
1249         .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1250         .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1251       BI2->getOperand(1).setReg(NewVReg);
1252     }
1253     else {
1254       // We finally eliminate compare instruction in MBB2.
1255       BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1256       CMPI2->eraseFromParent();
1257     }
1258     BI2->getOperand(1).setIsKill(true);
1259     BI1->getOperand(1).setIsKill(false);
1260 
1261     LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1262     LLVM_DEBUG(CMPI1->dump());
1263     LLVM_DEBUG(BI1->dump());
1264     LLVM_DEBUG(BI2->dump());
1265     if (IsPartiallyRedundant) {
1266       LLVM_DEBUG(dbgs() << "The following compare is moved into "
1267                         << printMBBReference(*MBBtoMoveCmp)
1268                         << " to handle partial redundancy.\n");
1269       LLVM_DEBUG(CMPI2->dump());
1270     }
1271 
1272     Simplified = true;
1273   }
1274 
1275   return Simplified;
1276 }
1277 
1278 } // end default namespace
1279 
1280 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1281                       "PowerPC MI Peephole Optimization", false, false)
1282 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1283                     "PowerPC MI Peephole Optimization", false, false)
1284 
1285 char PPCMIPeephole::ID = 0;
1286 FunctionPass*
createPPCMIPeepholePass()1287 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1288 
1289