1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===---------------------------------------------------------------------===//
9 //
10 // This pass performs peephole optimizations to clean up ugly code
11 // sequences at the MachineInstruction layer. It runs at the end of
12 // the SSA phases, following VSX swap removal. A pass of dead code
13 // elimination follows this one for quick clean-up of any dead
14 // instructions introduced here. Although we could do this as callbacks
15 // from the generic peephole pass, this would have a couple of bad
16 // effects: it might remove optimization opportunities for VSX swap
17 // removal, and it would miss cleanups made possible following VSX
18 // swap removal.
19 //
20 //===---------------------------------------------------------------------===//
21
22 #include "PPC.h"
23 #include "PPCInstrBuilder.h"
24 #include "PPCInstrInfo.h"
25 #include "PPCTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineDominators.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/Support/Debug.h"
32 #include "MCTargetDesc/PPCPredicates.h"
33
34 using namespace llvm;
35
36 #define DEBUG_TYPE "ppc-mi-peepholes"
37
38 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
39 STATISTIC(MultiTOCSaves,
40 "Number of functions with multiple TOC saves that must be kept");
41 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
42 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
43 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
44 STATISTIC(NumConvertedToImmediateForm,
45 "Number of instructions converted to their immediate form");
46 STATISTIC(NumFunctionsEnteredInMIPeephole,
47 "Number of functions entered in PPC MI Peepholes");
48 STATISTIC(NumFixedPointIterations,
49 "Number of fixed-point iterations converting reg-reg instructions "
50 "to reg-imm ones");
51
52 static cl::opt<bool>
53 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
54 cl::desc("Iterate to a fixed point when attempting to "
55 "convert reg-reg instructions to reg-imm"));
56
57 static cl::opt<bool>
58 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
59 cl::desc("Convert eligible reg+reg instructions to reg+imm"));
60
61 static cl::opt<bool>
62 EnableSExtElimination("ppc-eliminate-signext",
63 cl::desc("enable elimination of sign-extensions"),
64 cl::init(false), cl::Hidden);
65
66 static cl::opt<bool>
67 EnableZExtElimination("ppc-eliminate-zeroext",
68 cl::desc("enable elimination of zero-extensions"),
69 cl::init(false), cl::Hidden);
70
71 namespace {
72
73 struct PPCMIPeephole : public MachineFunctionPass {
74
75 static char ID;
76 const PPCInstrInfo *TII;
77 MachineFunction *MF;
78 MachineRegisterInfo *MRI;
79
PPCMIPeephole__anon2bc1337a0111::PPCMIPeephole80 PPCMIPeephole() : MachineFunctionPass(ID) {
81 initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
82 }
83
84 private:
85 MachineDominatorTree *MDT;
86
87 // Initialize class variables.
88 void initialize(MachineFunction &MFParm);
89
90 // Perform peepholes.
91 bool simplifyCode(void);
92
93 // Perform peepholes.
94 bool eliminateRedundantCompare(void);
95 bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
96 void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
97 MachineInstr *MI);
98
99 public:
100
getAnalysisUsage__anon2bc1337a0111::PPCMIPeephole101 void getAnalysisUsage(AnalysisUsage &AU) const override {
102 AU.addRequired<MachineDominatorTree>();
103 AU.addPreserved<MachineDominatorTree>();
104 MachineFunctionPass::getAnalysisUsage(AU);
105 }
106
107 // Main entry point for this pass.
runOnMachineFunction__anon2bc1337a0111::PPCMIPeephole108 bool runOnMachineFunction(MachineFunction &MF) override {
109 if (skipFunction(MF.getFunction()))
110 return false;
111 initialize(MF);
112 return simplifyCode();
113 }
114 };
115
116 // Initialize class variables.
initialize(MachineFunction & MFParm)117 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
118 MF = &MFParm;
119 MRI = &MF->getRegInfo();
120 MDT = &getAnalysis<MachineDominatorTree>();
121 TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
122 LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
123 LLVM_DEBUG(MF->dump());
124 }
125
getVRegDefOrNull(MachineOperand * Op,MachineRegisterInfo * MRI)126 static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
127 MachineRegisterInfo *MRI) {
128 assert(Op && "Invalid Operand!");
129 if (!Op->isReg())
130 return nullptr;
131
132 unsigned Reg = Op->getReg();
133 if (!TargetRegisterInfo::isVirtualRegister(Reg))
134 return nullptr;
135
136 return MRI->getVRegDef(Reg);
137 }
138
139 // This function returns number of known zero bits in output of MI
140 // starting from the most significant bit.
141 static unsigned
getKnownLeadingZeroCount(MachineInstr * MI,const PPCInstrInfo * TII)142 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
143 unsigned Opcode = MI->getOpcode();
144 if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICLo ||
145 Opcode == PPC::RLDCL || Opcode == PPC::RLDCLo)
146 return MI->getOperand(3).getImm();
147
148 if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDICo) &&
149 MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
150 return MI->getOperand(3).getImm();
151
152 if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINMo ||
153 Opcode == PPC::RLWNM || Opcode == PPC::RLWNMo ||
154 Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
155 MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
156 return 32 + MI->getOperand(3).getImm();
157
158 if (Opcode == PPC::ANDIo) {
159 uint16_t Imm = MI->getOperand(2).getImm();
160 return 48 + countLeadingZeros(Imm);
161 }
162
163 if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZWo ||
164 Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZWo ||
165 Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
166 // The result ranges from 0 to 32.
167 return 58;
168
169 if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZDo ||
170 Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZDo)
171 // The result ranges from 0 to 64.
172 return 57;
173
174 if (Opcode == PPC::LHZ || Opcode == PPC::LHZX ||
175 Opcode == PPC::LHZ8 || Opcode == PPC::LHZX8 ||
176 Opcode == PPC::LHZU || Opcode == PPC::LHZUX ||
177 Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
178 return 48;
179
180 if (Opcode == PPC::LBZ || Opcode == PPC::LBZX ||
181 Opcode == PPC::LBZ8 || Opcode == PPC::LBZX8 ||
182 Opcode == PPC::LBZU || Opcode == PPC::LBZUX ||
183 Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
184 return 56;
185
186 if (TII->isZeroExtended(*MI))
187 return 32;
188
189 return 0;
190 }
191
192 // This function maintains a map for the pairs <TOC Save Instr, Keep>
193 // Each time a new TOC save is encountered, it checks if any of the existing
194 // ones are dominated by the new one. If so, it marks the existing one as
195 // redundant by setting it's entry in the map as false. It then adds the new
196 // instruction to the map with either true or false depending on if any
197 // existing instructions dominated the new one.
UpdateTOCSaves(std::map<MachineInstr *,bool> & TOCSaves,MachineInstr * MI)198 void PPCMIPeephole::UpdateTOCSaves(
199 std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
200 assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
201 bool Keep = true;
202 for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
203 MachineInstr *CurrInst = It->first;
204 // If new instruction dominates an existing one, mark existing one as
205 // redundant.
206 if (It->second && MDT->dominates(MI, CurrInst))
207 It->second = false;
208 // Check if the new instruction is redundant.
209 if (MDT->dominates(CurrInst, MI)) {
210 Keep = false;
211 break;
212 }
213 }
214 // Add new instruction to map.
215 TOCSaves[MI] = Keep;
216 }
217
218 // Perform peephole optimizations.
simplifyCode(void)219 bool PPCMIPeephole::simplifyCode(void) {
220 bool Simplified = false;
221 MachineInstr* ToErase = nullptr;
222 std::map<MachineInstr *, bool> TOCSaves;
223 const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
224 NumFunctionsEnteredInMIPeephole++;
225 if (ConvertRegReg) {
226 // Fixed-point conversion of reg/reg instructions fed by load-immediate
227 // into reg/imm instructions. FIXME: This is expensive, control it with
228 // an option.
229 bool SomethingChanged = false;
230 do {
231 NumFixedPointIterations++;
232 SomethingChanged = false;
233 for (MachineBasicBlock &MBB : *MF) {
234 for (MachineInstr &MI : MBB) {
235 if (MI.isDebugInstr())
236 continue;
237
238 if (TII->convertToImmediateForm(MI)) {
239 // We don't erase anything in case the def has other uses. Let DCE
240 // remove it if it can be removed.
241 LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
242 LLVM_DEBUG(MI.dump());
243 NumConvertedToImmediateForm++;
244 SomethingChanged = true;
245 Simplified = true;
246 continue;
247 }
248 }
249 }
250 } while (SomethingChanged && FixedPointRegToImm);
251 }
252
253 for (MachineBasicBlock &MBB : *MF) {
254 for (MachineInstr &MI : MBB) {
255
256 // If the previous instruction was marked for elimination,
257 // remove it now.
258 if (ToErase) {
259 ToErase->eraseFromParent();
260 ToErase = nullptr;
261 }
262
263 // Ignore debug instructions.
264 if (MI.isDebugInstr())
265 continue;
266
267 // Per-opcode peepholes.
268 switch (MI.getOpcode()) {
269
270 default:
271 break;
272
273 case PPC::STD: {
274 MachineFrameInfo &MFI = MF->getFrameInfo();
275 if (MFI.hasVarSizedObjects() ||
276 !MF->getSubtarget<PPCSubtarget>().isELFv2ABI())
277 break;
278 // When encountering a TOC save instruction, call UpdateTOCSaves
279 // to add it to the TOCSaves map and mark any existing TOC saves
280 // it dominates as redundant.
281 if (TII->isTOCSaveMI(MI))
282 UpdateTOCSaves(TOCSaves, &MI);
283 break;
284 }
285 case PPC::XXPERMDI: {
286 // Perform simplifications of 2x64 vector swaps and splats.
287 // A swap is identified by an immediate value of 2, and a splat
288 // is identified by an immediate value of 0 or 3.
289 int Immed = MI.getOperand(3).getImm();
290
291 if (Immed != 1) {
292
293 // For each of these simplifications, we need the two source
294 // regs to match. Unfortunately, MachineCSE ignores COPY and
295 // SUBREG_TO_REG, so for example we can see
296 // XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
297 // We have to look through chains of COPY and SUBREG_TO_REG
298 // to find the real source values for comparison.
299 unsigned TrueReg1 =
300 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
301 unsigned TrueReg2 =
302 TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
303
304 if (TrueReg1 == TrueReg2
305 && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
306 MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
307 unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
308
309 // If this is a splat fed by a splatting load, the splat is
310 // redundant. Replace with a copy. This doesn't happen directly due
311 // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
312 // a load of a double to a vector of 64-bit integers.
313 auto isConversionOfLoadAndSplat = [=]() -> bool {
314 if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
315 return false;
316 unsigned DefReg =
317 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
318 if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
319 MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
320 if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
321 return true;
322 }
323 return false;
324 };
325 if (DefMI && (Immed == 0 || Immed == 3)) {
326 if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
327 LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
328 "to load-and-splat/copy: ");
329 LLVM_DEBUG(MI.dump());
330 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
331 MI.getOperand(0).getReg())
332 .add(MI.getOperand(1));
333 ToErase = &MI;
334 Simplified = true;
335 }
336 }
337
338 // If this is a splat or a swap fed by another splat, we
339 // can replace it with a copy.
340 if (DefOpc == PPC::XXPERMDI) {
341 unsigned FeedImmed = DefMI->getOperand(3).getImm();
342 unsigned FeedReg1 =
343 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
344 unsigned FeedReg2 =
345 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
346
347 if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
348 LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
349 "to splat/copy: ");
350 LLVM_DEBUG(MI.dump());
351 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
352 MI.getOperand(0).getReg())
353 .add(MI.getOperand(1));
354 ToErase = &MI;
355 Simplified = true;
356 }
357
358 // If this is a splat fed by a swap, we can simplify modify
359 // the splat to splat the other value from the swap's input
360 // parameter.
361 else if ((Immed == 0 || Immed == 3)
362 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
363 LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
364 LLVM_DEBUG(MI.dump());
365 MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
366 MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
367 MI.getOperand(3).setImm(3 - Immed);
368 Simplified = true;
369 }
370
371 // If this is a swap fed by a swap, we can replace it
372 // with a copy from the first swap's input.
373 else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
374 LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
375 LLVM_DEBUG(MI.dump());
376 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
377 MI.getOperand(0).getReg())
378 .add(DefMI->getOperand(1));
379 ToErase = &MI;
380 Simplified = true;
381 }
382 } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
383 (DefMI->getOperand(2).getImm() == 0 ||
384 DefMI->getOperand(2).getImm() == 3)) {
385 // Splat fed by another splat - switch the output of the first
386 // and remove the second.
387 DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
388 ToErase = &MI;
389 Simplified = true;
390 LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
391 LLVM_DEBUG(MI.dump());
392 }
393 }
394 }
395 break;
396 }
397 case PPC::VSPLTB:
398 case PPC::VSPLTH:
399 case PPC::XXSPLTW: {
400 unsigned MyOpcode = MI.getOpcode();
401 unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
402 unsigned TrueReg =
403 TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
404 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
405 break;
406 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
407 if (!DefMI)
408 break;
409 unsigned DefOpcode = DefMI->getOpcode();
410 auto isConvertOfSplat = [=]() -> bool {
411 if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
412 return false;
413 unsigned ConvReg = DefMI->getOperand(1).getReg();
414 if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
415 return false;
416 MachineInstr *Splt = MRI->getVRegDef(ConvReg);
417 return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
418 Splt->getOpcode() == PPC::XXSPLTW);
419 };
420 bool AlreadySplat = (MyOpcode == DefOpcode) ||
421 (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
422 (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
423 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
424 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
425 (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
426 (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
427 // If the instruction[s] that feed this splat have already splat
428 // the value, this splat is redundant.
429 if (AlreadySplat) {
430 LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
431 LLVM_DEBUG(MI.dump());
432 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
433 MI.getOperand(0).getReg())
434 .add(MI.getOperand(OpNo));
435 ToErase = &MI;
436 Simplified = true;
437 }
438 // Splat fed by a shift. Usually when we align value to splat into
439 // vector element zero.
440 if (DefOpcode == PPC::XXSLDWI) {
441 unsigned ShiftRes = DefMI->getOperand(0).getReg();
442 unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
443 unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
444 unsigned ShiftImm = DefMI->getOperand(3).getImm();
445 unsigned SplatImm = MI.getOperand(2).getImm();
446 if (ShiftOp1 == ShiftOp2) {
447 unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
448 if (MRI->hasOneNonDBGUse(ShiftRes)) {
449 LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
450 LLVM_DEBUG(DefMI->dump());
451 ToErase = DefMI;
452 }
453 Simplified = true;
454 LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
455 << " to " << NewElem << " in instruction: ");
456 LLVM_DEBUG(MI.dump());
457 MI.getOperand(1).setReg(ShiftOp1);
458 MI.getOperand(2).setImm(NewElem);
459 }
460 }
461 break;
462 }
463 case PPC::XVCVDPSP: {
464 // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
465 unsigned TrueReg =
466 TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
467 if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
468 break;
469 MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
470
471 // This can occur when building a vector of single precision or integer
472 // values.
473 if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
474 unsigned DefsReg1 =
475 TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
476 unsigned DefsReg2 =
477 TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
478 if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
479 !TargetRegisterInfo::isVirtualRegister(DefsReg2))
480 break;
481 MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
482 MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
483
484 if (!P1 || !P2)
485 break;
486
487 // Remove the passed FRSP instruction if it only feeds this MI and
488 // set any uses of that FRSP (in this MI) to the source of the FRSP.
489 auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
490 if (RoundInstr->getOpcode() == PPC::FRSP &&
491 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
492 Simplified = true;
493 unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
494 unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
495 MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
496 for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
497 if (Use.getOperand(i).isReg() &&
498 Use.getOperand(i).getReg() == FRSPDefines)
499 Use.getOperand(i).setReg(ConvReg1);
500 LLVM_DEBUG(dbgs() << "Removing redundant FRSP:\n");
501 LLVM_DEBUG(RoundInstr->dump());
502 LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
503 LLVM_DEBUG(MI.dump());
504 LLVM_DEBUG(dbgs() << "Through instruction:\n");
505 LLVM_DEBUG(DefMI->dump());
506 RoundInstr->eraseFromParent();
507 }
508 };
509
510 // If the input to XVCVDPSP is a vector that was built (even
511 // partially) out of FRSP's, the FRSP(s) can safely be removed
512 // since this instruction performs the same operation.
513 if (P1 != P2) {
514 removeFRSPIfPossible(P1);
515 removeFRSPIfPossible(P2);
516 break;
517 }
518 removeFRSPIfPossible(P1);
519 }
520 break;
521 }
522 case PPC::EXTSH:
523 case PPC::EXTSH8:
524 case PPC::EXTSH8_32_64: {
525 if (!EnableSExtElimination) break;
526 unsigned NarrowReg = MI.getOperand(1).getReg();
527 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
528 break;
529
530 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
531 // If we've used a zero-extending load that we will sign-extend,
532 // just do a sign-extending load.
533 if (SrcMI->getOpcode() == PPC::LHZ ||
534 SrcMI->getOpcode() == PPC::LHZX) {
535 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
536 break;
537 auto is64Bit = [] (unsigned Opcode) {
538 return Opcode == PPC::EXTSH8;
539 };
540 auto isXForm = [] (unsigned Opcode) {
541 return Opcode == PPC::LHZX;
542 };
543 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
544 if (is64Bit)
545 if (isXForm) return PPC::LHAX8;
546 else return PPC::LHA8;
547 else
548 if (isXForm) return PPC::LHAX;
549 else return PPC::LHA;
550 };
551 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
552 isXForm(SrcMI->getOpcode()));
553 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
554 LLVM_DEBUG(SrcMI->dump());
555 LLVM_DEBUG(dbgs() << "and sign-extension\n");
556 LLVM_DEBUG(MI.dump());
557 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
558 SrcMI->setDesc(TII->get(Opc));
559 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
560 ToErase = &MI;
561 Simplified = true;
562 NumEliminatedSExt++;
563 }
564 break;
565 }
566 case PPC::EXTSW:
567 case PPC::EXTSW_32:
568 case PPC::EXTSW_32_64: {
569 if (!EnableSExtElimination) break;
570 unsigned NarrowReg = MI.getOperand(1).getReg();
571 if (!TargetRegisterInfo::isVirtualRegister(NarrowReg))
572 break;
573
574 MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
575 // If we've used a zero-extending load that we will sign-extend,
576 // just do a sign-extending load.
577 if (SrcMI->getOpcode() == PPC::LWZ ||
578 SrcMI->getOpcode() == PPC::LWZX) {
579 if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
580 break;
581 auto is64Bit = [] (unsigned Opcode) {
582 return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
583 };
584 auto isXForm = [] (unsigned Opcode) {
585 return Opcode == PPC::LWZX;
586 };
587 auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
588 if (is64Bit)
589 if (isXForm) return PPC::LWAX;
590 else return PPC::LWA;
591 else
592 if (isXForm) return PPC::LWAX_32;
593 else return PPC::LWA_32;
594 };
595 unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
596 isXForm(SrcMI->getOpcode()));
597 LLVM_DEBUG(dbgs() << "Zero-extending load\n");
598 LLVM_DEBUG(SrcMI->dump());
599 LLVM_DEBUG(dbgs() << "and sign-extension\n");
600 LLVM_DEBUG(MI.dump());
601 LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
602 SrcMI->setDesc(TII->get(Opc));
603 SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
604 ToErase = &MI;
605 Simplified = true;
606 NumEliminatedSExt++;
607 } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
608 TII->isSignExtended(*SrcMI)) {
609 // We can eliminate EXTSW if the input is known to be already
610 // sign-extended.
611 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
612 unsigned TmpReg =
613 MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
614 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
615 TmpReg);
616 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
617 MI.getOperand(0).getReg())
618 .addReg(TmpReg)
619 .addReg(NarrowReg)
620 .addImm(PPC::sub_32);
621 ToErase = &MI;
622 Simplified = true;
623 NumEliminatedSExt++;
624 }
625 break;
626 }
627 case PPC::RLDICL: {
628 // We can eliminate RLDICL (e.g. for zero-extension)
629 // if all bits to clear are already zero in the input.
630 // This code assume following code sequence for zero-extension.
631 // %6 = COPY %5:sub_32; (optional)
632 // %8 = IMPLICIT_DEF;
633 // %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
634 if (!EnableZExtElimination) break;
635
636 if (MI.getOperand(2).getImm() != 0)
637 break;
638
639 unsigned SrcReg = MI.getOperand(1).getReg();
640 if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
641 break;
642
643 MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
644 if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
645 SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
646 break;
647
648 MachineInstr *ImpDefMI, *SubRegMI;
649 ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
650 SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
651 if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
652
653 SrcMI = SubRegMI;
654 if (SubRegMI->getOpcode() == PPC::COPY) {
655 unsigned CopyReg = SubRegMI->getOperand(1).getReg();
656 if (TargetRegisterInfo::isVirtualRegister(CopyReg))
657 SrcMI = MRI->getVRegDef(CopyReg);
658 }
659
660 unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
661 if (MI.getOperand(3).getImm() <= KnownZeroCount) {
662 LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
663 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
664 MI.getOperand(0).getReg())
665 .addReg(SrcReg);
666 ToErase = &MI;
667 Simplified = true;
668 NumEliminatedZExt++;
669 }
670 break;
671 }
672
673 // TODO: Any instruction that has an immediate form fed only by a PHI
674 // whose operands are all load immediate can be folded away. We currently
675 // do this for ADD instructions, but should expand it to arithmetic and
676 // binary instructions with immediate forms in the future.
677 case PPC::ADD4:
678 case PPC::ADD8: {
679 auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
680 assert(PhiOp && "Invalid Operand!");
681 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
682
683 return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
684 MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
685 };
686
687 auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
688 MachineOperand *PhiOp) {
689 assert(PhiOp && "Invalid Operand!");
690 assert(DominatorOp && "Invalid Operand!");
691 MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
692 MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
693
694 // Note: the vregs only show up at odd indices position of PHI Node,
695 // the even indices position save the BB info.
696 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
697 MachineInstr *LiMI =
698 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
699 if (!LiMI ||
700 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
701 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
702 !MDT->dominates(DefDomMI, LiMI))
703 return false;
704 }
705
706 return true;
707 };
708
709 MachineOperand Op1 = MI.getOperand(1);
710 MachineOperand Op2 = MI.getOperand(2);
711 if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
712 std::swap(Op1, Op2);
713 else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
714 break; // We don't have an ADD fed by LI's that can be transformed
715
716 // Now we know that Op1 is the PHI node and Op2 is the dominator
717 unsigned DominatorReg = Op2.getReg();
718
719 const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
720 ? &PPC::G8RC_and_G8RC_NOX0RegClass
721 : &PPC::GPRC_and_GPRC_NOR0RegClass;
722 MRI->setRegClass(DominatorReg, TRC);
723
724 // replace LIs with ADDIs
725 MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
726 for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
727 MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
728 LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
729 LLVM_DEBUG(LiMI->dump());
730
731 // There could be repeated registers in the PHI, e.g: %1 =
732 // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
733 // already replaced the def instruction, skip.
734 if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
735 continue;
736
737 assert((LiMI->getOpcode() == PPC::LI ||
738 LiMI->getOpcode() == PPC::LI8) &&
739 "Invalid Opcode!");
740 auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
741 LiMI->RemoveOperand(1); // remove the imm of LI
742 LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
743 : PPC::ADDI8));
744 MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
745 .addReg(DominatorReg)
746 .addImm(LiImm); // restore the imm of LI
747 LLVM_DEBUG(LiMI->dump());
748 }
749
750 // Replace ADD with COPY
751 LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
752 LLVM_DEBUG(MI.dump());
753 BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
754 MI.getOperand(0).getReg())
755 .add(Op1);
756 ToErase = &MI;
757 Simplified = true;
758 NumOptADDLIs++;
759 break;
760 }
761 }
762 }
763
764 // If the last instruction was marked for elimination,
765 // remove it now.
766 if (ToErase) {
767 ToErase->eraseFromParent();
768 ToErase = nullptr;
769 }
770 }
771
772 // Eliminate all the TOC save instructions which are redundant.
773 Simplified |= eliminateRedundantTOCSaves(TOCSaves);
774 // We try to eliminate redundant compare instruction.
775 Simplified |= eliminateRedundantCompare();
776
777 return Simplified;
778 }
779
780 // helper functions for eliminateRedundantCompare
isEqOrNe(MachineInstr * BI)781 static bool isEqOrNe(MachineInstr *BI) {
782 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
783 unsigned PredCond = PPC::getPredicateCondition(Pred);
784 return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
785 }
786
isSupportedCmpOp(unsigned opCode)787 static bool isSupportedCmpOp(unsigned opCode) {
788 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
789 opCode == PPC::CMPLW || opCode == PPC::CMPW ||
790 opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
791 opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
792 }
793
is64bitCmpOp(unsigned opCode)794 static bool is64bitCmpOp(unsigned opCode) {
795 return (opCode == PPC::CMPLD || opCode == PPC::CMPD ||
796 opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
797 }
798
isSignedCmpOp(unsigned opCode)799 static bool isSignedCmpOp(unsigned opCode) {
800 return (opCode == PPC::CMPD || opCode == PPC::CMPW ||
801 opCode == PPC::CMPDI || opCode == PPC::CMPWI);
802 }
803
getSignedCmpOpCode(unsigned opCode)804 static unsigned getSignedCmpOpCode(unsigned opCode) {
805 if (opCode == PPC::CMPLD) return PPC::CMPD;
806 if (opCode == PPC::CMPLW) return PPC::CMPW;
807 if (opCode == PPC::CMPLDI) return PPC::CMPDI;
808 if (opCode == PPC::CMPLWI) return PPC::CMPWI;
809 return opCode;
810 }
811
812 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
813 // (LT x) to (LE x-1)
getPredicateToDecImm(MachineInstr * BI,MachineInstr * CMPI)814 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
815 uint64_t Imm = CMPI->getOperand(2).getImm();
816 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
817 if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
818 return 0;
819
820 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
821 unsigned PredCond = PPC::getPredicateCondition(Pred);
822 unsigned PredHint = PPC::getPredicateHint(Pred);
823 if (PredCond == PPC::PRED_GE)
824 return PPC::getPredicate(PPC::PRED_GT, PredHint);
825 if (PredCond == PPC::PRED_LT)
826 return PPC::getPredicate(PPC::PRED_LE, PredHint);
827
828 return 0;
829 }
830
831 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
832 // (LE x) to (LT x+1)
getPredicateToIncImm(MachineInstr * BI,MachineInstr * CMPI)833 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
834 uint64_t Imm = CMPI->getOperand(2).getImm();
835 bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
836 if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
837 return 0;
838
839 PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
840 unsigned PredCond = PPC::getPredicateCondition(Pred);
841 unsigned PredHint = PPC::getPredicateHint(Pred);
842 if (PredCond == PPC::PRED_GT)
843 return PPC::getPredicate(PPC::PRED_GE, PredHint);
844 if (PredCond == PPC::PRED_LE)
845 return PPC::getPredicate(PPC::PRED_LT, PredHint);
846
847 return 0;
848 }
849
850 // This takes a Phi node and returns a register value for the specified BB.
getIncomingRegForBlock(MachineInstr * Phi,MachineBasicBlock * MBB)851 static unsigned getIncomingRegForBlock(MachineInstr *Phi,
852 MachineBasicBlock *MBB) {
853 for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
854 MachineOperand &MO = Phi->getOperand(I);
855 if (MO.getMBB() == MBB)
856 return Phi->getOperand(I-1).getReg();
857 }
858 llvm_unreachable("invalid src basic block for this Phi node\n");
859 return 0;
860 }
861
862 // This function tracks the source of the register through register copy.
863 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
864 // assuming that the control comes from BB1 into BB2.
getSrcVReg(unsigned Reg,MachineBasicBlock * BB1,MachineBasicBlock * BB2,MachineRegisterInfo * MRI)865 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
866 MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
867 unsigned SrcReg = Reg;
868 while (1) {
869 unsigned NextReg = SrcReg;
870 MachineInstr *Inst = MRI->getVRegDef(SrcReg);
871 if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
872 NextReg = getIncomingRegForBlock(Inst, BB1);
873 // We track through PHI only once to avoid infinite loop.
874 BB1 = nullptr;
875 }
876 else if (Inst->isFullCopy())
877 NextReg = Inst->getOperand(1).getReg();
878 if (NextReg == SrcReg || !TargetRegisterInfo::isVirtualRegister(NextReg))
879 break;
880 SrcReg = NextReg;
881 }
882 return SrcReg;
883 }
884
eligibleForCompareElimination(MachineBasicBlock & MBB,MachineBasicBlock * & PredMBB,MachineBasicBlock * & MBBtoMoveCmp,MachineRegisterInfo * MRI)885 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
886 MachineBasicBlock *&PredMBB,
887 MachineBasicBlock *&MBBtoMoveCmp,
888 MachineRegisterInfo *MRI) {
889
890 auto isEligibleBB = [&](MachineBasicBlock &BB) {
891 auto BII = BB.getFirstInstrTerminator();
892 // We optimize BBs ending with a conditional branch.
893 // We check only for BCC here, not BCCLR, because BCCLR
894 // will be formed only later in the pipeline.
895 if (BB.succ_size() == 2 &&
896 BII != BB.instr_end() &&
897 (*BII).getOpcode() == PPC::BCC &&
898 (*BII).getOperand(1).isReg()) {
899 // We optimize only if the condition code is used only by one BCC.
900 unsigned CndReg = (*BII).getOperand(1).getReg();
901 if (!TargetRegisterInfo::isVirtualRegister(CndReg) ||
902 !MRI->hasOneNonDBGUse(CndReg))
903 return false;
904
905 MachineInstr *CMPI = MRI->getVRegDef(CndReg);
906 // We assume compare and branch are in the same BB for ease of analysis.
907 if (CMPI->getParent() != &BB)
908 return false;
909
910 // We skip this BB if a physical register is used in comparison.
911 for (MachineOperand &MO : CMPI->operands())
912 if (MO.isReg() && !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
913 return false;
914
915 return true;
916 }
917 return false;
918 };
919
920 // If this BB has more than one successor, we can create a new BB and
921 // move the compare instruction in the new BB.
922 // So far, we do not move compare instruction to a BB having multiple
923 // successors to avoid potentially increasing code size.
924 auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
925 return BB.succ_size() == 1;
926 };
927
928 if (!isEligibleBB(MBB))
929 return false;
930
931 unsigned NumPredBBs = MBB.pred_size();
932 if (NumPredBBs == 1) {
933 MachineBasicBlock *TmpMBB = *MBB.pred_begin();
934 if (isEligibleBB(*TmpMBB)) {
935 PredMBB = TmpMBB;
936 MBBtoMoveCmp = nullptr;
937 return true;
938 }
939 }
940 else if (NumPredBBs == 2) {
941 // We check for partially redundant case.
942 // So far, we support cases with only two predecessors
943 // to avoid increasing the number of instructions.
944 MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
945 MachineBasicBlock *Pred1MBB = *PI;
946 MachineBasicBlock *Pred2MBB = *(PI+1);
947
948 if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
949 // We assume Pred1MBB is the BB containing the compare to be merged and
950 // Pred2MBB is the BB to which we will append a compare instruction.
951 // Hence we can proceed as is.
952 }
953 else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
954 // We need to swap Pred1MBB and Pred2MBB to canonicalize.
955 std::swap(Pred1MBB, Pred2MBB);
956 }
957 else return false;
958
959 // Here, Pred2MBB is the BB to which we need to append a compare inst.
960 // We cannot move the compare instruction if operands are not available
961 // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
962 MachineInstr *BI = &*MBB.getFirstInstrTerminator();
963 MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
964 for (int I = 1; I <= 2; I++)
965 if (CMPI->getOperand(I).isReg()) {
966 MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
967 if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
968 return false;
969 }
970
971 PredMBB = Pred1MBB;
972 MBBtoMoveCmp = Pred2MBB;
973 return true;
974 }
975
976 return false;
977 }
978
979 // This function will iterate over the input map containing a pair of TOC save
980 // instruction and a flag. The flag will be set to false if the TOC save is
981 // proven redundant. This function will erase from the basic block all the TOC
982 // saves marked as redundant.
eliminateRedundantTOCSaves(std::map<MachineInstr *,bool> & TOCSaves)983 bool PPCMIPeephole::eliminateRedundantTOCSaves(
984 std::map<MachineInstr *, bool> &TOCSaves) {
985 bool Simplified = false;
986 int NumKept = 0;
987 for (auto TOCSave : TOCSaves) {
988 if (!TOCSave.second) {
989 TOCSave.first->eraseFromParent();
990 RemoveTOCSave++;
991 Simplified = true;
992 } else {
993 NumKept++;
994 }
995 }
996
997 if (NumKept > 1)
998 MultiTOCSaves++;
999
1000 return Simplified;
1001 }
1002
1003 // If multiple conditional branches are executed based on the (essentially)
1004 // same comparison, we merge compare instructions into one and make multiple
1005 // conditional branches on this comparison.
1006 // For example,
1007 // if (a == 0) { ... }
1008 // else if (a < 0) { ... }
1009 // can be executed by one compare and two conditional branches instead of
1010 // two pairs of a compare and a conditional branch.
1011 //
1012 // This method merges two compare instructions in two MBBs and modifies the
1013 // compare and conditional branch instructions if needed.
1014 // For the above example, the input for this pass looks like:
1015 // cmplwi r3, 0
1016 // beq 0, .LBB0_3
1017 // cmpwi r3, -1
1018 // bgt 0, .LBB0_4
1019 // So, before merging two compares, we need to modify these instructions as
1020 // cmpwi r3, 0 ; cmplwi and cmpwi yield same result for beq
1021 // beq 0, .LBB0_3
1022 // cmpwi r3, 0 ; greather than -1 means greater or equal to 0
1023 // bge 0, .LBB0_4
1024
eliminateRedundantCompare(void)1025 bool PPCMIPeephole::eliminateRedundantCompare(void) {
1026 bool Simplified = false;
1027
1028 for (MachineBasicBlock &MBB2 : *MF) {
1029 MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
1030
1031 // For fully redundant case, we select two basic blocks MBB1 and MBB2
1032 // as an optimization target if
1033 // - both MBBs end with a conditional branch,
1034 // - MBB1 is the only predecessor of MBB2, and
1035 // - compare does not take a physical register as a operand in both MBBs.
1036 // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
1037 //
1038 // As partially redundant case, we additionally handle if MBB2 has one
1039 // additional predecessor, which has only one successor (MBB2).
1040 // In this case, we move the compare instruction originally in MBB2 into
1041 // MBBtoMoveCmp. This partially redundant case is typically appear by
1042 // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
1043 //
1044 // Overview of CFG of related basic blocks
1045 // Fully redundant case Partially redundant case
1046 // -------- ---------------- --------
1047 // | MBB1 | (w/ 2 succ) | MBBtoMoveCmp | | MBB1 | (w/ 2 succ)
1048 // -------- ---------------- --------
1049 // | \ (w/ 1 succ) \ | \
1050 // | \ \ | \
1051 // | \ |
1052 // -------- --------
1053 // | MBB2 | (w/ 1 pred | MBB2 | (w/ 2 pred
1054 // -------- and 2 succ) -------- and 2 succ)
1055 // | \ | \
1056 // | \ | \
1057 //
1058 if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
1059 continue;
1060
1061 MachineInstr *BI1 = &*MBB1->getFirstInstrTerminator();
1062 MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
1063
1064 MachineInstr *BI2 = &*MBB2.getFirstInstrTerminator();
1065 MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
1066 bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
1067
1068 // We cannot optimize an unsupported compare opcode or
1069 // a mix of 32-bit and 64-bit comaprisons
1070 if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
1071 !isSupportedCmpOp(CMPI2->getOpcode()) ||
1072 is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
1073 continue;
1074
1075 unsigned NewOpCode = 0;
1076 unsigned NewPredicate1 = 0, NewPredicate2 = 0;
1077 int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
1078 bool SwapOperands = false;
1079
1080 if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
1081 // Typically, unsigned comparison is used for equality check, but
1082 // we replace it with a signed comparison if the comparison
1083 // to be merged is a signed comparison.
1084 // In other cases of opcode mismatch, we cannot optimize this.
1085
1086 // We cannot change opcode when comparing against an immediate
1087 // if the most significant bit of the immediate is one
1088 // due to the difference in sign extension.
1089 auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
1090 if (!I->getOperand(2).isImm())
1091 return false;
1092 int16_t Imm = (int16_t)I->getOperand(2).getImm();
1093 return Imm < 0;
1094 };
1095
1096 if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
1097 CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
1098 NewOpCode = CMPI1->getOpcode();
1099 else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
1100 getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
1101 NewOpCode = CMPI2->getOpcode();
1102 else continue;
1103 }
1104
1105 if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
1106 // In case of comparisons between two registers, these two registers
1107 // must be same to merge two comparisons.
1108 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1109 nullptr, nullptr, MRI);
1110 unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
1111 nullptr, nullptr, MRI);
1112 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1113 MBB1, &MBB2, MRI);
1114 unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
1115 MBB1, &MBB2, MRI);
1116
1117 if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
1118 // Same pair of registers in the same order; ready to merge as is.
1119 }
1120 else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
1121 // Same pair of registers in different order.
1122 // We reverse the predicate to merge compare instructions.
1123 PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
1124 NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
1125 // In case of partial redundancy, we need to swap operands
1126 // in another compare instruction.
1127 SwapOperands = true;
1128 }
1129 else continue;
1130 }
1131 else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
1132 // In case of comparisons between a register and an immediate,
1133 // the operand register must be same for two compare instructions.
1134 unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
1135 nullptr, nullptr, MRI);
1136 unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
1137 MBB1, &MBB2, MRI);
1138 if (Cmp1Operand1 != Cmp2Operand1)
1139 continue;
1140
1141 NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
1142 NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
1143
1144 // If immediate are not same, we try to adjust by changing predicate;
1145 // e.g. GT imm means GE (imm+1).
1146 if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
1147 int Diff = Imm1 - Imm2;
1148 if (Diff < -2 || Diff > 2)
1149 continue;
1150
1151 unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
1152 unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
1153 unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
1154 unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
1155 if (Diff == 2) {
1156 if (PredToInc2 && PredToDec1) {
1157 NewPredicate2 = PredToInc2;
1158 NewPredicate1 = PredToDec1;
1159 NewImm2++;
1160 NewImm1--;
1161 }
1162 }
1163 else if (Diff == 1) {
1164 if (PredToInc2) {
1165 NewImm2++;
1166 NewPredicate2 = PredToInc2;
1167 }
1168 else if (PredToDec1) {
1169 NewImm1--;
1170 NewPredicate1 = PredToDec1;
1171 }
1172 }
1173 else if (Diff == -1) {
1174 if (PredToDec2) {
1175 NewImm2--;
1176 NewPredicate2 = PredToDec2;
1177 }
1178 else if (PredToInc1) {
1179 NewImm1++;
1180 NewPredicate1 = PredToInc1;
1181 }
1182 }
1183 else if (Diff == -2) {
1184 if (PredToDec2 && PredToInc1) {
1185 NewPredicate2 = PredToDec2;
1186 NewPredicate1 = PredToInc1;
1187 NewImm2--;
1188 NewImm1++;
1189 }
1190 }
1191 }
1192
1193 // We cannot merge two compares if the immediates are not same.
1194 if (NewImm2 != NewImm1)
1195 continue;
1196 }
1197
1198 LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
1199 LLVM_DEBUG(CMPI1->dump());
1200 LLVM_DEBUG(BI1->dump());
1201 LLVM_DEBUG(CMPI2->dump());
1202 LLVM_DEBUG(BI2->dump());
1203
1204 // We adjust opcode, predicates and immediate as we determined above.
1205 if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
1206 CMPI1->setDesc(TII->get(NewOpCode));
1207 }
1208 if (NewPredicate1) {
1209 BI1->getOperand(0).setImm(NewPredicate1);
1210 }
1211 if (NewPredicate2) {
1212 BI2->getOperand(0).setImm(NewPredicate2);
1213 }
1214 if (NewImm1 != Imm1) {
1215 CMPI1->getOperand(2).setImm(NewImm1);
1216 }
1217
1218 if (IsPartiallyRedundant) {
1219 // We touch up the compare instruction in MBB2 and move it to
1220 // a previous BB to handle partially redundant case.
1221 if (SwapOperands) {
1222 unsigned Op1 = CMPI2->getOperand(1).getReg();
1223 unsigned Op2 = CMPI2->getOperand(2).getReg();
1224 CMPI2->getOperand(1).setReg(Op2);
1225 CMPI2->getOperand(2).setReg(Op1);
1226 }
1227 if (NewImm2 != Imm2)
1228 CMPI2->getOperand(2).setImm(NewImm2);
1229
1230 for (int I = 1; I <= 2; I++) {
1231 if (CMPI2->getOperand(I).isReg()) {
1232 MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
1233 if (Inst->getParent() != &MBB2)
1234 continue;
1235
1236 assert(Inst->getOpcode() == PPC::PHI &&
1237 "We cannot support if an operand comes from this BB.");
1238 unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
1239 CMPI2->getOperand(I).setReg(SrcReg);
1240 }
1241 }
1242 auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
1243 MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
1244
1245 DebugLoc DL = CMPI2->getDebugLoc();
1246 unsigned NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
1247 BuildMI(MBB2, MBB2.begin(), DL,
1248 TII->get(PPC::PHI), NewVReg)
1249 .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
1250 .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
1251 BI2->getOperand(1).setReg(NewVReg);
1252 }
1253 else {
1254 // We finally eliminate compare instruction in MBB2.
1255 BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
1256 CMPI2->eraseFromParent();
1257 }
1258 BI2->getOperand(1).setIsKill(true);
1259 BI1->getOperand(1).setIsKill(false);
1260
1261 LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
1262 LLVM_DEBUG(CMPI1->dump());
1263 LLVM_DEBUG(BI1->dump());
1264 LLVM_DEBUG(BI2->dump());
1265 if (IsPartiallyRedundant) {
1266 LLVM_DEBUG(dbgs() << "The following compare is moved into "
1267 << printMBBReference(*MBBtoMoveCmp)
1268 << " to handle partial redundancy.\n");
1269 LLVM_DEBUG(CMPI2->dump());
1270 }
1271
1272 Simplified = true;
1273 }
1274
1275 return Simplified;
1276 }
1277
1278 } // end default namespace
1279
1280 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
1281 "PowerPC MI Peephole Optimization", false, false)
1282 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
1283 "PowerPC MI Peephole Optimization", false, false)
1284
1285 char PPCMIPeephole::ID = 0;
1286 FunctionPass*
createPPCMIPeepholePass()1287 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
1288
1289