1 //===------ LeonPasses.cpp - Define passes specific to LEON ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "LeonPasses.h"
14 #include "llvm/CodeGen/ISDOpcodes.h"
15 #include "llvm/CodeGen/MachineFunction.h"
16 #include "llvm/CodeGen/MachineInstr.h"
17 #include "llvm/CodeGen/MachineInstrBuilder.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/IR/LLVMContext.h"
20 #include "llvm/Support/raw_ostream.h"
21 using namespace llvm;
22 
LEONMachineFunctionPass(TargetMachine & tm,char & ID)23 LEONMachineFunctionPass::LEONMachineFunctionPass(TargetMachine &tm, char &ID)
24     : MachineFunctionPass(ID) {}
25 
LEONMachineFunctionPass(char & ID)26 LEONMachineFunctionPass::LEONMachineFunctionPass(char &ID)
27     : MachineFunctionPass(ID) {}
28 
GetRegIndexForOperand(MachineInstr & MI,int OperandIndex)29 int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr &MI,
30                                                    int OperandIndex) {
31   if (MI.getNumOperands() > 0) {
32     if (OperandIndex == LAST_OPERAND) {
33       OperandIndex = MI.getNumOperands() - 1;
34     }
35 
36     if (MI.getNumOperands() > (unsigned)OperandIndex &&
37         MI.getOperand(OperandIndex).isReg()) {
38       return (int)MI.getOperand(OperandIndex).getReg();
39     }
40   }
41 
42   static int NotFoundIndex = -10;
43   // Return a different number each time to avoid any comparisons between the
44   // values returned.
45   NotFoundIndex -= 10;
46   return NotFoundIndex;
47 }
48 
49 // finds a new free FP register
50 // checks also the AllocatedRegisters vector
getUnusedFPRegister(MachineRegisterInfo & MRI)51 int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) {
52   for (int RegisterIndex = SP::F0; RegisterIndex <= SP::F31; ++RegisterIndex) {
53     if (!MRI.isPhysRegUsed(RegisterIndex) &&
54         !(std::find(UsedRegisters.begin(), UsedRegisters.end(),
55                     RegisterIndex) != UsedRegisters.end())) {
56       return RegisterIndex;
57     }
58   }
59 
60   return -1;
61 }
62 
63 //*****************************************************************************
64 //**** InsertNOPLoad pass
65 //*****************************************************************************
66 // This pass fixes the incorrectly working Load instructions that exists for
67 // some earlier versions of the LEON processor line. NOP instructions must
68 // be inserted after the load instruction to ensure that the Load instruction
69 // behaves as expected for these processors.
70 //
71 // This pass inserts a NOP after any LD or LDF instruction.
72 //
73 char InsertNOPLoad::ID = 0;
74 
InsertNOPLoad(TargetMachine & tm)75 InsertNOPLoad::InsertNOPLoad(TargetMachine &tm)
76     : LEONMachineFunctionPass(tm, ID) {}
77 
runOnMachineFunction(MachineFunction & MF)78 bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
79   Subtarget = &MF.getSubtarget<SparcSubtarget>();
80   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
81   DebugLoc DL = DebugLoc();
82 
83   bool Modified = false;
84   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
85     MachineBasicBlock &MBB = *MFI;
86     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
87       MachineInstr &MI = *MBBI;
88       unsigned Opcode = MI.getOpcode();
89       if (Opcode >= SP::LDDArr && Opcode <= SP::LDrr) {
90         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
91         BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
92         Modified = true;
93       } else if (MI.isInlineAsm()) {
94         // Look for an inline ld or ldf instruction.
95         StringRef AsmString =
96             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
97         if (AsmString.startswith_lower("ld")) {
98           MachineBasicBlock::iterator NMBBI = std::next(MBBI);
99           BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
100           Modified = true;
101         }
102       }
103     }
104   }
105 
106   return Modified;
107 }
108 
109 //*****************************************************************************
110 //**** FixFSMULD pass
111 //*****************************************************************************
112 // This pass fixes the incorrectly working FSMULD instruction that exists for
113 // some earlier versions of the LEON processor line.
114 //
115 // The pass should convert the FSMULD operands to double precision in scratch
116 // registers, then calculate the result with the FMULD instruction. Therefore,
117 // the pass should replace operations of the form:
118 // fsmuld %f20,%f21,%f8
119 // with the sequence:
120 // fstod %f20,%f0
121 // fstod %f21,%f2
122 // fmuld %f0,%f2,%f8
123 //
124 char FixFSMULD::ID = 0;
125 
FixFSMULD(TargetMachine & tm)126 FixFSMULD::FixFSMULD(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {}
127 
runOnMachineFunction(MachineFunction & MF)128 bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) {
129   Subtarget = &MF.getSubtarget<SparcSubtarget>();
130   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
131   DebugLoc DL = DebugLoc();
132 
133   bool Modified = false;
134   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
135     MachineBasicBlock &MBB = *MFI;
136     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
137 
138       MachineInstr &MI = *MBBI;
139       unsigned Opcode = MI.getOpcode();
140 
141       const int UNASSIGNED_INDEX = -1;
142       int Reg1Index = UNASSIGNED_INDEX;
143       int Reg2Index = UNASSIGNED_INDEX;
144       int Reg3Index = UNASSIGNED_INDEX;
145 
146       if (Opcode == SP::FSMULD && MI.getNumOperands() == 3) {
147         // take the registers from fsmuld %f20,%f21,%f8
148         Reg1Index = MI.getOperand(0).getReg();
149         Reg2Index = MI.getOperand(1).getReg();
150         Reg3Index = MI.getOperand(2).getReg();
151       } else if (MI.isInlineAsm()) {
152         StringRef AsmString =
153             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
154         if (AsmString.startswith_lower("fsmuld")) {
155           // this is an inline FSMULD instruction
156 
157           unsigned StartOp = InlineAsm::MIOp_FirstOperand;
158 
159           // extracts the registers from the inline assembly instruction
160           for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
161             const MachineOperand &MO = MI.getOperand(i);
162             if (MO.isReg()) {
163               if (Reg1Index == UNASSIGNED_INDEX)
164                 Reg1Index = MO.getReg();
165               else if (Reg2Index == UNASSIGNED_INDEX)
166                 Reg2Index = MO.getReg();
167               else if (Reg3Index == UNASSIGNED_INDEX)
168                 Reg3Index = MO.getReg();
169             }
170             if (Reg3Index != UNASSIGNED_INDEX)
171               break;
172           }
173         }
174       }
175 
176       if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
177           Reg3Index != UNASSIGNED_INDEX) {
178         clearUsedRegisterList();
179         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
180         // Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
181         markRegisterUsed(Reg3Index);
182         const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
183         markRegisterUsed(ScratchReg1Index);
184         const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
185         markRegisterUsed(ScratchReg2Index);
186 
187         if (ScratchReg1Index == UNASSIGNED_INDEX ||
188             ScratchReg2Index == UNASSIGNED_INDEX) {
189           errs() << "Cannot allocate free scratch registers for the FixFSMULD "
190                     "pass."
191                  << "\n";
192         } else {
193           // create fstod %f20,%f0
194           BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
195               .addReg(ScratchReg1Index)
196               .addReg(Reg1Index);
197 
198           // create fstod %f21,%f2
199           BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
200               .addReg(ScratchReg2Index)
201               .addReg(Reg2Index);
202 
203           // create fmuld %f0,%f2,%f8
204           BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
205               .addReg(Reg3Index)
206               .addReg(ScratchReg1Index)
207               .addReg(ScratchReg2Index);
208 
209           MI.eraseFromParent();
210           MBBI = NMBBI;
211 
212           Modified = true;
213         }
214       }
215     }
216   }
217 
218   return Modified;
219 }
220 
221 //*****************************************************************************
222 //**** ReplaceFMULS pass
223 //*****************************************************************************
224 // This pass fixes the incorrectly working FMULS instruction that exists for
225 // some earlier versions of the LEON processor line.
226 //
227 // This pass converts the FMULS operands to double precision in scratch
228 // registers, then calculates the result with the FMULD instruction.
229 // The pass should replace operations of the form:
230 // fmuls %f20,%f21,%f8
231 // with the sequence:
232 // fstod %f20,%f0
233 // fstod %f21,%f2
234 // fmuld %f0,%f2,%f8
235 //
236 char ReplaceFMULS::ID = 0;
237 
ReplaceFMULS(TargetMachine & tm)238 ReplaceFMULS::ReplaceFMULS(TargetMachine &tm)
239     : LEONMachineFunctionPass(tm, ID) {}
240 
runOnMachineFunction(MachineFunction & MF)241 bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
242   Subtarget = &MF.getSubtarget<SparcSubtarget>();
243   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
244   DebugLoc DL = DebugLoc();
245 
246   bool Modified = false;
247   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
248     MachineBasicBlock &MBB = *MFI;
249     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
250       MachineInstr &MI = *MBBI;
251       unsigned Opcode = MI.getOpcode();
252 
253       const int UNASSIGNED_INDEX = -1;
254       int Reg1Index = UNASSIGNED_INDEX;
255       int Reg2Index = UNASSIGNED_INDEX;
256       int Reg3Index = UNASSIGNED_INDEX;
257 
258       if (Opcode == SP::FMULS && MI.getNumOperands() == 3) {
259         // take the registers from fmuls %f20,%f21,%f8
260         Reg1Index = MI.getOperand(0).getReg();
261         Reg2Index = MI.getOperand(1).getReg();
262         Reg3Index = MI.getOperand(2).getReg();
263       } else if (MI.isInlineAsm()) {
264         StringRef AsmString =
265             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
266         if (AsmString.startswith_lower("fmuls")) {
267           // this is an inline FMULS instruction
268           unsigned StartOp = InlineAsm::MIOp_FirstOperand;
269 
270           // extracts the registers from the inline assembly instruction
271           for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
272             const MachineOperand &MO = MI.getOperand(i);
273             if (MO.isReg()) {
274               if (Reg1Index == UNASSIGNED_INDEX)
275                 Reg1Index = MO.getReg();
276               else if (Reg2Index == UNASSIGNED_INDEX)
277                 Reg2Index = MO.getReg();
278               else if (Reg3Index == UNASSIGNED_INDEX)
279                 Reg3Index = MO.getReg();
280             }
281             if (Reg3Index != UNASSIGNED_INDEX)
282               break;
283           }
284         }
285       }
286 
287       if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
288           Reg3Index != UNASSIGNED_INDEX) {
289         clearUsedRegisterList();
290         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
291         // Whatever Reg3Index is hasn't been used yet, so we need to reserve it.
292         markRegisterUsed(Reg3Index);
293         const int ScratchReg1Index = getUnusedFPRegister(MF.getRegInfo());
294         markRegisterUsed(ScratchReg1Index);
295         const int ScratchReg2Index = getUnusedFPRegister(MF.getRegInfo());
296         markRegisterUsed(ScratchReg2Index);
297 
298         if (ScratchReg1Index == UNASSIGNED_INDEX ||
299             ScratchReg2Index == UNASSIGNED_INDEX) {
300           errs() << "Cannot allocate free scratch registers for the "
301                     "ReplaceFMULS pass."
302                  << "\n";
303         } else {
304           // create fstod %f20,%f0
305           BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
306               .addReg(ScratchReg1Index)
307               .addReg(Reg1Index);
308 
309           // create fstod %f21,%f2
310           BuildMI(MBB, MBBI, DL, TII.get(SP::FSTOD))
311               .addReg(ScratchReg2Index)
312               .addReg(Reg2Index);
313 
314           // create fmuld %f0,%f2,%f8
315           BuildMI(MBB, MBBI, DL, TII.get(SP::FMULD))
316               .addReg(Reg3Index)
317               .addReg(ScratchReg1Index)
318               .addReg(ScratchReg2Index);
319 
320           MI.eraseFromParent();
321           MBBI = NMBBI;
322 
323           Modified = true;
324         }
325       }
326     }
327   }
328 
329   return Modified;
330 }
331 
332 //*****************************************************************************
333 //**** FixAllFDIVSQRT pass
334 //*****************************************************************************
335 // This pass fixes the incorrectly working FDIVx and FSQRTx instructions that
336 // exist for some earlier versions of the LEON processor line. Five NOP
337 // instructions need to be inserted after these instructions to ensure the
338 // correct result is placed in the destination registers before they are used.
339 //
340 // This pass implements two fixes:
341 //  1) fixing the FSQRTS and FSQRTD instructions.
342 //  2) fixing the FDIVS and FDIVD instructions.
343 //
344 // FSQRTS and FDIVS are converted to FDIVD and FSQRTD respectively earlier in
345 // the pipeline when this option is enabled, so this pass needs only to deal
346 // with the changes that still need implementing for the "double" versions
347 // of these instructions.
348 //
349 char FixAllFDIVSQRT::ID = 0;
350 
FixAllFDIVSQRT(TargetMachine & tm)351 FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm)
352     : LEONMachineFunctionPass(tm, ID) {}
353 
runOnMachineFunction(MachineFunction & MF)354 bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
355   Subtarget = &MF.getSubtarget<SparcSubtarget>();
356   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
357   DebugLoc DL = DebugLoc();
358 
359   bool Modified = false;
360   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
361     MachineBasicBlock &MBB = *MFI;
362     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
363       MachineInstr &MI = *MBBI;
364       unsigned Opcode = MI.getOpcode();
365 
366       if (MI.isInlineAsm()) {
367         StringRef AsmString =
368             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
369         if (AsmString.startswith_lower("fsqrtd")) {
370           // this is an inline fsqrts instruction
371           Opcode = SP::FSQRTD;
372         } else if (AsmString.startswith_lower("fdivd")) {
373           // this is an inline fsqrts instruction
374           Opcode = SP::FDIVD;
375         }
376       }
377 
378       // Note: FDIVS and FSQRTS cannot be generated when this erratum fix is
379       // switched on so we don't need to check for them here. They will
380       // already have been converted to FSQRTD or FDIVD earlier in the
381       // pipeline.
382       if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) {
383         // Insert 5 NOPs before FSQRTD,FDIVD.
384         for (int InsertedCount = 0; InsertedCount < 5; InsertedCount++)
385           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
386 
387         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
388         // ... and inserting 28 NOPs after FSQRTD,FDIVD.
389         for (int InsertedCount = 0; InsertedCount < 28; InsertedCount++)
390           BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
391 
392         Modified = true;
393       }
394     }
395   }
396 
397   return Modified;
398 }
399 
400 //*****************************************************************************
401 //**** ReplaceSDIV pass
402 //*****************************************************************************
403 // This pass fixes the incorrectly working SDIV instruction that
404 // exist for some earlier versions of the LEON processor line. The instruction
405 // is replaced with an SDIVcc instruction instead, which is working.
406 //
407 char ReplaceSDIV::ID = 0;
408 
ReplaceSDIV()409 ReplaceSDIV::ReplaceSDIV() : LEONMachineFunctionPass(ID) {}
410 
ReplaceSDIV(TargetMachine & tm)411 ReplaceSDIV::ReplaceSDIV(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {}
412 
runOnMachineFunction(MachineFunction & MF)413 bool ReplaceSDIV::runOnMachineFunction(MachineFunction &MF) {
414   Subtarget = &MF.getSubtarget<SparcSubtarget>();
415   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
416 
417   bool Modified = false;
418   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
419     MachineBasicBlock &MBB = *MFI;
420     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
421       MachineInstr &MI = *MBBI;
422       unsigned Opcode = MI.getOpcode();
423       if (Opcode == SP::SDIVrr) {
424         MI.setDesc(TII.get(SP::SDIVCCrr));
425         Modified = true;
426       } else if (Opcode == SP::SDIVri) {
427         MI.setDesc(TII.get(SP::SDIVCCri));
428         Modified = true;
429       }
430     }
431   }
432 
433   return Modified;
434 }
435 
436 static RegisterPass<ReplaceSDIV> X("replace-sdiv", "Replase SDIV Pass", false,
437                                    false);
438 
439 //*****************************************************************************
440 //**** FixCALL pass
441 //*****************************************************************************
442 // This pass restricts the size of the immediate operand of the CALL
443 // instruction, which can cause problems on some earlier versions of the LEON
444 // processor, which can interpret some of the call address bits incorrectly.
445 //
446 char FixCALL::ID = 0;
447 
FixCALL(TargetMachine & tm)448 FixCALL::FixCALL(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {}
449 
runOnMachineFunction(MachineFunction & MF)450 bool FixCALL::runOnMachineFunction(MachineFunction &MF) {
451   bool Modified = false;
452 
453   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
454     MachineBasicBlock &MBB = *MFI;
455     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
456       MachineInstr &MI = *MBBI;
457       MI.print(errs());
458       errs() << "\n";
459 
460       unsigned Opcode = MI.getOpcode();
461       if (Opcode == SP::CALL || Opcode == SP::CALLrr) {
462         unsigned NumOperands = MI.getNumOperands();
463         for (unsigned OperandIndex = 0; OperandIndex < NumOperands;
464              OperandIndex++) {
465           MachineOperand &MO = MI.getOperand(OperandIndex);
466           if (MO.isImm()) {
467             int64_t Value = MO.getImm();
468             MO.setImm(Value & 0x000fffffL);
469             Modified = true;
470             break;
471           }
472         }
473       } else if (MI.isInlineAsm()) // inline assembly immediate call
474       {
475         StringRef AsmString =
476             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
477         if (AsmString.startswith_lower("call")) {
478           // this is an inline call instruction
479           unsigned StartOp = InlineAsm::MIOp_FirstOperand;
480 
481           // extracts the registers from the inline assembly instruction
482           for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
483             MachineOperand &MO = MI.getOperand(i);
484             if (MO.isImm()) {
485               int64_t Value = MO.getImm();
486               MO.setImm(Value & 0x000fffffL);
487               Modified = true;
488             }
489           }
490         }
491       }
492     }
493   }
494 
495   return Modified;
496 }
497 
498 //*****************************************************************************
499 //**** IgnoreZeroFlag pass
500 //*****************************************************************************
501 // This erratum fix fixes the overflow behavior of SDIVCC and UDIVCC
502 // instructions that exists on some earlier LEON processors. Where these
503 // instructions are detected, they are replaced by a sequence that will
504 // explicitly write the overflow bit flag if this is required.
505 //
506 char IgnoreZeroFlag::ID = 0;
507 
IgnoreZeroFlag(TargetMachine & tm)508 IgnoreZeroFlag::IgnoreZeroFlag(TargetMachine &tm)
509     : LEONMachineFunctionPass(tm, ID) {}
510 
runOnMachineFunction(MachineFunction & MF)511 bool IgnoreZeroFlag::runOnMachineFunction(MachineFunction &MF) {
512   Subtarget = &MF.getSubtarget<SparcSubtarget>();
513   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
514   DebugLoc DL = DebugLoc();
515 
516   bool Modified = false;
517   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
518     MachineBasicBlock &MBB = *MFI;
519     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
520       MachineInstr &MI = *MBBI;
521       unsigned Opcode = MI.getOpcode();
522       if (Opcode == SP::SDIVCCrr || Opcode == SP::SDIVCCri ||
523           Opcode == SP::UDIVCCrr || Opcode == SP::UDIVCCri) {
524 
525         // split the current machine basic block - just after the sdivcc/udivcc
526         // instruction
527         // create a label that help us skip the zero flag update (of PSR -
528         // Processor Status Register)
529         // if conditions are not met
530         const BasicBlock *LLVM_BB = MBB.getBasicBlock();
531         MachineFunction::iterator It =
532             std::next(MachineFunction::iterator(MBB));
533 
534         MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
535         MF.insert(It, dneBB);
536 
537         // Transfer the remainder of MBB and its successor edges to dneBB.
538         dneBB->splice(dneBB->begin(), &MBB,
539                       std::next(MachineBasicBlock::iterator(MI)), MBB.end());
540         dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
541 
542         MBB.addSuccessor(dneBB);
543 
544         MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
545 
546         // bvc - branch if overflow flag not set
547         BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
548             .addMBB(dneBB)
549             .addImm(SPCC::ICC_VS);
550 
551         // bnz - branch if not zero
552         BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
553             .addMBB(dneBB)
554             .addImm(SPCC::ICC_NE);
555 
556         // use the WRPSR (Write Processor State Register) instruction to set the
557         // zeo flag to 1
558         // create wr %g0, 1, %psr
559         BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri))
560             .addReg(SP::G0)
561             .addImm(1);
562 
563         BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP));
564 
565         Modified = true;
566       } else if (MI.isInlineAsm()) {
567         StringRef AsmString =
568             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
569         if (AsmString.startswith_lower("sdivcc") ||
570             AsmString.startswith_lower("udivcc")) {
571           // this is an inline SDIVCC or UDIVCC instruction
572 
573           // split the current machine basic block - just after the
574           // sdivcc/udivcc instruction
575           // create a label that help us skip the zero flag update (of PSR -
576           // Processor Status Register)
577           // if conditions are not met
578           const BasicBlock *LLVM_BB = MBB.getBasicBlock();
579           MachineFunction::iterator It =
580               std::next(MachineFunction::iterator(MBB));
581 
582           MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
583           MF.insert(It, dneBB);
584 
585           // Transfer the remainder of MBB and its successor edges to dneBB.
586           dneBB->splice(dneBB->begin(), &MBB,
587                         std::next(MachineBasicBlock::iterator(MI)), MBB.end());
588           dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
589 
590           MBB.addSuccessor(dneBB);
591 
592           MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
593 
594           // bvc - branch if overflow flag not set
595           BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
596               .addMBB(dneBB)
597               .addImm(SPCC::ICC_VS);
598 
599           // bnz - branch if not zero
600           BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
601               .addMBB(dneBB)
602               .addImm(SPCC::ICC_NE);
603 
604           // use the WRPSR (Write Processor State Register) instruction to set
605           // the zeo flag to 1
606           // create wr %g0, 1, %psr
607           BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri))
608               .addReg(SP::G0)
609               .addImm(1);
610 
611           BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP));
612 
613           Modified = true;
614         }
615       }
616     }
617   }
618 
619   return Modified;
620 }
621 
622 //*****************************************************************************
623 //**** InsertNOPDoublePrecision pass
624 //*****************************************************************************
625 // This erratum fix for some earlier LEON processors fixes a problem where a
626 // double precision load will not yield the correct result if used in FMUL,
627 // FDIV, FADD, FSUB or FSQRT instructions later. If this sequence is detected,
628 // inserting a NOP between the two instructions will fix the erratum.
629 // 1.scans the code after register allocation;
630 // 2.checks for the problem conditions as described in the AT697E erratum
631 // “Odd-Numbered FPU Register Dependency not Properly Checked in some
632 // Double-Precision FPU Operations”;
633 // 3.inserts NOPs if the problem exists.
634 //
635 char InsertNOPDoublePrecision::ID = 0;
636 
InsertNOPDoublePrecision(TargetMachine & tm)637 InsertNOPDoublePrecision::InsertNOPDoublePrecision(TargetMachine &tm)
638     : LEONMachineFunctionPass(tm, ID) {}
639 
runOnMachineFunction(MachineFunction & MF)640 bool InsertNOPDoublePrecision::runOnMachineFunction(MachineFunction &MF) {
641   Subtarget = &MF.getSubtarget<SparcSubtarget>();
642   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
643   DebugLoc DL = DebugLoc();
644 
645   bool Modified = false;
646   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
647     MachineBasicBlock &MBB = *MFI;
648     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
649       MachineInstr &MI = *MBBI;
650       unsigned Opcode = MI.getOpcode();
651       if (Opcode == SP::LDDFri || Opcode == SP::LDDFrr) {
652         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
653         MachineInstr &NMI = *NMBBI;
654 
655         unsigned NextOpcode = NMI.getOpcode();
656         // NMI.print(errs());
657         if (NextOpcode == SP::FADDD || NextOpcode == SP::FSUBD ||
658             NextOpcode == SP::FMULD || NextOpcode == SP::FDIVD) {
659           int RegAIndex = GetRegIndexForOperand(MI, 0);
660           int RegBIndex = GetRegIndexForOperand(NMI, 0);
661           int RegCIndex =
662               GetRegIndexForOperand(NMI, 2); // Second source operand is index 2
663           int RegDIndex =
664               GetRegIndexForOperand(NMI, 1); // Destination operand is index 1
665 
666           if ((RegAIndex == RegBIndex + 1 && RegBIndex == RegDIndex) ||
667               (RegAIndex == RegCIndex + 1 && RegCIndex == RegDIndex) ||
668               (RegAIndex == RegBIndex + 1 && RegCIndex == RegDIndex) ||
669               (RegAIndex == RegCIndex + 1 && RegBIndex == RegDIndex)) {
670             // Insert NOP between the two instructions.
671             BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
672             Modified = true;
673           }
674 
675           // Check the errata patterns that only happen for FADDD and FMULD
676           if (Modified == false &&
677               (NextOpcode == SP::FADDD || NextOpcode == SP::FMULD)) {
678             RegAIndex = GetRegIndexForOperand(MI, 1);
679             if (RegAIndex == RegBIndex + 1 && RegBIndex == RegCIndex &&
680                 RegBIndex == RegDIndex) {
681               // Insert NOP between the two instructions.
682               BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
683               Modified = true;
684             }
685           }
686         } else if (NextOpcode == SP::FSQRTD) {
687           int RegAIndex = GetRegIndexForOperand(MI, 1);
688           int RegBIndex = GetRegIndexForOperand(NMI, 0);
689           int RegCIndex = GetRegIndexForOperand(NMI, 1);
690 
691           if (RegAIndex == RegBIndex + 1 && RegBIndex == RegCIndex) {
692             // Insert NOP between the two instructions.
693             BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
694             Modified = true;
695           }
696         }
697       }
698     }
699   }
700 
701   return Modified;
702 }
703 
704 //*****************************************************************************
705 //**** PreventRoundChange pass
706 //*****************************************************************************
707 // To prevent any explicit change of the default rounding mode, this pass
708 // detects any call of the fesetround function and removes this call from the
709 // list of generated operations.
710 //
711 char PreventRoundChange::ID = 0;
712 
PreventRoundChange(TargetMachine & tm)713 PreventRoundChange::PreventRoundChange(TargetMachine &tm)
714     : LEONMachineFunctionPass(tm, ID) {}
715 
runOnMachineFunction(MachineFunction & MF)716 bool PreventRoundChange::runOnMachineFunction(MachineFunction &MF) {
717   Subtarget = &MF.getSubtarget<SparcSubtarget>();
718 
719   bool Modified = false;
720   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
721     MachineBasicBlock &MBB = *MFI;
722     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
723       MachineInstr &MI = *MBBI;
724       unsigned Opcode = MI.getOpcode();
725       if (Opcode == SP::CALL && MI.getNumOperands() > 0) {
726         MachineOperand &MO = MI.getOperand(0);
727 
728         if (MO.isGlobal()) {
729           StringRef FuncName = MO.getGlobal()->getName();
730           if (FuncName.compare_lower("fesetround") == 0) {
731             MachineBasicBlock::iterator NMBBI = std::next(MBBI);
732             MI.eraseFromParent();
733             MBBI = NMBBI;
734             Modified = true;
735           }
736         }
737       }
738     }
739   }
740 
741   return Modified;
742 }
743 //*****************************************************************************
744 //**** FlushCacheLineSWAP pass
745 //*****************************************************************************
746 // This pass inserts FLUSHW just before any SWAP atomic instruction.
747 //
748 char FlushCacheLineSWAP::ID = 0;
749 
FlushCacheLineSWAP(TargetMachine & tm)750 FlushCacheLineSWAP::FlushCacheLineSWAP(TargetMachine &tm)
751     : LEONMachineFunctionPass(tm, ID) {}
752 
runOnMachineFunction(MachineFunction & MF)753 bool FlushCacheLineSWAP::runOnMachineFunction(MachineFunction &MF) {
754   Subtarget = &MF.getSubtarget<SparcSubtarget>();
755   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
756   DebugLoc DL = DebugLoc();
757 
758   bool Modified = false;
759   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
760     MachineBasicBlock &MBB = *MFI;
761     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
762       MachineInstr &MI = *MBBI;
763       unsigned Opcode = MI.getOpcode();
764       if (Opcode == SP::SWAPrr || Opcode == SP::SWAPri ||
765           Opcode == SP::LDSTUBrr || Opcode == SP::LDSTUBri) {
766         // insert flush and 5 NOPs before the swap/ldstub instruction
767         BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH));
768         BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
769         BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
770         BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
771         BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
772         BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
773 
774         Modified = true;
775       } else if (MI.isInlineAsm()) {
776         StringRef AsmString =
777             MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
778         if (AsmString.startswith_lower("swap") ||
779             AsmString.startswith_lower("ldstub")) {
780           // this is an inline swap or ldstub instruction
781 
782           // insert flush and 5 NOPs before the swap/ldstub instruction
783           BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH));
784           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
785           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
786           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
787           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
788           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
789 
790           Modified = true;
791         }
792       }
793     }
794   }
795 
796   return Modified;
797 }
798 
799 //*****************************************************************************
800 //**** InsertNOPsLoadStore pass
801 //*****************************************************************************
802 // This pass shall insert NOPs between floating point loads and stores when the
803 // following circumstances are present [5]:
804 // Pattern 1:
805 // 1. single-precision load or single-precision FPOP to register %fX, where X is
806 // the same register as the store being checked;
807 // 2. single-precision load or single-precision FPOP to register %fY , where Y
808 // is the opposite register in the same double-precision pair;
809 // 3. 0-3 instructions of any kind, except stores from %fX or %fY or operations
810 // with %fX as destination;
811 // 4. the store (from register %fX) being considered.
812 // Pattern 2:
813 // 1. double-precision FPOP;
814 // 2. any number of operations on any kind, except no double-precision FPOP and
815 // at most one (less than two) single-precision or single-to-double FPOPs;
816 // 3. the store (from register %fX) being considered.
817 //
818 char InsertNOPsLoadStore::ID = 0;
819 
InsertNOPsLoadStore(TargetMachine & tm)820 InsertNOPsLoadStore::InsertNOPsLoadStore(TargetMachine &tm)
821     : LEONMachineFunctionPass(tm, ID) {}
822 
runOnMachineFunction(MachineFunction & MF)823 bool InsertNOPsLoadStore::runOnMachineFunction(MachineFunction &MF) {
824   Subtarget = &MF.getSubtarget<SparcSubtarget>();
825   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
826   DebugLoc DL = DebugLoc();
827 
828   MachineInstr *Pattern1FirstInstruction = NULL;
829   MachineInstr *Pattern2FirstInstruction = NULL;
830   unsigned int StoreInstructionsToCheck = 0;
831   int FxRegIndex, FyRegIndex;
832 
833   bool Modified = false;
834   for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
835     MachineBasicBlock &MBB = *MFI;
836     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
837       MachineInstr &MI = *MBBI;
838 
839       if (StoreInstructionsToCheck > 0) {
840         if (((MI.getOpcode() == SP::STFrr || MI.getOpcode() == SP::STFri) &&
841              (GetRegIndexForOperand(MI, LAST_OPERAND) == FxRegIndex ||
842               GetRegIndexForOperand(MI, LAST_OPERAND) == FyRegIndex)) ||
843             GetRegIndexForOperand(MI, 0) == FxRegIndex) {
844           // Insert four NOPs
845           for (unsigned InsertedCount = 0; InsertedCount < 4; InsertedCount++) {
846             BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
847           }
848           Modified = true;
849         }
850         StoreInstructionsToCheck--;
851       }
852 
853       switch (MI.getOpcode()) {
854       // Watch for Pattern 1 FPop instructions
855       case SP::LDrr:
856       case SP::LDri:
857       case SP::LDFrr:
858       case SP::LDFri:
859       case SP::FADDS:
860       case SP::FSUBS:
861       case SP::FMULS:
862       case SP::FDIVS:
863       case SP::FSQRTS:
864       case SP::FCMPS:
865       case SP::FMOVS:
866       case SP::FNEGS:
867       case SP::FABSS:
868       case SP::FITOS:
869       case SP::FSTOI:
870       case SP::FITOD:
871       case SP::FDTOI:
872       case SP::FDTOS:
873         if (Pattern1FirstInstruction != NULL) {
874           FxRegIndex = GetRegIndexForOperand(*Pattern1FirstInstruction, 0);
875           FyRegIndex = GetRegIndexForOperand(MI, 0);
876 
877           // Check to see if these registers are part of the same double
878           // precision
879           // register pair.
880           int DoublePrecRegIndexForX = (FxRegIndex - SP::F0) / 2;
881           int DoublePrecRegIndexForY = (FyRegIndex - SP::F0) / 2;
882 
883           if (DoublePrecRegIndexForX == DoublePrecRegIndexForY)
884             StoreInstructionsToCheck = 4;
885         }
886 
887         Pattern1FirstInstruction = &MI;
888         break;
889       // End of Pattern 1
890 
891       // Search for Pattern 2
892       case SP::FADDD:
893       case SP::FSUBD:
894       case SP::FMULD:
895       case SP::FDIVD:
896       case SP::FSQRTD:
897       case SP::FCMPD:
898         Pattern2FirstInstruction = &MI;
899         Pattern1FirstInstruction = NULL;
900         break;
901 
902       case SP::STFrr:
903       case SP::STFri:
904       case SP::STDFrr:
905       case SP::STDFri:
906         if (Pattern2FirstInstruction != NULL) {
907           if (GetRegIndexForOperand(MI, LAST_OPERAND) ==
908               GetRegIndexForOperand(*Pattern2FirstInstruction, 0)) {
909             // Insert four NOPs
910             for (unsigned InsertedCount = 0; InsertedCount < 4;
911                  InsertedCount++) {
912               BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
913             }
914 
915             Pattern2FirstInstruction = NULL;
916           }
917         }
918         Pattern1FirstInstruction = NULL;
919         break;
920       // End of Pattern 2
921 
922       default:
923         // Ensure we don't count debug-only values while we're testing for the
924         // patterns.
925         if (!MI.isDebugValue())
926           Pattern1FirstInstruction = NULL;
927         break;
928       }
929     }
930   }
931 
932   return Modified;
933 }
934