1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // R600 Implementation of TargetInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600InstrInfo.h"
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600RegisterInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "AMDILUtilityFunctions.h"
21 
22 #define GET_INSTRINFO_CTOR
23 #include "AMDGPUGenDFAPacketizer.inc"
24 
25 using namespace llvm;
26 
R600InstrInfo(AMDGPUTargetMachine & tm)27 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
28   : AMDGPUInstrInfo(tm),
29     RI(tm, *this),
30     TM(tm)
31   { }
32 
getRegisterInfo() const33 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
34 {
35   return RI;
36 }
37 
isTrig(const MachineInstr & MI) const38 bool R600InstrInfo::isTrig(const MachineInstr &MI) const
39 {
40   return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
41 }
42 
isVector(const MachineInstr & MI) const43 bool R600InstrInfo::isVector(const MachineInstr &MI) const
44 {
45   return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
46 }
47 
48 void
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const49 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50                            MachineBasicBlock::iterator MI, DebugLoc DL,
51                            unsigned DestReg, unsigned SrcReg,
52                            bool KillSrc) const
53 {
54   if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
55       && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
56     for (unsigned I = 0; I < 4; I++) {
57       unsigned SubRegIndex = RI.getSubRegFromChannel(I);
58       BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
59               .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
60               .addReg(RI.getSubReg(SrcReg, SubRegIndex))
61               .addImm(0) // Flag
62               .addReg(0) // PREDICATE_BIT
63               .addReg(DestReg, RegState::Define | RegState::Implicit);
64     }
65   } else {
66 
67     /* We can't copy vec4 registers */
68     assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
69            && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
70 
71     BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
72       .addReg(SrcReg, getKillRegState(KillSrc))
73       .addImm(0) // Flag
74       .addReg(0); // PREDICATE_BIT
75   }
76 }
77 
getMovImmInstr(MachineFunction * MF,unsigned DstReg,int64_t Imm) const78 MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
79                                              unsigned DstReg, int64_t Imm) const
80 {
81   MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
82   MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
83   MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
84   MachineInstrBuilder(MI).addImm(Imm);
85   MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
86 
87   return MI;
88 }
89 
getIEQOpcode() const90 unsigned R600InstrInfo::getIEQOpcode() const
91 {
92   return AMDGPU::SETE_INT;
93 }
94 
isMov(unsigned Opcode) const95 bool R600InstrInfo::isMov(unsigned Opcode) const
96 {
97 
98 
99   switch(Opcode) {
100   default: return false;
101   case AMDGPU::MOV:
102   case AMDGPU::MOV_IMM_F32:
103   case AMDGPU::MOV_IMM_I32:
104     return true;
105   }
106 }
107 
108 // Some instructions act as place holders to emulate operations that the GPU
109 // hardware does automatically. This function can be used to check if
110 // an opcode falls into this category.
isPlaceHolderOpcode(unsigned Opcode) const111 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
112 {
113   switch (Opcode) {
114   default: return false;
115   case AMDGPU::RETURN:
116   case AMDGPU::MASK_WRITE:
117   case AMDGPU::RESERVE_REG:
118     return true;
119   }
120 }
121 
isReductionOp(unsigned Opcode) const122 bool R600InstrInfo::isReductionOp(unsigned Opcode) const
123 {
124   switch(Opcode) {
125     default: return false;
126     case AMDGPU::DOT4_r600:
127     case AMDGPU::DOT4_eg:
128       return true;
129   }
130 }
131 
isCubeOp(unsigned Opcode) const132 bool R600InstrInfo::isCubeOp(unsigned Opcode) const
133 {
134   switch(Opcode) {
135     default: return false;
136     case AMDGPU::CUBE_r600_pseudo:
137     case AMDGPU::CUBE_r600_real:
138     case AMDGPU::CUBE_eg_pseudo:
139     case AMDGPU::CUBE_eg_real:
140       return true;
141   }
142 }
143 
CreateTargetScheduleState(const TargetMachine * TM,const ScheduleDAG * DAG) const144 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
145     const ScheduleDAG *DAG) const
146 {
147   const InstrItineraryData *II = TM->getInstrItineraryData();
148   return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
149 }
150 
151 static bool
isPredicateSetter(unsigned Opcode)152 isPredicateSetter(unsigned Opcode)
153 {
154   switch (Opcode) {
155   case AMDGPU::PRED_X:
156     return true;
157   default:
158     return false;
159   }
160 }
161 
162 static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)163 findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
164                              MachineBasicBlock::iterator I)
165 {
166   while (I != MBB.begin()) {
167     --I;
168     MachineInstr *MI = I;
169     if (isPredicateSetter(MI->getOpcode()))
170       return MI;
171   }
172 
173   return NULL;
174 }
175 
176 bool
AnalyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const177 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
178                              MachineBasicBlock *&TBB,
179                              MachineBasicBlock *&FBB,
180                              SmallVectorImpl<MachineOperand> &Cond,
181                              bool AllowModify) const
182 {
183   // Most of the following comes from the ARM implementation of AnalyzeBranch
184 
185   // If the block has no terminators, it just falls into the block after it.
186   MachineBasicBlock::iterator I = MBB.end();
187   if (I == MBB.begin())
188     return false;
189   --I;
190   while (I->isDebugValue()) {
191     if (I == MBB.begin())
192       return false;
193     --I;
194   }
195   if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
196     return false;
197   }
198 
199   // Get the last instruction in the block.
200   MachineInstr *LastInst = I;
201 
202   // If there is only one terminator instruction, process it.
203   unsigned LastOpc = LastInst->getOpcode();
204   if (I == MBB.begin() ||
205       static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
206     if (LastOpc == AMDGPU::JUMP) {
207       if(!isPredicated(LastInst)) {
208         TBB = LastInst->getOperand(0).getMBB();
209         return false;
210       } else {
211         MachineInstr *predSet = I;
212         while (!isPredicateSetter(predSet->getOpcode())) {
213           predSet = --I;
214         }
215         TBB = LastInst->getOperand(0).getMBB();
216         Cond.push_back(predSet->getOperand(1));
217         Cond.push_back(predSet->getOperand(2));
218         Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
219         return false;
220       }
221     }
222     return true;  // Can't handle indirect branch.
223   }
224 
225   // Get the instruction before it if it is a terminator.
226   MachineInstr *SecondLastInst = I;
227   unsigned SecondLastOpc = SecondLastInst->getOpcode();
228 
229   // If the block ends with a B and a Bcc, handle it.
230   if (SecondLastOpc == AMDGPU::JUMP &&
231       isPredicated(SecondLastInst) &&
232       LastOpc == AMDGPU::JUMP &&
233       !isPredicated(LastInst)) {
234     MachineInstr *predSet = --I;
235     while (!isPredicateSetter(predSet->getOpcode())) {
236       predSet = --I;
237     }
238     TBB = SecondLastInst->getOperand(0).getMBB();
239     FBB = LastInst->getOperand(0).getMBB();
240     Cond.push_back(predSet->getOperand(1));
241     Cond.push_back(predSet->getOperand(2));
242     Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
243     return false;
244   }
245 
246   // Otherwise, can't handle this.
247   return true;
248 }
249 
getBranchInstr(const MachineOperand & op) const250 int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
251   const MachineInstr *MI = op.getParent();
252 
253   switch (MI->getDesc().OpInfo->RegClass) {
254   default: // FIXME: fallthrough??
255   case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
256   case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
257   };
258 }
259 
260 unsigned
InsertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,const SmallVectorImpl<MachineOperand> & Cond,DebugLoc DL) const261 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
262                             MachineBasicBlock *TBB,
263                             MachineBasicBlock *FBB,
264                             const SmallVectorImpl<MachineOperand> &Cond,
265                             DebugLoc DL) const
266 {
267   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
268 
269   if (FBB == 0) {
270     if (Cond.empty()) {
271       BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
272       return 1;
273     } else {
274       MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
275       assert(PredSet && "No previous predicate !");
276       addFlag(PredSet, 1, MO_FLAG_PUSH);
277       PredSet->getOperand(2).setImm(Cond[1].getImm());
278 
279       BuildMI(&MBB, DL, get(AMDGPU::JUMP))
280              .addMBB(TBB)
281              .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
282       return 1;
283     }
284   } else {
285     MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
286     assert(PredSet && "No previous predicate !");
287     addFlag(PredSet, 1, MO_FLAG_PUSH);
288     PredSet->getOperand(2).setImm(Cond[1].getImm());
289     BuildMI(&MBB, DL, get(AMDGPU::JUMP))
290             .addMBB(TBB)
291             .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
292     BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
293     return 2;
294   }
295 }
296 
297 unsigned
RemoveBranch(MachineBasicBlock & MBB) const298 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
299 {
300 
301   // Note : we leave PRED* instructions there.
302   // They may be needed when predicating instructions.
303 
304   MachineBasicBlock::iterator I = MBB.end();
305 
306   if (I == MBB.begin()) {
307     return 0;
308   }
309   --I;
310   switch (I->getOpcode()) {
311   default:
312     return 0;
313   case AMDGPU::JUMP:
314     if (isPredicated(I)) {
315       MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
316       clearFlag(predSet, 1, MO_FLAG_PUSH);
317     }
318     I->eraseFromParent();
319     break;
320   }
321   I = MBB.end();
322 
323   if (I == MBB.begin()) {
324     return 1;
325   }
326   --I;
327   switch (I->getOpcode()) {
328     // FIXME: only one case??
329   default:
330     return 1;
331   case AMDGPU::JUMP:
332     if (isPredicated(I)) {
333       MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
334       clearFlag(predSet, 1, MO_FLAG_PUSH);
335     }
336     I->eraseFromParent();
337     break;
338   }
339   return 2;
340 }
341 
342 bool
isPredicated(const MachineInstr * MI) const343 R600InstrInfo::isPredicated(const MachineInstr *MI) const
344 {
345   int idx = MI->findFirstPredOperandIdx();
346   if (idx < 0)
347     return false;
348 
349   unsigned Reg = MI->getOperand(idx).getReg();
350   switch (Reg) {
351   default: return false;
352   case AMDGPU::PRED_SEL_ONE:
353   case AMDGPU::PRED_SEL_ZERO:
354   case AMDGPU::PREDICATE_BIT:
355     return true;
356   }
357 }
358 
359 bool
isPredicable(MachineInstr * MI) const360 R600InstrInfo::isPredicable(MachineInstr *MI) const
361 {
362   return AMDGPUInstrInfo::isPredicable(MI);
363 }
364 
365 
366 bool
isProfitableToIfCvt(MachineBasicBlock & MBB,unsigned NumCyles,unsigned ExtraPredCycles,const BranchProbability & Probability) const367 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
368                                    unsigned NumCyles,
369                                    unsigned ExtraPredCycles,
370                                    const BranchProbability &Probability) const{
371   return true;
372 }
373 
374 bool
isProfitableToIfCvt(MachineBasicBlock & TMBB,unsigned NumTCycles,unsigned ExtraTCycles,MachineBasicBlock & FMBB,unsigned NumFCycles,unsigned ExtraFCycles,const BranchProbability & Probability) const375 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
376                                    unsigned NumTCycles,
377                                    unsigned ExtraTCycles,
378                                    MachineBasicBlock &FMBB,
379                                    unsigned NumFCycles,
380                                    unsigned ExtraFCycles,
381                                    const BranchProbability &Probability) const
382 {
383   return true;
384 }
385 
386 bool
isProfitableToDupForIfCvt(MachineBasicBlock & MBB,unsigned NumCyles,const BranchProbability & Probability) const387 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
388                                          unsigned NumCyles,
389                                          const BranchProbability &Probability)
390                                          const
391 {
392   return true;
393 }
394 
395 bool
isProfitableToUnpredicate(MachineBasicBlock & TMBB,MachineBasicBlock & FMBB) const396 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
397                                          MachineBasicBlock &FMBB) const
398 {
399   return false;
400 }
401 
402 
403 bool
ReverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const404 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
405 {
406   MachineOperand &MO = Cond[1];
407   switch (MO.getImm()) {
408   case OPCODE_IS_ZERO_INT:
409     MO.setImm(OPCODE_IS_NOT_ZERO_INT);
410     break;
411   case OPCODE_IS_NOT_ZERO_INT:
412     MO.setImm(OPCODE_IS_ZERO_INT);
413     break;
414   case OPCODE_IS_ZERO:
415     MO.setImm(OPCODE_IS_NOT_ZERO);
416     break;
417   case OPCODE_IS_NOT_ZERO:
418     MO.setImm(OPCODE_IS_ZERO);
419     break;
420   default:
421     return true;
422   }
423 
424   MachineOperand &MO2 = Cond[2];
425   switch (MO2.getReg()) {
426   case AMDGPU::PRED_SEL_ZERO:
427     MO2.setReg(AMDGPU::PRED_SEL_ONE);
428     break;
429   case AMDGPU::PRED_SEL_ONE:
430     MO2.setReg(AMDGPU::PRED_SEL_ZERO);
431     break;
432   default:
433     return true;
434   }
435   return false;
436 }
437 
438 bool
DefinesPredicate(MachineInstr * MI,std::vector<MachineOperand> & Pred) const439 R600InstrInfo::DefinesPredicate(MachineInstr *MI,
440                                 std::vector<MachineOperand> &Pred) const
441 {
442   return isPredicateSetter(MI->getOpcode());
443 }
444 
445 
446 bool
SubsumesPredicate(const SmallVectorImpl<MachineOperand> & Pred1,const SmallVectorImpl<MachineOperand> & Pred2) const447 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
448                        const SmallVectorImpl<MachineOperand> &Pred2) const
449 {
450   return false;
451 }
452 
453 
454 bool
PredicateInstruction(MachineInstr * MI,const SmallVectorImpl<MachineOperand> & Pred) const455 R600InstrInfo::PredicateInstruction(MachineInstr *MI,
456                       const SmallVectorImpl<MachineOperand> &Pred) const
457 {
458   int PIdx = MI->findFirstPredOperandIdx();
459 
460   if (PIdx != -1) {
461     MachineOperand &PMO = MI->getOperand(PIdx);
462     PMO.setReg(Pred[2].getReg());
463     MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
464     return true;
465   }
466 
467   return false;
468 }
469 
getInstrLatency(const InstrItineraryData * ItinData,const MachineInstr * MI,unsigned * PredCost) const470 int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
471                                    const MachineInstr *MI,
472                                    unsigned *PredCost) const
473 {
474   if (PredCost)
475     *PredCost = 2;
476   return 2;
477 }
478 
479 //===----------------------------------------------------------------------===//
480 // Instruction flag getters/setters
481 //===----------------------------------------------------------------------===//
482 
hasFlagOperand(const MachineInstr & MI) const483 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
484 {
485   return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
486 }
487 
getFlagOp(MachineInstr * MI) const488 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
489 {
490   unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
491   assert(FlagIndex != 0 &&
492          "Instruction flags not supported for this instruction");
493   MachineOperand &FlagOp = MI->getOperand(FlagIndex);
494   assert(FlagOp.isImm());
495   return FlagOp;
496 }
497 
addFlag(MachineInstr * MI,unsigned Operand,unsigned Flag) const498 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
499                             unsigned Flag) const
500 {
501   MachineOperand &FlagOp = getFlagOp(MI);
502   FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
503 }
504 
clearFlag(MachineInstr * MI,unsigned Operand,unsigned Flag) const505 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
506                               unsigned Flag) const
507 {
508   MachineOperand &FlagOp = getFlagOp(MI);
509   unsigned InstFlags = FlagOp.getImm();
510   InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
511   FlagOp.setImm(InstFlags);
512 }
513