1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // R600 Implementation of TargetInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "R600InstrInfo.h"
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600RegisterInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "AMDILUtilityFunctions.h"
21
22 #define GET_INSTRINFO_CTOR
23 #include "AMDGPUGenDFAPacketizer.inc"
24
25 using namespace llvm;
26
R600InstrInfo(AMDGPUTargetMachine & tm)27 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
28 : AMDGPUInstrInfo(tm),
29 RI(tm, *this),
30 TM(tm)
31 { }
32
getRegisterInfo() const33 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const
34 {
35 return RI;
36 }
37
isTrig(const MachineInstr & MI) const38 bool R600InstrInfo::isTrig(const MachineInstr &MI) const
39 {
40 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG;
41 }
42
isVector(const MachineInstr & MI) const43 bool R600InstrInfo::isVector(const MachineInstr &MI) const
44 {
45 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
46 }
47
48 void
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator MI,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const49 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
50 MachineBasicBlock::iterator MI, DebugLoc DL,
51 unsigned DestReg, unsigned SrcReg,
52 bool KillSrc) const
53 {
54 if (AMDGPU::R600_Reg128RegClass.contains(DestReg)
55 && AMDGPU::R600_Reg128RegClass.contains(SrcReg)) {
56 for (unsigned I = 0; I < 4; I++) {
57 unsigned SubRegIndex = RI.getSubRegFromChannel(I);
58 BuildMI(MBB, MI, DL, get(AMDGPU::MOV))
59 .addReg(RI.getSubReg(DestReg, SubRegIndex), RegState::Define)
60 .addReg(RI.getSubReg(SrcReg, SubRegIndex))
61 .addImm(0) // Flag
62 .addReg(0) // PREDICATE_BIT
63 .addReg(DestReg, RegState::Define | RegState::Implicit);
64 }
65 } else {
66
67 /* We can't copy vec4 registers */
68 assert(!AMDGPU::R600_Reg128RegClass.contains(DestReg)
69 && !AMDGPU::R600_Reg128RegClass.contains(SrcReg));
70
71 BuildMI(MBB, MI, DL, get(AMDGPU::MOV), DestReg)
72 .addReg(SrcReg, getKillRegState(KillSrc))
73 .addImm(0) // Flag
74 .addReg(0); // PREDICATE_BIT
75 }
76 }
77
getMovImmInstr(MachineFunction * MF,unsigned DstReg,int64_t Imm) const78 MachineInstr * R600InstrInfo::getMovImmInstr(MachineFunction *MF,
79 unsigned DstReg, int64_t Imm) const
80 {
81 MachineInstr * MI = MF->CreateMachineInstr(get(AMDGPU::MOV), DebugLoc());
82 MachineInstrBuilder(MI).addReg(DstReg, RegState::Define);
83 MachineInstrBuilder(MI).addReg(AMDGPU::ALU_LITERAL_X);
84 MachineInstrBuilder(MI).addImm(Imm);
85 MachineInstrBuilder(MI).addReg(0); // PREDICATE_BIT
86
87 return MI;
88 }
89
getIEQOpcode() const90 unsigned R600InstrInfo::getIEQOpcode() const
91 {
92 return AMDGPU::SETE_INT;
93 }
94
isMov(unsigned Opcode) const95 bool R600InstrInfo::isMov(unsigned Opcode) const
96 {
97
98
99 switch(Opcode) {
100 default: return false;
101 case AMDGPU::MOV:
102 case AMDGPU::MOV_IMM_F32:
103 case AMDGPU::MOV_IMM_I32:
104 return true;
105 }
106 }
107
108 // Some instructions act as place holders to emulate operations that the GPU
109 // hardware does automatically. This function can be used to check if
110 // an opcode falls into this category.
isPlaceHolderOpcode(unsigned Opcode) const111 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const
112 {
113 switch (Opcode) {
114 default: return false;
115 case AMDGPU::RETURN:
116 case AMDGPU::MASK_WRITE:
117 case AMDGPU::RESERVE_REG:
118 return true;
119 }
120 }
121
isReductionOp(unsigned Opcode) const122 bool R600InstrInfo::isReductionOp(unsigned Opcode) const
123 {
124 switch(Opcode) {
125 default: return false;
126 case AMDGPU::DOT4_r600:
127 case AMDGPU::DOT4_eg:
128 return true;
129 }
130 }
131
isCubeOp(unsigned Opcode) const132 bool R600InstrInfo::isCubeOp(unsigned Opcode) const
133 {
134 switch(Opcode) {
135 default: return false;
136 case AMDGPU::CUBE_r600_pseudo:
137 case AMDGPU::CUBE_r600_real:
138 case AMDGPU::CUBE_eg_pseudo:
139 case AMDGPU::CUBE_eg_real:
140 return true;
141 }
142 }
143
CreateTargetScheduleState(const TargetMachine * TM,const ScheduleDAG * DAG) const144 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
145 const ScheduleDAG *DAG) const
146 {
147 const InstrItineraryData *II = TM->getInstrItineraryData();
148 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II);
149 }
150
151 static bool
isPredicateSetter(unsigned Opcode)152 isPredicateSetter(unsigned Opcode)
153 {
154 switch (Opcode) {
155 case AMDGPU::PRED_X:
156 return true;
157 default:
158 return false;
159 }
160 }
161
162 static MachineInstr *
findFirstPredicateSetterFrom(MachineBasicBlock & MBB,MachineBasicBlock::iterator I)163 findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
164 MachineBasicBlock::iterator I)
165 {
166 while (I != MBB.begin()) {
167 --I;
168 MachineInstr *MI = I;
169 if (isPredicateSetter(MI->getOpcode()))
170 return MI;
171 }
172
173 return NULL;
174 }
175
176 bool
AnalyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const177 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
178 MachineBasicBlock *&TBB,
179 MachineBasicBlock *&FBB,
180 SmallVectorImpl<MachineOperand> &Cond,
181 bool AllowModify) const
182 {
183 // Most of the following comes from the ARM implementation of AnalyzeBranch
184
185 // If the block has no terminators, it just falls into the block after it.
186 MachineBasicBlock::iterator I = MBB.end();
187 if (I == MBB.begin())
188 return false;
189 --I;
190 while (I->isDebugValue()) {
191 if (I == MBB.begin())
192 return false;
193 --I;
194 }
195 if (static_cast<MachineInstr *>(I)->getOpcode() != AMDGPU::JUMP) {
196 return false;
197 }
198
199 // Get the last instruction in the block.
200 MachineInstr *LastInst = I;
201
202 // If there is only one terminator instruction, process it.
203 unsigned LastOpc = LastInst->getOpcode();
204 if (I == MBB.begin() ||
205 static_cast<MachineInstr *>(--I)->getOpcode() != AMDGPU::JUMP) {
206 if (LastOpc == AMDGPU::JUMP) {
207 if(!isPredicated(LastInst)) {
208 TBB = LastInst->getOperand(0).getMBB();
209 return false;
210 } else {
211 MachineInstr *predSet = I;
212 while (!isPredicateSetter(predSet->getOpcode())) {
213 predSet = --I;
214 }
215 TBB = LastInst->getOperand(0).getMBB();
216 Cond.push_back(predSet->getOperand(1));
217 Cond.push_back(predSet->getOperand(2));
218 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
219 return false;
220 }
221 }
222 return true; // Can't handle indirect branch.
223 }
224
225 // Get the instruction before it if it is a terminator.
226 MachineInstr *SecondLastInst = I;
227 unsigned SecondLastOpc = SecondLastInst->getOpcode();
228
229 // If the block ends with a B and a Bcc, handle it.
230 if (SecondLastOpc == AMDGPU::JUMP &&
231 isPredicated(SecondLastInst) &&
232 LastOpc == AMDGPU::JUMP &&
233 !isPredicated(LastInst)) {
234 MachineInstr *predSet = --I;
235 while (!isPredicateSetter(predSet->getOpcode())) {
236 predSet = --I;
237 }
238 TBB = SecondLastInst->getOperand(0).getMBB();
239 FBB = LastInst->getOperand(0).getMBB();
240 Cond.push_back(predSet->getOperand(1));
241 Cond.push_back(predSet->getOperand(2));
242 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
243 return false;
244 }
245
246 // Otherwise, can't handle this.
247 return true;
248 }
249
getBranchInstr(const MachineOperand & op) const250 int R600InstrInfo::getBranchInstr(const MachineOperand &op) const {
251 const MachineInstr *MI = op.getParent();
252
253 switch (MI->getDesc().OpInfo->RegClass) {
254 default: // FIXME: fallthrough??
255 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32;
256 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32;
257 };
258 }
259
260 unsigned
InsertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,const SmallVectorImpl<MachineOperand> & Cond,DebugLoc DL) const261 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
262 MachineBasicBlock *TBB,
263 MachineBasicBlock *FBB,
264 const SmallVectorImpl<MachineOperand> &Cond,
265 DebugLoc DL) const
266 {
267 assert(TBB && "InsertBranch must not be told to insert a fallthrough");
268
269 if (FBB == 0) {
270 if (Cond.empty()) {
271 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB).addReg(0);
272 return 1;
273 } else {
274 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
275 assert(PredSet && "No previous predicate !");
276 addFlag(PredSet, 1, MO_FLAG_PUSH);
277 PredSet->getOperand(2).setImm(Cond[1].getImm());
278
279 BuildMI(&MBB, DL, get(AMDGPU::JUMP))
280 .addMBB(TBB)
281 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
282 return 1;
283 }
284 } else {
285 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
286 assert(PredSet && "No previous predicate !");
287 addFlag(PredSet, 1, MO_FLAG_PUSH);
288 PredSet->getOperand(2).setImm(Cond[1].getImm());
289 BuildMI(&MBB, DL, get(AMDGPU::JUMP))
290 .addMBB(TBB)
291 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
292 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB).addReg(0);
293 return 2;
294 }
295 }
296
297 unsigned
RemoveBranch(MachineBasicBlock & MBB) const298 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
299 {
300
301 // Note : we leave PRED* instructions there.
302 // They may be needed when predicating instructions.
303
304 MachineBasicBlock::iterator I = MBB.end();
305
306 if (I == MBB.begin()) {
307 return 0;
308 }
309 --I;
310 switch (I->getOpcode()) {
311 default:
312 return 0;
313 case AMDGPU::JUMP:
314 if (isPredicated(I)) {
315 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
316 clearFlag(predSet, 1, MO_FLAG_PUSH);
317 }
318 I->eraseFromParent();
319 break;
320 }
321 I = MBB.end();
322
323 if (I == MBB.begin()) {
324 return 1;
325 }
326 --I;
327 switch (I->getOpcode()) {
328 // FIXME: only one case??
329 default:
330 return 1;
331 case AMDGPU::JUMP:
332 if (isPredicated(I)) {
333 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
334 clearFlag(predSet, 1, MO_FLAG_PUSH);
335 }
336 I->eraseFromParent();
337 break;
338 }
339 return 2;
340 }
341
342 bool
isPredicated(const MachineInstr * MI) const343 R600InstrInfo::isPredicated(const MachineInstr *MI) const
344 {
345 int idx = MI->findFirstPredOperandIdx();
346 if (idx < 0)
347 return false;
348
349 unsigned Reg = MI->getOperand(idx).getReg();
350 switch (Reg) {
351 default: return false;
352 case AMDGPU::PRED_SEL_ONE:
353 case AMDGPU::PRED_SEL_ZERO:
354 case AMDGPU::PREDICATE_BIT:
355 return true;
356 }
357 }
358
359 bool
isPredicable(MachineInstr * MI) const360 R600InstrInfo::isPredicable(MachineInstr *MI) const
361 {
362 return AMDGPUInstrInfo::isPredicable(MI);
363 }
364
365
366 bool
isProfitableToIfCvt(MachineBasicBlock & MBB,unsigned NumCyles,unsigned ExtraPredCycles,const BranchProbability & Probability) const367 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
368 unsigned NumCyles,
369 unsigned ExtraPredCycles,
370 const BranchProbability &Probability) const{
371 return true;
372 }
373
374 bool
isProfitableToIfCvt(MachineBasicBlock & TMBB,unsigned NumTCycles,unsigned ExtraTCycles,MachineBasicBlock & FMBB,unsigned NumFCycles,unsigned ExtraFCycles,const BranchProbability & Probability) const375 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
376 unsigned NumTCycles,
377 unsigned ExtraTCycles,
378 MachineBasicBlock &FMBB,
379 unsigned NumFCycles,
380 unsigned ExtraFCycles,
381 const BranchProbability &Probability) const
382 {
383 return true;
384 }
385
386 bool
isProfitableToDupForIfCvt(MachineBasicBlock & MBB,unsigned NumCyles,const BranchProbability & Probability) const387 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
388 unsigned NumCyles,
389 const BranchProbability &Probability)
390 const
391 {
392 return true;
393 }
394
395 bool
isProfitableToUnpredicate(MachineBasicBlock & TMBB,MachineBasicBlock & FMBB) const396 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
397 MachineBasicBlock &FMBB) const
398 {
399 return false;
400 }
401
402
403 bool
ReverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const404 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
405 {
406 MachineOperand &MO = Cond[1];
407 switch (MO.getImm()) {
408 case OPCODE_IS_ZERO_INT:
409 MO.setImm(OPCODE_IS_NOT_ZERO_INT);
410 break;
411 case OPCODE_IS_NOT_ZERO_INT:
412 MO.setImm(OPCODE_IS_ZERO_INT);
413 break;
414 case OPCODE_IS_ZERO:
415 MO.setImm(OPCODE_IS_NOT_ZERO);
416 break;
417 case OPCODE_IS_NOT_ZERO:
418 MO.setImm(OPCODE_IS_ZERO);
419 break;
420 default:
421 return true;
422 }
423
424 MachineOperand &MO2 = Cond[2];
425 switch (MO2.getReg()) {
426 case AMDGPU::PRED_SEL_ZERO:
427 MO2.setReg(AMDGPU::PRED_SEL_ONE);
428 break;
429 case AMDGPU::PRED_SEL_ONE:
430 MO2.setReg(AMDGPU::PRED_SEL_ZERO);
431 break;
432 default:
433 return true;
434 }
435 return false;
436 }
437
438 bool
DefinesPredicate(MachineInstr * MI,std::vector<MachineOperand> & Pred) const439 R600InstrInfo::DefinesPredicate(MachineInstr *MI,
440 std::vector<MachineOperand> &Pred) const
441 {
442 return isPredicateSetter(MI->getOpcode());
443 }
444
445
446 bool
SubsumesPredicate(const SmallVectorImpl<MachineOperand> & Pred1,const SmallVectorImpl<MachineOperand> & Pred2) const447 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
448 const SmallVectorImpl<MachineOperand> &Pred2) const
449 {
450 return false;
451 }
452
453
454 bool
PredicateInstruction(MachineInstr * MI,const SmallVectorImpl<MachineOperand> & Pred) const455 R600InstrInfo::PredicateInstruction(MachineInstr *MI,
456 const SmallVectorImpl<MachineOperand> &Pred) const
457 {
458 int PIdx = MI->findFirstPredOperandIdx();
459
460 if (PIdx != -1) {
461 MachineOperand &PMO = MI->getOperand(PIdx);
462 PMO.setReg(Pred[2].getReg());
463 MachineInstrBuilder(MI).addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
464 return true;
465 }
466
467 return false;
468 }
469
getInstrLatency(const InstrItineraryData * ItinData,const MachineInstr * MI,unsigned * PredCost) const470 int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
471 const MachineInstr *MI,
472 unsigned *PredCost) const
473 {
474 if (PredCost)
475 *PredCost = 2;
476 return 2;
477 }
478
479 //===----------------------------------------------------------------------===//
480 // Instruction flag getters/setters
481 //===----------------------------------------------------------------------===//
482
hasFlagOperand(const MachineInstr & MI) const483 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const
484 {
485 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0;
486 }
487
getFlagOp(MachineInstr * MI) const488 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI) const
489 {
490 unsigned FlagIndex = GET_FLAG_OPERAND_IDX(get(MI->getOpcode()).TSFlags);
491 assert(FlagIndex != 0 &&
492 "Instruction flags not supported for this instruction");
493 MachineOperand &FlagOp = MI->getOperand(FlagIndex);
494 assert(FlagOp.isImm());
495 return FlagOp;
496 }
497
addFlag(MachineInstr * MI,unsigned Operand,unsigned Flag) const498 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand,
499 unsigned Flag) const
500 {
501 MachineOperand &FlagOp = getFlagOp(MI);
502 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand)));
503 }
504
clearFlag(MachineInstr * MI,unsigned Operand,unsigned Flag) const505 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand,
506 unsigned Flag) const
507 {
508 MachineOperand &FlagOp = getFlagOp(MI);
509 unsigned InstFlags = FlagOp.getImm();
510 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand));
511 FlagOp.setImm(InstFlags);
512 }
513