1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This pass compute turns all control flow pseudo instructions into native one
12 /// computing their address on the fly; it also sets STACK_SIZE info.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600InstrInfo.h"
20 #include "R600MachineFunctionInfo.h"
21 #include "R600RegisterInfo.h"
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <cassert>
42 #include <cstdint>
43 #include <set>
44 #include <utility>
45 #include <vector>
46
47 using namespace llvm;
48
49 #define DEBUG_TYPE "r600cf"
50
51 namespace {
52
53 struct CFStack {
54 enum StackItem {
55 ENTRY = 0,
56 SUB_ENTRY = 1,
57 FIRST_NON_WQM_PUSH = 2,
58 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
59 };
60
61 const R600Subtarget *ST;
62 std::vector<StackItem> BranchStack;
63 std::vector<StackItem> LoopStack;
64 unsigned MaxStackSize;
65 unsigned CurrentEntries = 0;
66 unsigned CurrentSubEntries = 0;
67
CFStack__anon47c1fcb20111::CFStack68 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
69 // We need to reserve a stack entry for CALL_FS in vertex shaders.
70 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
71
72 unsigned getLoopDepth();
73 bool branchStackContains(CFStack::StackItem);
74 bool requiresWorkAroundForInst(unsigned Opcode);
75 unsigned getSubEntrySize(CFStack::StackItem Item);
76 void updateMaxStackSize();
77 void pushBranch(unsigned Opcode, bool isWQM = false);
78 void pushLoop();
79 void popBranch();
80 void popLoop();
81 };
82
getLoopDepth()83 unsigned CFStack::getLoopDepth() {
84 return LoopStack.size();
85 }
86
branchStackContains(CFStack::StackItem Item)87 bool CFStack::branchStackContains(CFStack::StackItem Item) {
88 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
89 E = BranchStack.end(); I != E; ++I) {
90 if (*I == Item)
91 return true;
92 }
93 return false;
94 }
95
requiresWorkAroundForInst(unsigned Opcode)96 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
97 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
98 getLoopDepth() > 1)
99 return true;
100
101 if (!ST->hasCFAluBug())
102 return false;
103
104 switch(Opcode) {
105 default: return false;
106 case R600::CF_ALU_PUSH_BEFORE:
107 case R600::CF_ALU_ELSE_AFTER:
108 case R600::CF_ALU_BREAK:
109 case R600::CF_ALU_CONTINUE:
110 if (CurrentSubEntries == 0)
111 return false;
112 if (ST->getWavefrontSize() == 64) {
113 // We are being conservative here. We only require this work-around if
114 // CurrentSubEntries > 3 &&
115 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
116 //
117 // We have to be conservative, because we don't know for certain that
118 // our stack allocation algorithm for Evergreen/NI is correct. Applying this
119 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
120 // resources without any problems.
121 return CurrentSubEntries > 3;
122 } else {
123 assert(ST->getWavefrontSize() == 32);
124 // We are being conservative here. We only require the work-around if
125 // CurrentSubEntries > 7 &&
126 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
127 // See the comment on the wavefront size == 64 case for why we are
128 // being conservative.
129 return CurrentSubEntries > 7;
130 }
131 }
132 }
133
getSubEntrySize(CFStack::StackItem Item)134 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
135 switch(Item) {
136 default:
137 return 0;
138 case CFStack::FIRST_NON_WQM_PUSH:
139 assert(!ST->hasCaymanISA());
140 if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
141 // +1 For the push operation.
142 // +2 Extra space required.
143 return 3;
144 } else {
145 // Some documentation says that this is not necessary on Evergreen,
146 // but experimentation has show that we need to allocate 1 extra
147 // sub-entry for the first non-WQM push.
148 // +1 For the push operation.
149 // +1 Extra space required.
150 return 2;
151 }
152 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
153 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
154 // +1 For the push operation.
155 // +1 Extra space required.
156 return 2;
157 case CFStack::SUB_ENTRY:
158 return 1;
159 }
160 }
161
updateMaxStackSize()162 void CFStack::updateMaxStackSize() {
163 unsigned CurrentStackSize =
164 CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
165 MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
166 }
167
pushBranch(unsigned Opcode,bool isWQM)168 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
169 CFStack::StackItem Item = CFStack::ENTRY;
170 switch(Opcode) {
171 case R600::CF_PUSH_EG:
172 case R600::CF_ALU_PUSH_BEFORE:
173 if (!isWQM) {
174 if (!ST->hasCaymanISA() &&
175 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
176 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
177 // See comment in
178 // CFStack::getSubEntrySize()
179 else if (CurrentEntries > 0 &&
180 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
181 !ST->hasCaymanISA() &&
182 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
183 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
184 else
185 Item = CFStack::SUB_ENTRY;
186 } else
187 Item = CFStack::ENTRY;
188 break;
189 }
190 BranchStack.push_back(Item);
191 if (Item == CFStack::ENTRY)
192 CurrentEntries++;
193 else
194 CurrentSubEntries += getSubEntrySize(Item);
195 updateMaxStackSize();
196 }
197
pushLoop()198 void CFStack::pushLoop() {
199 LoopStack.push_back(CFStack::ENTRY);
200 CurrentEntries++;
201 updateMaxStackSize();
202 }
203
popBranch()204 void CFStack::popBranch() {
205 CFStack::StackItem Top = BranchStack.back();
206 if (Top == CFStack::ENTRY)
207 CurrentEntries--;
208 else
209 CurrentSubEntries-= getSubEntrySize(Top);
210 BranchStack.pop_back();
211 }
212
popLoop()213 void CFStack::popLoop() {
214 CurrentEntries--;
215 LoopStack.pop_back();
216 }
217
218 class R600ControlFlowFinalizer : public MachineFunctionPass {
219 private:
220 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
221
222 enum ControlFlowInstruction {
223 CF_TC,
224 CF_VC,
225 CF_CALL_FS,
226 CF_WHILE_LOOP,
227 CF_END_LOOP,
228 CF_LOOP_BREAK,
229 CF_LOOP_CONTINUE,
230 CF_JUMP,
231 CF_ELSE,
232 CF_POP,
233 CF_END
234 };
235
236 const R600InstrInfo *TII = nullptr;
237 const R600RegisterInfo *TRI = nullptr;
238 unsigned MaxFetchInst;
239 const R600Subtarget *ST = nullptr;
240
IsTrivialInst(MachineInstr & MI) const241 bool IsTrivialInst(MachineInstr &MI) const {
242 switch (MI.getOpcode()) {
243 case R600::KILL:
244 case R600::RETURN:
245 return true;
246 default:
247 return false;
248 }
249 }
250
getHWInstrDesc(ControlFlowInstruction CFI) const251 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
252 unsigned Opcode = 0;
253 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
254 switch (CFI) {
255 case CF_TC:
256 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
257 break;
258 case CF_VC:
259 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
260 break;
261 case CF_CALL_FS:
262 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
263 break;
264 case CF_WHILE_LOOP:
265 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
266 break;
267 case CF_END_LOOP:
268 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
269 break;
270 case CF_LOOP_BREAK:
271 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
272 break;
273 case CF_LOOP_CONTINUE:
274 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
275 break;
276 case CF_JUMP:
277 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
278 break;
279 case CF_ELSE:
280 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
281 break;
282 case CF_POP:
283 Opcode = isEg ? R600::POP_EG : R600::POP_R600;
284 break;
285 case CF_END:
286 if (ST->hasCaymanISA()) {
287 Opcode = R600::CF_END_CM;
288 break;
289 }
290 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
291 break;
292 }
293 assert (Opcode && "No opcode selected");
294 return TII->get(Opcode);
295 }
296
isCompatibleWithClause(const MachineInstr & MI,std::set<unsigned> & DstRegs) const297 bool isCompatibleWithClause(const MachineInstr &MI,
298 std::set<unsigned> &DstRegs) const {
299 unsigned DstMI, SrcMI;
300 for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
301 E = MI.operands_end();
302 I != E; ++I) {
303 const MachineOperand &MO = *I;
304 if (!MO.isReg())
305 continue;
306 if (MO.isDef()) {
307 unsigned Reg = MO.getReg();
308 if (R600::R600_Reg128RegClass.contains(Reg))
309 DstMI = Reg;
310 else
311 DstMI = TRI->getMatchingSuperReg(Reg,
312 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
313 &R600::R600_Reg128RegClass);
314 }
315 if (MO.isUse()) {
316 unsigned Reg = MO.getReg();
317 if (R600::R600_Reg128RegClass.contains(Reg))
318 SrcMI = Reg;
319 else
320 SrcMI = TRI->getMatchingSuperReg(Reg,
321 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
322 &R600::R600_Reg128RegClass);
323 }
324 }
325 if ((DstRegs.find(SrcMI) == DstRegs.end())) {
326 DstRegs.insert(DstMI);
327 return true;
328 } else
329 return false;
330 }
331
332 ClauseFile
MakeFetchClause(MachineBasicBlock & MBB,MachineBasicBlock::iterator & I) const333 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
334 const {
335 MachineBasicBlock::iterator ClauseHead = I;
336 std::vector<MachineInstr *> ClauseContent;
337 unsigned AluInstCount = 0;
338 bool IsTex = TII->usesTextureCache(*ClauseHead);
339 std::set<unsigned> DstRegs;
340 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
341 if (IsTrivialInst(*I))
342 continue;
343 if (AluInstCount >= MaxFetchInst)
344 break;
345 if ((IsTex && !TII->usesTextureCache(*I)) ||
346 (!IsTex && !TII->usesVertexCache(*I)))
347 break;
348 if (!isCompatibleWithClause(*I, DstRegs))
349 break;
350 AluInstCount ++;
351 ClauseContent.push_back(&*I);
352 }
353 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
354 getHWInstrDesc(IsTex?CF_TC:CF_VC))
355 .addImm(0) // ADDR
356 .addImm(AluInstCount - 1); // COUNT
357 return ClauseFile(MIb, std::move(ClauseContent));
358 }
359
getLiteral(MachineInstr & MI,std::vector<MachineOperand * > & Lits) const360 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
361 static const unsigned LiteralRegs[] = {
362 R600::ALU_LITERAL_X,
363 R600::ALU_LITERAL_Y,
364 R600::ALU_LITERAL_Z,
365 R600::ALU_LITERAL_W
366 };
367 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
368 TII->getSrcs(MI);
369 for (const auto &Src:Srcs) {
370 if (Src.first->getReg() != R600::ALU_LITERAL_X)
371 continue;
372 int64_t Imm = Src.second;
373 std::vector<MachineOperand *>::iterator It =
374 llvm::find_if(Lits, [&](MachineOperand *val) {
375 return val->isImm() && (val->getImm() == Imm);
376 });
377
378 // Get corresponding Operand
379 MachineOperand &Operand = MI.getOperand(
380 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
381
382 if (It != Lits.end()) {
383 // Reuse existing literal reg
384 unsigned Index = It - Lits.begin();
385 Src.first->setReg(LiteralRegs[Index]);
386 } else {
387 // Allocate new literal reg
388 assert(Lits.size() < 4 && "Too many literals in Instruction Group");
389 Src.first->setReg(LiteralRegs[Lits.size()]);
390 Lits.push_back(&Operand);
391 }
392 }
393 }
394
insertLiterals(MachineBasicBlock::iterator InsertPos,const std::vector<unsigned> & Literals) const395 MachineBasicBlock::iterator insertLiterals(
396 MachineBasicBlock::iterator InsertPos,
397 const std::vector<unsigned> &Literals) const {
398 MachineBasicBlock *MBB = InsertPos->getParent();
399 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
400 unsigned LiteralPair0 = Literals[i];
401 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
402 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
403 TII->get(R600::LITERALS))
404 .addImm(LiteralPair0)
405 .addImm(LiteralPair1);
406 }
407 return InsertPos;
408 }
409
410 ClauseFile
MakeALUClause(MachineBasicBlock & MBB,MachineBasicBlock::iterator & I) const411 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
412 const {
413 MachineInstr &ClauseHead = *I;
414 std::vector<MachineInstr *> ClauseContent;
415 I++;
416 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
417 if (IsTrivialInst(*I)) {
418 ++I;
419 continue;
420 }
421 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
422 break;
423 std::vector<MachineOperand *>Literals;
424 if (I->isBundle()) {
425 MachineInstr &DeleteMI = *I;
426 MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
427 while (++BI != E && BI->isBundledWithPred()) {
428 BI->unbundleFromPred();
429 for (MachineOperand &MO : BI->operands()) {
430 if (MO.isReg() && MO.isInternalRead())
431 MO.setIsInternalRead(false);
432 }
433 getLiteral(*BI, Literals);
434 ClauseContent.push_back(&*BI);
435 }
436 I = BI;
437 DeleteMI.eraseFromParent();
438 } else {
439 getLiteral(*I, Literals);
440 ClauseContent.push_back(&*I);
441 I++;
442 }
443 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
444 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
445 TII->get(R600::LITERALS));
446 if (Literals[i]->isImm()) {
447 MILit.addImm(Literals[i]->getImm());
448 } else {
449 MILit.addGlobalAddress(Literals[i]->getGlobal(),
450 Literals[i]->getOffset());
451 }
452 if (i + 1 < e) {
453 if (Literals[i + 1]->isImm()) {
454 MILit.addImm(Literals[i + 1]->getImm());
455 } else {
456 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
457 Literals[i + 1]->getOffset());
458 }
459 } else
460 MILit.addImm(0);
461 ClauseContent.push_back(MILit);
462 }
463 }
464 assert(ClauseContent.size() < 128 && "ALU clause is too big");
465 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
466 return ClauseFile(&ClauseHead, std::move(ClauseContent));
467 }
468
EmitFetchClause(MachineBasicBlock::iterator InsertPos,const DebugLoc & DL,ClauseFile & Clause,unsigned & CfCount)469 void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
470 const DebugLoc &DL, ClauseFile &Clause,
471 unsigned &CfCount) {
472 CounterPropagateAddr(*Clause.first, CfCount);
473 MachineBasicBlock *BB = Clause.first->getParent();
474 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
475 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
476 BB->splice(InsertPos, BB, Clause.second[i]);
477 }
478 CfCount += 2 * Clause.second.size();
479 }
480
EmitALUClause(MachineBasicBlock::iterator InsertPos,const DebugLoc & DL,ClauseFile & Clause,unsigned & CfCount)481 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
482 ClauseFile &Clause, unsigned &CfCount) {
483 Clause.first->getOperand(0).setImm(0);
484 CounterPropagateAddr(*Clause.first, CfCount);
485 MachineBasicBlock *BB = Clause.first->getParent();
486 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
487 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
488 BB->splice(InsertPos, BB, Clause.second[i]);
489 }
490 CfCount += Clause.second.size();
491 }
492
CounterPropagateAddr(MachineInstr & MI,unsigned Addr) const493 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
494 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
495 }
CounterPropagateAddr(const std::set<MachineInstr * > & MIs,unsigned Addr) const496 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
497 unsigned Addr) const {
498 for (MachineInstr *MI : MIs) {
499 CounterPropagateAddr(*MI, Addr);
500 }
501 }
502
503 public:
504 static char ID;
505
R600ControlFlowFinalizer()506 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
507
runOnMachineFunction(MachineFunction & MF)508 bool runOnMachineFunction(MachineFunction &MF) override {
509 ST = &MF.getSubtarget<R600Subtarget>();
510 MaxFetchInst = ST->getTexVTXClauseSize();
511 TII = ST->getInstrInfo();
512 TRI = ST->getRegisterInfo();
513
514 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
515
516 CFStack CFStack(ST, MF.getFunction().getCallingConv());
517 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
518 ++MB) {
519 MachineBasicBlock &MBB = *MB;
520 unsigned CfCount = 0;
521 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
522 std::vector<MachineInstr * > IfThenElseStack;
523 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
524 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
525 getHWInstrDesc(CF_CALL_FS));
526 CfCount++;
527 }
528 std::vector<ClauseFile> FetchClauses, AluClauses;
529 std::vector<MachineInstr *> LastAlu(1);
530 std::vector<MachineInstr *> ToPopAfter;
531
532 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
533 I != E;) {
534 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
535 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
536 FetchClauses.push_back(MakeFetchClause(MBB, I));
537 CfCount++;
538 LastAlu.back() = nullptr;
539 continue;
540 }
541
542 MachineBasicBlock::iterator MI = I;
543 if (MI->getOpcode() != R600::ENDIF)
544 LastAlu.back() = nullptr;
545 if (MI->getOpcode() == R600::CF_ALU)
546 LastAlu.back() = &*MI;
547 I++;
548 bool RequiresWorkAround =
549 CFStack.requiresWorkAroundForInst(MI->getOpcode());
550 switch (MI->getOpcode()) {
551 case R600::CF_ALU_PUSH_BEFORE:
552 if (RequiresWorkAround) {
553 LLVM_DEBUG(dbgs()
554 << "Applying bug work-around for ALU_PUSH_BEFORE\n");
555 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
556 .addImm(CfCount + 1)
557 .addImm(1);
558 MI->setDesc(TII->get(R600::CF_ALU));
559 CfCount++;
560 CFStack.pushBranch(R600::CF_PUSH_EG);
561 } else
562 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
563 LLVM_FALLTHROUGH;
564 case R600::CF_ALU:
565 I = MI;
566 AluClauses.push_back(MakeALUClause(MBB, I));
567 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
568 CfCount++;
569 break;
570 case R600::WHILELOOP: {
571 CFStack.pushLoop();
572 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
573 getHWInstrDesc(CF_WHILE_LOOP))
574 .addImm(1);
575 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
576 std::set<MachineInstr *>());
577 Pair.second.insert(MIb);
578 LoopStack.push_back(std::move(Pair));
579 MI->eraseFromParent();
580 CfCount++;
581 break;
582 }
583 case R600::ENDLOOP: {
584 CFStack.popLoop();
585 std::pair<unsigned, std::set<MachineInstr *>> Pair =
586 std::move(LoopStack.back());
587 LoopStack.pop_back();
588 CounterPropagateAddr(Pair.second, CfCount);
589 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
590 .addImm(Pair.first + 1);
591 MI->eraseFromParent();
592 CfCount++;
593 break;
594 }
595 case R600::IF_PREDICATE_SET: {
596 LastAlu.push_back(nullptr);
597 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
598 getHWInstrDesc(CF_JUMP))
599 .addImm(0)
600 .addImm(0);
601 IfThenElseStack.push_back(MIb);
602 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
603 MI->eraseFromParent();
604 CfCount++;
605 break;
606 }
607 case R600::ELSE: {
608 MachineInstr * JumpInst = IfThenElseStack.back();
609 IfThenElseStack.pop_back();
610 CounterPropagateAddr(*JumpInst, CfCount);
611 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
612 getHWInstrDesc(CF_ELSE))
613 .addImm(0)
614 .addImm(0);
615 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
616 IfThenElseStack.push_back(MIb);
617 MI->eraseFromParent();
618 CfCount++;
619 break;
620 }
621 case R600::ENDIF: {
622 CFStack.popBranch();
623 if (LastAlu.back()) {
624 ToPopAfter.push_back(LastAlu.back());
625 } else {
626 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
627 getHWInstrDesc(CF_POP))
628 .addImm(CfCount + 1)
629 .addImm(1);
630 (void)MIb;
631 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
632 CfCount++;
633 }
634
635 MachineInstr *IfOrElseInst = IfThenElseStack.back();
636 IfThenElseStack.pop_back();
637 CounterPropagateAddr(*IfOrElseInst, CfCount);
638 IfOrElseInst->getOperand(1).setImm(1);
639 LastAlu.pop_back();
640 MI->eraseFromParent();
641 break;
642 }
643 case R600::BREAK: {
644 CfCount ++;
645 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
646 getHWInstrDesc(CF_LOOP_BREAK))
647 .addImm(0);
648 LoopStack.back().second.insert(MIb);
649 MI->eraseFromParent();
650 break;
651 }
652 case R600::CONTINUE: {
653 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
654 getHWInstrDesc(CF_LOOP_CONTINUE))
655 .addImm(0);
656 LoopStack.back().second.insert(MIb);
657 MI->eraseFromParent();
658 CfCount++;
659 break;
660 }
661 case R600::RETURN: {
662 DebugLoc DL = MBB.findDebugLoc(MI);
663 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
664 CfCount++;
665 if (CfCount % 2) {
666 BuildMI(MBB, I, DL, TII->get(R600::PAD));
667 CfCount++;
668 }
669 MI->eraseFromParent();
670 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
671 EmitFetchClause(I, DL, FetchClauses[i], CfCount);
672 for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
673 EmitALUClause(I, DL, AluClauses[i], CfCount);
674 break;
675 }
676 default:
677 if (TII->isExport(MI->getOpcode())) {
678 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
679 CfCount++;
680 }
681 break;
682 }
683 }
684 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
685 MachineInstr *Alu = ToPopAfter[i];
686 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
687 TII->get(R600::CF_ALU_POP_AFTER))
688 .addImm(Alu->getOperand(0).getImm())
689 .addImm(Alu->getOperand(1).getImm())
690 .addImm(Alu->getOperand(2).getImm())
691 .addImm(Alu->getOperand(3).getImm())
692 .addImm(Alu->getOperand(4).getImm())
693 .addImm(Alu->getOperand(5).getImm())
694 .addImm(Alu->getOperand(6).getImm())
695 .addImm(Alu->getOperand(7).getImm())
696 .addImm(Alu->getOperand(8).getImm());
697 Alu->eraseFromParent();
698 }
699 MFI->CFStackSize = CFStack.MaxStackSize;
700 }
701
702 return false;
703 }
704
getPassName() const705 StringRef getPassName() const override {
706 return "R600 Control Flow Finalizer Pass";
707 }
708 };
709
710 } // end anonymous namespace
711
712 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
713 "R600 Control Flow Finalizer", false, false)
714 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
715 "R600 Control Flow Finalizer", false, false)
716
717 char R600ControlFlowFinalizer::ID = 0;
718
719 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
720
createR600ControlFlowFinalizer()721 FunctionPass *llvm::createR600ControlFlowFinalizer() {
722 return new R600ControlFlowFinalizer();
723 }
724