1 //===-- R600ExpandSpecialInstrs.cpp - Expand special instructions ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // Vector, Reduction, and Cube instructions need to fill the entire instruction
10 // group to work correctly.  This pass expands these individual instructions
11 // into several instructions that will completely fill the instruction group.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "R600Defines.h"
16 #include "R600InstrInfo.h"
17 #include "R600RegisterInfo.h"
18 #include "llvm/CodeGen/MachineFunctionPass.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 
22 using namespace llvm;
23 
24 namespace {
25 
26 class R600ExpandSpecialInstrsPass : public MachineFunctionPass {
27 
28 private:
29   static char ID;
30   const R600InstrInfo *TII;
31 
32 public:
R600ExpandSpecialInstrsPass(TargetMachine & tm)33   R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
34     TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
35 
36   virtual bool runOnMachineFunction(MachineFunction &MF);
37 
getPassName() const38   const char *getPassName() const {
39     return "R600 Expand special instructions pass";
40   }
41 };
42 
43 } // End anonymous namespace
44 
45 char R600ExpandSpecialInstrsPass::ID = 0;
46 
createR600ExpandSpecialInstrsPass(TargetMachine & TM)47 FunctionPass *llvm::createR600ExpandSpecialInstrsPass(TargetMachine &TM) {
48   return new R600ExpandSpecialInstrsPass(TM);
49 }
50 
runOnMachineFunction(MachineFunction & MF)51 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
52 
53   const R600RegisterInfo &TRI = TII->getRegisterInfo();
54 
55   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
56                                                   BB != BB_E; ++BB) {
57     MachineBasicBlock &MBB = *BB;
58     MachineBasicBlock::iterator I = MBB.begin();
59     while (I != MBB.end()) {
60       MachineInstr &MI = *I;
61       I = llvm::next(I);
62 
63       bool IsReduction = TII->isReductionOp(MI.getOpcode());
64       bool IsVector = TII->isVector(MI);
65 	    bool IsCube = TII->isCubeOp(MI.getOpcode());
66       if (!IsReduction && !IsVector && !IsCube) {
67         continue;
68       }
69 
70       // Expand the instruction
71       //
72       // Reduction instructions:
73       // T0_X = DP4 T1_XYZW, T2_XYZW
74       // becomes:
75       // TO_X = DP4 T1_X, T2_X
76       // TO_Y (write masked) = DP4 T1_Y, T2_Y
77       // TO_Z (write masked) = DP4 T1_Z, T2_Z
78       // TO_W (write masked) = DP4 T1_W, T2_W
79       //
80       // Vector instructions:
81       // T0_X = MULLO_INT T1_X, T2_X
82       // becomes:
83       // T0_X = MULLO_INT T1_X, T2_X
84       // T0_Y (write masked) = MULLO_INT T1_X, T2_X
85       // T0_Z (write masked) = MULLO_INT T1_X, T2_X
86       // T0_W (write masked) = MULLO_INT T1_X, T2_X
87       //
88       // Cube instructions:
89       // T0_XYZW = CUBE T1_XYZW
90       // becomes:
91       // TO_X = CUBE T1_Z, T1_Y
92       // T0_Y = CUBE T1_Z, T1_X
93       // T0_Z = CUBE T1_X, T1_Z
94       // T0_W = CUBE T1_Y, T1_Z
95       for (unsigned Chan = 0; Chan < 4; Chan++) {
96         unsigned DstReg = MI.getOperand(0).getReg();
97         unsigned Src0 = MI.getOperand(1).getReg();
98         unsigned Src1 = 0;
99 
100         // Determine the correct source registers
101         if (!IsCube) {
102           Src1 = MI.getOperand(2).getReg();
103         }
104         if (IsReduction) {
105           unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
106           Src0 = TRI.getSubReg(Src0, SubRegIndex);
107           Src1 = TRI.getSubReg(Src1, SubRegIndex);
108         } else if (IsCube) {
109           static const int CubeSrcSwz[] = {2, 2, 0, 1};
110           unsigned SubRegIndex0 = TRI.getSubRegFromChannel(CubeSrcSwz[Chan]);
111           unsigned SubRegIndex1 = TRI.getSubRegFromChannel(CubeSrcSwz[3 - Chan]);
112           Src1 = TRI.getSubReg(Src0, SubRegIndex1);
113           Src0 = TRI.getSubReg(Src0, SubRegIndex0);
114         }
115 
116         // Determine the correct destination registers;
117         unsigned Flags = 0;
118         if (IsCube) {
119           unsigned SubRegIndex = TRI.getSubRegFromChannel(Chan);
120           DstReg = TRI.getSubReg(DstReg, SubRegIndex);
121         } else {
122           // Mask the write if the original instruction does not write to
123           // the current Channel.
124           Flags |= (Chan != TRI.getHWRegChan(DstReg) ? MO_FLAG_MASK : 0);
125           unsigned DstBase = TRI.getHWRegIndex(DstReg);
126           DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
127         }
128 
129         // Set the IsLast bit
130         Flags |= (Chan != 3 ? MO_FLAG_NOT_LAST : 0);
131 
132         // Add the new instruction
133         unsigned Opcode;
134         if (IsCube) {
135           switch (MI.getOpcode()) {
136           case AMDGPU::CUBE_r600_pseudo:
137             Opcode = AMDGPU::CUBE_r600_real;
138             break;
139           case AMDGPU::CUBE_eg_pseudo:
140             Opcode = AMDGPU::CUBE_eg_real;
141             break;
142           default:
143             assert(!"Unknown CUBE instruction");
144             Opcode = 0;
145             break;
146           }
147         } else {
148           Opcode = MI.getOpcode();
149         }
150         MachineInstr *NewMI =
151           BuildMI(MBB, I, MBB.findDebugLoc(I), TII->get(Opcode), DstReg)
152                   .addReg(Src0)
153                   .addReg(Src1)
154                   .addImm(0); // Flag
155 
156         NewMI->setIsInsideBundle(Chan != 0);
157         TII->addFlag(NewMI, 0, Flags);
158       }
159       MI.eraseFromParent();
160     }
161   }
162   return false;
163 }
164