1 //=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 // This class implements a deterministic finite automaton (DFA) based
10 // packetizing mechanism for VLIW architectures. It provides APIs to
11 // determine whether there exists a legal mapping of instructions to
12 // functional unit assignments in a packet. The DFA is auto-generated from
13 // the target's Schedule.td file.
14 //
15 // A DFA consists of 3 major elements: states, inputs, and transitions. For
16 // the packetizing mechanism, the input is the set of instruction classes for
17 // a target. The state models all possible combinations of functional unit
18 // consumption for a given set of instructions in a packet. A transition
19 // models the addition of an instruction to a packet. In the DFA constructed
20 // by this class, if an instruction can be added to a packet, then a valid
21 // transition exists from the corresponding state. Invalid transitions
22 // indicate that the instruction cannot be added to the current packet.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/CodeGen/DFAPacketizer.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBundle.h"
30 #include "llvm/CodeGen/ScheduleDAG.h"
31 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
32 #include "llvm/CodeGen/TargetInstrInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/MC/MCInstrItineraries.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include <algorithm>
40 #include <cassert>
41 #include <iterator>
42 #include <memory>
43 #include <vector>
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "packets"
48 
49 static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
50   cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
51 
52 static unsigned InstrCount = 0;
53 
54 // --------------------------------------------------------------------
55 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
56 
addDFAFuncUnits(DFAInput Inp,unsigned FuncUnits)57 static DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
58   return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
59 }
60 
61 /// Return the DFAInput for an instruction class input vector.
62 /// This function is used in both DFAPacketizer.cpp and in
63 /// DFAPacketizerEmitter.cpp.
getDFAInsnInput(const std::vector<unsigned> & InsnClass)64 static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
65   DFAInput InsnInput = 0;
66   assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
67          "Exceeded maximum number of DFA terms");
68   for (auto U : InsnClass)
69     InsnInput = addDFAFuncUnits(InsnInput, U);
70   return InsnInput;
71 }
72 
73 // --------------------------------------------------------------------
74 
DFAPacketizer(const InstrItineraryData * I,const DFAStateInput (* SIT)[2],const unsigned * SET)75 DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
76                              const DFAStateInput (*SIT)[2],
77                              const unsigned *SET):
78   InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
79   // Make sure DFA types are large enough for the number of terms & resources.
80   static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
81                     (8 * sizeof(DFAInput)),
82                 "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
83   static_assert(
84       (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
85       "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
86 }
87 
88 // Read the DFA transition table and update CachedTable.
89 //
90 // Format of the transition tables:
91 // DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
92 //                           transitions
93 // DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
94 //                         for the ith state
95 //
ReadTable(unsigned int state)96 void DFAPacketizer::ReadTable(unsigned int state) {
97   unsigned ThisState = DFAStateEntryTable[state];
98   unsigned NextStateInTable = DFAStateEntryTable[state+1];
99   // Early exit in case CachedTable has already contains this
100   // state's transitions.
101   if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
102     return;
103 
104   for (unsigned i = ThisState; i < NextStateInTable; i++)
105     CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
106       DFAStateInputTable[i][1];
107 }
108 
109 // Return the DFAInput for an instruction class.
getInsnInput(unsigned InsnClass)110 DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
111   // Note: this logic must match that in DFAPacketizerDefs.h for input vectors.
112   DFAInput InsnInput = 0;
113   unsigned i = 0;
114   (void)i;
115   for (const InstrStage *IS = InstrItins->beginStage(InsnClass),
116        *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS) {
117     InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits());
118     assert((i++ < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs");
119   }
120   return InsnInput;
121 }
122 
123 // Return the DFAInput for an instruction class input vector.
getInsnInput(const std::vector<unsigned> & InsnClass)124 DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
125   return getDFAInsnInput(InsnClass);
126 }
127 
128 // Check if the resources occupied by a MCInstrDesc are available in the
129 // current state.
canReserveResources(const MCInstrDesc * MID)130 bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
131   unsigned InsnClass = MID->getSchedClass();
132   DFAInput InsnInput = getInsnInput(InsnClass);
133   UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
134   ReadTable(CurrentState);
135   return CachedTable.count(StateTrans) != 0;
136 }
137 
138 // Reserve the resources occupied by a MCInstrDesc and change the current
139 // state to reflect that change.
reserveResources(const MCInstrDesc * MID)140 void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
141   unsigned InsnClass = MID->getSchedClass();
142   DFAInput InsnInput = getInsnInput(InsnClass);
143   UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
144   ReadTable(CurrentState);
145   assert(CachedTable.count(StateTrans) != 0);
146   CurrentState = CachedTable[StateTrans];
147 }
148 
149 // Check if the resources occupied by a machine instruction are available
150 // in the current state.
canReserveResources(MachineInstr & MI)151 bool DFAPacketizer::canReserveResources(MachineInstr &MI) {
152   const MCInstrDesc &MID = MI.getDesc();
153   return canReserveResources(&MID);
154 }
155 
156 // Reserve the resources occupied by a machine instruction and change the
157 // current state to reflect that change.
reserveResources(MachineInstr & MI)158 void DFAPacketizer::reserveResources(MachineInstr &MI) {
159   const MCInstrDesc &MID = MI.getDesc();
160   reserveResources(&MID);
161 }
162 
163 namespace llvm {
164 
165 // This class extends ScheduleDAGInstrs and overrides the schedule method
166 // to build the dependence graph.
167 class DefaultVLIWScheduler : public ScheduleDAGInstrs {
168 private:
169   AliasAnalysis *AA;
170   /// Ordered list of DAG postprocessing steps.
171   std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
172 
173 public:
174   DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
175                        AliasAnalysis *AA);
176 
177   // Actual scheduling work.
178   void schedule() override;
179 
180   /// DefaultVLIWScheduler takes ownership of the Mutation object.
addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)181   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
182     Mutations.push_back(std::move(Mutation));
183   }
184 
185 protected:
186   void postprocessDAG();
187 };
188 
189 } // end namespace llvm
190 
DefaultVLIWScheduler(MachineFunction & MF,MachineLoopInfo & MLI,AliasAnalysis * AA)191 DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
192                                            MachineLoopInfo &MLI,
193                                            AliasAnalysis *AA)
194     : ScheduleDAGInstrs(MF, &MLI), AA(AA) {
195   CanHandleTerminators = true;
196 }
197 
198 /// Apply each ScheduleDAGMutation step in order.
postprocessDAG()199 void DefaultVLIWScheduler::postprocessDAG() {
200   for (auto &M : Mutations)
201     M->apply(this);
202 }
203 
schedule()204 void DefaultVLIWScheduler::schedule() {
205   // Build the scheduling graph.
206   buildSchedGraph(AA);
207   postprocessDAG();
208 }
209 
VLIWPacketizerList(MachineFunction & mf,MachineLoopInfo & mli,AliasAnalysis * aa)210 VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
211                                        MachineLoopInfo &mli, AliasAnalysis *aa)
212     : MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
213   ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
214   VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
215 }
216 
~VLIWPacketizerList()217 VLIWPacketizerList::~VLIWPacketizerList() {
218   delete VLIWScheduler;
219   delete ResourceTracker;
220 }
221 
222 // End the current packet, bundle packet instructions and reset DFA state.
endPacket(MachineBasicBlock * MBB,MachineBasicBlock::iterator MI)223 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
224                                    MachineBasicBlock::iterator MI) {
225   LLVM_DEBUG({
226     if (!CurrentPacketMIs.empty()) {
227       dbgs() << "Finalizing packet:\n";
228       for (MachineInstr *MI : CurrentPacketMIs)
229         dbgs() << " * " << *MI;
230     }
231   });
232   if (CurrentPacketMIs.size() > 1) {
233     MachineInstr &MIFirst = *CurrentPacketMIs.front();
234     finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
235   }
236   CurrentPacketMIs.clear();
237   ResourceTracker->clearResources();
238   LLVM_DEBUG(dbgs() << "End packet\n");
239 }
240 
241 // Bundle machine instructions into packets.
PacketizeMIs(MachineBasicBlock * MBB,MachineBasicBlock::iterator BeginItr,MachineBasicBlock::iterator EndItr)242 void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
243                                       MachineBasicBlock::iterator BeginItr,
244                                       MachineBasicBlock::iterator EndItr) {
245   assert(VLIWScheduler && "VLIW Scheduler is not initialized!");
246   VLIWScheduler->startBlock(MBB);
247   VLIWScheduler->enterRegion(MBB, BeginItr, EndItr,
248                              std::distance(BeginItr, EndItr));
249   VLIWScheduler->schedule();
250 
251   LLVM_DEBUG({
252     dbgs() << "Scheduling DAG of the packetize region\n";
253     for (SUnit &SU : VLIWScheduler->SUnits)
254       SU.dumpAll(VLIWScheduler);
255   });
256 
257   // Generate MI -> SU map.
258   MIToSUnit.clear();
259   for (SUnit &SU : VLIWScheduler->SUnits)
260     MIToSUnit[SU.getInstr()] = &SU;
261 
262   bool LimitPresent = InstrLimit.getPosition();
263 
264   // The main packetizer loop.
265   for (; BeginItr != EndItr; ++BeginItr) {
266     if (LimitPresent) {
267       if (InstrCount >= InstrLimit) {
268         EndItr = BeginItr;
269         break;
270       }
271       InstrCount++;
272     }
273     MachineInstr &MI = *BeginItr;
274     initPacketizerState();
275 
276     // End the current packet if needed.
277     if (isSoloInstruction(MI)) {
278       endPacket(MBB, MI);
279       continue;
280     }
281 
282     // Ignore pseudo instructions.
283     if (ignorePseudoInstruction(MI, MBB))
284       continue;
285 
286     SUnit *SUI = MIToSUnit[&MI];
287     assert(SUI && "Missing SUnit Info!");
288 
289     // Ask DFA if machine resource is available for MI.
290     LLVM_DEBUG(dbgs() << "Checking resources for adding MI to packet " << MI);
291 
292     bool ResourceAvail = ResourceTracker->canReserveResources(MI);
293     LLVM_DEBUG({
294       if (ResourceAvail)
295         dbgs() << "  Resources are available for adding MI to packet\n";
296       else
297         dbgs() << "  Resources NOT available\n";
298     });
299     if (ResourceAvail && shouldAddToPacket(MI)) {
300       // Dependency check for MI with instructions in CurrentPacketMIs.
301       for (auto MJ : CurrentPacketMIs) {
302         SUnit *SUJ = MIToSUnit[MJ];
303         assert(SUJ && "Missing SUnit Info!");
304 
305         LLVM_DEBUG(dbgs() << "  Checking against MJ " << *MJ);
306         // Is it legal to packetize SUI and SUJ together.
307         if (!isLegalToPacketizeTogether(SUI, SUJ)) {
308           LLVM_DEBUG(dbgs() << "  Not legal to add MI, try to prune\n");
309           // Allow packetization if dependency can be pruned.
310           if (!isLegalToPruneDependencies(SUI, SUJ)) {
311             // End the packet if dependency cannot be pruned.
312             LLVM_DEBUG(dbgs()
313                        << "  Could not prune dependencies for adding MI\n");
314             endPacket(MBB, MI);
315             break;
316           }
317           LLVM_DEBUG(dbgs() << "  Pruned dependence for adding MI\n");
318         }
319       }
320     } else {
321       LLVM_DEBUG(if (ResourceAvail) dbgs()
322                  << "Resources are available, but instruction should not be "
323                     "added to packet\n  "
324                  << MI);
325       // End the packet if resource is not available, or if the instruction
326       // shoud not be added to the current packet.
327       endPacket(MBB, MI);
328     }
329 
330     // Add MI to the current packet.
331     LLVM_DEBUG(dbgs() << "* Adding MI to packet " << MI << '\n');
332     BeginItr = addToPacket(MI);
333   } // For all instructions in the packetization range.
334 
335   // End any packet left behind.
336   endPacket(MBB, EndItr);
337   VLIWScheduler->exitRegion();
338   VLIWScheduler->finishBlock();
339 }
340 
alias(const MachineMemOperand & Op1,const MachineMemOperand & Op2,bool UseTBAA) const341 bool VLIWPacketizerList::alias(const MachineMemOperand &Op1,
342                                const MachineMemOperand &Op2,
343                                bool UseTBAA) const {
344   if (!Op1.getValue() || !Op2.getValue())
345     return true;
346 
347   int64_t MinOffset = std::min(Op1.getOffset(), Op2.getOffset());
348   int64_t Overlapa = Op1.getSize() + Op1.getOffset() - MinOffset;
349   int64_t Overlapb = Op2.getSize() + Op2.getOffset() - MinOffset;
350 
351   AliasResult AAResult =
352       AA->alias(MemoryLocation(Op1.getValue(), Overlapa,
353                                UseTBAA ? Op1.getAAInfo() : AAMDNodes()),
354                 MemoryLocation(Op2.getValue(), Overlapb,
355                                UseTBAA ? Op2.getAAInfo() : AAMDNodes()));
356 
357   return AAResult != NoAlias;
358 }
359 
alias(const MachineInstr & MI1,const MachineInstr & MI2,bool UseTBAA) const360 bool VLIWPacketizerList::alias(const MachineInstr &MI1,
361                                const MachineInstr &MI2,
362                                bool UseTBAA) const {
363   if (MI1.memoperands_empty() || MI2.memoperands_empty())
364     return true;
365 
366   for (const MachineMemOperand *Op1 : MI1.memoperands())
367     for (const MachineMemOperand *Op2 : MI2.memoperands())
368       if (alias(*Op1, *Op2, UseTBAA))
369         return true;
370   return false;
371 }
372 
373 // Add a DAG mutation object to the ordered list.
addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation)374 void VLIWPacketizerList::addMutation(
375       std::unique_ptr<ScheduleDAGMutation> Mutation) {
376   VLIWScheduler->addMutation(std::move(Mutation));
377 }
378