• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  //===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===//
2  //
3  //                     The LLVM Compiler Infrastructure
4  //
5  // This file is distributed under the University of Illinois Open Source
6  // License. See LICENSE.TXT for details.
7  //
8  //===----------------------------------------------------------------------===//
9  //
10  // This file defines the interface for the MachineTraceMetrics analysis pass
11  // that estimates CPU resource usage and critical data dependency paths through
12  // preferred traces. This is useful for super-scalar CPUs where execution speed
13  // can be limited both by data dependencies and by limited execution resources.
14  //
15  // Out-of-order CPUs will often be executing instructions from multiple basic
16  // blocks at the same time. This makes it difficult to estimate the resource
17  // usage accurately in a single basic block. Resources can be estimated better
18  // by looking at a trace through the current basic block.
19  //
20  // For every block, the MachineTraceMetrics pass will pick a preferred trace
21  // that passes through the block. The trace is chosen based on loop structure,
22  // branch probabilities, and resource usage. The intention is to pick likely
23  // traces that would be the most affected by code transformations.
24  //
25  // It is expensive to compute a full arbitrary trace for every block, so to
26  // save some computations, traces are chosen to be convergent. This means that
27  // if the traces through basic blocks A and B ever cross when moving away from
28  // A and B, they never diverge again. This applies in both directions - If the
29  // traces meet above A and B, they won't diverge when going further back.
30  //
31  // Traces tend to align with loops. The trace through a block in an inner loop
32  // will begin at the loop entry block and end at a back edge. If there are
33  // nested loops, the trace may begin and end at those instead.
34  //
35  // For each trace, we compute the critical path length, which is the number of
36  // cycles required to execute the trace when execution is limited by data
37  // dependencies only. We also compute the resource height, which is the number
38  // of cycles required to execute all instructions in the trace when ignoring
39  // data dependencies.
40  //
41  // Every instruction in the current block has a slack - the number of cycles
42  // execution of the instruction can be delayed without extending the critical
43  // path.
44  //
45  //===----------------------------------------------------------------------===//
46  
47  #ifndef LLVM_CODEGEN_MACHINETRACEMETRICS_H
48  #define LLVM_CODEGEN_MACHINETRACEMETRICS_H
49  
50  #include "llvm/ADT/ArrayRef.h"
51  #include "llvm/ADT/DenseMap.h"
52  #include "llvm/CodeGen/MachineFunctionPass.h"
53  #include "llvm/CodeGen/MachineInstr.h"
54  #include "llvm/CodeGen/TargetSchedule.h"
55  
56  namespace llvm {
57  
58  class InstrItineraryData;
59  class MachineBasicBlock;
60  class MachineInstr;
61  class MachineLoop;
62  class MachineLoopInfo;
63  class MachineRegisterInfo;
64  class TargetInstrInfo;
65  class TargetRegisterInfo;
66  class raw_ostream;
67  
68  class MachineTraceMetrics : public MachineFunctionPass {
69    const MachineFunction *MF;
70    const TargetInstrInfo *TII;
71    const TargetRegisterInfo *TRI;
72    const MachineRegisterInfo *MRI;
73    const MachineLoopInfo *Loops;
74    TargetSchedModel SchedModel;
75  
76  public:
77    class Ensemble;
78    class Trace;
79    static char ID;
80    MachineTraceMetrics();
81    void getAnalysisUsage(AnalysisUsage&) const override;
82    bool runOnMachineFunction(MachineFunction&) override;
83    void releaseMemory() override;
84    void verifyAnalysis() const override;
85  
86    friend class Ensemble;
87    friend class Trace;
88  
89    /// Per-basic block information that doesn't depend on the trace through the
90    /// block.
91    struct FixedBlockInfo {
92      /// The number of non-trivial instructions in the block.
93      /// Doesn't count PHI and COPY instructions that are likely to be removed.
94      unsigned InstrCount;
95  
96      /// True when the block contains calls.
97      bool HasCalls;
98  
FixedBlockInfoFixedBlockInfo99      FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {}
100  
101      /// Returns true when resource information for this block has been computed.
hasResourcesFixedBlockInfo102      bool hasResources() const { return InstrCount != ~0u; }
103  
104      /// Invalidate resource information.
invalidateFixedBlockInfo105      void invalidate() { InstrCount = ~0u; }
106    };
107  
108    /// Get the fixed resource information about MBB. Compute it on demand.
109    const FixedBlockInfo *getResources(const MachineBasicBlock*);
110  
111    /// Get the scaled number of cycles used per processor resource in MBB.
112    /// This is an array with SchedModel.getNumProcResourceKinds() entries.
113    /// The getResources() function above must have been called first.
114    ///
115    /// These numbers have already been scaled by SchedModel.getResourceFactor().
116    ArrayRef<unsigned> getProcResourceCycles(unsigned MBBNum) const;
117  
118    /// A virtual register or regunit required by a basic block or its trace
119    /// successors.
120    struct LiveInReg {
121      /// The virtual register required, or a register unit.
122      unsigned Reg;
123  
124      /// For virtual registers: Minimum height of the defining instruction.
125      /// For regunits: Height of the highest user in the trace.
126      unsigned Height;
127  
RegLiveInReg128      LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {}
129    };
130  
131    /// Per-basic block information that relates to a specific trace through the
132    /// block. Convergent traces means that only one of these is required per
133    /// block in a trace ensemble.
134    struct TraceBlockInfo {
135      /// Trace predecessor, or NULL for the first block in the trace.
136      /// Valid when hasValidDepth().
137      const MachineBasicBlock *Pred;
138  
139      /// Trace successor, or NULL for the last block in the trace.
140      /// Valid when hasValidHeight().
141      const MachineBasicBlock *Succ;
142  
143      /// The block number of the head of the trace. (When hasValidDepth()).
144      unsigned Head;
145  
146      /// The block number of the tail of the trace. (When hasValidHeight()).
147      unsigned Tail;
148  
149      /// Accumulated number of instructions in the trace above this block.
150      /// Does not include instructions in this block.
151      unsigned InstrDepth;
152  
153      /// Accumulated number of instructions in the trace below this block.
154      /// Includes instructions in this block.
155      unsigned InstrHeight;
156  
TraceBlockInfoTraceBlockInfo157      TraceBlockInfo() :
158        Pred(nullptr), Succ(nullptr),
159        InstrDepth(~0u), InstrHeight(~0u),
160        HasValidInstrDepths(false), HasValidInstrHeights(false) {}
161  
162      /// Returns true if the depth resources have been computed from the trace
163      /// above this block.
hasValidDepthTraceBlockInfo164      bool hasValidDepth() const { return InstrDepth != ~0u; }
165  
166      /// Returns true if the height resources have been computed from the trace
167      /// below this block.
hasValidHeightTraceBlockInfo168      bool hasValidHeight() const { return InstrHeight != ~0u; }
169  
170      /// Invalidate depth resources when some block above this one has changed.
invalidateDepthTraceBlockInfo171      void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; }
172  
173      /// Invalidate height resources when a block below this one has changed.
invalidateHeightTraceBlockInfo174      void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; }
175  
176      /// Assuming that this is a dominator of TBI, determine if it contains
177      /// useful instruction depths. A dominating block can be above the current
178      /// trace head, and any dependencies from such a far away dominator are not
179      /// expected to affect the critical path.
180      ///
181      /// Also returns true when TBI == this.
isUsefulDominatorTraceBlockInfo182      bool isUsefulDominator(const TraceBlockInfo &TBI) const {
183        // The trace for TBI may not even be calculated yet.
184        if (!hasValidDepth() || !TBI.hasValidDepth())
185          return false;
186        // Instruction depths are only comparable if the traces share a head.
187        if (Head != TBI.Head)
188          return false;
189        // It is almost always the case that TBI belongs to the same trace as
190        // this block, but rare convoluted cases involving irreducible control
191        // flow, a dominator may share a trace head without actually being on the
192        // same trace as TBI. This is not a big problem as long as it doesn't
193        // increase the instruction depth.
194        return HasValidInstrDepths && InstrDepth <= TBI.InstrDepth;
195      }
196  
197      // Data-dependency-related information. Per-instruction depth and height
198      // are computed from data dependencies in the current trace, using
199      // itinerary data.
200  
201      /// Instruction depths have been computed. This implies hasValidDepth().
202      bool HasValidInstrDepths;
203  
204      /// Instruction heights have been computed. This implies hasValidHeight().
205      bool HasValidInstrHeights;
206  
207      /// Critical path length. This is the number of cycles in the longest data
208      /// dependency chain through the trace. This is only valid when both
209      /// HasValidInstrDepths and HasValidInstrHeights are set.
210      unsigned CriticalPath;
211  
212      /// Live-in registers. These registers are defined above the current block
213      /// and used by this block or a block below it.
214      /// This does not include PHI uses in the current block, but it does
215      /// include PHI uses in deeper blocks.
216      SmallVector<LiveInReg, 4> LiveIns;
217  
218      void print(raw_ostream&) const;
219    };
220  
221    /// InstrCycles represents the cycle height and depth of an instruction in a
222    /// trace.
223    struct InstrCycles {
224      /// Earliest issue cycle as determined by data dependencies and instruction
225      /// latencies from the beginning of the trace. Data dependencies from
226      /// before the trace are not included.
227      unsigned Depth;
228  
229      /// Minimum number of cycles from this instruction is issued to the of the
230      /// trace, as determined by data dependencies and instruction latencies.
231      unsigned Height;
232    };
233  
234    /// A trace represents a plausible sequence of executed basic blocks that
235    /// passes through the current basic block one. The Trace class serves as a
236    /// handle to internal cached data structures.
237    class Trace {
238      Ensemble &TE;
239      TraceBlockInfo &TBI;
240  
getBlockNum()241      unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; }
242  
243    public:
Trace(Ensemble & te,TraceBlockInfo & tbi)244      explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {}
245      void print(raw_ostream&) const;
246  
247      /// Compute the total number of instructions in the trace.
getInstrCount()248      unsigned getInstrCount() const {
249        return TBI.InstrDepth + TBI.InstrHeight;
250      }
251  
252      /// Return the resource depth of the top/bottom of the trace center block.
253      /// This is the number of cycles required to execute all instructions from
254      /// the trace head to the trace center block. The resource depth only
255      /// considers execution resources, it ignores data dependencies.
256      /// When Bottom is set, instructions in the trace center block are included.
257      unsigned getResourceDepth(bool Bottom) const;
258  
259      /// Return the resource length of the trace. This is the number of cycles
260      /// required to execute the instructions in the trace if they were all
261      /// independent, exposing the maximum instruction-level parallelism.
262      ///
263      /// Any blocks in Extrablocks are included as if they were part of the
264      /// trace. Likewise, extra resources required by the specified scheduling
265      /// classes are included. For the caller to account for extra machine
266      /// instructions, it must first resolve each instruction's scheduling class.
267      unsigned getResourceLength(
268          ArrayRef<const MachineBasicBlock *> Extrablocks = None,
269          ArrayRef<const MCSchedClassDesc *> ExtraInstrs = None,
270          ArrayRef<const MCSchedClassDesc *> RemoveInstrs = None) const;
271  
272      /// Return the length of the (data dependency) critical path through the
273      /// trace.
getCriticalPath()274      unsigned getCriticalPath() const { return TBI.CriticalPath; }
275  
276      /// Return the depth and height of MI. The depth is only valid for
277      /// instructions in or above the trace center block. The height is only
278      /// valid for instructions in or below the trace center block.
getInstrCycles(const MachineInstr & MI)279      InstrCycles getInstrCycles(const MachineInstr &MI) const {
280        return TE.Cycles.lookup(&MI);
281      }
282  
283      /// Return the slack of MI. This is the number of cycles MI can be delayed
284      /// before the critical path becomes longer.
285      /// MI must be an instruction in the trace center block.
286      unsigned getInstrSlack(const MachineInstr &MI) const;
287  
288      /// Return the Depth of a PHI instruction in a trace center block successor.
289      /// The PHI does not have to be part of the trace.
290      unsigned getPHIDepth(const MachineInstr &PHI) const;
291  
292      /// A dependence is useful if the basic block of the defining instruction
293      /// is part of the trace of the user instruction. It is assumed that DefMI
294      /// dominates UseMI (see also isUsefulDominator).
295      bool isDepInTrace(const MachineInstr &DefMI,
296                        const MachineInstr &UseMI) const;
297    };
298  
299    /// A trace ensemble is a collection of traces selected using the same
300    /// strategy, for example 'minimum resource height'. There is one trace for
301    /// every block in the function.
302    class Ensemble {
303      SmallVector<TraceBlockInfo, 4> BlockInfo;
304      DenseMap<const MachineInstr*, InstrCycles> Cycles;
305      SmallVector<unsigned, 0> ProcResourceDepths;
306      SmallVector<unsigned, 0> ProcResourceHeights;
307      friend class Trace;
308  
309      void computeTrace(const MachineBasicBlock*);
310      void computeDepthResources(const MachineBasicBlock*);
311      void computeHeightResources(const MachineBasicBlock*);
312      unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&);
313      void computeInstrDepths(const MachineBasicBlock*);
314      void computeInstrHeights(const MachineBasicBlock*);
315      void addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
316                      ArrayRef<const MachineBasicBlock*> Trace);
317  
318    protected:
319      MachineTraceMetrics &MTM;
320      virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0;
321      virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0;
322      explicit Ensemble(MachineTraceMetrics*);
323      const MachineLoop *getLoopFor(const MachineBasicBlock*) const;
324      const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const;
325      const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const;
326      ArrayRef<unsigned> getProcResourceDepths(unsigned MBBNum) const;
327      ArrayRef<unsigned> getProcResourceHeights(unsigned MBBNum) const;
328  
329    public:
330      virtual ~Ensemble();
331      virtual const char *getName() const =0;
332      void print(raw_ostream&) const;
333      void invalidate(const MachineBasicBlock *MBB);
334      void verify() const;
335  
336      /// Get the trace that passes through MBB.
337      /// The trace is computed on demand.
338      Trace getTrace(const MachineBasicBlock *MBB);
339    };
340  
341    /// Strategies for selecting traces.
342    enum Strategy {
343      /// Select the trace through a block that has the fewest instructions.
344      TS_MinInstrCount,
345  
346      TS_NumStrategies
347    };
348  
349    /// Get the trace ensemble representing the given trace selection strategy.
350    /// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
351    /// and valid for the lifetime of the analysis pass.
352    Ensemble *getEnsemble(Strategy);
353  
354    /// Invalidate cached information about MBB. This must be called *before* MBB
355    /// is erased, or the CFG is otherwise changed.
356    ///
357    /// This invalidates per-block information about resource usage for MBB only,
358    /// and it invalidates per-trace information for any trace that passes
359    /// through MBB.
360    ///
361    /// Call Ensemble::getTrace() again to update any trace handles.
362    void invalidate(const MachineBasicBlock *MBB);
363  
364  private:
365    // One entry per basic block, indexed by block number.
366    SmallVector<FixedBlockInfo, 4> BlockInfo;
367  
368    // Cycles consumed on each processor resource per block.
369    // The number of processor resource kinds is constant for a given subtarget,
370    // but it is not known at compile time. The number of cycles consumed by
371    // block B on processor resource R is at ProcResourceCycles[B*Kinds + R]
372    // where Kinds = SchedModel.getNumProcResourceKinds().
373    SmallVector<unsigned, 0> ProcResourceCycles;
374  
375    // One ensemble per strategy.
376    Ensemble* Ensembles[TS_NumStrategies];
377  
378    // Convert scaled resource usage to a cycle count that can be compared with
379    // latencies.
getCycles(unsigned Scaled)380    unsigned getCycles(unsigned Scaled) {
381      unsigned Factor = SchedModel.getLatencyFactor();
382      return (Scaled + Factor - 1) / Factor;
383    }
384  };
385  
386  inline raw_ostream &operator<<(raw_ostream &OS,
387                                 const MachineTraceMetrics::Trace &Tr) {
388    Tr.print(OS);
389    return OS;
390  }
391  
392  inline raw_ostream &operator<<(raw_ostream &OS,
393                                 const MachineTraceMetrics::Ensemble &En) {
394    En.print(OS);
395    return OS;
396  }
397  } // end namespace llvm
398  
399  #endif
400