1 //===--- PerfMonitor.h --- Monitor time spent in scops --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef PERF_MONITOR_H
10 #define PERF_MONITOR_H
11 
12 #include "polly/CodeGen/IRBuilder.h"
13 
14 namespace polly {
15 
16 class PerfMonitor {
17 public:
18   /// Create a new performance monitor.
19   ///
20   /// @param S The scop for which to generate fine-grained performance
21   ///          monitoring information.
22   /// @param M The module for which to generate the performance monitor.
23   PerfMonitor(const Scop &S, llvm::Module *M);
24 
25   /// Initialize the performance monitor.
26   ///
27   /// Ensure that all global variables, functions, and callbacks needed to
28   /// manage the performance monitor are initialized and registered.
29   void initialize();
30 
31   /// Mark the beginning of a timing region.
32   ///
33   /// @param InsertBefore The instruction before which the timing region starts.
34   void insertRegionStart(llvm::Instruction *InsertBefore);
35 
36   /// Mark the end of a timing region.
37   ///
38   /// @param InsertBefore The instruction before which the timing region starts.
39   void insertRegionEnd(llvm::Instruction *InsertBefore);
40 
41 private:
42   llvm::Module *M;
43   PollyIRBuilder Builder;
44 
45   // The scop to profile against.
46   const Scop &S;
47 
48   /// Indicates if performance profiling is supported on this architecture.
49   bool Supported;
50 
51   /// The cycle counter at the beginning of the program execution.
52   llvm::Value *CyclesTotalStartPtr;
53 
54   /// The total number of cycles spent in the current scop S.
55   llvm::Value *CyclesInCurrentScopPtr;
56 
57   /// The total number of times the current scop S is executed.
58   llvm::Value *TripCountForCurrentScopPtr;
59 
60   /// The total number of cycles spent within scops.
61   llvm::Value *CyclesInScopsPtr;
62 
63   /// The value of the cycle counter at the beginning of the last scop.
64   llvm::Value *CyclesInScopStartPtr;
65 
66   /// A global variable, that keeps track if the performance monitor
67   /// initialization has already been run.
68   llvm::Value *AlreadyInitializedPtr;
69 
70   llvm::Function *insertInitFunction(llvm::Function *FinalReporting);
71 
72   /// Add Function @p to list of global constructors
73   ///
74   /// If no global constructors are available in this current module, insert
75   /// a new list of global constructors containing @p Fn as only global
76   /// constructor. Otherwise, append @p Fn to the list of global constructors.
77   ///
78   /// All functions listed as global constructors are executed before the
79   /// main() function is called.
80   ///
81   /// @param Fn Function to add to global constructors
82   void addToGlobalConstructors(llvm::Function *Fn);
83 
84   /// Add global variables to module.
85   ///
86   /// Insert a set of global variables that are used to track performance,
87   /// into the module (or obtain references to them if they already exist).
88   void addGlobalVariables();
89 
90   /// Add per-scop tracking to module.
91   ///
92   /// Insert the global variable which is used to track the number of cycles
93   /// this scop runs.
94   void addScopCounter();
95 
96   /// Get a reference to the intrinsic "{ i64, i32 } @llvm.x86.rdtscp()".
97   ///
98   /// The rdtscp function returns the current value of the processor's
99   /// time-stamp counter as well as the current CPU identifier. On modern x86
100   /// systems, the returned value is independent of the dynamic clock frequency
101   /// and consistent across multiple cores. It can consequently be used to get
102   /// accurate and low-overhead timing information. Even though the counter is
103   /// wrapping, it can be reliably used even for measuring longer time
104   /// intervals, as on a 1 GHz processor the counter only wraps every 545 years.
105   ///
106   /// The RDTSCP instruction is "pseudo" serializing:
107   ///
108   /// "“The RDTSCP instruction waits until all previous instructions have been
109   /// executed before reading the counter. However, subsequent instructions may
110   /// begin execution before the read operation is performed.”
111   ///
112   /// To ensure that no later instructions are scheduled before the RDTSCP
113   /// instruction it is often recommended to schedule a cpuid call after the
114   /// RDTSCP instruction. We do not do this yet, trading some imprecision in
115   /// our timing for a reduced overhead in our timing.
116   ///
117   /// @returns A reference to the declaration of @llvm.x86.rdtscp.
118   llvm::Function *getRDTSCP();
119 
120   /// Get a reference to "int atexit(void (*function)(void))" function.
121   ///
122   /// This function allows to register function pointers that must be executed
123   /// when the program is terminated.
124   ///
125   /// @returns A reference to @atexit().
126   llvm::Function *getAtExit();
127 
128   /// Create function "__polly_perf_final_reporting".
129   ///
130   /// This function finalizes the performance measurements and prints the
131   /// results to stdout. It is expected to be registered with 'atexit()'.
132   llvm::Function *insertFinalReporting();
133 
134   /// Append Scop reporting data to "__polly_perf_final_reporting".
135   ///
136   /// This function appends the current scop (S)'s information to the final
137   /// printing function.
138   void AppendScopReporting();
139 };
140 } // namespace polly
141 
142 #endif
143