1 //===--- PerfMonitor.h --- Monitor time spent in scops --------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef PERF_MONITOR_H 10 #define PERF_MONITOR_H 11 12 #include "polly/CodeGen/IRBuilder.h" 13 14 namespace polly { 15 16 class PerfMonitor { 17 public: 18 /// Create a new performance monitor. 19 /// 20 /// @param S The scop for which to generate fine-grained performance 21 /// monitoring information. 22 /// @param M The module for which to generate the performance monitor. 23 PerfMonitor(const Scop &S, llvm::Module *M); 24 25 /// Initialize the performance monitor. 26 /// 27 /// Ensure that all global variables, functions, and callbacks needed to 28 /// manage the performance monitor are initialized and registered. 29 void initialize(); 30 31 /// Mark the beginning of a timing region. 32 /// 33 /// @param InsertBefore The instruction before which the timing region starts. 34 void insertRegionStart(llvm::Instruction *InsertBefore); 35 36 /// Mark the end of a timing region. 37 /// 38 /// @param InsertBefore The instruction before which the timing region starts. 39 void insertRegionEnd(llvm::Instruction *InsertBefore); 40 41 private: 42 llvm::Module *M; 43 PollyIRBuilder Builder; 44 45 // The scop to profile against. 46 const Scop &S; 47 48 /// Indicates if performance profiling is supported on this architecture. 49 bool Supported; 50 51 /// The cycle counter at the beginning of the program execution. 52 llvm::Value *CyclesTotalStartPtr; 53 54 /// The total number of cycles spent in the current scop S. 55 llvm::Value *CyclesInCurrentScopPtr; 56 57 /// The total number of times the current scop S is executed. 58 llvm::Value *TripCountForCurrentScopPtr; 59 60 /// The total number of cycles spent within scops. 61 llvm::Value *CyclesInScopsPtr; 62 63 /// The value of the cycle counter at the beginning of the last scop. 64 llvm::Value *CyclesInScopStartPtr; 65 66 /// A global variable, that keeps track if the performance monitor 67 /// initialization has already been run. 68 llvm::Value *AlreadyInitializedPtr; 69 70 llvm::Function *insertInitFunction(llvm::Function *FinalReporting); 71 72 /// Add Function @p to list of global constructors 73 /// 74 /// If no global constructors are available in this current module, insert 75 /// a new list of global constructors containing @p Fn as only global 76 /// constructor. Otherwise, append @p Fn to the list of global constructors. 77 /// 78 /// All functions listed as global constructors are executed before the 79 /// main() function is called. 80 /// 81 /// @param Fn Function to add to global constructors 82 void addToGlobalConstructors(llvm::Function *Fn); 83 84 /// Add global variables to module. 85 /// 86 /// Insert a set of global variables that are used to track performance, 87 /// into the module (or obtain references to them if they already exist). 88 void addGlobalVariables(); 89 90 /// Add per-scop tracking to module. 91 /// 92 /// Insert the global variable which is used to track the number of cycles 93 /// this scop runs. 94 void addScopCounter(); 95 96 /// Get a reference to the intrinsic "{ i64, i32 } @llvm.x86.rdtscp()". 97 /// 98 /// The rdtscp function returns the current value of the processor's 99 /// time-stamp counter as well as the current CPU identifier. On modern x86 100 /// systems, the returned value is independent of the dynamic clock frequency 101 /// and consistent across multiple cores. It can consequently be used to get 102 /// accurate and low-overhead timing information. Even though the counter is 103 /// wrapping, it can be reliably used even for measuring longer time 104 /// intervals, as on a 1 GHz processor the counter only wraps every 545 years. 105 /// 106 /// The RDTSCP instruction is "pseudo" serializing: 107 /// 108 /// "“The RDTSCP instruction waits until all previous instructions have been 109 /// executed before reading the counter. However, subsequent instructions may 110 /// begin execution before the read operation is performed.” 111 /// 112 /// To ensure that no later instructions are scheduled before the RDTSCP 113 /// instruction it is often recommended to schedule a cpuid call after the 114 /// RDTSCP instruction. We do not do this yet, trading some imprecision in 115 /// our timing for a reduced overhead in our timing. 116 /// 117 /// @returns A reference to the declaration of @llvm.x86.rdtscp. 118 llvm::Function *getRDTSCP(); 119 120 /// Get a reference to "int atexit(void (*function)(void))" function. 121 /// 122 /// This function allows to register function pointers that must be executed 123 /// when the program is terminated. 124 /// 125 /// @returns A reference to @atexit(). 126 llvm::Function *getAtExit(); 127 128 /// Create function "__polly_perf_final_reporting". 129 /// 130 /// This function finalizes the performance measurements and prints the 131 /// results to stdout. It is expected to be registered with 'atexit()'. 132 llvm::Function *insertFinalReporting(); 133 134 /// Append Scop reporting data to "__polly_perf_final_reporting". 135 /// 136 /// This function appends the current scop (S)'s information to the final 137 /// printing function. 138 void AppendScopReporting(); 139 }; 140 } // namespace polly 141 142 #endif 143