1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/CommandLine.h"
14 #include "llvm/Support/Regex.h"
15 #include <fstream>
16 #include <list>
17 #include <map>
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace sampleprof;
22 
23 namespace llvm {
24 namespace sampleprof {
25 
26 // Stream based trace line iterator
27 class TraceStream {
28   std::string CurrentLine;
29   std::ifstream Fin;
30   bool IsAtEoF = false;
31   uint64_t LineNumber = 0;
32 
33 public:
TraceStream(StringRef Filename)34   TraceStream(StringRef Filename) : Fin(Filename.str()) {
35     if (!Fin.good())
36       exitWithError("Error read input perf script file", Filename);
37     advance();
38   }
39 
getCurrentLine()40   StringRef getCurrentLine() {
41     assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
42     return CurrentLine;
43   }
44 
getLineNumber()45   uint64_t getLineNumber() { return LineNumber; }
46 
isAtEoF()47   bool isAtEoF() { return IsAtEoF; }
48 
49   // Read the next line
advance()50   void advance() {
51     if (!std::getline(Fin, CurrentLine)) {
52       IsAtEoF = true;
53       return;
54     }
55     LineNumber++;
56   }
57 };
58 
59 // The type of perfscript
60 enum PerfScriptType {
61   PERF_INVILID = 0,
62   PERF_LBR = 1,       // Only LBR sample
63   PERF_LBR_STACK = 2, // Hybrid sample including call stack and LBR stack.
64 };
65 
66 // The parsed LBR sample entry.
67 struct LBREntry {
68   uint64_t Source = 0;
69   uint64_t Target = 0;
70   // An artificial branch stands for a series of consecutive branches starting
71   // from the current binary with a transition through external code and
72   // eventually landing back in the current binary.
73   bool IsArtificial = false;
LBREntryLBREntry74   LBREntry(uint64_t S, uint64_t T, bool I)
75       : Source(S), Target(T), IsArtificial(I) {}
76 };
77 
78 // The parsed hybrid sample including call stack and LBR stack.
79 struct HybridSample {
80   // Profiled binary that current frame address belongs to
81   ProfiledBinary *Binary;
82   // Call stack recorded in FILO(leaf to root) order
83   std::list<uint64_t> CallStack;
84   // LBR stack recorded in FIFO order
85   SmallVector<LBREntry, 16> LBRStack;
86 
87   // Used for sample aggregation
88   bool operator==(const HybridSample &Other) const {
89     if (Other.Binary != Binary)
90       return false;
91     const std::list<uint64_t> &OtherCallStack = Other.CallStack;
92     const SmallVector<LBREntry, 16> &OtherLBRStack = Other.LBRStack;
93 
94     if (CallStack.size() != OtherCallStack.size() ||
95         LBRStack.size() != OtherLBRStack.size())
96       return false;
97 
98     auto Iter = CallStack.begin();
99     for (auto Address : OtherCallStack) {
100       if (Address != *Iter++)
101         return false;
102     }
103 
104     for (size_t I = 0; I < OtherLBRStack.size(); I++) {
105       if (LBRStack[I].Source != OtherLBRStack[I].Source ||
106           LBRStack[I].Target != OtherLBRStack[I].Target)
107         return false;
108     }
109     return true;
110   }
111 };
112 
113 // The state for the unwinder, it doesn't hold the data but only keep the
114 // pointer/index of the data, While unwinding, the CallStack is changed
115 // dynamicially and will be recorded as the context of the sample
116 struct UnwindState {
117   // Profiled binary that current frame address belongs to
118   const ProfiledBinary *Binary;
119   // TODO: switch to use trie for call stack
120   std::list<uint64_t> CallStack;
121   // Used to fall through the LBR stack
122   uint32_t LBRIndex = 0;
123   // Reference to HybridSample.LBRStack
124   const SmallVector<LBREntry, 16> &LBRStack;
125   // Used to iterate the address range
126   InstructionPointer InstPtr;
UnwindStateUnwindState127   UnwindState(const HybridSample &Sample)
128       : Binary(Sample.Binary), CallStack(Sample.CallStack),
129         LBRStack(Sample.LBRStack),
130         InstPtr(Sample.Binary, Sample.CallStack.front()) {}
131 
validateInitialStateUnwindState132   bool validateInitialState() {
133     uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
134     uint64_t StackLeaf = CallStack.front();
135     // When we take a stack sample, ideally the sampling distance between the
136     // leaf IP of stack and the last LBR target shouldn't be very large.
137     // Use a heuristic size (0x100) to filter out broken records.
138     if (StackLeaf < LBRLeaf || StackLeaf >= LBRLeaf + 0x100) {
139       WithColor::warning() << "Bogus trace: stack tip = "
140                            << format("%#010x", StackLeaf)
141                            << ", LBR tip = " << format("%#010x\n", LBRLeaf);
142       return false;
143     }
144     return true;
145   }
146 
checkStateConsistencyUnwindState147   void checkStateConsistency() {
148     assert(InstPtr.Address == CallStack.front() &&
149            "IP should align with context leaf");
150   }
151 
getExpandedContextStrUnwindState152   std::string getExpandedContextStr() const {
153     return Binary->getExpandedContextStr(CallStack);
154   }
getBinaryUnwindState155   const ProfiledBinary *getBinary() const { return Binary; }
hasNextLBRUnwindState156   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState157   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState158   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState159   const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
advanceLBRUnwindState160   void advanceLBR() { LBRIndex++; }
161 };
162 
163 // The counter of branch samples for one function indexed by the branch,
164 // which is represented as the source and target offset pair.
165 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
166 // The counter of range samples for one function indexed by the range,
167 // which is represented as the start and end offset pair.
168 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
169 // Range sample counters indexed by the context string
170 using ContextRangeCounter = std::unordered_map<std::string, RangeSample>;
171 // Branch sample counters indexed by the context string
172 using ContextBranchCounter = std::unordered_map<std::string, BranchSample>;
173 
174 // For Hybrid sample counters
175 struct ContextSampleCounters {
176   ContextRangeCounter RangeCounter;
177   ContextBranchCounter BranchCounter;
178 
recordRangeCountContextSampleCounters179   void recordRangeCount(std::string &ContextId, uint64_t Start, uint64_t End,
180                         uint64_t Repeat) {
181     RangeCounter[ContextId][{Start, End}] += Repeat;
182   }
recordBranchCountContextSampleCounters183   void recordBranchCount(std::string &ContextId, uint64_t Source,
184                          uint64_t Target, uint64_t Repeat) {
185     BranchCounter[ContextId][{Source, Target}] += Repeat;
186   }
187 };
188 
189 struct HybridSampleHash {
hashCombineHybridSampleHash190   uint64_t hashCombine(uint64_t Hash, uint64_t Value) const {
191     // Simple DJB2 hash
192     return ((Hash << 5) + Hash) + Value;
193   }
194 
operatorHybridSampleHash195   uint64_t operator()(const HybridSample &Sample) const {
196     uint64_t Hash = 5381;
197     Hash = hashCombine(Hash, reinterpret_cast<uint64_t>(Sample.Binary));
198     for (const auto &Value : Sample.CallStack) {
199       Hash = hashCombine(Hash, Value);
200     }
201     for (const auto &Entry : Sample.LBRStack) {
202       Hash = hashCombine(Hash, Entry.Source);
203       Hash = hashCombine(Hash, Entry.Target);
204     }
205     return Hash;
206   }
207 };
208 
209 // After parsing the sample, we record the samples by aggregating them
210 // into this structure and the value is the sample counter.
211 using AggregationCounter =
212     std::unordered_map<HybridSample, uint64_t, HybridSampleHash>;
213 
214 /*
215 As in hybrid sample we have a group of LBRs and the most recent sampling call
216 stack, we can walk through those LBRs to infer more call stacks which would be
217 used as context for profile. VirtualUnwinder is the class to do the call stack
218 unwinding based on LBR state. Two types of unwinding are processd here:
219 1) LBR unwinding and 2) linear range unwinding.
220 Specifically, for each LBR entry(can be classified into call, return, regular
221 branch), LBR unwinding will replay the operation by pushing, popping or
222 switching leaf frame towards the call stack and since the initial call stack
223 is most recently sampled, the replay should be in anti-execution order, i.e. for
224 the regular case, pop the call stack when LBR is call, push frame on call stack
225 when LBR is return. After each LBR processed, it also needs to align with the
226 next LBR by going through instructions from previous LBR's target to current
227 LBR's source, which is the linear unwinding. As instruction from linear range
228 can come from different function by inlining, linear unwinding will do the range
229 splitting and record counters by the range with same inline context. Over those
230 unwinding process we will record each call stack as context id and LBR/linear
231 range as sample counter for further CS profile generation.
232 */
233 class VirtualUnwinder {
234 public:
VirtualUnwinder(ContextSampleCounters * Counters)235   VirtualUnwinder(ContextSampleCounters *Counters) : SampleCounters(Counters) {}
236 
isCallState(UnwindState & State)237   bool isCallState(UnwindState &State) const {
238     // The tail call frame is always missing here in stack sample, we will
239     // use a specific tail call tracker to infer it.
240     return State.getBinary()->addressIsCall(State.getCurrentLBRSource());
241   }
242 
isReturnState(UnwindState & State)243   bool isReturnState(UnwindState &State) const {
244     // Simply check addressIsReturn, as ret is always reliable, both for
245     // regular call and tail call.
246     return State.getBinary()->addressIsReturn(State.getCurrentLBRSource());
247   }
248 
249   void unwindCall(UnwindState &State);
250   void unwindLinear(UnwindState &State, uint64_t Repeat);
251   void unwindReturn(UnwindState &State);
252   void unwindBranchWithinFrame(UnwindState &State);
253   bool unwind(const HybridSample &Sample, uint64_t Repeat);
254   void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
255                         uint64_t Repeat);
256   void recordBranchCount(const LBREntry &Branch, UnwindState &State,
257                          uint64_t Repeat);
258 
259 private:
260   ContextSampleCounters *SampleCounters;
261 };
262 
263 // Filename to binary map
264 using BinaryMap = StringMap<ProfiledBinary>;
265 // Address to binary map for fast look-up
266 using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>;
267 // Binary to ContextSampleCounters Map to support multiple binary, we may have
268 // same binary loaded at different addresses, they should share the same sample
269 // counter
270 using BinarySampleCounterMap =
271     std::unordered_map<ProfiledBinary *, ContextSampleCounters>;
272 
273 // Load binaries and read perf trace to parse the events and samples
274 class PerfReader {
275 
276 public:
277   PerfReader(cl::list<std::string> &BinaryFilenames);
278 
279   // Hybrid sample(call stack + LBRs) profile traces are seprated by double line
280   // break, search for that within the first 4k charactors to avoid going
281   // through the whole file.
isHybridPerfScript(StringRef FileName)282   static bool isHybridPerfScript(StringRef FileName) {
283     auto BufOrError = MemoryBuffer::getFileOrSTDIN(FileName, 4000);
284     if (!BufOrError)
285       exitWithError(BufOrError.getError(), FileName);
286     auto Buffer = std::move(BufOrError.get());
287     if (Buffer->getBuffer().find("\n\n") == StringRef::npos)
288       return false;
289     return true;
290   }
291 
292   // The parsed MMap event
293   struct MMapEvent {
294     uint64_t PID = 0;
295     uint64_t BaseAddress = 0;
296     uint64_t Size = 0;
297     uint64_t Offset = 0;
298     StringRef BinaryPath;
299   };
300 
301   /// Load symbols and disassemble the code of a give binary.
302   /// Also register the binary in the binary table.
303   ///
304   ProfiledBinary &loadBinary(const StringRef BinaryPath,
305                              bool AllowNameConflict = true);
306   void updateBinaryAddress(const MMapEvent &Event);
getPerfScriptType()307   PerfScriptType getPerfScriptType() const { return PerfType; }
308   // Entry of the reader to parse multiple perf traces
309   void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
getBinarySampleCounters()310   const BinarySampleCounterMap &getBinarySampleCounters() const {
311     return BinarySampleCounters;
312   }
313 
314 private:
315   /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
316   /// mapping between the binary name and its memory layout.
317   ///
318   void parseMMap2Event(TraceStream &TraceIt);
319   // Parse perf events/samples and do aggregation
320   void parseAndAggregateTrace(StringRef Filename);
321   // Parse either an MMAP event or a perf sample
322   void parseEventOrSample(TraceStream &TraceIt);
323   // Parse the hybrid sample including the call and LBR line
324   void parseHybridSample(TraceStream &TraceIt);
325   // Extract call stack from the perf trace lines
326   bool extractCallstack(TraceStream &TraceIt, std::list<uint64_t> &CallStack);
327   // Extract LBR stack from one perf trace line
328   bool extractLBRStack(TraceStream &TraceIt,
329                        SmallVector<LBREntry, 16> &LBRStack,
330                        ProfiledBinary *Binary);
331   void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
332   // Post process the profile after trace aggregation, we will do simple range
333   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
334   void generateRawProfile();
335   // Unwind the hybrid samples after aggregration
336   void unwindSamples();
337   void printUnwinderOutput();
338   // Helper function for looking up binary in AddressBinaryMap
339   ProfiledBinary *getBinary(uint64_t Address);
340 
341   BinaryMap BinaryTable;
342   AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.
343 
344 private:
345   BinarySampleCounterMap BinarySampleCounters;
346   // Samples with the repeating time generated by the perf reader
347   AggregationCounter AggregatedSamples;
348   PerfScriptType PerfType;
349 };
350 
351 } // end namespace sampleprof
352 } // end namespace llvm
353 
354 #endif
355