1 //===-- PerfReader.cpp - perfscript reader  ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "PerfReader.h"
9 
10 static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
11                                     cl::init(false), cl::ZeroOrMore,
12                                     cl::desc("Print binary load events."));
13 
14 static cl::opt<bool> ShowUnwinderOutput("show-unwinder-output",
15                                         cl::ReallyHidden, cl::init(false),
16                                         cl::ZeroOrMore,
17                                         cl::desc("Print unwinder output"));
18 
19 namespace llvm {
20 namespace sampleprof {
21 
unwindCall(UnwindState & State)22 void VirtualUnwinder::unwindCall(UnwindState &State) {
23   // The 2nd frame after leaf could be missing if stack sample is
24   // taken when IP is within prolog/epilog, as frame chain isn't
25   // setup yet. Fill in the missing frame in that case.
26   // TODO: Currently we just assume all the addr that can't match the
27   // 2nd frame is in prolog/epilog. In the future, we will switch to
28   // pro/epi tracker(Dwarf CFI) for the precise check.
29   uint64_t Source = State.getCurrentLBRSource();
30   auto Iter = State.CallStack.begin();
31   if (State.CallStack.size() == 1 || *(++Iter) != Source) {
32     State.CallStack.front() = Source;
33   } else {
34     State.CallStack.pop_front();
35   }
36   State.InstPtr.update(Source);
37 }
38 
unwindLinear(UnwindState & State,uint64_t Repeat)39 void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
40   InstructionPointer &IP = State.InstPtr;
41   uint64_t Target = State.getCurrentLBRTarget();
42   uint64_t End = IP.Address;
43   // Unwind linear execution part
44   while (IP.Address >= Target) {
45     uint64_t PrevIP = IP.Address;
46     IP.backward();
47     // Break into segments for implicit call/return due to inlining
48     bool SameInlinee =
49         State.getBinary()->inlineContextEqual(PrevIP, IP.Address);
50     if (!SameInlinee || PrevIP == Target) {
51       recordRangeCount(PrevIP, End, State, Repeat);
52       End = IP.Address;
53     }
54     State.CallStack.front() = IP.Address;
55   }
56 }
57 
unwindReturn(UnwindState & State)58 void VirtualUnwinder::unwindReturn(UnwindState &State) {
59   // Add extra frame as we unwind through the return
60   const LBREntry &LBR = State.getCurrentLBR();
61   uint64_t CallAddr = State.getBinary()->getCallAddrFromFrameAddr(LBR.Target);
62   State.CallStack.front() = CallAddr;
63   State.CallStack.push_front(LBR.Source);
64   State.InstPtr.update(LBR.Source);
65 }
66 
unwindBranchWithinFrame(UnwindState & State)67 void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
68   // TODO: Tolerate tail call for now, as we may see tail call from libraries.
69   // This is only for intra function branches, excluding tail calls.
70   uint64_t Source = State.getCurrentLBRSource();
71   State.CallStack.front() = Source;
72   State.InstPtr.update(Source);
73 }
74 
recordRangeCount(uint64_t Start,uint64_t End,UnwindState & State,uint64_t Repeat)75 void VirtualUnwinder::recordRangeCount(uint64_t Start, uint64_t End,
76                                        UnwindState &State, uint64_t Repeat) {
77   std::string &&ContextId = State.getExpandedContextStr();
78   uint64_t StartOffset = State.getBinary()->virtualAddrToOffset(Start);
79   uint64_t EndOffset = State.getBinary()->virtualAddrToOffset(End);
80   SampleCounters->recordRangeCount(ContextId, StartOffset, EndOffset, Repeat);
81 }
82 
recordBranchCount(const LBREntry & Branch,UnwindState & State,uint64_t Repeat)83 void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
84                                         UnwindState &State, uint64_t Repeat) {
85   if (Branch.IsArtificial)
86     return;
87   std::string &&ContextId = State.getExpandedContextStr();
88   uint64_t SourceOffset = State.getBinary()->virtualAddrToOffset(Branch.Source);
89   uint64_t TargetOffset = State.getBinary()->virtualAddrToOffset(Branch.Target);
90   SampleCounters->recordBranchCount(ContextId, SourceOffset, TargetOffset,
91                                     Repeat);
92 }
93 
unwind(const HybridSample & Sample,uint64_t Repeat)94 bool VirtualUnwinder::unwind(const HybridSample &Sample, uint64_t Repeat) {
95   // Capture initial state as starting point for unwinding.
96   UnwindState State(Sample);
97 
98   // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
99   // Stack sample sometimes can be unreliable, so filter out bogus ones.
100   if (!State.validateInitialState())
101     return false;
102 
103   // Also do not attempt linear unwind for the leaf range as it's incomplete.
104   bool IsLeaf = true;
105 
106   // Now process the LBR samples in parrallel with stack sample
107   // Note that we do not reverse the LBR entry order so we can
108   // unwind the sample stack as we walk through LBR entries.
109   while (State.hasNextLBR()) {
110     State.checkStateConsistency();
111 
112     // Unwind implicit calls/returns from inlining, along the linear path,
113     // break into smaller sub section each with its own calling context.
114     if (!IsLeaf) {
115       unwindLinear(State, Repeat);
116     }
117     IsLeaf = false;
118 
119     // Save the LBR branch before it gets unwound.
120     const LBREntry &Branch = State.getCurrentLBR();
121 
122     if (isCallState(State)) {
123       // Unwind calls - we know we encountered call if LBR overlaps with
124       // transition between leaf the 2nd frame. Note that for calls that
125       // were not in the original stack sample, we should have added the
126       // extra frame when processing the return paired with this call.
127       unwindCall(State);
128     } else if (isReturnState(State)) {
129       // Unwind returns - check whether the IP is indeed at a return instruction
130       unwindReturn(State);
131     } else {
132       // Unwind branches - for regular intra function branches, we only
133       // need to record branch with context.
134       unwindBranchWithinFrame(State);
135     }
136     State.advanceLBR();
137     // Record `branch` with calling context after unwinding.
138     recordBranchCount(Branch, State, Repeat);
139   }
140 
141   return true;
142 }
143 
PerfReader(cl::list<std::string> & BinaryFilenames)144 PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames) {
145   // Load the binaries.
146   for (auto Filename : BinaryFilenames)
147     loadBinary(Filename, /*AllowNameConflict*/ false);
148 }
149 
loadBinary(const StringRef BinaryPath,bool AllowNameConflict)150 ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath,
151                                        bool AllowNameConflict) {
152   // The binary table is currently indexed by the binary name not the full
153   // binary path. This is because the user-given path may not match the one
154   // that was actually executed.
155   StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
156 
157   // Call to load the binary in the ctor of ProfiledBinary.
158   auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)});
159 
160   if (!Ret.second && !AllowNameConflict) {
161     std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() +
162                            " and " + Ret.first->second.getPath().str() + " \n";
163     exitWithError(ErrorMsg);
164   }
165 
166   return Ret.first->second;
167 }
168 
updateBinaryAddress(const MMapEvent & Event)169 void PerfReader::updateBinaryAddress(const MMapEvent &Event) {
170   // Load the binary.
171   StringRef BinaryPath = Event.BinaryPath;
172   StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
173 
174   auto I = BinaryTable.find(BinaryName);
175   // Drop the event which doesn't belong to user-provided binaries
176   // or if its image is loaded at the same address
177   if (I == BinaryTable.end() || Event.BaseAddress == I->second.getBaseAddress())
178     return;
179 
180   ProfiledBinary &Binary = I->second;
181 
182   // A binary image could be uploaded and then reloaded at different
183   // place, so update the address map here
184   AddrToBinaryMap.erase(Binary.getBaseAddress());
185   AddrToBinaryMap[Event.BaseAddress] = &Binary;
186 
187   // Update binary load address.
188   Binary.setBaseAddress(Event.BaseAddress);
189 }
190 
getBinary(uint64_t Address)191 ProfiledBinary *PerfReader::getBinary(uint64_t Address) {
192   auto Iter = AddrToBinaryMap.lower_bound(Address);
193   if (Iter == AddrToBinaryMap.end() || Iter->first != Address) {
194     if (Iter == AddrToBinaryMap.begin())
195       return nullptr;
196     Iter--;
197   }
198   return Iter->second;
199 }
200 
printSampleCounter(ContextRangeCounter & Counter)201 static void printSampleCounter(ContextRangeCounter &Counter) {
202   // Use ordered map to make the output deterministic
203   std::map<std::string, RangeSample> OrderedCounter(Counter.begin(),
204                                                     Counter.end());
205   for (auto Range : OrderedCounter) {
206     outs() << Range.first << "\n";
207     for (auto I : Range.second) {
208       outs() << "  (" << format("%" PRIx64, I.first.first) << ", "
209              << format("%" PRIx64, I.first.second) << "): " << I.second << "\n";
210     }
211   }
212 }
213 
printUnwinderOutput()214 void PerfReader::printUnwinderOutput() {
215   for (auto I : BinarySampleCounters) {
216     const ProfiledBinary *Binary = I.first;
217     outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";
218     printSampleCounter(I.second.RangeCounter);
219     outs() << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n";
220     printSampleCounter(I.second.BranchCounter);
221   }
222 }
223 
unwindSamples()224 void PerfReader::unwindSamples() {
225   for (const auto &Item : AggregatedSamples) {
226     const HybridSample &Sample = Item.first;
227     VirtualUnwinder Unwinder(&BinarySampleCounters[Sample.Binary]);
228     Unwinder.unwind(Sample, Item.second);
229   }
230 
231   if (ShowUnwinderOutput)
232     printUnwinderOutput();
233 }
234 
extractLBRStack(TraceStream & TraceIt,SmallVector<LBREntry,16> & LBRStack,ProfiledBinary * Binary)235 bool PerfReader::extractLBRStack(TraceStream &TraceIt,
236                                  SmallVector<LBREntry, 16> &LBRStack,
237                                  ProfiledBinary *Binary) {
238   // The raw format of LBR stack is like:
239   // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
240   //                           ... 0x4005c8/0x4005dc/P/-/-/0
241   // It's in FIFO order and seperated by whitespace.
242   SmallVector<StringRef, 32> Records;
243   TraceIt.getCurrentLine().split(Records, " ");
244 
245   // Extract leading instruction pointer if present, use single
246   // list to pass out as reference.
247   size_t Index = 0;
248   if (!Records.empty() && Records[0].find('/') == StringRef::npos) {
249     Index = 1;
250   }
251   // Now extract LBR samples - note that we do not reverse the
252   // LBR entry order so we can unwind the sample stack as we walk
253   // through LBR entries.
254   uint64_t PrevTrDst = 0;
255 
256   while (Index < Records.size()) {
257     auto &Token = Records[Index++];
258     if (Token.size() == 0)
259       continue;
260 
261     SmallVector<StringRef, 8> Addresses;
262     Token.split(Addresses, "/");
263     uint64_t Src;
264     uint64_t Dst;
265     Addresses[0].substr(2).getAsInteger(16, Src);
266     Addresses[1].substr(2).getAsInteger(16, Dst);
267 
268     bool SrcIsInternal = Binary->addressIsCode(Src);
269     bool DstIsInternal = Binary->addressIsCode(Dst);
270     bool IsArtificial = false;
271     // Ignore branches outside the current binary.
272     if (!SrcIsInternal && !DstIsInternal)
273       continue;
274     if (!SrcIsInternal && DstIsInternal) {
275       // For transition from external code (such as dynamic libraries) to
276       // the current binary, keep track of the branch target which will be
277       // grouped with the Source of the last transition from the current
278       // binary.
279       PrevTrDst = Dst;
280       continue;
281     }
282     if (SrcIsInternal && !DstIsInternal) {
283       // For transition to external code, group the Source with the next
284       // availabe transition target.
285       if (!PrevTrDst)
286         continue;
287       Dst = PrevTrDst;
288       PrevTrDst = 0;
289       IsArtificial = true;
290     }
291     // TODO: filter out buggy duplicate branches on Skylake
292 
293     LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial));
294   }
295   TraceIt.advance();
296   return !LBRStack.empty();
297 }
298 
extractCallstack(TraceStream & TraceIt,std::list<uint64_t> & CallStack)299 bool PerfReader::extractCallstack(TraceStream &TraceIt,
300                                   std::list<uint64_t> &CallStack) {
301   // The raw format of call stack is like:
302   //            4005dc      # leaf frame
303   //	          400634
304   //	          400684      # root frame
305   // It's in bottom-up order with each frame in one line.
306 
307   // Extract stack frames from sample
308   ProfiledBinary *Binary = nullptr;
309   while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
310     StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
311     // We might get an empty line at the beginning or comments, skip it
312     uint64_t FrameAddr = 0;
313     if (FrameStr.getAsInteger(16, FrameAddr)) {
314       TraceIt.advance();
315       break;
316     }
317     TraceIt.advance();
318     if (!Binary) {
319       Binary = getBinary(FrameAddr);
320       // we might have addr not match the MMAP, skip it
321       if (!Binary) {
322         if (AddrToBinaryMap.size() == 0)
323           WithColor::warning() << "No MMAP event in the perfscript, create it "
324                                   "with '--show-mmap-events'\n";
325         break;
326       }
327     }
328     // Currently intermixed frame from different binaries is not supported.
329     // Ignore bottom frames not from binary of interest.
330     if (!Binary->addressIsCode(FrameAddr))
331       break;
332 
333     // We need to translate return address to call address
334     // for non-leaf frames
335     if (!CallStack.empty()) {
336       FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
337     }
338 
339     CallStack.emplace_back(FrameAddr);
340   }
341 
342   if (CallStack.empty())
343     return false;
344   // Skip other unrelated line, find the next valid LBR line
345   while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
346     TraceIt.advance();
347   }
348   // Filter out broken stack sample. We may not have complete frame info
349   // if sample end up in prolog/epilog, the result is dangling context not
350   // connected to entry point. This should be relatively rare thus not much
351   // impact on overall profile quality. However we do want to filter them
352   // out to reduce the number of different calling contexts. One instance
353   // of such case - when sample landed in prolog/epilog, somehow stack
354   // walking will be broken in an unexpected way that higher frames will be
355   // missing.
356   return !Binary->addressInPrologEpilog(CallStack.front());
357 }
358 
parseHybridSample(TraceStream & TraceIt)359 void PerfReader::parseHybridSample(TraceStream &TraceIt) {
360   // The raw hybird sample started with call stack in FILO order and followed
361   // intermediately by LBR sample
362   // e.g.
363   // 	          4005dc    # call stack leaf
364   //	          400634
365   //	          400684    # call stack root
366   // 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
367   //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
368   //
369   HybridSample Sample;
370 
371   // Parsing call stack and populate into HybridSample.CallStack
372   if (!extractCallstack(TraceIt, Sample.CallStack)) {
373     // Skip the next LBR line matched current call stack
374     if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x"))
375       TraceIt.advance();
376     return;
377   }
378   // Set the binary current sample belongs to
379   Sample.Binary = getBinary(Sample.CallStack.front());
380 
381   if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
382     // Parsing LBR stack and populate into HybridSample.LBRStack
383     if (extractLBRStack(TraceIt, Sample.LBRStack, Sample.Binary)) {
384       // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
385       // ranges
386       Sample.CallStack.front() = Sample.LBRStack[0].Target;
387       // Record samples by aggregation
388       AggregatedSamples[Sample]++;
389     }
390   } else {
391     // LBR sample is encoded in single line after stack sample
392     exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
393   }
394 }
395 
parseMMap2Event(TraceStream & TraceIt)396 void PerfReader::parseMMap2Event(TraceStream &TraceIt) {
397   // Parse a line like:
398   //  PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
399   //  08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
400   constexpr static const char *const Pattern =
401       "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
402       "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
403       "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
404   // Field 0 - whole line
405   // Field 1 - PID
406   // Field 2 - base address
407   // Field 3 - mmapped size
408   // Field 4 - page offset
409   // Field 5 - binary path
410   enum EventIndex {
411     WHOLE_LINE = 0,
412     PID = 1,
413     BASE_ADDRESS = 2,
414     MMAPPED_SIZE = 3,
415     PAGE_OFFSET = 4,
416     BINARY_PATH = 5
417   };
418 
419   Regex RegMmap2(Pattern);
420   SmallVector<StringRef, 6> Fields;
421   bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields);
422   if (!R) {
423     std::string ErrorMsg = "Cannot parse mmap event: Line" +
424                            Twine(TraceIt.getLineNumber()).str() + ": " +
425                            TraceIt.getCurrentLine().str() + " \n";
426     exitWithError(ErrorMsg);
427   }
428   MMapEvent Event;
429   Fields[PID].getAsInteger(10, Event.PID);
430   Fields[BASE_ADDRESS].getAsInteger(0, Event.BaseAddress);
431   Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size);
432   Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset);
433   Event.BinaryPath = Fields[BINARY_PATH];
434   updateBinaryAddress(Event);
435   if (ShowMmapEvents) {
436     outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at "
437            << format("0x%" PRIx64 ":", Event.BaseAddress) << " \n";
438   }
439   TraceIt.advance();
440 }
441 
parseEventOrSample(TraceStream & TraceIt)442 void PerfReader::parseEventOrSample(TraceStream &TraceIt) {
443   if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2"))
444     parseMMap2Event(TraceIt);
445   else if (getPerfScriptType() == PERF_LBR_STACK)
446     parseHybridSample(TraceIt);
447   else {
448     // TODO: parse other type sample
449     TraceIt.advance();
450   }
451 }
452 
parseAndAggregateTrace(StringRef Filename)453 void PerfReader::parseAndAggregateTrace(StringRef Filename) {
454   // Trace line iterator
455   TraceStream TraceIt(Filename);
456   while (!TraceIt.isAtEoF())
457     parseEventOrSample(TraceIt);
458 }
459 
checkAndSetPerfType(cl::list<std::string> & PerfTraceFilenames)460 void PerfReader::checkAndSetPerfType(
461     cl::list<std::string> &PerfTraceFilenames) {
462   bool HasHybridPerf = true;
463   for (auto FileName : PerfTraceFilenames) {
464     if (!isHybridPerfScript(FileName)) {
465       HasHybridPerf = false;
466       break;
467     }
468   }
469 
470   if (HasHybridPerf) {
471     // Set up ProfileIsCS to enable context-sensitive functionalities
472     // in SampleProf
473     FunctionSamples::ProfileIsCS = true;
474     PerfType = PERF_LBR_STACK;
475 
476   } else {
477     // TODO: Support other type of perf script
478     PerfType = PERF_INVILID;
479   }
480 
481   if (BinaryTable.size() > 1) {
482     // TODO: remove this if everything is ready to support multiple binaries.
483     exitWithError("Currently only support one input binary, multiple binaries' "
484                   "profile will be merged in one profile and make profile "
485                   "summary info inaccurate. Please use `perfdata` to merge "
486                   "profiles from multiple binaries.");
487   }
488 }
489 
generateRawProfile()490 void PerfReader::generateRawProfile() {
491   if (getPerfScriptType() == PERF_LBR_STACK) {
492     // Unwind samples if it's hybird sample
493     unwindSamples();
494   } else if (getPerfScriptType() == PERF_LBR) {
495     // TODO: range overlap computation for regular AutoFDO
496   }
497 }
498 
parsePerfTraces(cl::list<std::string> & PerfTraceFilenames)499 void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) {
500   // Check and set current perfscript type
501   checkAndSetPerfType(PerfTraceFilenames);
502   // Parse perf traces and do aggregation.
503   for (auto Filename : PerfTraceFilenames)
504     parseAndAggregateTrace(Filename);
505 
506   generateRawProfile();
507 }
508 
509 } // end namespace sampleprof
510 } // end namespace llvm
511