1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18 #define SIMPLE_PERF_SAMPLE_TREE_H_
19 
20 #include <unordered_map>
21 
22 #include "OfflineUnwinder.h"
23 #include "SampleComparator.h"
24 #include "SampleDisplayer.h"
25 #include "callchain.h"
26 #include "perf_regs.h"
27 #include "record.h"
28 #include "thread_tree.h"
29 
30 namespace simpleperf {
31 
32 // A SampleTree is a collection of samples. A profiling report is mainly about
33 // constructing a SampleTree and display it. There are three steps involved:
34 // build the tree, sort the tree, and display it. For example, if we want to
35 // show how many cpu-cycles are spent in different functions, we should do as
36 // follows:
37 // 1. Build a SampleTree from SampleRecords with each sample containing
38 //    (cpu-cycles, function name). When building the tree, we should merge
39 //    samples containing the same function name.
40 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
41 //    samples in a decreasing order of cpu-cycles, we should sort it like this.
42 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
43 //    pair.
44 //
45 // We represent the three steps with three template classes.
46 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
47 //    SampleTreeBuilder's constructor decides the property of samples should be
48 //    merged together.
49 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
50 //    sorted by SampleTreeSorter. The sort result decides the order to show
51 //    samples.
52 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
53 //    displays each sample in the SampleTree.
54 
55 template <typename EntryT, typename AccumulateInfoT>
56 class SampleTreeBuilder {
57  public:
SampleTreeBuilder(const SampleComparator<EntryT> & comparator)58   explicit SampleTreeBuilder(const SampleComparator<EntryT>& comparator)
59       : sample_set_(comparator),
60         accumulate_callchain_(false),
61         sample_comparator_(comparator),
62         callchain_sample_set_(comparator),
63         use_branch_address_(false),
64         build_callchain_(false),
65         use_caller_as_callchain_root_(false) {}
66 
~SampleTreeBuilder()67   virtual ~SampleTreeBuilder() {}
68 
SetBranchSampleOption(bool use_branch_address)69   void SetBranchSampleOption(bool use_branch_address) { use_branch_address_ = use_branch_address; }
70 
SetCallChainSampleOptions(bool accumulate_callchain,bool build_callchain,bool use_caller_as_callchain_root)71   void SetCallChainSampleOptions(bool accumulate_callchain, bool build_callchain,
72                                  bool use_caller_as_callchain_root) {
73     accumulate_callchain_ = accumulate_callchain;
74     build_callchain_ = build_callchain;
75     use_caller_as_callchain_root_ = use_caller_as_callchain_root;
76     if (accumulate_callchain_) {
77       offline_unwinder_ = OfflineUnwinder::Create(false);
78     }
79   }
80 
GetUnwinder()81   OfflineUnwinder* GetUnwinder() { return offline_unwinder_.get(); }
82 
ProcessSampleRecord(const SampleRecord & r)83   void ProcessSampleRecord(const SampleRecord& r) {
84     if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
85       for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
86         auto& item = r.branch_stack_data.stack[i];
87         if (item.from != 0 && item.to != 0) {
88           CreateBranchSample(r, item);
89         }
90       }
91       return;
92     }
93     bool in_kernel = r.InKernel();
94     AccumulateInfoT acc_info;
95     EntryT* sample = CreateSample(r, in_kernel, &acc_info);
96     if (sample == nullptr) {
97       return;
98     }
99     if (accumulate_callchain_) {
100       std::vector<uint64_t> ips;
101       if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
102         ips.insert(ips.end(), r.callchain_data.ips, r.callchain_data.ips + r.callchain_data.ip_nr);
103       }
104       const ThreadEntry* thread = GetThreadOfSample(sample);
105       // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
106       // make up for the missing kernel patch in N9. See b/22612370.
107       if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
108           (r.regs_user_data.reg_mask != 0) && (r.sample_type & PERF_SAMPLE_STACK_USER) &&
109           (r.GetValidStackSize() > 0)) {
110         RegSet regs(r.regs_user_data.abi, r.regs_user_data.reg_mask, r.regs_user_data.regs);
111         std::vector<uint64_t> user_ips;
112         std::vector<uint64_t> sps;
113         if (offline_unwinder_->UnwindCallChain(*thread, regs, r.stack_user_data.data,
114                                                r.GetValidStackSize(), &user_ips, &sps)) {
115           ips.push_back(PERF_CONTEXT_USER);
116           ips.insert(ips.end(), user_ips.begin(), user_ips.end());
117         }
118       }
119 
120       std::vector<EntryT*> callchain;
121       callchain.push_back(sample);
122 
123       bool first_ip = true;
124       for (auto& ip : ips) {
125         if (ip >= PERF_CONTEXT_MAX) {
126           switch (ip) {
127             case PERF_CONTEXT_KERNEL:
128               in_kernel = true;
129               break;
130             case PERF_CONTEXT_USER:
131               in_kernel = false;
132               break;
133             default:
134               LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
135           }
136         } else {
137           if (first_ip) {
138             first_ip = false;
139             // Remove duplication with sampled ip.
140             if (ip == r.ip_data.ip) {
141               continue;
142             }
143           }
144           EntryT* callchain_sample =
145               CreateCallChainSample(thread, sample, ip, in_kernel, callchain, acc_info);
146           if (callchain_sample == nullptr) {
147             break;
148           }
149           callchain.push_back(callchain_sample);
150         }
151       }
152 
153       if (build_callchain_) {
154         std::set<EntryT*> added_set;
155         if (use_caller_as_callchain_root_) {
156           std::reverse(callchain.begin(), callchain.end());
157         }
158         EntryT* parent = nullptr;
159         while (callchain.size() >= 2) {
160           EntryT* sample = callchain[0];
161           callchain.erase(callchain.begin());
162           // Add only once for recursive calls on callchain.
163           if (added_set.find(sample) != added_set.end()) {
164             continue;
165           }
166           added_set.insert(sample);
167           InsertCallChainForSample(sample, callchain, acc_info);
168           UpdateCallChainParentInfo(sample, parent);
169           parent = sample;
170         }
171       }
172     }
173   }
174 
GetSamples()175   std::vector<EntryT*> GetSamples() const {
176     std::vector<EntryT*> result;
177     for (auto& entry : sample_set_) {
178       result.push_back(entry);
179     }
180     return result;
181   }
182 
183  protected:
184   virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
185                                AccumulateInfoT* acc_info) = 0;
186   virtual EntryT* CreateBranchSample(const SampleRecord& r, const BranchStackItemType& item) = 0;
187   virtual EntryT* CreateCallChainSample(const ThreadEntry* thread, const EntryT* sample,
188                                         uint64_t ip, bool in_kernel,
189                                         const std::vector<EntryT*>& callchain,
190                                         const AccumulateInfoT& acc_info) = 0;
191   virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
192   virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
FilterSample(const EntryT *)193   virtual bool FilterSample(const EntryT*) { return true; }
194 
UpdateSummary(const EntryT *)195   virtual void UpdateSummary(const EntryT*) {}
196 
197   virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
198 
InsertSample(std::unique_ptr<EntryT> sample)199   EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
200     if (sample == nullptr || !FilterSample(sample.get())) {
201       return nullptr;
202     }
203     UpdateSummary(sample.get());
204     EntryT* result;
205     auto it = sample_set_.find(sample.get());
206     if (it == sample_set_.end()) {
207       result = sample.get();
208       sample_set_.insert(sample.get());
209       sample_storage_.push_back(std::move(sample));
210     } else {
211       result = *it;
212       MergeSample(*it, sample.get());
213     }
214     return result;
215   }
216 
InsertCallChainSample(std::unique_ptr<EntryT> sample,const std::vector<EntryT * > & callchain)217   EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
218                                 const std::vector<EntryT*>& callchain) {
219     if (sample == nullptr) {
220       return nullptr;
221     }
222     if (!FilterSample(sample.get())) {
223       // Store in callchain_sample_set_ for use in other EntryT's callchain.
224       auto it = callchain_sample_set_.find(sample.get());
225       if (it != callchain_sample_set_.end()) {
226         return *it;
227       }
228       EntryT* result = sample.get();
229       callchain_sample_set_.insert(sample.get());
230       sample_storage_.push_back(std::move(sample));
231       return result;
232     }
233 
234     auto it = sample_set_.find(sample.get());
235     if (it != sample_set_.end()) {
236       EntryT* sample = *it;
237       // Process only once for recursive function call.
238       if (std::find(callchain.begin(), callchain.end(), sample) != callchain.end()) {
239         return sample;
240       }
241     }
242     return InsertSample(std::move(sample));
243   }
244 
InsertCallChainForSample(EntryT * sample,const std::vector<EntryT * > & callchain,const AccumulateInfoT & acc_info)245   void InsertCallChainForSample(EntryT* sample, const std::vector<EntryT*>& callchain,
246                                 const AccumulateInfoT& acc_info) {
247     uint64_t period = GetPeriodForCallChain(acc_info);
248     sample->callchain.AddCallChain(callchain, period, [&](const EntryT* s1, const EntryT* s2) {
249       return sample_comparator_.IsSameSample(s1, s2);
250     });
251   }
252 
AddCallChainDuplicateInfo()253   void AddCallChainDuplicateInfo() {
254     if (build_callchain_) {
255       for (EntryT* sample : sample_set_) {
256         auto it = callchain_parent_map_.find(sample);
257         if (it != callchain_parent_map_.end() && !it->second.has_multiple_parents) {
258           sample->callchain.duplicated = true;
259         }
260       }
261     }
262   }
263 
264   std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
265   bool accumulate_callchain_;
266 
267  private:
UpdateCallChainParentInfo(EntryT * sample,EntryT * parent)268   void UpdateCallChainParentInfo(EntryT* sample, EntryT* parent) {
269     if (parent == nullptr) {
270       return;
271     }
272     auto it = callchain_parent_map_.find(sample);
273     if (it == callchain_parent_map_.end()) {
274       CallChainParentInfo info;
275       info.parent = parent;
276       info.has_multiple_parents = false;
277       callchain_parent_map_[sample] = info;
278     } else if (it->second.parent != parent) {
279       it->second.has_multiple_parents = true;
280     }
281   }
282 
283   const SampleComparator<EntryT> sample_comparator_;
284   // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
285   // and only used in other EntryT's callchain.
286   std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
287   std::vector<std::unique_ptr<EntryT>> sample_storage_;
288 
289   struct CallChainParentInfo {
290     EntryT* parent;
291     bool has_multiple_parents;
292   };
293   std::unordered_map<EntryT*, CallChainParentInfo> callchain_parent_map_;
294 
295   bool use_branch_address_;
296   bool build_callchain_;
297   bool use_caller_as_callchain_root_;
298   std::unique_ptr<OfflineUnwinder> offline_unwinder_;
299 };
300 
301 template <typename EntryT>
302 class SampleTreeSorter {
303  public:
SampleTreeSorter(SampleComparator<EntryT> comparator)304   explicit SampleTreeSorter(SampleComparator<EntryT> comparator) : comparator_(comparator) {}
305 
~SampleTreeSorter()306   virtual ~SampleTreeSorter() {}
307 
Sort(std::vector<EntryT * > & v,bool sort_callchain)308   void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
309     if (sort_callchain) {
310       for (auto& sample : v) {
311         SortCallChain(sample);
312       }
313     }
314     if (!comparator_.empty()) {
315       std::sort(v.begin(), v.end(),
316                 [this](const EntryT* s1, const EntryT* s2) { return comparator_(s1, s2); });
317     }
318   }
319 
320  protected:
SortCallChain(EntryT * sample)321   void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
322 
323  private:
324   SampleComparator<EntryT> comparator_;
325 };
326 
327 template <typename EntryT, typename InfoT>
328 class SampleTreeDisplayer {
329  public:
SampleTreeDisplayer(SampleDisplayer<EntryT,InfoT> displayer)330   explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer) : displayer_(displayer) {}
331 
~SampleTreeDisplayer()332   virtual ~SampleTreeDisplayer() {}
333 
DisplaySamples(FILE * fp,const std::vector<EntryT * > & samples,const InfoT * info)334   void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples, const InfoT* info) {
335     displayer_.SetInfo(info);
336     for (const auto& sample : samples) {
337       displayer_.AdjustWidth(sample);
338     }
339     displayer_.PrintNames(fp);
340     for (const auto& sample : samples) {
341       displayer_.PrintSample(fp, sample);
342     }
343   }
344 
345  private:
346   SampleDisplayer<EntryT, InfoT> displayer_;
347 };
348 
349 }  // namespace simpleperf
350 
351 #endif  // SIMPLE_PERF_SAMPLE_TREE_H_
352