1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SIMPLE_PERF_SAMPLE_TREE_H_
18 #define SIMPLE_PERF_SAMPLE_TREE_H_
19 
20 #include "callchain.h"
21 #include "dwarf_unwind.h"
22 #include "perf_regs.h"
23 #include "record.h"
24 #include "SampleComparator.h"
25 #include "SampleDisplayer.h"
26 #include "thread_tree.h"
27 
28 // A SampleTree is a collection of samples. A profiling report is mainly about
29 // constructing a SampleTree and display it. There are three steps involved:
30 // build the tree, sort the tree, and display it. For example, if we want to
31 // show how many cpu-cycles are spent in different functions, we should do as
32 // follows:
33 // 1. Build a SampleTree from SampleRecords with each sample containing
34 //    (cpu-cycles, function name). When building the tree, we should merge
35 //    samples containing the same function name.
36 // 2. Sort the SampleTree by cpu-cycles in the sample. As we want to display the
37 //    samples in a decreasing order of cpu-cycles, we should sort it like this.
38 // 3. Display the SampleTree, each sample prints its (cpu-cycles, function name)
39 //    pair.
40 //
41 // We represent the three steps with three template classes.
42 // 1. A SampleTree is built by SampleTreeBuilder. The comparator passed in
43 //    SampleTreeBuilder's constructor decides the property of samples should be
44 //    merged together.
45 // 2. After a SampleTree is built and got from SampleTreeBuilder, it should be
46 //    sorted by SampleTreeSorter. The sort result decides the order to show
47 //    samples.
48 // 3. At last, the sorted SampleTree is passed to SampleTreeDisplayer, which
49 //    displays each sample in the SampleTree.
50 
51 template <typename EntryT, typename AccumulateInfoT>
52 class SampleTreeBuilder {
53  public:
SampleTreeBuilder(SampleComparator<EntryT> comparator)54   explicit SampleTreeBuilder(SampleComparator<EntryT> comparator)
55       : sample_set_(comparator),
56         accumulate_callchain_(false),
57         sample_comparator_(comparator),
58         callchain_sample_set_(comparator),
59         use_branch_address_(false),
60         build_callchain_(false),
61         use_caller_as_callchain_root_(false),
62         strict_unwind_arch_check_(false) {}
63 
~SampleTreeBuilder()64   virtual ~SampleTreeBuilder() {}
65 
SetBranchSampleOption(bool use_branch_address)66   void SetBranchSampleOption(bool use_branch_address) {
67     use_branch_address_ = use_branch_address;
68   }
69 
SetCallChainSampleOptions(bool accumulate_callchain,bool build_callchain,bool use_caller_as_callchain_root,bool strict_unwind_arch_check)70   void SetCallChainSampleOptions(bool accumulate_callchain,
71                                  bool build_callchain,
72                                  bool use_caller_as_callchain_root,
73                                  bool strict_unwind_arch_check) {
74     accumulate_callchain_ = accumulate_callchain;
75     build_callchain_ = build_callchain;
76     use_caller_as_callchain_root_ = use_caller_as_callchain_root;
77     strict_unwind_arch_check_ = strict_unwind_arch_check;
78   }
79 
ProcessSampleRecord(const SampleRecord & r)80   void ProcessSampleRecord(const SampleRecord& r) {
81     if (use_branch_address_ && (r.sample_type & PERF_SAMPLE_BRANCH_STACK)) {
82       for (uint64_t i = 0; i < r.branch_stack_data.stack_nr; ++i) {
83         auto& item = r.branch_stack_data.stack[i];
84         if (item.from != 0 && item.to != 0) {
85           CreateBranchSample(r, item);
86         }
87       }
88       return;
89     }
90     bool in_kernel = r.InKernel();
91     AccumulateInfoT acc_info;
92     EntryT* sample = CreateSample(r, in_kernel, &acc_info);
93     if (sample == nullptr) {
94       return;
95     }
96     if (accumulate_callchain_) {
97       std::vector<uint64_t> ips;
98       if (r.sample_type & PERF_SAMPLE_CALLCHAIN) {
99         ips.insert(ips.end(), r.callchain_data.ips,
100                    r.callchain_data.ips + r.callchain_data.ip_nr);
101       }
102       const ThreadEntry* thread = GetThreadOfSample(sample);
103       // Use stack_user_data.data.size() instead of stack_user_data.dyn_size, to
104       // make up for the missing kernel patch in N9. See b/22612370.
105       if (thread != nullptr && (r.sample_type & PERF_SAMPLE_REGS_USER) &&
106           (r.regs_user_data.reg_mask != 0) &&
107           (r.sample_type & PERF_SAMPLE_STACK_USER) &&
108           (r.GetValidStackSize() > 0)) {
109         RegSet regs = CreateRegSet(r.regs_user_data.abi,
110                                    r.regs_user_data.reg_mask,
111                                    r.regs_user_data.regs);
112         std::vector<uint64_t> unwind_ips =
113             UnwindCallChain(r.regs_user_data.abi, *thread, regs,
114                             r.stack_user_data.data,
115                             r.GetValidStackSize(), strict_unwind_arch_check_);
116         if (!unwind_ips.empty()) {
117           ips.push_back(PERF_CONTEXT_USER);
118           ips.insert(ips.end(), unwind_ips.begin(), unwind_ips.end());
119         }
120       }
121 
122       std::vector<EntryT*> callchain;
123       callchain.push_back(sample);
124 
125       bool first_ip = true;
126       for (auto& ip : ips) {
127         if (ip >= PERF_CONTEXT_MAX) {
128           switch (ip) {
129             case PERF_CONTEXT_KERNEL:
130               in_kernel = true;
131               break;
132             case PERF_CONTEXT_USER:
133               in_kernel = false;
134               break;
135             default:
136               LOG(DEBUG) << "Unexpected perf_context in callchain: " << ip;
137           }
138         } else {
139           if (first_ip) {
140             first_ip = false;
141             // Remove duplication with sampled ip.
142             if (ip == r.ip_data.ip) {
143               continue;
144             }
145           }
146           EntryT* callchain_sample =
147               CreateCallChainSample(sample, ip, in_kernel, callchain, acc_info);
148           if (callchain_sample == nullptr) {
149             break;
150           }
151           callchain.push_back(callchain_sample);
152         }
153       }
154 
155       if (build_callchain_) {
156         std::set<EntryT*> added_set;
157         if (use_caller_as_callchain_root_) {
158           std::reverse(callchain.begin(), callchain.end());
159         }
160         while (callchain.size() >= 2) {
161           EntryT* sample = callchain[0];
162           callchain.erase(callchain.begin());
163           // Add only once for recursive calls on callchain.
164           if (added_set.find(sample) != added_set.end()) {
165             continue;
166           }
167           added_set.insert(sample);
168           InsertCallChainForSample(sample, callchain, acc_info);
169         }
170       }
171     }
172   }
173 
GetSamples()174   std::vector<EntryT*> GetSamples() const {
175     std::vector<EntryT*> result;
176     for (auto& entry : sample_set_) {
177       result.push_back(entry);
178     }
179     return result;
180   }
181 
182  protected:
183   virtual EntryT* CreateSample(const SampleRecord& r, bool in_kernel,
184                                AccumulateInfoT* acc_info) = 0;
185   virtual EntryT* CreateBranchSample(const SampleRecord& r,
186                                      const BranchStackItemType& item) = 0;
187   virtual EntryT* CreateCallChainSample(const EntryT* sample, uint64_t ip,
188                                         bool in_kernel,
189                                         const std::vector<EntryT*>& callchain,
190                                         const AccumulateInfoT& acc_info) = 0;
191   virtual const ThreadEntry* GetThreadOfSample(EntryT*) = 0;
192   virtual uint64_t GetPeriodForCallChain(const AccumulateInfoT& acc_info) = 0;
FilterSample(const EntryT *)193   virtual bool FilterSample(const EntryT*) { return true; }
194 
UpdateSummary(const EntryT *)195   virtual void UpdateSummary(const EntryT*) {}
196 
197   virtual void MergeSample(EntryT* sample1, EntryT* sample2) = 0;
198 
InsertSample(std::unique_ptr<EntryT> sample)199   EntryT* InsertSample(std::unique_ptr<EntryT> sample) {
200     if (sample == nullptr || !FilterSample(sample.get())) {
201       return nullptr;
202     }
203     UpdateSummary(sample.get());
204     EntryT* result;
205     auto it = sample_set_.find(sample.get());
206     if (it == sample_set_.end()) {
207       result = sample.get();
208       sample_set_.insert(sample.get());
209       sample_storage_.push_back(std::move(sample));
210     } else {
211       result = *it;
212       MergeSample(*it, sample.get());
213     }
214     return result;
215   }
216 
InsertCallChainSample(std::unique_ptr<EntryT> sample,const std::vector<EntryT * > & callchain)217   EntryT* InsertCallChainSample(std::unique_ptr<EntryT> sample,
218                                 const std::vector<EntryT*>& callchain) {
219     if (sample == nullptr) {
220       return nullptr;
221     }
222     if (!FilterSample(sample.get())) {
223       // Store in callchain_sample_set_ for use in other EntryT's callchain.
224       auto it = callchain_sample_set_.find(sample.get());
225       if (it != callchain_sample_set_.end()) {
226         return *it;
227       }
228       EntryT* result = sample.get();
229       callchain_sample_set_.insert(sample.get());
230       sample_storage_.push_back(std::move(sample));
231       return result;
232     }
233 
234     auto it = sample_set_.find(sample.get());
235     if (it != sample_set_.end()) {
236       EntryT* sample = *it;
237       // Process only once for recursive function call.
238       if (std::find(callchain.begin(), callchain.end(), sample) !=
239           callchain.end()) {
240         return sample;
241       }
242     }
243     return InsertSample(std::move(sample));
244   }
245 
InsertCallChainForSample(EntryT * sample,const std::vector<EntryT * > & callchain,const AccumulateInfoT & acc_info)246   void InsertCallChainForSample(EntryT* sample,
247                                 const std::vector<EntryT*>& callchain,
248                                 const AccumulateInfoT& acc_info) {
249     uint64_t period = GetPeriodForCallChain(acc_info);
250     sample->callchain.AddCallChain(
251         callchain, period, [&](const EntryT* s1, const EntryT* s2) {
252           return sample_comparator_.IsSameSample(s1, s2);
253         });
254   }
255 
256   std::set<EntryT*, SampleComparator<EntryT>> sample_set_;
257   bool accumulate_callchain_;
258 
259  private:
260   const SampleComparator<EntryT> sample_comparator_;
261   // If a CallChainSample is filtered out, it is stored in callchain_sample_set_
262   // and only used in other EntryT's callchain.
263   std::set<EntryT*, SampleComparator<EntryT>> callchain_sample_set_;
264   std::vector<std::unique_ptr<EntryT>> sample_storage_;
265 
266   bool use_branch_address_;
267   bool build_callchain_;
268   bool use_caller_as_callchain_root_;
269   bool strict_unwind_arch_check_;
270 };
271 
272 template <typename EntryT>
273 class SampleTreeSorter {
274  public:
SampleTreeSorter(SampleComparator<EntryT> comparator)275   explicit SampleTreeSorter(SampleComparator<EntryT> comparator)
276       : comparator_(comparator) {}
277 
~SampleTreeSorter()278   virtual ~SampleTreeSorter() {}
279 
Sort(std::vector<EntryT * > & v,bool sort_callchain)280   void Sort(std::vector<EntryT*>& v, bool sort_callchain) {
281     if (sort_callchain) {
282       for (auto& sample : v) {
283         SortCallChain(sample);
284       }
285     }
286     if (!comparator_.empty()) {
287       std::sort(v.begin(), v.end(), [this](const EntryT* s1, const EntryT* s2) {
288         return comparator_(s1, s2);
289       });
290     }
291   }
292 
293  protected:
SortCallChain(EntryT * sample)294   void SortCallChain(EntryT* sample) { sample->callchain.SortByPeriod(); }
295 
296  private:
297   SampleComparator<EntryT> comparator_;
298 };
299 
300 template <typename EntryT, typename InfoT>
301 class SampleTreeDisplayer {
302  public:
SampleTreeDisplayer(SampleDisplayer<EntryT,InfoT> displayer)303   explicit SampleTreeDisplayer(SampleDisplayer<EntryT, InfoT> displayer)
304       : displayer_(displayer) {}
305 
~SampleTreeDisplayer()306   virtual ~SampleTreeDisplayer() {}
307 
DisplaySamples(FILE * fp,const std::vector<EntryT * > & samples,const InfoT * info)308   void DisplaySamples(FILE* fp, const std::vector<EntryT*>& samples,
309                       const InfoT* info) {
310     displayer_.SetInfo(info);
311     for (const auto& sample : samples) {
312       displayer_.AdjustWidth(sample);
313     }
314     displayer_.PrintNames(fp);
315     for (const auto& sample : samples) {
316       displayer_.PrintSample(fp, sample);
317     }
318   }
319 
320  private:
321   SampleDisplayer<EntryT, InfoT> displayer_;
322 };
323 
324 #endif  // SIMPLE_PERF_SAMPLE_TREE_H_
325