1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <math.h>
20 #include <sys/types.h>
21 
22 #include <algorithm>
23 #include <string>
24 #include <unordered_map>
25 #include <vector>
26 
27 #include <android-base/stringprintf.h>
28 
29 #include "event_selection_set.h"
30 
31 namespace simpleperf {
32 
33 struct CounterSum {
34   uint64_t value = 0;
35   uint64_t time_enabled = 0;
36   uint64_t time_running = 0;
37 
38   void FromCounter(const PerfCounter& counter) {
39     value = counter.value;
40     time_enabled = counter.time_enabled;
41     time_running = counter.time_running;
42   }
43 
44   void ToCounter(PerfCounter& counter) const {
45     counter.value = value;
46     counter.time_enabled = time_enabled;
47     counter.time_running = time_running;
48   }
49 
50   CounterSum operator+(const CounterSum& other) const {
51     CounterSum res;
52     res.value = value + other.value;
53     res.time_enabled = time_enabled + other.time_enabled;
54     res.time_running = time_running + other.time_running;
55     return res;
56   }
57 
58   CounterSum operator-(const CounterSum& other) const {
59     CounterSum res;
60     res.value = value - other.value;
61     res.time_enabled = time_enabled - other.time_enabled;
62     res.time_running = time_running - other.time_running;
63     return res;
64   }
65 };
66 
67 struct ThreadInfo {
68   pid_t tid;
69   pid_t pid;
70   std::string name;
71 };
72 
73 struct CounterSummary {
74   std::string type_name;
75   std::string modifier;
76   uint32_t group_id;
77   const ThreadInfo* thread;
78   int cpu;  // -1 represents all cpus
79   uint64_t count;
80   uint64_t runtime_in_ns;
81   double scale;
82   std::string readable_count;
83   std::string comment;
84   bool auto_generated;
85 
86   CounterSummary(const std::string& type_name, const std::string& modifier, uint32_t group_id,
87                  const ThreadInfo* thread, int cpu, uint64_t count, uint64_t runtime_in_ns,
88                  double scale, bool auto_generated, bool csv)
89       : type_name(type_name),
90         modifier(modifier),
91         group_id(group_id),
92         thread(thread),
93         cpu(cpu),
94         count(count),
95         runtime_in_ns(runtime_in_ns),
96         scale(scale),
97         auto_generated(auto_generated) {
98     readable_count = ReadableCountValue(csv);
99   }
100 
101   bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
102     // Two summaries are monitored at the same time if they are in the same
103     // group or are monitored all the time.
104     if (group_id == other.group_id) {
105       return true;
106     }
107     return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
108   }
109 
110   std::string Name() const {
111     if (modifier.empty()) {
112       return type_name;
113     }
114     return type_name + ":" + modifier;
115   }
116 
117   bool IsMonitoredAllTheTime() const {
118     // If an event runs all the time it is enabled (by not sharing hardware
119     // counters with other events), the scale of its summary is usually within
120     // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
121     // events monitored all the time in most cases while keeping the report
122     // error rate <= 1e-5.
123     constexpr double SCALE_ERROR_LIMIT = 1e-5;
124     return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
125   }
126 
127  private:
128   std::string ReadableCountValue(bool csv) {
129     if (type_name == "cpu-clock" || type_name == "task-clock") {
130       // Convert nanoseconds to milliseconds.
131       double value = count / 1e6;
132       return android::base::StringPrintf("%lf(ms)", value);
133     } else {
134       // Convert big numbers to human friendly mode. For example,
135       // 1000000 will be converted to 1,000,000.
136       std::string s = android::base::StringPrintf("%" PRIu64, count);
137       if (csv) {
138         return s;
139       } else {
140         for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
141           if (j == 3) {
142             s.insert(s.begin() + i, ',');
143             j = 0;
144           }
145         }
146         return s;
147       }
148     }
149   }
150 };
151 
152 // Build a vector of CounterSummary.
153 class CounterSummaryBuilder {
154  public:
155   CounterSummaryBuilder(bool report_per_thread, bool report_per_core, bool csv,
156                         const std::unordered_map<pid_t, ThreadInfo>& thread_map)
157       : report_per_thread_(report_per_thread),
158         report_per_core_(report_per_core),
159         csv_(csv),
160         thread_map_(thread_map) {}
161 
162   void AddCountersForOneEventType(const CountersInfo& info) {
163     std::unordered_map<uint64_t, CounterSum> sum_map;
164     for (const auto& counter : info.counters) {
165       uint64_t key = 0;
166       if (report_per_thread_) {
167         key |= counter.tid;
168       }
169       if (report_per_core_) {
170         key |= static_cast<uint64_t>(counter.cpu) << 32;
171       }
172       CounterSum& sum = sum_map[key];
173       CounterSum add;
174       add.FromCounter(counter.counter);
175       sum = sum + add;
176     }
177     size_t pre_sum_count = summaries_.size();
178     for (const auto& pair : sum_map) {
179       pid_t tid = report_per_thread_ ? static_cast<pid_t>(pair.first & UINT32_MAX) : 0;
180       int cpu = report_per_core_ ? static_cast<int>(pair.first >> 32) : -1;
181       const CounterSum& sum = pair.second;
182       AddSummary(info, tid, cpu, sum);
183     }
184     if (report_per_thread_ || report_per_core_) {
185       SortSummaries(summaries_.begin() + pre_sum_count, summaries_.end());
186     }
187   }
188 
189   std::vector<CounterSummary> Build() {
190     std::vector<CounterSummary> res = std::move(summaries_);
191     summaries_.clear();
192     return res;
193   }
194 
195  private:
196   void AddSummary(const CountersInfo& info, pid_t tid, int cpu, const CounterSum& sum) {
197     double scale = 1.0;
198     if (sum.time_running < sum.time_enabled && sum.time_running != 0) {
199       scale = static_cast<double>(sum.time_enabled) / sum.time_running;
200     }
201     if ((report_per_thread_ || report_per_core_) && sum.time_running == 0) {
202       // No need to report threads or cpus not running.
203       return;
204     }
205     const ThreadInfo* thread = nullptr;
206     if (report_per_thread_) {
207       auto it = thread_map_.find(tid);
208       CHECK(it != thread_map_.end());
209       thread = &it->second;
210     }
211     summaries_.emplace_back(info.event_name, info.event_modifier, info.group_id, thread, cpu,
212                             sum.value, sum.time_running, scale, false, csv_);
213   }
214 
215   void SortSummaries(std::vector<CounterSummary>::iterator begin,
216                      std::vector<CounterSummary>::iterator end) {
217     if (report_per_thread_ && report_per_core_) {
218       // First sort by event count for all cpus in a thread, then sort by event count of each cpu.
219       std::unordered_map<pid_t, uint64_t> count_per_thread;
220       for (auto it = begin; it != end; ++it) {
221         count_per_thread[it->thread->tid] += it->count;
222       }
223       std::sort(begin, end, [&](const CounterSummary& s1, const CounterSummary& s2) {
224         pid_t tid1 = s1.thread->tid;
225         pid_t tid2 = s2.thread->tid;
226         if (tid1 != tid2) {
227           if (count_per_thread[tid1] != count_per_thread[tid2]) {
228             return count_per_thread[tid1] > count_per_thread[tid2];
229           }
230           return tid1 < tid2;
231         }
232         return s1.count > s2.count;
233       });
234     } else {
235       std::sort(begin, end, [](const CounterSummary& s1, const CounterSummary& s2) {
236         return s1.count > s2.count;
237       });
238     }
239   };
240 
241   const bool report_per_thread_;
242   const bool report_per_core_;
243   const bool csv_;
244   const std::unordered_map<pid_t, ThreadInfo>& thread_map_;
245   std::vector<CounterSummary> summaries_;
246 };
247 
248 class CounterSummaries {
249  public:
250   explicit CounterSummaries(std::vector<CounterSummary>&& summaries, bool csv)
251       : summaries_(std::move(summaries)), csv_(csv) {}
252   const std::vector<CounterSummary>& Summaries() { return summaries_; }
253 
254   const CounterSummary* FindSummary(const std::string& type_name, const std::string& modifier,
255                                     const ThreadInfo* thread, int cpu);
256 
257   // If we have two summaries monitoring the same event type at the same time,
258   // that one is for user space only, and the other is for kernel space only;
259   // then we can automatically generate a summary combining the two results.
260   // For example, a summary of branch-misses:u and a summary for branch-misses:k
261   // can generate a summary of branch-misses.
262   void AutoGenerateSummaries();
263   void GenerateComments(double duration_in_sec);
264   void Show(FILE* fp);
265   void ShowCSV(FILE* fp);
266   void ShowText(FILE* fp);
267 
268  private:
269   std::string GetCommentForSummary(const CounterSummary& s, double duration_in_sec);
270   std::string GetRateComment(const CounterSummary& s, char sep);
271   bool FindRunningTimeForSummary(const CounterSummary& summary, double* running_time_in_sec);
272 
273  private:
274   std::vector<CounterSummary> summaries_;
275   bool csv_;
276 };
277 
278 }  // namespace simpleperf