1 /* 2 * Copyright (C) 2020 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <math.h> 20 #include <sys/types.h> 21 22 #include <algorithm> 23 #include <string> 24 #include <unordered_map> 25 #include <vector> 26 27 #include <android-base/stringprintf.h> 28 29 #include "event_selection_set.h" 30 31 namespace simpleperf { 32 33 struct CounterSum { 34 uint64_t value = 0; 35 uint64_t time_enabled = 0; 36 uint64_t time_running = 0; 37 38 void FromCounter(const PerfCounter& counter) { 39 value = counter.value; 40 time_enabled = counter.time_enabled; 41 time_running = counter.time_running; 42 } 43 44 void ToCounter(PerfCounter& counter) const { 45 counter.value = value; 46 counter.time_enabled = time_enabled; 47 counter.time_running = time_running; 48 } 49 50 CounterSum operator+(const CounterSum& other) const { 51 CounterSum res; 52 res.value = value + other.value; 53 res.time_enabled = time_enabled + other.time_enabled; 54 res.time_running = time_running + other.time_running; 55 return res; 56 } 57 58 CounterSum operator-(const CounterSum& other) const { 59 CounterSum res; 60 res.value = value - other.value; 61 res.time_enabled = time_enabled - other.time_enabled; 62 res.time_running = time_running - other.time_running; 63 return res; 64 } 65 }; 66 67 struct ThreadInfo { 68 pid_t tid; 69 pid_t pid; 70 std::string name; 71 }; 72 73 struct CounterSummary { 74 std::string type_name; 75 std::string modifier; 76 uint32_t group_id; 77 const ThreadInfo* thread; 78 int cpu; // -1 represents all cpus 79 uint64_t count; 80 uint64_t runtime_in_ns; 81 double scale; 82 std::string readable_count; 83 std::string comment; 84 bool auto_generated; 85 86 CounterSummary(const std::string& type_name, const std::string& modifier, uint32_t group_id, 87 const ThreadInfo* thread, int cpu, uint64_t count, uint64_t runtime_in_ns, 88 double scale, bool auto_generated, bool csv) 89 : type_name(type_name), 90 modifier(modifier), 91 group_id(group_id), 92 thread(thread), 93 cpu(cpu), 94 count(count), 95 runtime_in_ns(runtime_in_ns), 96 scale(scale), 97 auto_generated(auto_generated) { 98 readable_count = ReadableCountValue(csv); 99 } 100 101 bool IsMonitoredAtTheSameTime(const CounterSummary& other) const { 102 // Two summaries are monitored at the same time if they are in the same 103 // group or are monitored all the time. 104 if (group_id == other.group_id) { 105 return true; 106 } 107 return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime(); 108 } 109 110 std::string Name() const { 111 if (modifier.empty()) { 112 return type_name; 113 } 114 return type_name + ":" + modifier; 115 } 116 117 bool IsMonitoredAllTheTime() const { 118 // If an event runs all the time it is enabled (by not sharing hardware 119 // counters with other events), the scale of its summary is usually within 120 // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify 121 // events monitored all the time in most cases while keeping the report 122 // error rate <= 1e-5. 123 constexpr double SCALE_ERROR_LIMIT = 1e-5; 124 return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT); 125 } 126 127 private: 128 std::string ReadableCountValue(bool csv) { 129 if (type_name == "cpu-clock" || type_name == "task-clock") { 130 // Convert nanoseconds to milliseconds. 131 double value = count / 1e6; 132 return android::base::StringPrintf("%lf(ms)", value); 133 } else { 134 // Convert big numbers to human friendly mode. For example, 135 // 1000000 will be converted to 1,000,000. 136 std::string s = android::base::StringPrintf("%" PRIu64, count); 137 if (csv) { 138 return s; 139 } else { 140 for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) { 141 if (j == 3) { 142 s.insert(s.begin() + i, ','); 143 j = 0; 144 } 145 } 146 return s; 147 } 148 } 149 } 150 }; 151 152 // Build a vector of CounterSummary. 153 class CounterSummaryBuilder { 154 public: 155 CounterSummaryBuilder(bool report_per_thread, bool report_per_core, bool csv, 156 const std::unordered_map<pid_t, ThreadInfo>& thread_map) 157 : report_per_thread_(report_per_thread), 158 report_per_core_(report_per_core), 159 csv_(csv), 160 thread_map_(thread_map) {} 161 162 void AddCountersForOneEventType(const CountersInfo& info) { 163 std::unordered_map<uint64_t, CounterSum> sum_map; 164 for (const auto& counter : info.counters) { 165 uint64_t key = 0; 166 if (report_per_thread_) { 167 key |= counter.tid; 168 } 169 if (report_per_core_) { 170 key |= static_cast<uint64_t>(counter.cpu) << 32; 171 } 172 CounterSum& sum = sum_map[key]; 173 CounterSum add; 174 add.FromCounter(counter.counter); 175 sum = sum + add; 176 } 177 size_t pre_sum_count = summaries_.size(); 178 for (const auto& pair : sum_map) { 179 pid_t tid = report_per_thread_ ? static_cast<pid_t>(pair.first & UINT32_MAX) : 0; 180 int cpu = report_per_core_ ? static_cast<int>(pair.first >> 32) : -1; 181 const CounterSum& sum = pair.second; 182 AddSummary(info, tid, cpu, sum); 183 } 184 if (report_per_thread_ || report_per_core_) { 185 SortSummaries(summaries_.begin() + pre_sum_count, summaries_.end()); 186 } 187 } 188 189 std::vector<CounterSummary> Build() { 190 std::vector<CounterSummary> res = std::move(summaries_); 191 summaries_.clear(); 192 return res; 193 } 194 195 private: 196 void AddSummary(const CountersInfo& info, pid_t tid, int cpu, const CounterSum& sum) { 197 double scale = 1.0; 198 if (sum.time_running < sum.time_enabled && sum.time_running != 0) { 199 scale = static_cast<double>(sum.time_enabled) / sum.time_running; 200 } 201 if ((report_per_thread_ || report_per_core_) && sum.time_running == 0) { 202 // No need to report threads or cpus not running. 203 return; 204 } 205 const ThreadInfo* thread = nullptr; 206 if (report_per_thread_) { 207 auto it = thread_map_.find(tid); 208 CHECK(it != thread_map_.end()); 209 thread = &it->second; 210 } 211 summaries_.emplace_back(info.event_name, info.event_modifier, info.group_id, thread, cpu, 212 sum.value, sum.time_running, scale, false, csv_); 213 } 214 215 void SortSummaries(std::vector<CounterSummary>::iterator begin, 216 std::vector<CounterSummary>::iterator end) { 217 if (report_per_thread_ && report_per_core_) { 218 // First sort by event count for all cpus in a thread, then sort by event count of each cpu. 219 std::unordered_map<pid_t, uint64_t> count_per_thread; 220 for (auto it = begin; it != end; ++it) { 221 count_per_thread[it->thread->tid] += it->count; 222 } 223 std::sort(begin, end, [&](const CounterSummary& s1, const CounterSummary& s2) { 224 pid_t tid1 = s1.thread->tid; 225 pid_t tid2 = s2.thread->tid; 226 if (tid1 != tid2) { 227 if (count_per_thread[tid1] != count_per_thread[tid2]) { 228 return count_per_thread[tid1] > count_per_thread[tid2]; 229 } 230 return tid1 < tid2; 231 } 232 return s1.count > s2.count; 233 }); 234 } else { 235 std::sort(begin, end, [](const CounterSummary& s1, const CounterSummary& s2) { 236 return s1.count > s2.count; 237 }); 238 } 239 }; 240 241 const bool report_per_thread_; 242 const bool report_per_core_; 243 const bool csv_; 244 const std::unordered_map<pid_t, ThreadInfo>& thread_map_; 245 std::vector<CounterSummary> summaries_; 246 }; 247 248 class CounterSummaries { 249 public: 250 explicit CounterSummaries(std::vector<CounterSummary>&& summaries, bool csv) 251 : summaries_(std::move(summaries)), csv_(csv) {} 252 const std::vector<CounterSummary>& Summaries() { return summaries_; } 253 254 const CounterSummary* FindSummary(const std::string& type_name, const std::string& modifier, 255 const ThreadInfo* thread, int cpu); 256 257 // If we have two summaries monitoring the same event type at the same time, 258 // that one is for user space only, and the other is for kernel space only; 259 // then we can automatically generate a summary combining the two results. 260 // For example, a summary of branch-misses:u and a summary for branch-misses:k 261 // can generate a summary of branch-misses. 262 void AutoGenerateSummaries(); 263 void GenerateComments(double duration_in_sec); 264 void Show(FILE* fp); 265 void ShowCSV(FILE* fp); 266 void ShowText(FILE* fp); 267 268 private: 269 std::string GetCommentForSummary(const CounterSummary& s, double duration_in_sec); 270 std::string GetRateComment(const CounterSummary& s, char sep); 271 bool FindRunningTimeForSummary(const CounterSummary& summary, double* running_time_in_sec); 272 273 private: 274 std::vector<CounterSummary> summaries_; 275 bool csv_; 276 }; 277 278 } // namespace simpleperf