1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <inttypes.h>
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/prctl.h>
22
23 #include <algorithm>
24 #include <chrono>
25 #include <set>
26 #include <string>
27 #include <vector>
28
29 #include <android-base/logging.h>
30 #include <android-base/parsedouble.h>
31 #include <android-base/strings.h>
32
33 #include "command.h"
34 #include "environment.h"
35 #include "event_attr.h"
36 #include "event_fd.h"
37 #include "event_selection_set.h"
38 #include "event_type.h"
39 #include "IOEventLoop.h"
40 #include "utils.h"
41 #include "workload.h"
42
43 namespace {
44
45 static std::vector<std::string> default_measured_event_types{
46 "cpu-cycles", "stalled-cycles-frontend", "stalled-cycles-backend",
47 "instructions", "branch-instructions", "branch-misses",
48 "task-clock", "context-switches", "page-faults",
49 };
50
51 struct CounterSummary {
52 std::string type_name;
53 std::string modifier;
54 uint32_t group_id;
55 uint64_t count;
56 double scale;
57 std::string readable_count;
58 std::string comment;
59 bool auto_generated;
60
CounterSummary__anon3ff1e7bf0111::CounterSummary61 CounterSummary(const std::string& type_name, const std::string& modifier,
62 uint32_t group_id, uint64_t count, double scale,
63 bool auto_generated, bool csv)
64 : type_name(type_name),
65 modifier(modifier),
66 group_id(group_id),
67 count(count),
68 scale(scale),
69 auto_generated(auto_generated) {
70 readable_count = ReadableCountValue(csv);
71 }
72
IsMonitoredAtTheSameTime__anon3ff1e7bf0111::CounterSummary73 bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
74 // Two summaries are monitored at the same time if they are in the same
75 // group or are monitored all the time.
76 if (group_id == other.group_id) {
77 return true;
78 }
79 return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
80 }
81
Name__anon3ff1e7bf0111::CounterSummary82 std::string Name() const {
83 if (modifier.empty()) {
84 return type_name;
85 }
86 return type_name + ":" + modifier;
87 }
88
89 private:
ReadableCountValue__anon3ff1e7bf0111::CounterSummary90 std::string ReadableCountValue(bool csv) {
91 if (type_name == "cpu-clock" || type_name == "task-clock") {
92 // Convert nanoseconds to milliseconds.
93 double value = count / 1e6;
94 return android::base::StringPrintf("%lf(ms)", value);
95 } else {
96 // Convert big numbers to human friendly mode. For example,
97 // 1000000 will be converted to 1,000,000.
98 std::string s = android::base::StringPrintf("%" PRIu64, count);
99 if (csv) {
100 return s;
101 } else {
102 for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
103 if (j == 3) {
104 s.insert(s.begin() + i, ',');
105 j = 0;
106 }
107 }
108 return s;
109 }
110 }
111 }
112
IsMonitoredAllTheTime__anon3ff1e7bf0111::CounterSummary113 bool IsMonitoredAllTheTime() const {
114 // If an event runs all the time it is enabled (by not sharing hardware
115 // counters with other events), the scale of its summary is usually within
116 // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
117 // events monitored all the time in most cases while keeping the report
118 // error rate <= 1e-5.
119 constexpr double SCALE_ERROR_LIMIT = 1e-5;
120 return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
121 }
122 };
123
124 class CounterSummaries {
125 public:
CounterSummaries(bool csv)126 explicit CounterSummaries(bool csv) : csv_(csv) {}
AddSummary(const CounterSummary & summary)127 void AddSummary(const CounterSummary& summary) {
128 summaries_.push_back(summary);
129 }
130
FindSummary(const std::string & type_name,const std::string & modifier)131 const CounterSummary* FindSummary(const std::string& type_name,
132 const std::string& modifier) {
133 for (const auto& s : summaries_) {
134 if (s.type_name == type_name && s.modifier == modifier) {
135 return &s;
136 }
137 }
138 return nullptr;
139 }
140
141 // If we have two summaries monitoring the same event type at the same time,
142 // that one is for user space only, and the other is for kernel space only;
143 // then we can automatically generate a summary combining the two results.
144 // For example, a summary of branch-misses:u and a summary for branch-misses:k
145 // can generate a summary of branch-misses.
AutoGenerateSummaries()146 void AutoGenerateSummaries() {
147 for (size_t i = 0; i < summaries_.size(); ++i) {
148 const CounterSummary& s = summaries_[i];
149 if (s.modifier == "u") {
150 const CounterSummary* other = FindSummary(s.type_name, "k");
151 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
152 if (FindSummary(s.type_name, "") == nullptr) {
153 AddSummary(CounterSummary(s.type_name, "", s.group_id,
154 s.count + other->count, s.scale, true,
155 csv_));
156 }
157 }
158 }
159 }
160 }
161
GenerateComments(double duration_in_sec)162 void GenerateComments(double duration_in_sec) {
163 for (auto& s : summaries_) {
164 s.comment = GetCommentForSummary(s, duration_in_sec);
165 }
166 }
167
Show(FILE * fp)168 void Show(FILE* fp) {
169 size_t count_column_width = 0;
170 size_t name_column_width = 0;
171 size_t comment_column_width = 0;
172 for (auto& s : summaries_) {
173 count_column_width =
174 std::max(count_column_width, s.readable_count.size());
175 name_column_width = std::max(name_column_width, s.Name().size());
176 comment_column_width = std::max(comment_column_width, s.comment.size());
177 }
178
179 for (auto& s : summaries_) {
180 if (csv_) {
181 fprintf(fp, "%s,%s,%s,(%.0lf%%)%s\n", s.readable_count.c_str(),
182 s.Name().c_str(), s.comment.c_str(), 1.0 / s.scale * 100,
183 (s.auto_generated ? " (generated)," : ","));
184 } else {
185 fprintf(fp, " %*s %-*s # %-*s (%.0lf%%)%s\n",
186 static_cast<int>(count_column_width), s.readable_count.c_str(),
187 static_cast<int>(name_column_width), s.Name().c_str(),
188 static_cast<int>(comment_column_width), s.comment.c_str(),
189 1.0 / s.scale * 100, (s.auto_generated ? " (generated)" : ""));
190 }
191 }
192 }
193
194 private:
GetCommentForSummary(const CounterSummary & s,double duration_in_sec)195 std::string GetCommentForSummary(const CounterSummary& s,
196 double duration_in_sec) {
197 char sap_mid;
198 if (csv_) {
199 sap_mid = ',';
200 } else {
201 sap_mid = ' ';
202 }
203 if (s.type_name == "task-clock") {
204 double run_sec = s.count / 1e9;
205 double used_cpus = run_sec / (duration_in_sec / s.scale);
206 return android::base::StringPrintf("%lf%ccpus used", used_cpus, sap_mid);
207 }
208 if (s.type_name == "cpu-clock") {
209 return "";
210 }
211 if (s.type_name == "cpu-cycles") {
212 double hz = s.count / (duration_in_sec / s.scale);
213 return android::base::StringPrintf("%lf%cGHz", hz / 1e9, sap_mid);
214 }
215 if (s.type_name == "instructions" && s.count != 0) {
216 const CounterSummary* other = FindSummary("cpu-cycles", s.modifier);
217 if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
218 double cpi = static_cast<double>(other->count) / s.count;
219 return android::base::StringPrintf("%lf%ccycles per instruction", cpi,
220 sap_mid);
221 }
222 }
223 if (android::base::EndsWith(s.type_name, "-misses")) {
224 std::string other_name;
225 if (s.type_name == "cache-misses") {
226 other_name = "cache-references";
227 } else if (s.type_name == "branch-misses") {
228 other_name = "branch-instructions";
229 } else {
230 other_name =
231 s.type_name.substr(0, s.type_name.size() - strlen("-misses")) + "s";
232 }
233 const CounterSummary* other = FindSummary(other_name, s.modifier);
234 if (other != nullptr && other->IsMonitoredAtTheSameTime(s) &&
235 other->count != 0) {
236 double miss_rate = static_cast<double>(s.count) / other->count;
237 return android::base::StringPrintf("%lf%%%cmiss rate", miss_rate * 100,
238 sap_mid);
239 }
240 }
241 double rate = s.count / (duration_in_sec / s.scale);
242 if (rate > 1e9) {
243 return android::base::StringPrintf("%.3lf%cG/sec", rate / 1e9, sap_mid);
244 }
245 if (rate > 1e6) {
246 return android::base::StringPrintf("%.3lf%cM/sec", rate / 1e6, sap_mid);
247 }
248 if (rate > 1e3) {
249 return android::base::StringPrintf("%.3lf%cK/sec", rate / 1e3, sap_mid);
250 }
251 return android::base::StringPrintf("%.3lf%c/sec", rate, sap_mid);
252 }
253
254 private:
255 std::vector<CounterSummary> summaries_;
256 bool csv_;
257 };
258
259 class StatCommand : public Command {
260 public:
StatCommand()261 StatCommand()
262 : Command("stat", "gather performance counter information",
263 // clang-format off
264 "Usage: simpleperf stat [options] [command [command-args]]\n"
265 " Gather performance counter information of running [command].\n"
266 " And -a/-p/-t option can be used to change target of counter information.\n"
267 "-a Collect system-wide information.\n"
268 "--cpu cpu_item1,cpu_item2,...\n"
269 " Collect information only on the selected cpus. cpu_item can\n"
270 " be a cpu number like 1, or a cpu range like 0-3.\n"
271 "--csv Write report in comma separate form.\n"
272 "--duration time_in_sec Monitor for time_in_sec seconds instead of running\n"
273 " [command]. Here time_in_sec may be any positive\n"
274 " floating point number.\n"
275 "--interval time_in_ms Print stat for every time_in_ms milliseconds.\n"
276 " Here time_in_ms may be any positive floating point\n"
277 " number.\n"
278 "-e event1[:modifier1],event2[:modifier2],...\n"
279 " Select the event list to count. Use `simpleperf list` to find\n"
280 " all possible event names. Modifiers can be added to define\n"
281 " how the event should be monitored. Possible modifiers are:\n"
282 " u - monitor user space events only\n"
283 " k - monitor kernel space events only\n"
284 "--group event1[:modifier],event2[:modifier2],...\n"
285 " Similar to -e option. But events specified in the same --group\n"
286 " option are monitored as a group, and scheduled in and out at the\n"
287 " same time.\n"
288 "--no-inherit Don't stat created child threads/processes.\n"
289 "-o output_filename Write report to output_filename instead of standard output.\n"
290 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
291 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
292 "--verbose Show result in verbose mode.\n"
293 // clang-format on
294 ),
295 verbose_mode_(false),
296 system_wide_collection_(false),
297 child_inherit_(true),
298 duration_in_sec_(0),
299 interval_in_ms_(0),
300 event_selection_set_(true),
301 csv_(false) {
302 // Die if parent exits.
303 prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
304 }
305
306 bool Run(const std::vector<std::string>& args);
307
308 private:
309 bool ParseOptions(const std::vector<std::string>& args,
310 std::vector<std::string>* non_option_args);
311 bool AddDefaultMeasuredEventTypes();
312 void SetEventSelectionFlags();
313 bool ShowCounters(const std::vector<CountersInfo>& counters,
314 double duration_in_sec, FILE* fp);
315
316 bool verbose_mode_;
317 bool system_wide_collection_;
318 bool child_inherit_;
319 double duration_in_sec_;
320 double interval_in_ms_;
321 std::vector<int> cpus_;
322 EventSelectionSet event_selection_set_;
323 std::string output_filename_;
324 bool csv_;
325 };
326
Run(const std::vector<std::string> & args)327 bool StatCommand::Run(const std::vector<std::string>& args) {
328 if (!CheckPerfEventLimit()) {
329 return false;
330 }
331
332 // 1. Parse options, and use default measured event types if not given.
333 std::vector<std::string> workload_args;
334 if (!ParseOptions(args, &workload_args)) {
335 return false;
336 }
337 if (event_selection_set_.empty()) {
338 if (!AddDefaultMeasuredEventTypes()) {
339 return false;
340 }
341 }
342 SetEventSelectionFlags();
343
344 // 2. Create workload.
345 std::unique_ptr<Workload> workload;
346 if (!workload_args.empty()) {
347 workload = Workload::CreateWorkload(workload_args);
348 if (workload == nullptr) {
349 return false;
350 }
351 }
352 bool need_to_check_targets = false;
353 if (system_wide_collection_) {
354 event_selection_set_.AddMonitoredThreads({-1});
355 } else if (!event_selection_set_.HasMonitoredTarget()) {
356 if (workload != nullptr) {
357 event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
358 event_selection_set_.SetEnableOnExec(true);
359 } else {
360 LOG(ERROR)
361 << "No threads to monitor. Try `simpleperf help stat` for help\n";
362 return false;
363 }
364 } else {
365 need_to_check_targets = true;
366 }
367
368 // 3. Open perf_event_files and output file if defined.
369 if (!system_wide_collection_ && cpus_.empty()) {
370 cpus_.push_back(-1); // Monitor on all cpus.
371 }
372 if (!event_selection_set_.OpenEventFiles(cpus_)) {
373 return false;
374 }
375 std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
376 FILE* fp = stdout;
377 if (!output_filename_.empty()) {
378 fp_holder.reset(fopen(output_filename_.c_str(), "w"));
379 if (fp_holder == nullptr) {
380 PLOG(ERROR) << "failed to open " << output_filename_;
381 return false;
382 }
383 fp = fp_holder.get();
384 }
385
386 // 4. Add signal/periodic Events.
387 std::chrono::time_point<std::chrono::steady_clock> start_time;
388 std::vector<CountersInfo> counters;
389 if (system_wide_collection_ || (!cpus_.empty() && cpus_[0] != -1)) {
390 if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) {
391 return false;
392 }
393 }
394 if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
395 return false;
396 }
397 IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
398 if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
399 [&]() { return loop->ExitLoop(); })) {
400 return false;
401 }
402 if (duration_in_sec_ != 0) {
403 if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
404 [&]() { return loop->ExitLoop(); })) {
405 return false;
406 }
407 }
408 auto print_counters = [&]() {
409 auto end_time = std::chrono::steady_clock::now();
410 if (!event_selection_set_.ReadCounters(&counters)) {
411 return false;
412 }
413 double duration_in_sec =
414 std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
415 start_time)
416 .count();
417 if (!ShowCounters(counters, duration_in_sec, fp)) {
418 return false;
419 }
420 return true;
421 };
422
423 if (interval_in_ms_ != 0) {
424 if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
425 print_counters)) {
426 return false;
427 }
428 }
429
430 // 5. Count events while workload running.
431 start_time = std::chrono::steady_clock::now();
432 if (workload != nullptr && !workload->Start()) {
433 return false;
434 }
435 if (!loop->RunLoop()) {
436 return false;
437 }
438
439 // 6. Read and print counters.
440
441 return print_counters();
442 }
443
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)444 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
445 std::vector<std::string>* non_option_args) {
446 std::set<pid_t> tid_set;
447 size_t i;
448 for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) {
449 if (args[i] == "-a") {
450 system_wide_collection_ = true;
451 } else if (args[i] == "--cpu") {
452 if (!NextArgumentOrError(args, &i)) {
453 return false;
454 }
455 cpus_ = GetCpusFromString(args[i]);
456 } else if (args[i] == "--csv") {
457 csv_ = true;
458 } else if (args[i] == "--duration") {
459 if (!NextArgumentOrError(args, &i)) {
460 return false;
461 }
462 if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
463 1e-9)) {
464 LOG(ERROR) << "Invalid duration: " << args[i].c_str();
465 return false;
466 }
467 } else if (args[i] == "--interval") {
468 if (!NextArgumentOrError(args, &i)) {
469 return false;
470 }
471 if (!android::base::ParseDouble(args[i].c_str(), &interval_in_ms_,
472 1e-9)) {
473 LOG(ERROR) << "Invalid interval: " << args[i].c_str();
474 return false;
475 }
476 } else if (args[i] == "-e") {
477 if (!NextArgumentOrError(args, &i)) {
478 return false;
479 }
480 std::vector<std::string> event_types = android::base::Split(args[i], ",");
481 for (auto& event_type : event_types) {
482 if (!event_selection_set_.AddEventType(event_type)) {
483 return false;
484 }
485 }
486 } else if (args[i] == "--group") {
487 if (!NextArgumentOrError(args, &i)) {
488 return false;
489 }
490 std::vector<std::string> event_types = android::base::Split(args[i], ",");
491 if (!event_selection_set_.AddEventGroup(event_types)) {
492 return false;
493 }
494 } else if (args[i] == "--no-inherit") {
495 child_inherit_ = false;
496 } else if (args[i] == "-o") {
497 if (!NextArgumentOrError(args, &i)) {
498 return false;
499 }
500 output_filename_ = args[i];
501 } else if (args[i] == "-p") {
502 if (!NextArgumentOrError(args, &i)) {
503 return false;
504 }
505 std::set<pid_t> pids;
506 if (!GetValidThreadsFromThreadString(args[i], &pids)) {
507 return false;
508 }
509 event_selection_set_.AddMonitoredProcesses(pids);
510 } else if (args[i] == "-t") {
511 if (!NextArgumentOrError(args, &i)) {
512 return false;
513 }
514 std::set<pid_t> tids;
515 if (!GetValidThreadsFromThreadString(args[i], &tids)) {
516 return false;
517 }
518 event_selection_set_.AddMonitoredThreads(tids);
519 } else if (args[i] == "--verbose") {
520 verbose_mode_ = true;
521 } else {
522 ReportUnknownOption(args, i);
523 return false;
524 }
525 }
526
527 if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
528 LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
529 "used at the same time.";
530 return false;
531 }
532 if (system_wide_collection_ && !IsRoot()) {
533 LOG(ERROR) << "System wide profiling needs root privilege.";
534 return false;
535 }
536
537 non_option_args->clear();
538 for (; i < args.size(); ++i) {
539 non_option_args->push_back(args[i]);
540 }
541 return true;
542 }
543
AddDefaultMeasuredEventTypes()544 bool StatCommand::AddDefaultMeasuredEventTypes() {
545 for (auto& name : default_measured_event_types) {
546 // It is not an error when some event types in the default list are not
547 // supported by the kernel.
548 const EventType* type = FindEventTypeByName(name);
549 if (type != nullptr &&
550 IsEventAttrSupported(CreateDefaultPerfEventAttr(*type))) {
551 if (!event_selection_set_.AddEventType(name)) {
552 return false;
553 }
554 }
555 }
556 if (event_selection_set_.empty()) {
557 LOG(ERROR) << "Failed to add any supported default measured types";
558 return false;
559 }
560 return true;
561 }
562
SetEventSelectionFlags()563 void StatCommand::SetEventSelectionFlags() {
564 event_selection_set_.SetInherit(child_inherit_);
565 }
566
ShowCounters(const std::vector<CountersInfo> & counters,double duration_in_sec,FILE * fp)567 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
568 double duration_in_sec, FILE* fp) {
569 if (csv_) {
570 fprintf(fp, "Performance counter statistics,\n");
571 } else {
572 fprintf(fp, "Performance counter statistics:\n\n");
573 }
574
575 if (verbose_mode_) {
576 for (auto& counters_info : counters) {
577 for (auto& counter_info : counters_info.counters) {
578 if (csv_) {
579 fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
580 ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
581 counters_info.event_name.c_str(), counter_info.tid,
582 counter_info.cpu, counter_info.counter.value,
583 counter_info.counter.time_enabled,
584 counter_info.counter.time_running, counter_info.counter.id);
585 } else {
586 fprintf(fp,
587 "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
588 ", time running %" PRIu64 ", id %" PRIu64 "\n",
589 counters_info.event_name.c_str(), counter_info.tid,
590 counter_info.cpu, counter_info.counter.value,
591 counter_info.counter.time_enabled,
592 counter_info.counter.time_running, counter_info.counter.id);
593 }
594 }
595 }
596 }
597
598 CounterSummaries summaries(csv_);
599 for (auto& counters_info : counters) {
600 uint64_t value_sum = 0;
601 uint64_t time_enabled_sum = 0;
602 uint64_t time_running_sum = 0;
603 for (auto& counter_info : counters_info.counters) {
604 value_sum += counter_info.counter.value;
605 time_enabled_sum += counter_info.counter.time_enabled;
606 time_running_sum += counter_info.counter.time_running;
607 }
608 double scale = 1.0;
609 if (time_running_sum < time_enabled_sum && time_running_sum != 0) {
610 scale = static_cast<double>(time_enabled_sum) / time_running_sum;
611 }
612 summaries.AddSummary(
613 CounterSummary(counters_info.event_name, counters_info.event_modifier,
614 counters_info.group_id, value_sum, scale, false, csv_));
615 }
616 summaries.AutoGenerateSummaries();
617 summaries.GenerateComments(duration_in_sec);
618 summaries.Show(fp);
619
620 if (csv_)
621 fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
622 else
623 fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
624 return true;
625 }
626
627 } // namespace
628
RegisterStatCommand()629 void RegisterStatCommand() {
630 RegisterCommand("stat",
631 [] { return std::unique_ptr<Command>(new StatCommand); });
632 }
633