1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <inttypes.h>
18 #include <signal.h>
19 #include <stdio.h>
20 #include <string.h>
21 #include <sys/prctl.h>
22 
23 #include <algorithm>
24 #include <chrono>
25 #include <set>
26 #include <string>
27 #include <vector>
28 
29 #include <android-base/logging.h>
30 #include <android-base/parsedouble.h>
31 #include <android-base/strings.h>
32 
33 #include "command.h"
34 #include "environment.h"
35 #include "event_attr.h"
36 #include "event_fd.h"
37 #include "event_selection_set.h"
38 #include "event_type.h"
39 #include "IOEventLoop.h"
40 #include "utils.h"
41 #include "workload.h"
42 
43 namespace {
44 
45 static std::vector<std::string> default_measured_event_types{
46     "cpu-cycles",   "stalled-cycles-frontend", "stalled-cycles-backend",
47     "instructions", "branch-instructions",     "branch-misses",
48     "task-clock",   "context-switches",        "page-faults",
49 };
50 
51 struct CounterSummary {
52   std::string type_name;
53   std::string modifier;
54   uint32_t group_id;
55   uint64_t count;
56   double scale;
57   std::string readable_count;
58   std::string comment;
59   bool auto_generated;
60 
CounterSummary__anon3ff1e7bf0111::CounterSummary61   CounterSummary(const std::string& type_name, const std::string& modifier,
62                  uint32_t group_id, uint64_t count, double scale,
63                  bool auto_generated, bool csv)
64       : type_name(type_name),
65         modifier(modifier),
66         group_id(group_id),
67         count(count),
68         scale(scale),
69         auto_generated(auto_generated) {
70     readable_count = ReadableCountValue(csv);
71   }
72 
IsMonitoredAtTheSameTime__anon3ff1e7bf0111::CounterSummary73   bool IsMonitoredAtTheSameTime(const CounterSummary& other) const {
74     // Two summaries are monitored at the same time if they are in the same
75     // group or are monitored all the time.
76     if (group_id == other.group_id) {
77       return true;
78     }
79     return IsMonitoredAllTheTime() && other.IsMonitoredAllTheTime();
80   }
81 
Name__anon3ff1e7bf0111::CounterSummary82   std::string Name() const {
83     if (modifier.empty()) {
84       return type_name;
85     }
86     return type_name + ":" + modifier;
87   }
88 
89  private:
ReadableCountValue__anon3ff1e7bf0111::CounterSummary90   std::string ReadableCountValue(bool csv) {
91     if (type_name == "cpu-clock" || type_name == "task-clock") {
92       // Convert nanoseconds to milliseconds.
93       double value = count / 1e6;
94       return android::base::StringPrintf("%lf(ms)", value);
95     } else {
96       // Convert big numbers to human friendly mode. For example,
97       // 1000000 will be converted to 1,000,000.
98       std::string s = android::base::StringPrintf("%" PRIu64, count);
99       if (csv) {
100         return s;
101       } else {
102         for (size_t i = s.size() - 1, j = 1; i > 0; --i, ++j) {
103           if (j == 3) {
104             s.insert(s.begin() + i, ',');
105             j = 0;
106           }
107         }
108         return s;
109       }
110     }
111   }
112 
IsMonitoredAllTheTime__anon3ff1e7bf0111::CounterSummary113   bool IsMonitoredAllTheTime() const {
114     // If an event runs all the time it is enabled (by not sharing hardware
115     // counters with other events), the scale of its summary is usually within
116     // [1, 1 + 1e-5]. By setting SCALE_ERROR_LIMIT to 1e-5, We can identify
117     // events monitored all the time in most cases while keeping the report
118     // error rate <= 1e-5.
119     constexpr double SCALE_ERROR_LIMIT = 1e-5;
120     return (fabs(scale - 1.0) < SCALE_ERROR_LIMIT);
121   }
122 };
123 
124 class CounterSummaries {
125  public:
CounterSummaries(bool csv)126   explicit CounterSummaries(bool csv) : csv_(csv) {}
AddSummary(const CounterSummary & summary)127   void AddSummary(const CounterSummary& summary) {
128     summaries_.push_back(summary);
129   }
130 
FindSummary(const std::string & type_name,const std::string & modifier)131   const CounterSummary* FindSummary(const std::string& type_name,
132                                     const std::string& modifier) {
133     for (const auto& s : summaries_) {
134       if (s.type_name == type_name && s.modifier == modifier) {
135         return &s;
136       }
137     }
138     return nullptr;
139   }
140 
141   // If we have two summaries monitoring the same event type at the same time,
142   // that one is for user space only, and the other is for kernel space only;
143   // then we can automatically generate a summary combining the two results.
144   // For example, a summary of branch-misses:u and a summary for branch-misses:k
145   // can generate a summary of branch-misses.
AutoGenerateSummaries()146   void AutoGenerateSummaries() {
147     for (size_t i = 0; i < summaries_.size(); ++i) {
148       const CounterSummary& s = summaries_[i];
149       if (s.modifier == "u") {
150         const CounterSummary* other = FindSummary(s.type_name, "k");
151         if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
152           if (FindSummary(s.type_name, "") == nullptr) {
153             AddSummary(CounterSummary(s.type_name, "", s.group_id,
154                                       s.count + other->count, s.scale, true,
155                                       csv_));
156           }
157         }
158       }
159     }
160   }
161 
GenerateComments(double duration_in_sec)162   void GenerateComments(double duration_in_sec) {
163     for (auto& s : summaries_) {
164       s.comment = GetCommentForSummary(s, duration_in_sec);
165     }
166   }
167 
Show(FILE * fp)168   void Show(FILE* fp) {
169     size_t count_column_width = 0;
170     size_t name_column_width = 0;
171     size_t comment_column_width = 0;
172     for (auto& s : summaries_) {
173       count_column_width =
174           std::max(count_column_width, s.readable_count.size());
175       name_column_width = std::max(name_column_width, s.Name().size());
176       comment_column_width = std::max(comment_column_width, s.comment.size());
177     }
178 
179     for (auto& s : summaries_) {
180       if (csv_) {
181         fprintf(fp, "%s,%s,%s,(%.0lf%%)%s\n", s.readable_count.c_str(),
182                 s.Name().c_str(), s.comment.c_str(), 1.0 / s.scale * 100,
183                 (s.auto_generated ? " (generated)," : ","));
184       } else {
185         fprintf(fp, "  %*s  %-*s   # %-*s  (%.0lf%%)%s\n",
186                 static_cast<int>(count_column_width), s.readable_count.c_str(),
187                 static_cast<int>(name_column_width), s.Name().c_str(),
188                 static_cast<int>(comment_column_width), s.comment.c_str(),
189                 1.0 / s.scale * 100, (s.auto_generated ? " (generated)" : ""));
190       }
191     }
192   }
193 
194  private:
GetCommentForSummary(const CounterSummary & s,double duration_in_sec)195   std::string GetCommentForSummary(const CounterSummary& s,
196                                    double duration_in_sec) {
197     char sap_mid;
198     if (csv_) {
199       sap_mid = ',';
200     } else {
201       sap_mid = ' ';
202     }
203     if (s.type_name == "task-clock") {
204       double run_sec = s.count / 1e9;
205       double used_cpus = run_sec / (duration_in_sec / s.scale);
206       return android::base::StringPrintf("%lf%ccpus used", used_cpus, sap_mid);
207     }
208     if (s.type_name == "cpu-clock") {
209       return "";
210     }
211     if (s.type_name == "cpu-cycles") {
212       double hz = s.count / (duration_in_sec / s.scale);
213       return android::base::StringPrintf("%lf%cGHz", hz / 1e9, sap_mid);
214     }
215     if (s.type_name == "instructions" && s.count != 0) {
216       const CounterSummary* other = FindSummary("cpu-cycles", s.modifier);
217       if (other != nullptr && other->IsMonitoredAtTheSameTime(s)) {
218         double cpi = static_cast<double>(other->count) / s.count;
219         return android::base::StringPrintf("%lf%ccycles per instruction", cpi,
220                                            sap_mid);
221       }
222     }
223     if (android::base::EndsWith(s.type_name, "-misses")) {
224       std::string other_name;
225       if (s.type_name == "cache-misses") {
226         other_name = "cache-references";
227       } else if (s.type_name == "branch-misses") {
228         other_name = "branch-instructions";
229       } else {
230         other_name =
231             s.type_name.substr(0, s.type_name.size() - strlen("-misses")) + "s";
232       }
233       const CounterSummary* other = FindSummary(other_name, s.modifier);
234       if (other != nullptr && other->IsMonitoredAtTheSameTime(s) &&
235           other->count != 0) {
236         double miss_rate = static_cast<double>(s.count) / other->count;
237         return android::base::StringPrintf("%lf%%%cmiss rate", miss_rate * 100,
238                                            sap_mid);
239       }
240     }
241     double rate = s.count / (duration_in_sec / s.scale);
242     if (rate > 1e9) {
243       return android::base::StringPrintf("%.3lf%cG/sec", rate / 1e9, sap_mid);
244     }
245     if (rate > 1e6) {
246       return android::base::StringPrintf("%.3lf%cM/sec", rate / 1e6, sap_mid);
247     }
248     if (rate > 1e3) {
249       return android::base::StringPrintf("%.3lf%cK/sec", rate / 1e3, sap_mid);
250     }
251     return android::base::StringPrintf("%.3lf%c/sec", rate, sap_mid);
252   }
253 
254  private:
255   std::vector<CounterSummary> summaries_;
256   bool csv_;
257 };
258 
259 class StatCommand : public Command {
260  public:
StatCommand()261   StatCommand()
262       : Command("stat", "gather performance counter information",
263                 // clang-format off
264 "Usage: simpleperf stat [options] [command [command-args]]\n"
265 "       Gather performance counter information of running [command].\n"
266 "       And -a/-p/-t option can be used to change target of counter information.\n"
267 "-a           Collect system-wide information.\n"
268 "--cpu cpu_item1,cpu_item2,...\n"
269 "                 Collect information only on the selected cpus. cpu_item can\n"
270 "                 be a cpu number like 1, or a cpu range like 0-3.\n"
271 "--csv            Write report in comma separate form.\n"
272 "--duration time_in_sec  Monitor for time_in_sec seconds instead of running\n"
273 "                        [command]. Here time_in_sec may be any positive\n"
274 "                        floating point number.\n"
275 "--interval time_in_ms   Print stat for every time_in_ms milliseconds.\n"
276 "                        Here time_in_ms may be any positive floating point\n"
277 "                        number.\n"
278 "-e event1[:modifier1],event2[:modifier2],...\n"
279 "                 Select the event list to count. Use `simpleperf list` to find\n"
280 "                 all possible event names. Modifiers can be added to define\n"
281 "                 how the event should be monitored. Possible modifiers are:\n"
282 "                   u - monitor user space events only\n"
283 "                   k - monitor kernel space events only\n"
284 "--group event1[:modifier],event2[:modifier2],...\n"
285 "             Similar to -e option. But events specified in the same --group\n"
286 "             option are monitored as a group, and scheduled in and out at the\n"
287 "             same time.\n"
288 "--no-inherit     Don't stat created child threads/processes.\n"
289 "-o output_filename  Write report to output_filename instead of standard output.\n"
290 "-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
291 "-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
292 "--verbose        Show result in verbose mode.\n"
293                 // clang-format on
294                 ),
295         verbose_mode_(false),
296         system_wide_collection_(false),
297         child_inherit_(true),
298         duration_in_sec_(0),
299         interval_in_ms_(0),
300         event_selection_set_(true),
301         csv_(false) {
302     // Die if parent exits.
303     prctl(PR_SET_PDEATHSIG, SIGHUP, 0, 0, 0);
304   }
305 
306   bool Run(const std::vector<std::string>& args);
307 
308  private:
309   bool ParseOptions(const std::vector<std::string>& args,
310                     std::vector<std::string>* non_option_args);
311   bool AddDefaultMeasuredEventTypes();
312   void SetEventSelectionFlags();
313   bool ShowCounters(const std::vector<CountersInfo>& counters,
314                     double duration_in_sec, FILE* fp);
315 
316   bool verbose_mode_;
317   bool system_wide_collection_;
318   bool child_inherit_;
319   double duration_in_sec_;
320   double interval_in_ms_;
321   std::vector<int> cpus_;
322   EventSelectionSet event_selection_set_;
323   std::string output_filename_;
324   bool csv_;
325 };
326 
Run(const std::vector<std::string> & args)327 bool StatCommand::Run(const std::vector<std::string>& args) {
328   if (!CheckPerfEventLimit()) {
329     return false;
330   }
331 
332   // 1. Parse options, and use default measured event types if not given.
333   std::vector<std::string> workload_args;
334   if (!ParseOptions(args, &workload_args)) {
335     return false;
336   }
337   if (event_selection_set_.empty()) {
338     if (!AddDefaultMeasuredEventTypes()) {
339       return false;
340     }
341   }
342   SetEventSelectionFlags();
343 
344   // 2. Create workload.
345   std::unique_ptr<Workload> workload;
346   if (!workload_args.empty()) {
347     workload = Workload::CreateWorkload(workload_args);
348     if (workload == nullptr) {
349       return false;
350     }
351   }
352   bool need_to_check_targets = false;
353   if (system_wide_collection_) {
354     event_selection_set_.AddMonitoredThreads({-1});
355   } else if (!event_selection_set_.HasMonitoredTarget()) {
356     if (workload != nullptr) {
357       event_selection_set_.AddMonitoredProcesses({workload->GetPid()});
358       event_selection_set_.SetEnableOnExec(true);
359     } else {
360       LOG(ERROR)
361           << "No threads to monitor. Try `simpleperf help stat` for help\n";
362       return false;
363     }
364   } else {
365     need_to_check_targets = true;
366   }
367 
368   // 3. Open perf_event_files and output file if defined.
369   if (!system_wide_collection_ && cpus_.empty()) {
370     cpus_.push_back(-1);  // Monitor on all cpus.
371   }
372   if (!event_selection_set_.OpenEventFiles(cpus_)) {
373     return false;
374   }
375   std::unique_ptr<FILE, decltype(&fclose)> fp_holder(nullptr, fclose);
376   FILE* fp = stdout;
377   if (!output_filename_.empty()) {
378     fp_holder.reset(fopen(output_filename_.c_str(), "w"));
379     if (fp_holder == nullptr) {
380       PLOG(ERROR) << "failed to open " << output_filename_;
381       return false;
382     }
383     fp = fp_holder.get();
384   }
385 
386   // 4. Add signal/periodic Events.
387   std::chrono::time_point<std::chrono::steady_clock> start_time;
388   std::vector<CountersInfo> counters;
389   if (system_wide_collection_ || (!cpus_.empty() && cpus_[0] != -1)) {
390     if (!event_selection_set_.HandleCpuHotplugEvents(cpus_)) {
391       return false;
392     }
393   }
394   if (need_to_check_targets && !event_selection_set_.StopWhenNoMoreTargets()) {
395     return false;
396   }
397   IOEventLoop* loop = event_selection_set_.GetIOEventLoop();
398   if (!loop->AddSignalEvents({SIGCHLD, SIGINT, SIGTERM, SIGHUP},
399                              [&]() { return loop->ExitLoop(); })) {
400     return false;
401   }
402   if (duration_in_sec_ != 0) {
403     if (!loop->AddPeriodicEvent(SecondToTimeval(duration_in_sec_),
404                                 [&]() { return loop->ExitLoop(); })) {
405       return false;
406     }
407   }
408   auto print_counters = [&]() {
409       auto end_time = std::chrono::steady_clock::now();
410       if (!event_selection_set_.ReadCounters(&counters)) {
411         return false;
412       }
413       double duration_in_sec =
414       std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
415                                                                 start_time)
416       .count();
417       if (!ShowCounters(counters, duration_in_sec, fp)) {
418         return false;
419       }
420       return true;
421   };
422 
423   if (interval_in_ms_ != 0) {
424     if (!loop->AddPeriodicEvent(SecondToTimeval(interval_in_ms_ / 1000.0),
425                                 print_counters)) {
426       return false;
427     }
428   }
429 
430   // 5. Count events while workload running.
431   start_time = std::chrono::steady_clock::now();
432   if (workload != nullptr && !workload->Start()) {
433     return false;
434   }
435   if (!loop->RunLoop()) {
436     return false;
437   }
438 
439   // 6. Read and print counters.
440 
441   return print_counters();
442 }
443 
ParseOptions(const std::vector<std::string> & args,std::vector<std::string> * non_option_args)444 bool StatCommand::ParseOptions(const std::vector<std::string>& args,
445                                std::vector<std::string>* non_option_args) {
446   std::set<pid_t> tid_set;
447   size_t i;
448   for (i = 0; i < args.size() && args[i].size() > 0 && args[i][0] == '-'; ++i) {
449     if (args[i] == "-a") {
450       system_wide_collection_ = true;
451     } else if (args[i] == "--cpu") {
452       if (!NextArgumentOrError(args, &i)) {
453         return false;
454       }
455       cpus_ = GetCpusFromString(args[i]);
456     } else if (args[i] == "--csv") {
457       csv_ = true;
458     } else if (args[i] == "--duration") {
459       if (!NextArgumentOrError(args, &i)) {
460         return false;
461       }
462       if (!android::base::ParseDouble(args[i].c_str(), &duration_in_sec_,
463                                       1e-9)) {
464         LOG(ERROR) << "Invalid duration: " << args[i].c_str();
465         return false;
466       }
467     } else if (args[i] == "--interval") {
468       if (!NextArgumentOrError(args, &i)) {
469         return false;
470       }
471       if (!android::base::ParseDouble(args[i].c_str(), &interval_in_ms_,
472                                       1e-9)) {
473         LOG(ERROR) << "Invalid interval: " << args[i].c_str();
474         return false;
475       }
476     } else if (args[i] == "-e") {
477       if (!NextArgumentOrError(args, &i)) {
478         return false;
479       }
480       std::vector<std::string> event_types = android::base::Split(args[i], ",");
481       for (auto& event_type : event_types) {
482         if (!event_selection_set_.AddEventType(event_type)) {
483           return false;
484         }
485       }
486     } else if (args[i] == "--group") {
487       if (!NextArgumentOrError(args, &i)) {
488         return false;
489       }
490       std::vector<std::string> event_types = android::base::Split(args[i], ",");
491       if (!event_selection_set_.AddEventGroup(event_types)) {
492         return false;
493       }
494     } else if (args[i] == "--no-inherit") {
495       child_inherit_ = false;
496     } else if (args[i] == "-o") {
497       if (!NextArgumentOrError(args, &i)) {
498         return false;
499       }
500       output_filename_ = args[i];
501     } else if (args[i] == "-p") {
502       if (!NextArgumentOrError(args, &i)) {
503         return false;
504       }
505       std::set<pid_t> pids;
506       if (!GetValidThreadsFromThreadString(args[i], &pids)) {
507         return false;
508       }
509       event_selection_set_.AddMonitoredProcesses(pids);
510     } else if (args[i] == "-t") {
511       if (!NextArgumentOrError(args, &i)) {
512         return false;
513       }
514       std::set<pid_t> tids;
515       if (!GetValidThreadsFromThreadString(args[i], &tids)) {
516         return false;
517       }
518       event_selection_set_.AddMonitoredThreads(tids);
519     } else if (args[i] == "--verbose") {
520       verbose_mode_ = true;
521     } else {
522       ReportUnknownOption(args, i);
523       return false;
524     }
525   }
526 
527   if (system_wide_collection_ && event_selection_set_.HasMonitoredTarget()) {
528     LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
529                   "used at the same time.";
530     return false;
531   }
532   if (system_wide_collection_ && !IsRoot()) {
533     LOG(ERROR) << "System wide profiling needs root privilege.";
534     return false;
535   }
536 
537   non_option_args->clear();
538   for (; i < args.size(); ++i) {
539     non_option_args->push_back(args[i]);
540   }
541   return true;
542 }
543 
AddDefaultMeasuredEventTypes()544 bool StatCommand::AddDefaultMeasuredEventTypes() {
545   for (auto& name : default_measured_event_types) {
546     // It is not an error when some event types in the default list are not
547     // supported by the kernel.
548     const EventType* type = FindEventTypeByName(name);
549     if (type != nullptr &&
550         IsEventAttrSupported(CreateDefaultPerfEventAttr(*type))) {
551       if (!event_selection_set_.AddEventType(name)) {
552         return false;
553       }
554     }
555   }
556   if (event_selection_set_.empty()) {
557     LOG(ERROR) << "Failed to add any supported default measured types";
558     return false;
559   }
560   return true;
561 }
562 
SetEventSelectionFlags()563 void StatCommand::SetEventSelectionFlags() {
564   event_selection_set_.SetInherit(child_inherit_);
565 }
566 
ShowCounters(const std::vector<CountersInfo> & counters,double duration_in_sec,FILE * fp)567 bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
568                                double duration_in_sec, FILE* fp) {
569   if (csv_) {
570     fprintf(fp, "Performance counter statistics,\n");
571   } else {
572     fprintf(fp, "Performance counter statistics:\n\n");
573   }
574 
575   if (verbose_mode_) {
576     for (auto& counters_info : counters) {
577       for (auto& counter_info : counters_info.counters) {
578         if (csv_) {
579           fprintf(fp, "%s,tid,%d,cpu,%d,count,%" PRIu64 ",time_enabled,%" PRIu64
580                       ",time running,%" PRIu64 ",id,%" PRIu64 ",\n",
581                   counters_info.event_name.c_str(), counter_info.tid,
582                   counter_info.cpu, counter_info.counter.value,
583                   counter_info.counter.time_enabled,
584                   counter_info.counter.time_running, counter_info.counter.id);
585         } else {
586           fprintf(fp,
587                   "%s(tid %d, cpu %d): count %" PRIu64 ", time_enabled %" PRIu64
588                   ", time running %" PRIu64 ", id %" PRIu64 "\n",
589                   counters_info.event_name.c_str(), counter_info.tid,
590                   counter_info.cpu, counter_info.counter.value,
591                   counter_info.counter.time_enabled,
592                   counter_info.counter.time_running, counter_info.counter.id);
593         }
594       }
595     }
596   }
597 
598   CounterSummaries summaries(csv_);
599   for (auto& counters_info : counters) {
600     uint64_t value_sum = 0;
601     uint64_t time_enabled_sum = 0;
602     uint64_t time_running_sum = 0;
603     for (auto& counter_info : counters_info.counters) {
604       value_sum += counter_info.counter.value;
605       time_enabled_sum += counter_info.counter.time_enabled;
606       time_running_sum += counter_info.counter.time_running;
607     }
608     double scale = 1.0;
609     if (time_running_sum < time_enabled_sum && time_running_sum != 0) {
610       scale = static_cast<double>(time_enabled_sum) / time_running_sum;
611     }
612     summaries.AddSummary(
613         CounterSummary(counters_info.event_name, counters_info.event_modifier,
614                        counters_info.group_id, value_sum, scale, false, csv_));
615   }
616   summaries.AutoGenerateSummaries();
617   summaries.GenerateComments(duration_in_sec);
618   summaries.Show(fp);
619 
620   if (csv_)
621     fprintf(fp, "Total test time,%lf,seconds,\n", duration_in_sec);
622   else
623     fprintf(fp, "\nTotal test time: %lf seconds.\n", duration_in_sec);
624   return true;
625 }
626 
627 }  // namespace
628 
RegisterStatCommand()629 void RegisterStatCommand() {
630   RegisterCommand("stat",
631                   [] { return std::unique_ptr<Command>(new StatCommand); });
632 }
633